├── data ├── DEX_YCB │ └── empty ├── YCB_Video │ ├── debug.txt │ ├── classes.txt │ ├── extents.txt │ └── models.txt ├── YCB_Self_Supervision │ ├── train_block_big_sim.txt │ ├── train_block_small_sim.txt │ ├── train_block_median_sim.txt │ ├── train_block_azure.txt │ ├── train_block_median.txt │ ├── debug.txt │ ├── train_block_median_azure.txt │ ├── train_block.txt │ ├── train_table.txt │ ├── extents.txt │ ├── train_1.txt │ ├── train_2.txt │ ├── stats.txt │ ├── train_3.txt │ ├── train_4.txt │ ├── test.txt │ └── train_5.txt ├── sunrgbd.pkl ├── pics │ ├── intro.png │ ├── deepim.gif │ └── deepim.png └── demo │ ├── 000000-color.png │ ├── 000000-depth.png │ ├── 000001-color.png │ ├── 000001-depth.png │ ├── 000002-color.png │ ├── 000002-depth.png │ ├── 000003-color.png │ ├── 000003-depth.png │ ├── meta.yml │ ├── posecnn_results │ ├── 000000-color.png.mat │ ├── 000001-color.png.mat │ ├── 000002-color.png.mat │ ├── 000003-color.png.mat │ ├── 000000-color.png_render.jpg │ ├── 000001-color.png_render.jpg │ ├── 000002-color.png_render.jpg │ ├── 000003-color.png_render.jpg │ ├── 000000-color.png_render_refined.jpg │ ├── 000001-color.png_render_refined.jpg │ ├── 000002-color.png_render_refined.jpg │ └── 000003-color.png_render_refined.jpg │ ├── deepim_results_COLOR │ ├── 000000-color.png.mat │ ├── 000001-color.png.mat │ ├── 000002-color.png.mat │ ├── 000003-color.png.mat │ ├── 000000-color.png_render.jpg │ ├── 000001-color.png_render.jpg │ ├── 000002-color.png_render.jpg │ └── 000003-color.png_render.jpg │ └── deepim_results_RGBD │ ├── 000000-color.png.mat │ ├── 000001-color.png.mat │ ├── 000002-color.png.mat │ ├── 000003-color.png.mat │ ├── 000000-color.png_render.jpg │ ├── 000001-color.png_render.jpg │ ├── 000002-color.png_render.jpg │ └── 000003-color.png_render.jpg ├── ycb_render ├── glutils │ ├── __init__.py │ ├── utils.py │ ├── glcontext.py │ ├── meshutil.py │ ├── glrenderer.py │ └── trackball.py ├── misc │ ├── fps.png │ └── husky.jpg ├── __init__.py ├── shaders │ ├── vert_simple.shader │ ├── frag_simple.shader │ ├── frag.shader │ ├── vert.shader │ ├── vert_mat.shader │ ├── vert_textureless.shader │ ├── vert_blinnphong.shader │ ├── frag_mat.shader │ ├── frag_textureless.shader │ └── frag_blinnphong.shader ├── CMakeLists.txt ├── get_available_devices.py ├── cpp │ ├── query_devices.cpp │ └── test_device.cpp ├── setup.py ├── visualize_sim.py └── glad │ └── EGL │ └── eglplatform.h ├── .gitmodules ├── lib ├── fcn │ ├── __init__.py │ └── multiscaleloss.py ├── utils │ ├── __init__.py │ ├── timer.py │ ├── nms.py │ ├── bbox.pyx │ ├── show_flows.py │ ├── se3.py │ ├── zoom_in.py │ ├── setup.py │ ├── pose_error.py │ └── blob.py ├── point_matching_loss │ ├── __init__.py │ ├── setup.py │ ├── PMLoss.py │ └── point_matching_loss.cpp ├── networks │ └── __init__.py └── datasets │ ├── __init__.py │ ├── factory.py │ └── imdb.py ├── requirement.txt ├── experiments ├── scripts │ ├── dex_ycb_flow_train_s0.sh │ ├── dex_ycb_flow_train_s1.sh │ ├── dex_ycb_flow_train_s2.sh │ ├── dex_ycb_flow_train_s3.sh │ ├── ycb_video_flow_train.sh │ ├── ycb_object_flow_train.sh │ ├── dex_ycb_flow_rgbd_train_s0.sh │ ├── dex_ycb_flow_rgbd_train_s1.sh │ ├── dex_ycb_flow_rgbd_train_s2.sh │ ├── dex_ycb_flow_rgbd_train_s3.sh │ ├── ycb_object_flow_rgbd_train.sh │ ├── ycb_video_flow_rgbd_train.sh │ ├── dex_ycb_flow_test_s0.sh │ ├── dex_ycb_flow_test_s1.sh │ ├── dex_ycb_flow_test_s2.sh │ ├── dex_ycb_flow_test_s3.sh │ ├── ycb_video_flow_test.sh │ ├── dex_ycb_flow_rgbd_test_s0.sh │ ├── dex_ycb_flow_rgbd_test_s1.sh │ ├── dex_ycb_flow_rgbd_test_s2.sh │ ├── dex_ycb_flow_rgbd_test_s3.sh │ ├── ycb_object_flow_test.sh │ ├── ycb_object_flow_train_self_supervision.sh │ ├── ycb_video_flow_rgbd_test.sh │ ├── ros_ycb_object_test.sh │ ├── ros_ycb_object_rgbd_test.sh │ ├── ycb_object_flow_rgbd_test.sh │ ├── ycb_object_flow_rgbd_train_self_supervision.sh │ ├── demo.sh │ └── demo_rgbd.sh └── cfgs │ ├── dex_ycb_flow.yml │ ├── ycb_video_flow.yml │ ├── ycb_object_flow.yml │ ├── dex_ycb_flow_rgbd.yml │ ├── ycb_video_flow_rgbd.yml │ ├── ycb_object_flow_rgbd.yml │ ├── ycb_object_flow_self_supervision.yml │ └── ycb_object_flow_rgbd_self_supervision.yml ├── .gitignore ├── ros ├── _init_paths.py └── test_images.py ├── tools ├── _init_paths.py ├── sunrgbd_list.py ├── test_net.py └── train_net.py ├── package.xml ├── LICENSE.md └── CMakeLists.txt /data/DEX_YCB/empty: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /ycb_render/glutils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/YCB_Video/debug.txt: -------------------------------------------------------------------------------- 1 | 0058/000737 2 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_block_big_sim.txt: -------------------------------------------------------------------------------- 1 | blocks_big/scene_00 2 | -------------------------------------------------------------------------------- /ycb_render/glutils/utils.py: -------------------------------------------------------------------------------- 1 | colormap = [[1,0,0], [0,1,0], [0,0,1]] 2 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_block_small_sim.txt: -------------------------------------------------------------------------------- 1 | blocks_small/scene_00 2 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_block_median_sim.txt: -------------------------------------------------------------------------------- 1 | blocks_median/scene_00 2 | -------------------------------------------------------------------------------- /data/sunrgbd.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/sunrgbd.pkl -------------------------------------------------------------------------------- /data/pics/intro.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/pics/intro.png -------------------------------------------------------------------------------- /data/pics/deepim.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/pics/deepim.gif -------------------------------------------------------------------------------- /data/pics/deepim.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/pics/deepim.png -------------------------------------------------------------------------------- /ycb_render/misc/fps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/ycb_render/misc/fps.png -------------------------------------------------------------------------------- /data/demo/000000-color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000000-color.png -------------------------------------------------------------------------------- /data/demo/000000-depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000000-depth.png -------------------------------------------------------------------------------- /data/demo/000001-color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000001-color.png -------------------------------------------------------------------------------- /data/demo/000001-depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000001-depth.png -------------------------------------------------------------------------------- /data/demo/000002-color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000002-color.png -------------------------------------------------------------------------------- /data/demo/000002-depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000002-depth.png -------------------------------------------------------------------------------- /data/demo/000003-color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000003-color.png -------------------------------------------------------------------------------- /data/demo/000003-depth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/000003-depth.png -------------------------------------------------------------------------------- /ycb_render/misc/husky.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/ycb_render/misc/husky.jpg -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "ycb_render/pybind11"] 2 | path = ycb_render/pybind11 3 | url = https://github.com/pybind/pybind11.git 4 | -------------------------------------------------------------------------------- /data/demo/meta.yml: -------------------------------------------------------------------------------- 1 | INTRINSICS: [618.0172729492188, 0.0, 312.376953125, 0.0, 618.0033569335938, 232.37530517578125, 2 | 0.0, 0.0, 1.0] 3 | -------------------------------------------------------------------------------- /data/demo/posecnn_results/000000-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000000-color.png.mat -------------------------------------------------------------------------------- /data/demo/posecnn_results/000001-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000001-color.png.mat -------------------------------------------------------------------------------- /data/demo/posecnn_results/000002-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000002-color.png.mat -------------------------------------------------------------------------------- /data/demo/posecnn_results/000003-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000003-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000000-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000000-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000001-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000001-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000002-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000002-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000003-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000003-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000000-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000000-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000001-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000001-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000002-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000002-color.png.mat -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000003-color.png.mat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000003-color.png.mat -------------------------------------------------------------------------------- /data/demo/posecnn_results/000000-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000000-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/posecnn_results/000001-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000001-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/posecnn_results/000002-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000002-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/posecnn_results/000003-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000003-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000000-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000000-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000001-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000001-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000002-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000002-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_RGBD/000003-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_RGBD/000003-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000000-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000000-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000001-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000001-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000002-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000002-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/deepim_results_COLOR/000003-color.png_render.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/deepim_results_COLOR/000003-color.png_render.jpg -------------------------------------------------------------------------------- /data/demo/posecnn_results/000000-color.png_render_refined.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000000-color.png_render_refined.jpg -------------------------------------------------------------------------------- /data/demo/posecnn_results/000001-color.png_render_refined.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000001-color.png_render_refined.jpg -------------------------------------------------------------------------------- /data/demo/posecnn_results/000002-color.png_render_refined.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000002-color.png_render_refined.jpg -------------------------------------------------------------------------------- /data/demo/posecnn_results/000003-color.png_render_refined.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVlabs/DeepIM-PyTorch/master/data/demo/posecnn_results/000003-color.png_render_refined.jpg -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_block_azure.txt: -------------------------------------------------------------------------------- 1 | 1002T195153/scene_00 2 | 1002T201056/scene_00 3 | 1002T201353/scene_00 4 | 1002T202430/scene_00 5 | 1002T202726/scene_00 6 | 1002T203041/scene_00 7 | -------------------------------------------------------------------------------- /lib/fcn/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | -------------------------------------------------------------------------------- /lib/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | -------------------------------------------------------------------------------- /ycb_render/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | -------------------------------------------------------------------------------- /lib/point_matching_loss/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | -------------------------------------------------------------------------------- /requirement.txt: -------------------------------------------------------------------------------- 1 | pyassimp == 4.1.3 2 | progressbar2 3 | pyopengl >= 3.1.0 4 | opencv-python == 4.2.0.34 5 | transforms3d 6 | pillow 7 | IPython 8 | matplotlib 9 | easydict 10 | pyyaml 11 | future 12 | scipy 13 | Cython 14 | cupy 15 | -------------------------------------------------------------------------------- /lib/networks/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | from .FlowNetS import * 6 | -------------------------------------------------------------------------------- /ycb_render/shaders/vert_simple.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | uniform mat4 V; 3 | uniform mat4 P; 4 | 5 | layout (location=0) in vec3 position; 6 | layout (location=1) in vec3 normal; 7 | layout (location=2) in vec2 texCoords; 8 | 9 | void main() { 10 | gl_Position = P * V * vec4(position,1); 11 | } -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_block_median.txt: -------------------------------------------------------------------------------- 1 | 1104T153032/scene_00 2 | 1104T153454/scene_00 3 | 1104T154829/scene_00 4 | 1107T182722/scene_00 5 | 1107T183624/scene_00 6 | 1107T184113/scene_00 7 | 1107T184439/scene_00 8 | 1110T160057/scene_00 9 | 1110T161747/scene_00 10 | 1110T163042/scene_00 11 | 1110T164151/scene_00 12 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/debug.txt: -------------------------------------------------------------------------------- 1 | 0217T160323/scene_00 2 | 0218T121356/scene_00 3 | 0218T131441/scene_00 4 | 0218T131825/scene_00 5 | 0218T132034/scene_00 6 | 0218T132704/scene_00 7 | 0218T133103/scene_00 8 | 0218T133804/scene_00 9 | 0218T134316/scene_00 10 | 0218T135116/scene_00 11 | 0218T140401/scene_00 12 | 0218T140943/scene_00 13 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_train_s0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s0_train \ 10 | --cfg experiments/cfgs/dex_ycb_flow.yml \ 11 | --solver sgd \ 12 | --epochs 20 13 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_train_s1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s1_train \ 10 | --cfg experiments/cfgs/dex_ycb_flow.yml \ 11 | --solver sgd \ 12 | --epochs 20 13 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_train_s2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s2_train \ 10 | --cfg experiments/cfgs/dex_ycb_flow.yml \ 11 | --solver sgd \ 12 | --epochs 20 13 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_train_s3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s3_train \ 10 | --cfg experiments/cfgs/dex_ycb_flow.yml \ 11 | --solver sgd \ 12 | --epochs 20 13 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_video_flow_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset ycb_video_train \ 10 | --cfg experiments/cfgs/ycb_video_flow.yml \ 11 | --solver sgd \ 12 | --epochs 20 13 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_object_flow_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset ycb_object_train \ 10 | --cfg experiments/cfgs/ycb_object_flow.yml \ 11 | --solver sgd \ 12 | --epochs 20 13 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_train_s0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets_rgbd \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s0_train \ 10 | --dataset_background background_sunrgbd \ 11 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml \ 12 | --solver sgd \ 13 | --epochs 20 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_train_s1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets_rgbd \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s1_train \ 10 | --dataset_background background_sunrgbd \ 11 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml \ 12 | --solver sgd \ 13 | --epochs 20 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_train_s2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets_rgbd \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s2_train \ 10 | --dataset_background background_sunrgbd \ 11 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml \ 12 | --solver sgd \ 13 | --epochs 20 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_train_s3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets_rgbd \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset dex_ycb_s3_train \ 10 | --dataset_background background_sunrgbd \ 11 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml \ 12 | --solver sgd \ 13 | --epochs 20 14 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_object_flow_rgbd_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets_rgbd \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset ycb_object_train \ 10 | --dataset_background background_sunrgbd \ 11 | --cfg experiments/cfgs/ycb_object_flow_rgbd.yml \ 12 | --solver sgd \ 13 | --epochs 20 14 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_video_flow_rgbd_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets_rgbd \ 8 | --pretrained data/checkpoints/flownets_EPE1.951.pth \ 9 | --dataset ycb_video_train \ 10 | --dataset_background background_sunrgbd \ 11 | --cfg experiments/cfgs/ycb_video_flow_rgbd.yml \ 12 | --solver sgd \ 13 | --epochs 20 14 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_block_median_azure.txt: -------------------------------------------------------------------------------- 1 | 1104T225304/scene_00 2 | 1104T225719/scene_00 3 | 1104T225915/scene_00 4 | 1104T230215/scene_00 5 | 1104T230451/scene_00 6 | 1104T231204/scene_00 7 | 1110T160048/scene_00 8 | 1110T161739/scene_00 9 | 1110T163037/scene_00 10 | 1110T164153/scene_00 11 | 1110T165415/scene_00 12 | 1110T170503/scene_00 13 | 1110T171334/scene_00 14 | 1110T171948/scene_00 15 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_test_s0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets \ 11 | --pretrained output/dex_ycb/dex_ycb_s0_train/flownets_dex_ycb_all_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s0_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_test_s1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets \ 11 | --pretrained output/dex_ycb/dex_ycb_s1_train/flownets_dex_ycb_all_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s1_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_test_s2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets \ 11 | --pretrained output/dex_ycb/dex_ycb_s2_train/flownets_dex_ycb_all_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s2_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_test_s3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets \ 11 | --pretrained output/dex_ycb/dex_ycb_s3_train/flownets_dex_ycb_all_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s3_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_video_flow_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu 0 \ 10 | --network flownets \ 11 | --pretrained output/ycb_video/ycb_video_train/flownets_ycb_video_all_epoch_$2.checkpoint.pth \ 12 | --dataset ycb_video_keyframe \ 13 | --cfg experiments/cfgs/ycb_video_flow.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_test_s0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets_rgbd \ 11 | --pretrained output/dex_ycb/dex_ycb_s0_train/flownets_dex_ycb_all_rgbd_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s0_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_test_s1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets_rgbd \ 11 | --pretrained output/dex_ycb/dex_ycb_s1_train/flownets_dex_ycb_all_rgbd_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s1_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_test_s2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets_rgbd \ 11 | --pretrained output/dex_ycb/dex_ycb_s2_train/flownets_dex_ycb_all_rgbd_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s2_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/dex_ycb_flow_rgbd_test_s3.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets_rgbd \ 11 | --pretrained output/dex_ycb/dex_ycb_s3_train/flownets_dex_ycb_all_rgbd_epoch_$2.checkpoint.pth \ 12 | --dataset dex_ycb_s3_test \ 13 | --cfg experiments/cfgs/dex_ycb_flow_rgbd.yml 14 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_object_flow_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets \ 11 | --pretrained output/ycb_object/ycb_object_train/flownets_ycb_object_20objects_color_epoch_$2.checkpoint.pth \ 12 | --dataset ycb_object_test \ 13 | --cfg experiments/cfgs/ycb_object_flow.yml 14 | -------------------------------------------------------------------------------- /ycb_render/shaders/frag_simple.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | layout (location = 0) out vec4 outputColour; 3 | layout (location = 1) out vec4 NormalColour; 4 | layout (location = 2) out vec4 InstanceColour; 5 | layout (location = 3) out vec4 PCColour; 6 | void main() { 7 | outputColour = vec4(0.1, 0.1, 0.1, 1.0); 8 | NormalColour = vec4(0,0,0,0); 9 | InstanceColour = vec4(0,0,0,0); 10 | PCColour = vec4(0,0,0,0); 11 | 12 | } -------------------------------------------------------------------------------- /experiments/scripts/ycb_object_flow_train_self_supervision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets \ 8 | --pretrained output/ycb_object/ycb_object_train/flownets_ycb_object_20objects_color_epoch_20.checkpoint.pth \ 9 | --dataset ycb_self_supervision_all_ycb \ 10 | --cfg experiments/cfgs/ycb_object_flow_self_supervision.yml \ 11 | --solver sgd \ 12 | --epochs 10 13 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_video_flow_rgbd_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu 0 \ 10 | --network flownets_rgbd \ 11 | --pretrained output/ycb_video/ycb_video_train/flownets_ycb_video_all_rgbd_epoch_$2.checkpoint.pth \ 12 | --dataset ycb_video_keyframe \ 13 | --cfg experiments/cfgs/ycb_video_flow_rgbd.yml 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.mex* 2 | *.pyc 3 | *.tgz 4 | *.so 5 | *.o 6 | output* 7 | lib/synthesize/build/* 8 | lib/utils/bbox.c 9 | data/ 10 | ngc/ 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | -------------------------------------------------------------------------------- /experiments/scripts/ros_ycb_object_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./ros/test_images.py --gpu $1 \ 10 | --network flownets \ 11 | --pretrained data/checkpoints/ycb_object/flownets_ycb_object_20objects_color_self_supervision_epoch_10.checkpoint.pth \ 12 | --dataset ycb_object_train \ 13 | --cfg experiments/cfgs/ycb_object_flow.yml 14 | -------------------------------------------------------------------------------- /data/YCB_Video/classes.txt: -------------------------------------------------------------------------------- 1 | 002_master_chef_can 2 | 003_cracker_box 3 | 004_sugar_box 4 | 005_tomato_soup_can 5 | 006_mustard_bottle 6 | 007_tuna_fish_can 7 | 008_pudding_box 8 | 009_gelatin_box 9 | 010_potted_meat_can 10 | 011_banana 11 | 019_pitcher_base 12 | 021_bleach_cleanser 13 | 024_bowl 14 | 025_mug 15 | 035_power_drill 16 | 036_wood_block 17 | 037_scissors 18 | 040_large_marker 19 | 051_large_clamp 20 | 052_extra_large_clamp 21 | 061_foam_brick 22 | -------------------------------------------------------------------------------- /experiments/scripts/ros_ycb_object_rgbd_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./ros/test_images.py --gpu $1 \ 10 | --network flownets_rgbd \ 11 | --pretrained data/checkpoints/ycb_object/flownets_ycb_object_20objects_rgbd_self_supervision_epoch_10.checkpoint.pth \ 12 | --dataset ycb_object_train \ 13 | --cfg experiments/cfgs/ycb_object_flow_rgbd.yml 14 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_block.txt: -------------------------------------------------------------------------------- 1 | 0916T000000/blue 2 | 0916T000000/green 3 | 0916T000000/red 4 | 0916T000000/yellow 5 | 0926T132202/scene_01 6 | 0926T132202/scene_02 7 | 0926T132202/scene_03 8 | 0926T133705/scene_01 9 | 0926T133705/scene_02 10 | 0926T133705/scene_03 11 | 0926T142330/scene_01 12 | 0926T142330/scene_03 13 | 0926T142330/scene_05 14 | 0926T143104/scene_01 15 | 0926T143104/scene_03 16 | 0926T143104/scene_04 17 | 1006T145231/scene_00 18 | 1006T200659/scene_00 19 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_object_flow_rgbd_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=$1 8 | 9 | time ./tools/test_net.py --gpu $1 \ 10 | --network flownets_rgbd \ 11 | --pretrained output/ycb_object/ycb_object_train/flownets_ycb_object_20objects_rgbd_epoch_$2.checkpoint.pth \ 12 | --dataset ycb_object_test \ 13 | --dataset_background background_sunrgbd \ 14 | --cfg experiments/cfgs/ycb_object_flow_rgbd.yml 15 | -------------------------------------------------------------------------------- /experiments/scripts/ycb_object_flow_rgbd_train_self_supervision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | time ./tools/train_net.py \ 7 | --network flownets_rgbd \ 8 | --pretrained output/ycb_object/ycb_object_train/flownets_ycb_object_20objects_rgbd_epoch_20.checkpoint.pth \ 9 | --dataset ycb_self_supervision_all_ycb \ 10 | --dataset_background background_sunrgbd \ 11 | --cfg experiments/cfgs/ycb_object_flow_rgbd_self_supervision.yml \ 12 | --solver sgd \ 13 | --epochs 10 14 | -------------------------------------------------------------------------------- /experiments/scripts/demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | time ./tools/test_images.py --gpu 0 \ 10 | --imgdir data/demo/ \ 11 | --meta data/demo/meta.yml \ 12 | --color *color.png \ 13 | --network flownets \ 14 | --pretrained data/checkpoints/ycb_object/flownets_ycb_object_20objects_color_self_supervision_epoch_10.checkpoint.pth \ 15 | --dataset ycb_object_test \ 16 | --cfg experiments/cfgs/ycb_object_flow.yml 17 | -------------------------------------------------------------------------------- /experiments/scripts/demo_rgbd.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | time ./tools/test_images.py --gpu 0 \ 10 | --imgdir data/demo/ \ 11 | --meta data/demo/meta.yml \ 12 | --color *color.png \ 13 | --network flownets_rgbd \ 14 | --pretrained data/checkpoints/ycb_object/flownets_ycb_object_20objects_rgbd_self_supervision_epoch_10.checkpoint.pth \ 15 | --dataset ycb_object_test \ 16 | --cfg experiments/cfgs/ycb_object_flow_rgbd.yml 17 | -------------------------------------------------------------------------------- /lib/datasets/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | from .imdb import imdb 6 | from .ycb_video import YCBVideo 7 | from .ycb_object import YCBObject 8 | from .ycb_self_supervision import YCBSelfSupervision 9 | from .background import BackgroundDataset 10 | from .dex_ycb import DexYCBDataset 11 | import os.path as osp 12 | ROOT_DIR = osp.join(osp.dirname(__file__), '..', '..') 13 | -------------------------------------------------------------------------------- /lib/point_matching_loss/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | from setuptools import setup 6 | from torch.utils.cpp_extension import BuildExtension, CUDAExtension 7 | 8 | setup( 9 | name='pml', 10 | ext_modules=[ 11 | CUDAExtension('pml_cuda', [ 12 | 'point_matching_loss.cpp', 13 | 'point_matching_loss_kernel.cu', 14 | ]) 15 | ], 16 | cmdclass={ 17 | 'build_ext': BuildExtension 18 | }) 19 | -------------------------------------------------------------------------------- /ros/_init_paths.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | """Set up paths for Fast R-CNN.""" 6 | 7 | import os.path as osp 8 | import sys 9 | 10 | def add_path(path): 11 | if path not in sys.path: 12 | sys.path.insert(0, path) 13 | 14 | this_dir = osp.dirname(__file__) 15 | 16 | # Add lib to PYTHONPATH 17 | lib_path = osp.join(this_dir, '..', 'ycb_render') 18 | add_path(lib_path) 19 | 20 | lib_path = osp.join(this_dir, '..', 'lib') 21 | add_path(lib_path) 22 | -------------------------------------------------------------------------------- /tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | """Set up paths for DeepIM""" 6 | 7 | import os.path as osp 8 | import sys 9 | 10 | def add_path(path): 11 | if path not in sys.path: 12 | sys.path.insert(0, path) 13 | 14 | this_dir = osp.dirname(__file__) 15 | 16 | # Add lib to PYTHONPATH 17 | lib_path = osp.join(this_dir, '..', 'lib') 18 | add_path(lib_path) 19 | 20 | lib_path = osp.join(this_dir, '..', 'ycb_render') 21 | add_path(lib_path) 22 | -------------------------------------------------------------------------------- /data/YCB_Video/extents.txt: -------------------------------------------------------------------------------- 1 | 0.105098 0.103336 0.147140 2 | 0.072948 0.167432 0.223122 3 | 0.051228 0.097062 0.184740 4 | 0.068346 0.070898 0.118506 5 | 0.099712 0.071530 0.215002 6 | 0.085656 0.085848 0.041788 7 | 0.140458 0.136312 0.044982 8 | 0.092226 0.102030 0.037278 9 | 0.106770 0.061462 0.099400 10 | 0.146328 0.202874 0.039542 11 | 0.159810 0.157306 0.293620 12 | 0.112422 0.072590 0.277178 13 | 0.161696 0.163252 0.060978 14 | 0.133400 0.094318 0.084588 15 | 0.202122 0.229442 0.061552 16 | 0.106668 0.108480 0.240242 17 | 0.110210 0.257878 0.015808 18 | 0.021110 0.125212 0.019532 19 | 0.140818 0.174792 0.040068 20 | 0.210450 0.185262 0.036514 21 | 0.052900 0.077960 0.067918 22 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_table.txt: -------------------------------------------------------------------------------- 1 | 20200420-ft/002_master_chef_can 2 | 20200420-ft/003_cracker_box 3 | 20200420-ft/004_sugar_box 4 | 20200420-ft/005_tomato_soup_can 5 | 20200420-ft/006_mustard_bottle 6 | 20200420-ft/007_tuna_fish_can 7 | 20200420-ft/008_pudding_box 8 | 20200420-ft/009_gelatin_box 9 | 20200420-ft/010_potted_meat_can 10 | 20200420-ft/011_banana 11 | 20200420-ft/019_pitcher_base 12 | 20200420-ft/021_bleach_cleanser 13 | 20200420-ft/024_bowl 14 | 20200420-ft/025_mug 15 | 20200420-ft/035_power_drill 16 | 20200420-ft/036_wood_block 17 | 20200420-ft/037_scissors 18 | 20200420-ft/040_large_marker 19 | 20200420-ft/052_extra_large_clamp 20 | 20200420-ft/061_foam_brick 21 | -------------------------------------------------------------------------------- /ycb_render/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.12) 2 | project(CppYCBRenderer) 3 | 4 | find_package(CUDA REQUIRED) 5 | set(CUDA_LIBRARIES PUBLIC ${CUDA_LIBRARIES}) 6 | 7 | include_directories(glad) 8 | 9 | add_subdirectory(pybind11) 10 | 11 | cuda_add_library(CppYCBRenderer MODULE glad/egl.c glad/gl.c cpp/ycb_renderer.cpp) 12 | 13 | target_link_libraries(CppYCBRenderer PRIVATE pybind11::module dl pthread) 14 | set_target_properties(CppYCBRenderer PROPERTIES PREFIX "${PYTHON_MODULE_PREFIX}" 15 | SUFFIX "${PYTHON_MODULE_EXTENSION}") 16 | 17 | add_executable(query_devices glad/egl.c glad/gl.c cpp/query_devices.cpp) 18 | add_executable(test_device glad/egl.c glad/gl.c cpp/test_device.cpp) 19 | 20 | target_link_libraries(query_devices dl pthread) 21 | target_link_libraries(test_device dl pthread) 22 | -------------------------------------------------------------------------------- /ycb_render/get_available_devices.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import subprocess 6 | import os 7 | 8 | 9 | def get_available_devices(): 10 | executable_path = os.path.join(os.path.dirname(__file__), 'build') 11 | 12 | num_devices = int(subprocess.check_output( 13 | ["{}/query_devices".format(executable_path)])) 14 | 15 | available_devices = [] 16 | for i in range(num_devices): 17 | try: 18 | if b"NVIDIA" in subprocess.check_output(["{}/test_device".format(executable_path), str(i)]): 19 | available_devices.append(i) 20 | except subprocess.CalledProcessError as e: 21 | print(e) 22 | return(available_devices) 23 | 24 | 25 | if __name__ == '__main__': 26 | print(get_available_devices()) 27 | -------------------------------------------------------------------------------- /experiments/cfgs/dex_ycb_flow.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: dex_ycb 2 | INPUT: COLOR 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.0001 7 | MILESTONES: !!python/tuple [10] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 8 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20] 16 | SNAPSHOT_INFIX: dex_ycb_all 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: False 24 | SYN_TNEAR: 0.6 25 | SYN_TFAR: 1.2 26 | SYN_STD_ROTATION: 15 27 | SYN_STD_TRANSLATION: 0.01 28 | SYN_RATIO: 5 29 | TEST: 30 | SINGLE_FRAME: True 31 | IMS_PER_BATCH: 1 32 | ITERNUM: 4 33 | SCALES_BASE: !!python/tuple [1.0] 34 | VISUALIZE: False 35 | SYNTHESIZE: False 36 | ROS_CAMERA: 'camera' 37 | -------------------------------------------------------------------------------- /tools/sunrgbd_list.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import os, sys 6 | import glob 7 | try: 8 | import cPickle # Use cPickle on Python 2.7 9 | except ImportError: 10 | import pickle as cPickle 11 | 12 | comotion = '../data/SUNRGBD' 13 | filename = os.path.join(comotion, '**/depth') 14 | print('list files in %s' % (filename)) 15 | files = glob.glob(filename, recursive=True) 16 | 17 | f = open('../data/sunrgbd.txt', 'w') 18 | filenames = [] 19 | for i in range(len(files)): 20 | filename = files[i] 21 | f.write('%s\n' % (filename[8:])) 22 | filenames.append(filename[8:]) 23 | f.close() 24 | 25 | cache_file = '../data/sunrgbd.pkl' 26 | with open(cache_file, 'wb') as fid: 27 | cPickle.dump(filenames, fid, cPickle.HIGHEST_PROTOCOL) 28 | print('wrote filenames to {}'.format(cache_file)) 29 | -------------------------------------------------------------------------------- /experiments/cfgs/ycb_video_flow.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: ycb_video 2 | INPUT: COLOR 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.0001 7 | MILESTONES: !!python/tuple [10] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 16 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 16 | SNAPSHOT_INFIX: ycb_video_all 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: False 24 | SYN_TNEAR: 0.6 25 | SYN_TFAR: 1.2 26 | SYN_STD_ROTATION: 15 27 | SYN_STD_TRANSLATION: 0.01 28 | SYN_RATIO: 5 29 | TEST: 30 | SINGLE_FRAME: True 31 | IMS_PER_BATCH: 1 32 | ITERNUM: 4 33 | SCALES_BASE: !!python/tuple [1.0] 34 | VISUALIZE: False 35 | SYNTHESIZE: False 36 | ROS_CAMERA: 'camera' 37 | -------------------------------------------------------------------------------- /experiments/cfgs/ycb_object_flow.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: ycb_object 2 | INPUT: COLOR 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.0001 7 | MILESTONES: !!python/tuple [10] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 16 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20] 16 | SNAPSHOT_INFIX: ycb_object_20objects_color 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: True 24 | SYNNUM: 80000 25 | SYN_TNEAR: 0.4 26 | SYN_TFAR: 1.5 27 | SYN_STD_ROTATION: 15 28 | SYN_STD_TRANSLATION: 0.01 29 | SYN_RATIO: 5 30 | TEST: 31 | SINGLE_FRAME: True 32 | IMS_PER_BATCH: 4 33 | ITERNUM: 4 34 | SCALES_BASE: !!python/tuple [1.0] 35 | VISUALIZE: True 36 | SYNTHESIZE: True 37 | ROS_CAMERA: D435 38 | -------------------------------------------------------------------------------- /experiments/cfgs/dex_ycb_flow_rgbd.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: dex_ycb 2 | INPUT: RGBD 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.0001 7 | MILESTONES: !!python/tuple [10] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 8 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20] 16 | SNAPSHOT_INFIX: dex_ycb_all_rgbd 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: False 24 | SYN_TNEAR: 0.6 25 | SYN_TFAR: 1.2 26 | SYN_STD_ROTATION: 15 27 | SYN_STD_TRANSLATION: 0.01 28 | SYN_RATIO: 5 29 | SYN_BACKGROUND_SPECIFIC: True 30 | TEST: 31 | SINGLE_FRAME: True 32 | IMS_PER_BATCH: 1 33 | ITERNUM: 4 34 | SCALES_BASE: !!python/tuple [1.0] 35 | VISUALIZE: False 36 | SYNTHESIZE: False 37 | ROS_CAMERA: 'camera' 38 | -------------------------------------------------------------------------------- /lib/utils/timer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import time 6 | 7 | class Timer(object): 8 | """A simple timer.""" 9 | def __init__(self): 10 | self.total_time = 0. 11 | self.calls = 0 12 | self.start_time = 0. 13 | self.diff = 0. 14 | self.average_time = 0. 15 | 16 | def tic(self): 17 | # using time.time instead of time.clock because time time.clock 18 | # does not normalize for multithreading 19 | self.start_time = time.time() 20 | 21 | def toc(self, average=True): 22 | self.diff = time.time() - self.start_time 23 | self.total_time += self.diff 24 | self.calls += 1 25 | self.average_time = self.total_time / self.calls 26 | if average: 27 | return self.average_time 28 | else: 29 | return self.diff 30 | -------------------------------------------------------------------------------- /experiments/cfgs/ycb_video_flow_rgbd.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: ycb_video 2 | INPUT: RGBD 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.0001 7 | MILESTONES: !!python/tuple [10] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 16 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 16 | SNAPSHOT_INFIX: ycb_video_all_rgbd 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: False 24 | SYN_TNEAR: 0.6 25 | SYN_TFAR: 1.2 26 | SYN_STD_ROTATION: 15 27 | SYN_STD_TRANSLATION: 0.01 28 | SYN_RATIO: 5 29 | SYN_BACKGROUND_SPECIFIC: True 30 | TEST: 31 | SINGLE_FRAME: True 32 | IMS_PER_BATCH: 1 33 | ITERNUM: 4 34 | SCALES_BASE: !!python/tuple [1.0] 35 | VISUALIZE: False 36 | SYNTHESIZE: False 37 | ROS_CAMERA: 'camera' 38 | -------------------------------------------------------------------------------- /experiments/cfgs/ycb_object_flow_rgbd.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: ycb_object 2 | INPUT: RGBD 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.0001 7 | MILESTONES: !!python/tuple [10] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 16 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20] 16 | SNAPSHOT_INFIX: ycb_object_20objects_rgbd 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: True 24 | SYNNUM: 80000 25 | SYN_TNEAR: 0.4 26 | SYN_TFAR: 1.5 27 | SYN_STD_ROTATION: 15 28 | SYN_STD_TRANSLATION: 0.01 29 | SYN_RATIO: 5 30 | SYN_BACKGROUND_SPECIFIC: True 31 | TEST: 32 | SINGLE_FRAME: True 33 | IMS_PER_BATCH: 4 34 | ITERNUM: 4 35 | SCALES_BASE: !!python/tuple [1.0] 36 | VISUALIZE: True 37 | SYNTHESIZE: True 38 | ROS_CAMERA: D435 39 | -------------------------------------------------------------------------------- /experiments/cfgs/ycb_object_flow_self_supervision.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: ycb_object 2 | INPUT: COLOR 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.00001 7 | MILESTONES: !!python/tuple [10000] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 16 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20] 16 | SNAPSHOT_INFIX: ycb_object_20objects_color_self_supervision 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: True 24 | SYNNUM: 80000 25 | SYN_TNEAR: 0.4 26 | SYN_TFAR: 1.5 27 | SYN_STD_ROTATION: 15 28 | SYN_STD_TRANSLATION: 0.01 29 | SYN_RATIO: 5 30 | TEST: 31 | SINGLE_FRAME: True 32 | IMS_PER_BATCH: 4 33 | ITERNUM: 4 34 | SCALES_BASE: !!python/tuple [1.0] 35 | VISUALIZE: True 36 | SYNTHESIZE: True 37 | ROS_CAMERA: D435 38 | -------------------------------------------------------------------------------- /experiments/cfgs/ycb_object_flow_rgbd_self_supervision.yml: -------------------------------------------------------------------------------- 1 | EXP_DIR: ycb_object 2 | INPUT: RGBD 3 | TRAIN: 4 | TRAINABLE: True 5 | WEIGHT_DECAY: 0.0001 6 | LEARNING_RATE: 0.00001 7 | MILESTONES: !!python/tuple [10000] 8 | HEATUP: 1 9 | ITERNUM: 4 10 | MOMENTUM: 0.9 11 | BETA: 0.999 12 | GAMMA: 0.1 13 | SCALES_BASE: !!python/tuple [1.0] 14 | IMS_PER_BATCH: 16 15 | CLASSES: !!python/tuple [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 19, 20] 16 | SNAPSHOT_INFIX: ycb_object_20objects_rgbd_self_supervision 17 | SNAPSHOT_PREFIX: flownets 18 | SNAPSHOT_EPOCHS: 1 19 | USE_FLIPPED: False 20 | CHROMATIC: True 21 | ADD_NOISE: True 22 | VISUALIZE: False 23 | SYNTHESIZE: True 24 | SYNNUM: 80000 25 | SYN_TNEAR: 0.4 26 | SYN_TFAR: 1.5 27 | SYN_STD_ROTATION: 15 28 | SYN_STD_TRANSLATION: 0.01 29 | SYN_RATIO: 5 30 | SYN_BACKGROUND_SPECIFIC: True 31 | TEST: 32 | SINGLE_FRAME: True 33 | IMS_PER_BATCH: 4 34 | ITERNUM: 4 35 | SCALES_BASE: !!python/tuple [1.0] 36 | VISUALIZE: True 37 | SYNTHESIZE: True 38 | ROS_CAMERA: D435 39 | -------------------------------------------------------------------------------- /ycb_render/shaders/frag.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | uniform sampler2D texUnit; 3 | in vec2 theCoords; 4 | in vec3 Normal; 5 | in vec3 Normal_cam; 6 | in vec3 FragPos; 7 | in vec3 Instance_color; 8 | in vec3 Pos_cam; 9 | in vec3 Pos_obj; 10 | 11 | layout (location = 0) out vec4 outputColour; 12 | layout (location = 1) out vec4 NormalColour; 13 | layout (location = 2) out vec4 InstanceColour; 14 | layout (location = 3) out vec4 PCObject; 15 | layout (location = 4) out vec4 PCColour; 16 | 17 | uniform vec3 light_position; // in world coordinate 18 | uniform vec3 light_color; // light color 19 | 20 | void main() { 21 | float ambientStrength = 0.2; 22 | vec3 ambient = ambientStrength * light_color; 23 | vec3 lightDir = normalize(light_position - FragPos); 24 | float diff = max(dot(Normal, lightDir), 0.0); 25 | vec3 diffuse = diff * light_color; 26 | 27 | outputColour = texture(texUnit, theCoords) * vec4(diffuse + ambient, 1); 28 | NormalColour = vec4((Normal_cam + 1) / 2,1); 29 | InstanceColour = vec4(Instance_color,1); 30 | PCObject = vec4(Pos_obj,1); 31 | PCColour = vec4(Pos_cam,1); 32 | } -------------------------------------------------------------------------------- /ycb_render/shaders/vert.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | uniform mat4 V; 3 | uniform mat4 P; 4 | uniform mat4 pose_rot; 5 | uniform mat4 pose_trans; 6 | uniform vec3 instance_color; 7 | 8 | layout (location=0) in vec3 position; 9 | layout (location=1) in vec3 normal; 10 | layout (location=2) in vec2 texCoords; 11 | out vec2 theCoords; 12 | out vec3 Normal; 13 | out vec3 FragPos; 14 | out vec3 Normal_cam; 15 | out vec3 Instance_color; 16 | out vec3 Pos_cam; 17 | out vec3 Pos_obj; 18 | void main() { 19 | gl_Position = P * V * pose_trans * pose_rot * vec4(position, 1); 20 | vec4 world_position4 = pose_trans * pose_rot * vec4(position, 1); 21 | FragPos = vec3(world_position4.xyz / world_position4.w); // in world coordinate 22 | Normal = normalize(mat3(pose_rot) * normal); // in world coordinate 23 | Normal_cam = normalize(mat3(V) * mat3(pose_rot) * normal); // in camera coordinate 24 | 25 | vec4 pos_cam4 = V * pose_trans * pose_rot * vec4(position, 1); 26 | Pos_cam = pos_cam4.xyz / pos_cam4.w; 27 | Pos_obj = position; 28 | 29 | theCoords = texCoords; 30 | Instance_color = instance_color; 31 | } -------------------------------------------------------------------------------- /ycb_render/shaders/vert_mat.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | uniform mat4 V; 3 | uniform mat4 P; 4 | uniform mat4 pose_rot; 5 | uniform mat4 pose_trans; 6 | uniform vec3 instance_color; 7 | 8 | layout (location=0) in vec3 position; 9 | layout (location=1) in vec3 normal; 10 | out vec3 Normal; 11 | out vec3 FragPos; 12 | out vec3 Normal_cam; 13 | out vec3 Instance_color; 14 | out vec3 Pos_cam; 15 | out vec3 Pos_obj; 16 | out float inverse_normal; 17 | void main() { 18 | gl_Position = P * V * pose_trans * pose_rot * vec4(position, 1); 19 | vec4 world_position4 = pose_trans * pose_rot * vec4(position, 1); 20 | FragPos = vec3(world_position4.xyz / world_position4.w); // in world coordinate 21 | Normal = normalize(mat3(pose_rot) * normal); // in world coordinate 22 | Normal_cam = normalize(mat3(V) * mat3(pose_rot) * normal); // in camera coordinate 23 | 24 | vec4 pos_cam4 = V * pose_trans * pose_rot * vec4(position, 1); 25 | Pos_cam = pos_cam4.xyz / pos_cam4.w; 26 | float normalDir = dot(Normal_cam, Pos_cam); 27 | Pos_obj = position; 28 | Instance_color = instance_color; 29 | inverse_normal = normalDir; 30 | } -------------------------------------------------------------------------------- /lib/utils/nms.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import numpy as np 6 | 7 | def nms(dets, thresh): 8 | cls = dets[:, 1] 9 | x1 = dets[:, 2] 10 | y1 = dets[:, 3] 11 | x2 = dets[:, 4] 12 | y2 = dets[:, 5] 13 | scores = dets[:, 6] 14 | 15 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 16 | order = scores.argsort()[::-1] 17 | 18 | keep = [] 19 | while order.size > 0: 20 | i = order[0] 21 | keep.append(i) 22 | xx1 = np.maximum(x1[i], x1[order[1:]]) 23 | yy1 = np.maximum(y1[i], y1[order[1:]]) 24 | xx2 = np.minimum(x2[i], x2[order[1:]]) 25 | yy2 = np.minimum(y2[i], y2[order[1:]]) 26 | 27 | w = np.maximum(0.0, xx2 - xx1 + 1) 28 | h = np.maximum(0.0, yy2 - yy1 + 1) 29 | inter = w * h 30 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 31 | 32 | inds = np.where(~((ovr > thresh) & (cls[order[1:]] == cls[i])))[0] 33 | #inds = np.where(ovr <= thresh)[0] 34 | order = order[inds + 1] 35 | 36 | return keep 37 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/extents.txt: -------------------------------------------------------------------------------- 1 | 0.105098 0.103336 0.147140 2 | 0.072948 0.167432 0.223122 3 | 0.051228 0.097062 0.184740 4 | 0.068346 0.070898 0.118506 5 | 0.099712 0.071530 0.215002 6 | 0.085656 0.085848 0.041788 7 | 0.140458 0.136312 0.044982 8 | 0.092226 0.102030 0.037278 9 | 0.106770 0.061462 0.099400 10 | 0.146328 0.202874 0.039542 11 | 0.159810 0.157306 0.293620 12 | 0.112422 0.072590 0.277178 13 | 0.161696 0.163252 0.060978 14 | 0.133400 0.094318 0.084588 15 | 0.202122 0.229442 0.061552 16 | 0.106668 0.108480 0.240242 17 | 0.110210 0.257878 0.015808 18 | 0.021110 0.125212 0.019532 19 | 0.140818 0.174792 0.040068 20 | 0.210450 0.185262 0.036514 21 | 0.052900 0.077960 0.067918 22 | 0.088622 0.087677 0.190435 23 | 0.088622 0.087677 0.190435 24 | 0.082827 0.140092 0.104835 25 | 0.084566 0.088989 0.246768 26 | 0.063000 0.063000 0.063000 27 | 0.063000 0.063000 0.063000 28 | 0.063000 0.063000 0.063000 29 | 0.063000 0.063000 0.063000 30 | 0.038000 0.038000 0.038000 31 | 0.038000 0.038000 0.038000 32 | 0.038000 0.038000 0.038000 33 | 0.038000 0.038000 0.038000 34 | 0.050000 0.050000 0.050000 35 | 0.050000 0.050000 0.050000 36 | 0.050000 0.050000 0.050000 37 | 0.050000 0.050000 0.050000 38 | 0.204848 0.235324 0.213590 39 | 0.012642 0.243882 0.056368 40 | -------------------------------------------------------------------------------- /ycb_render/shaders/vert_textureless.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | uniform mat4 V; 3 | uniform mat4 P; 4 | uniform mat4 pose_rot; 5 | uniform mat4 pose_trans; 6 | uniform vec3 instance_color; 7 | 8 | layout (location=0) in vec3 position; 9 | layout (location=1) in vec3 normal; 10 | layout (location=2) in vec3 color; 11 | out vec3 theColor; 12 | out vec3 Normal; 13 | out vec3 FragPos; 14 | out vec3 Normal_cam; 15 | out vec3 Instance_color; 16 | out vec3 Pos_cam; 17 | out vec3 Pos_obj; 18 | out float inverse_normal; 19 | void main() { 20 | gl_Position = P * V * pose_trans * pose_rot * vec4(position, 1); 21 | vec4 world_position4 = pose_trans * pose_rot * vec4(position, 1); 22 | FragPos = vec3(world_position4.xyz / world_position4.w); // in world coordinate 23 | Normal = normalize(mat3(pose_rot) * normal); // in world coordinate 24 | Normal_cam = normalize(mat3(V) * mat3(pose_rot) * normal); // in camera coordinate 25 | 26 | vec4 pos_cam4 = V * pose_trans * pose_rot * vec4(position, 1); 27 | Pos_cam = pos_cam4.xyz / pos_cam4.w; 28 | Pos_obj = position; 29 | float normalDir = dot(Normal_cam, Pos_cam); 30 | theColor = color; 31 | Instance_color = instance_color; 32 | inverse_normal = normalDir; 33 | } -------------------------------------------------------------------------------- /ycb_render/shaders/vert_blinnphong.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | uniform mat4 V; 3 | uniform mat4 P; 4 | uniform mat4 pose_rot; 5 | uniform mat4 pose_trans; 6 | uniform vec3 instance_color; 7 | 8 | layout (location=0) in vec3 position; 9 | layout (location=1) in vec3 normal; 10 | layout (location=2) in vec2 texCoords; 11 | out vec2 theCoords; 12 | out vec3 Normal; 13 | out vec3 FragPos; 14 | out vec3 Normal_cam; 15 | out vec3 Instance_color; 16 | out vec3 Pos_cam; 17 | out vec3 Pos_obj; 18 | out float inverse_normal; 19 | void main() { 20 | gl_Position = P * V * pose_trans * pose_rot * vec4(position, 1); 21 | vec4 world_position4 = pose_trans * pose_rot * vec4(position, 1); 22 | FragPos = vec3(world_position4.xyz / world_position4.w); // in world coordinate 23 | Normal = normalize(mat3(pose_rot) * normal); // in world coordinate 24 | Normal_cam = normalize(mat3(V) * mat3(pose_rot) * normal); // in camera coordinate 25 | 26 | vec4 pos_cam4 = V * pose_trans * pose_rot * vec4(position, 1); 27 | Pos_cam = pos_cam4.xyz / pos_cam4.w; 28 | Pos_obj = position; 29 | float normalDir = dot(Normal_cam, Pos_cam); 30 | inverse_normal = normalDir; 31 | theCoords = texCoords; 32 | Instance_color = instance_color; 33 | } -------------------------------------------------------------------------------- /data/YCB_Video/models.txt: -------------------------------------------------------------------------------- 1 | data/YCB_Video/models/002_master_chef_can/textured_simple.obj 2 | data/YCB_Video/models/003_cracker_box/textured_simple.obj 3 | data/YCB_Video/models/004_sugar_box/textured_simple.obj 4 | data/YCB_Video/models/005_tomato_soup_can/textured_simple.obj 5 | data/YCB_Video/models/006_mustard_bottle/textured_simple.obj 6 | data/YCB_Video/models/007_tuna_fish_can/textured_simple.obj 7 | data/YCB_Video/models/008_pudding_box/textured_simple.obj 8 | data/YCB_Video/models/009_gelatin_box/textured_simple.obj 9 | data/YCB_Video/models/010_potted_meat_can/textured_simple.obj 10 | data/YCB_Video/models/011_banana/textured_simple.obj 11 | data/YCB_Video/models/019_pitcher_base/textured_simple.obj 12 | data/YCB_Video/models/021_bleach_cleanser/textured_simple.obj 13 | data/YCB_Video/models/024_bowl/textured_simple.obj 14 | data/YCB_Video/models/025_mug/textured_simple.obj 15 | data/YCB_Video/models/035_power_drill/textured_simple.obj 16 | data/YCB_Video/models/036_wood_block/textured_simple.obj 17 | data/YCB_Video/models/037_scissors/textured_simple.obj 18 | data/YCB_Video/models/040_large_marker/textured_simple.obj 19 | data/YCB_Video/models/051_large_clamp/textured_simple.obj 20 | data/YCB_Video/models/052_extra_large_clamp/textured_simple.obj 21 | data/YCB_Video/models/061_foam_brick/textured_simple.obj 22 | -------------------------------------------------------------------------------- /lib/point_matching_loss/PMLoss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import math 6 | from torch import nn 7 | from torch.autograd import Function 8 | import torch 9 | import pml_cuda 10 | 11 | 12 | class PMLossFunction(Function): 13 | @staticmethod 14 | def forward(ctx, rotations, translations, poses_src, poses_tgt, extents, points): 15 | outputs = pml_cuda.forward(rotations, translations, poses_src, poses_tgt, extents, points) 16 | loss = outputs[0] 17 | variables = outputs[1:] 18 | ctx.save_for_backward(*variables) 19 | 20 | return loss 21 | 22 | @staticmethod 23 | def backward(ctx, grad_loss): 24 | outputs = pml_cuda.backward(grad_loss.contiguous(), *ctx.saved_variables) 25 | d_rotation, d_translation = outputs 26 | 27 | return d_rotation, d_translation, None, None, None, None 28 | 29 | 30 | class PMLoss(nn.Module): 31 | def __init__(self): 32 | super(PMLoss, self).__init__() 33 | 34 | def forward(self, rotations, translations, poses_src, poses_tgt, extents, points): 35 | return PMLossFunction.apply(rotations, translations, poses_src, poses_tgt, extents, points) 36 | -------------------------------------------------------------------------------- /ycb_render/shaders/frag_mat.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | in vec3 Normal; 3 | in vec3 Normal_cam; 4 | in vec3 FragPos; 5 | in vec3 Instance_color; 6 | in vec3 Pos_cam; 7 | in vec3 Pos_obj; 8 | in float inverse_normal; 9 | uniform vec3 mat_ambient; 10 | uniform vec3 mat_diffuse; 11 | uniform vec3 mat_specular; 12 | uniform float mat_shininess; 13 | 14 | layout (location = 0) out vec4 outputColour; 15 | layout (location = 1) out vec4 NormalColour; 16 | layout (location = 2) out vec4 InstanceColour; 17 | layout (location = 3) out vec4 PCObject; 18 | layout (location = 4) out vec4 PCColour; 19 | 20 | uniform vec3 light_position; // in world coordinate 21 | uniform vec3 light_color; // light color 22 | void main() { 23 | if (inverse_normal > 0) discard; // discard the wrong pixel 24 | vec3 norm = normalize(Normal); 25 | vec3 ambient = mat_ambient * light_color; 26 | vec3 lightDir = normalize(light_position - FragPos); 27 | float diff = max(dot(norm, lightDir), 0.0); 28 | vec3 diffuse = diff * light_color * mat_diffuse; 29 | vec3 viewDir = normalize(Pos_cam - FragPos); 30 | vec3 reflectDir = reflect(-lightDir, norm); 31 | float spec = pow(max(dot(viewDir, reflectDir), 0.0), mat_shininess); 32 | vec3 specular = light_color * (spec * mat_specular); 33 | outputColour = vec4(ambient + diffuse + specular, 1); 34 | 35 | //NormalColour = vec4((Normal_cam + 1) / 2,1); 36 | NormalColour = vec4(Normal_cam,1); 37 | InstanceColour = vec4(Instance_color,1); 38 | PCObject = vec4(Pos_obj,1); 39 | PCColour = vec4(Pos_cam,1); 40 | } -------------------------------------------------------------------------------- /ycb_render/shaders/frag_textureless.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | in vec3 theColor; 3 | in vec3 Normal; 4 | in vec3 Normal_cam; 5 | in vec3 FragPos; 6 | in vec3 Instance_color; 7 | in vec3 Pos_cam; 8 | in vec3 Pos_obj; 9 | in float inverse_normal; 10 | 11 | layout (location = 0) out vec4 outputColour; 12 | layout (location = 1) out vec4 NormalColour; 13 | layout (location = 2) out vec4 InstanceColour; 14 | layout (location = 3) out vec4 PCObject; 15 | layout (location = 4) out vec4 PCColour; 16 | 17 | uniform vec3 light_position; // in world coordinate 18 | uniform vec3 light_color; // light color 19 | uniform vec3 mat_ambient; 20 | uniform vec3 mat_diffuse; 21 | uniform vec3 mat_specular; 22 | uniform float mat_shininess; 23 | void main() { 24 | //float ambientStrength = 0.2; 25 | //vec3 ambient = ambientStrength * light_color; 26 | //vec3 lightDir = normalize(light_position - FragPos); 27 | //float diff = max(dot(Normal, lightDir), 0.0); 28 | //vec3 diffuse = diff * light_color; 29 | if (inverse_normal > 0) discard; // discard the wrong pixel 30 | vec3 norm = normalize(Normal); 31 | vec3 ambient = mat_ambient * light_color; 32 | vec3 lightDir = normalize(light_position - FragPos); 33 | float diff = max(dot(norm, lightDir), 0.0); 34 | vec3 diffuse = diff * light_color * mat_diffuse; 35 | vec3 viewDir = normalize(Pos_cam - FragPos); 36 | vec3 reflectDir = reflect(-lightDir, norm); 37 | float spec = pow(max(dot(viewDir, reflectDir), 0.0), mat_shininess); 38 | vec3 specular = light_color * (spec * mat_specular); 39 | 40 | outputColour = vec4(theColor, 1) * vec4(diffuse + ambient + specular, 1); 41 | NormalColour = vec4((Normal_cam + 1) / 2,1); 42 | InstanceColour = vec4(Instance_color,1); 43 | PCObject = vec4(Pos_obj,1); 44 | PCColour = vec4(Pos_cam,1); 45 | } -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_1.txt: -------------------------------------------------------------------------------- 1 | 0907T162817/scene_01 2 | 0907T163317/scene_01 3 | 0907T164137/scene_06 4 | 0907T171024/scene_01 5 | 0907T172157/scene_04 6 | 0907T172157/scene_18 7 | 0907T180254/scene_06 8 | 0907T181442/scene_01 9 | 0907T191150/scene_02 10 | 0907T192325/scene_01 11 | 0907T194155/scene_02 12 | 0907T215136/scene_01 13 | 0907T215805/scene_01 14 | 0907T221844/scene_01 15 | 0907T222106/scene_03 16 | 0907T222555/scene_02 17 | 0907T222933/scene_02 18 | 0907T223820/scene_02 19 | 0907T225112/scene_03 20 | 0907T231131/scene_05 21 | 0907T231131/scene_09 22 | 0907T231131/scene_12 23 | 0908T000608/scene_01 24 | 0908T000608/scene_03 25 | 0908T003015/scene_02 26 | 0910T015831/scene_01 27 | 0910T020707/scene_06 28 | 0910T020707/scene_10 29 | 0910T022211/scene_08 30 | 0910T022211/scene_09 31 | 0910T025409/scene_02 32 | 0910T025409/scene_12 33 | 0910T033945/scene_07 34 | 0910T033945/scene_09 35 | 0910T040255/scene_06 36 | 0910T042817/scene_02 37 | 0910T042817/scene_06 38 | 0910T044956/scene_02 39 | 0910T044956/scene_27 40 | 0910T053353/scene_08 41 | 0910T054747/scene_01 42 | 0910T060213/scene_12 43 | 0912T112032/scene_04 44 | 0912T112032/scene_05 45 | 0912T114842/scene_03 46 | 0912T114842/scene_19 47 | 0912T114842/scene_21 48 | 0912T122347/scene_09 49 | 0912T122347/scene_10 50 | 0912T124551/scene_03 51 | 0912T124551/scene_11 52 | 0912T192512/scene_01 53 | 0912T193313/scene_30 54 | 0912T193313/scene_31 55 | 0912T193313/scene_33 56 | 0912T202829/scene_06 57 | 0912T210233/scene_01 58 | 0912T211535/scene_01 59 | 0912T212212/scene_07 60 | 0912T212212/scene_08 61 | 0912T212212/scene_15 62 | 0912T230419/scene_03 63 | 0912T230419/scene_09 64 | 0912T230419/scene_20 65 | 0912T230419/scene_22 66 | 0913T000123/scene_13 67 | 0913T000123/scene_16 68 | 0913T002954/scene_06 69 | 0913T004159/scene_08 70 | 0913T010525/scene_03 71 | 0913T010525/scene_15 72 | 0913T013716/scene_05 73 | 0913T013716/scene_07 74 | 0913T020922/scene_14 75 | 0913T020922/scene_19 76 | 0913T024454/scene_05 77 | 0913T025555/scene_12 78 | 0913T025555/scene_18 79 | 0913T025555/scene_21 80 | -------------------------------------------------------------------------------- /lib/point_matching_loss/point_matching_loss.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | // This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | // text can be found in LICENSE.md 4 | 5 | #include 6 | 7 | #include 8 | 9 | // CUDA forward declarations 10 | // point matching loss (pml) 11 | 12 | std::vector pml_cuda_forward( 13 | at::Tensor bottom_rotations, 14 | at::Tensor bottom_translations, 15 | at::Tensor poses_src, 16 | at::Tensor poses_tgt, 17 | at::Tensor extents, 18 | at::Tensor points); 19 | 20 | std::vector pml_cuda_backward( 21 | at::Tensor grad_loss, 22 | at::Tensor bottom_diff_rotation, 23 | at::Tensor bottom_diff_translation); 24 | 25 | // C++ interface 26 | 27 | #define CHECK_CUDA(x) AT_ASSERT(x.type().is_cuda()) 28 | #define CHECK_CONTIGUOUS(x) AT_ASSERT(x.is_contiguous()) 29 | #define CHECK_INPUT(x) CHECK_CUDA(x); CHECK_CONTIGUOUS(x) 30 | 31 | std::vector pml_forward( 32 | at::Tensor rotations, 33 | at::Tensor translations, 34 | at::Tensor poses_src, 35 | at::Tensor poses_tgt, 36 | at::Tensor extents, 37 | at::Tensor points) { 38 | CHECK_INPUT(rotations); 39 | CHECK_INPUT(translations); 40 | CHECK_INPUT(poses_src); 41 | CHECK_INPUT(poses_tgt); 42 | CHECK_INPUT(extents); 43 | CHECK_INPUT(points); 44 | 45 | return pml_cuda_forward(rotations, translations, poses_src, poses_tgt, extents, points); 46 | } 47 | 48 | std::vector pml_backward( 49 | at::Tensor grad_loss, 50 | at::Tensor bottom_diff_rotation, 51 | at::Tensor bottom_diff_translation) { 52 | CHECK_INPUT(grad_loss); 53 | CHECK_INPUT(bottom_diff_rotation); 54 | CHECK_INPUT(bottom_diff_translation); 55 | 56 | return pml_cuda_backward(grad_loss, bottom_diff_rotation, bottom_diff_translation); 57 | } 58 | 59 | PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { 60 | m.def("forward", &pml_forward, "pml forward (CUDA)"); 61 | m.def("backward", &pml_backward, "pml backward (CUDA)"); 62 | } 63 | -------------------------------------------------------------------------------- /lib/utils/bbox.pyx: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | cimport cython 6 | import numpy as np 7 | cimport numpy as np 8 | 9 | DTYPE = np.float 10 | ctypedef np.float_t DTYPE_t 11 | 12 | def bbox_overlaps( 13 | np.ndarray[DTYPE_t, ndim=2] boxes, 14 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 15 | """ 16 | Parameters 17 | ---------- 18 | boxes: (N, 5) ndarray of float (batch_id, x1, y1, x2, y2) 19 | query_boxes: (K, 5) ndarray of float (batch_id, x1, y1, x2, y2) 20 | Returns 21 | ------- 22 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 23 | """ 24 | cdef unsigned int N = boxes.shape[0] 25 | cdef unsigned int K = query_boxes.shape[0] 26 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 27 | cdef DTYPE_t iw, ih, box_area 28 | cdef DTYPE_t ua 29 | cdef unsigned int k, n 30 | for k in range(K): 31 | box_area = ( 32 | (query_boxes[k, 2+1] - query_boxes[k, 0+1] + 1) * 33 | (query_boxes[k, 3+1] - query_boxes[k, 1+1] + 1) 34 | ) 35 | for n in range(N): 36 | if query_boxes[k, 0] == boxes[n, 0]: 37 | iw = ( 38 | min(boxes[n, 2+1], query_boxes[k, 2+1]) - 39 | max(boxes[n, 0+1], query_boxes[k, 0+1]) + 1 40 | ) 41 | if iw > 0: 42 | ih = ( 43 | min(boxes[n, 3+1], query_boxes[k, 3+1]) - 44 | max(boxes[n, 1+1], query_boxes[k, 1+1]) + 1 45 | ) 46 | if ih > 0: 47 | ua = float( 48 | (boxes[n, 2+1] - boxes[n, 0+1] + 1) * 49 | (boxes[n, 3+1] - boxes[n, 1+1] + 1) + 50 | box_area - iw * ih 51 | ) 52 | overlaps[n, k] = iw * ih / ua 53 | return overlaps 54 | -------------------------------------------------------------------------------- /lib/fcn/multiscaleloss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import torch 6 | import torch.nn as nn 7 | 8 | 9 | def EPE(input_flow, target_flow, sparse=False, mean=True): 10 | EPE_map = torch.norm(target_flow-input_flow,2,1) 11 | batch_size = EPE_map.size(0) 12 | if sparse: 13 | # invalid flow is defined with both flow coordinates to be exactly 0 14 | mask = (target_flow[:,0] == 0) & (target_flow[:,1] == 0) 15 | 16 | EPE_map = EPE_map[~mask.data] 17 | if mean: 18 | return EPE_map.mean() 19 | else: 20 | return EPE_map.sum()/batch_size 21 | 22 | 23 | def sparse_max_pool(input, size): 24 | positive = (input > 0).float() 25 | negative = (input < 0).float() 26 | output = nn.functional.adaptive_max_pool2d(input * positive, size) - nn.functional.adaptive_max_pool2d(-input * negative, size) 27 | return output 28 | 29 | 30 | def multiscaleEPE(network_output, target_flow, weights=None, sparse=False): 31 | def one_scale(output, target, sparse): 32 | 33 | b, _, h, w = output.size() 34 | 35 | if sparse: 36 | target_scaled = sparse_max_pool(target, (h, w)) 37 | else: 38 | target_scaled = nn.functional.adaptive_avg_pool2d(target, (h, w)) 39 | return EPE(output, target_scaled, sparse, mean=True) 40 | 41 | if type(network_output) not in [tuple, list]: 42 | network_output = [network_output] 43 | if weights is None: 44 | weights = [0.005, 0.01, 0.02, 0.08, 0.32] # as in original article 45 | assert(len(weights) == len(network_output)) 46 | 47 | loss = 0 48 | for output, weight in zip(network_output, weights): 49 | loss += weight * one_scale(output, target_flow, sparse) 50 | return loss 51 | 52 | 53 | def realEPE(output, target, sparse=False): 54 | b, _, h, w = target.size() 55 | upsampled_output = nn.functional.interpolate(output, size=(h,w), mode='bilinear') 56 | return EPE(upsampled_output, target, sparse, mean=True) 57 | -------------------------------------------------------------------------------- /lib/datasets/factory.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | """Factory method for easily getting imdbs by name.""" 6 | 7 | __sets = {} 8 | 9 | import datasets.ycb_video 10 | import datasets.ycb_object 11 | import datasets.ycb_self_supervision 12 | import datasets.background 13 | import datasets.dex_ycb 14 | import numpy as np 15 | 16 | # ycb video dataset 17 | for split in ['train', 'val', 'keyframe', 'trainval', 'debug']: 18 | name = 'ycb_video_{}'.format(split) 19 | #print name 20 | __sets[name] = (lambda split=split: 21 | datasets.YCBVideo(split)) 22 | 23 | # ycb object dataset 24 | for split in ['train', 'test']: 25 | name = 'ycb_object_{}'.format(split) 26 | print(name) 27 | __sets[name] = (lambda split=split: 28 | datasets.YCBObject(split)) 29 | 30 | # ycb self supervision dataset 31 | for split in ['train_1', 'train_2', 'train_3', 'train_4', 'train_5', 'test', 'all', 'train_block_median', 'train_block_median_azure', 'train_block_median_demo', 'train_block_median_azure_demo', 'train_table', 'all_ycb', 32 | 'debug', 'train_block', 'train_block_azure', 'train_block_big_sim', 'train_block_median_sim', 'train_block_small_sim']: 33 | name = 'ycb_self_supervision_{}'.format(split) 34 | print(name) 35 | __sets[name] = (lambda split=split: 36 | datasets.YCBSelfSupervision(split)) 37 | 38 | # background dataset 39 | for split in ['coco', 'rgbd', 'nvidia', 'table', 'isaac', 'texture', 'sunrgbd']: 40 | name = 'background_{}'.format(split) 41 | print(name) 42 | __sets[name] = (lambda split=split: 43 | datasets.BackgroundDataset(split)) 44 | 45 | # DEX YCB dataset 46 | for setup in ('s0', 's1', 's2', 's3'): 47 | for split in ('train', 'val', 'test'): 48 | name = 'dex_ycb_{}_{}'.format(setup, split) 49 | __sets[name] = (lambda setup=setup, split=split: datasets.DexYCBDataset(setup, split)) 50 | 51 | def get_dataset(name): 52 | """Get an imdb (image database) by name.""" 53 | if name not in __sets: 54 | raise KeyError('Unknown dataset: {}'.format(name)) 55 | return __sets[name]() 56 | 57 | def list_datasets(): 58 | """List all registered imdbs.""" 59 | return __sets.keys() 60 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | deepim_pytorch 4 | 0.0.0 5 | The deepim_pytorch package 6 | 7 | 8 | 9 | 10 | dieter 11 | 12 | 13 | 14 | 15 | 16 | TODO 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | catkin 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /ycb_render/cpp/query_devices.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | // This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | // text can be found in LICENSE.md 4 | 5 | //g++ glad/egl.c glad/gl.c egl.cpp -I glad -lpthread -ldl 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | struct EGLInternalData2 { 17 | bool m_isInitialized; 18 | 19 | int m_windowWidth; 20 | int m_windowHeight; 21 | int m_renderDevice; 22 | 23 | 24 | EGLBoolean success; 25 | EGLint num_configs; 26 | EGLConfig egl_config; 27 | EGLSurface egl_surface; 28 | EGLContext egl_context; 29 | EGLDisplay egl_display; 30 | 31 | EGLInternalData2() 32 | : m_isInitialized(false), 33 | m_windowWidth(0), 34 | m_windowHeight(0) {} 35 | }; 36 | 37 | int main(){ 38 | 39 | 40 | int m_windowWidth; 41 | int m_windowHeight; 42 | int m_renderDevice; 43 | 44 | EGLBoolean success; 45 | EGLint num_configs; 46 | EGLConfig egl_config; 47 | EGLSurface egl_surface; 48 | EGLContext egl_context; 49 | EGLDisplay egl_display; 50 | 51 | m_windowWidth = 256; 52 | m_windowHeight = 256; 53 | m_renderDevice = -1; 54 | 55 | EGLint egl_config_attribs[] = {EGL_RED_SIZE, 56 | 8, 57 | EGL_GREEN_SIZE, 58 | 8, 59 | EGL_BLUE_SIZE, 60 | 8, 61 | EGL_DEPTH_SIZE, 62 | 8, 63 | EGL_SURFACE_TYPE, 64 | EGL_PBUFFER_BIT, 65 | EGL_RENDERABLE_TYPE, 66 | EGL_OPENGL_BIT, 67 | EGL_NONE}; 68 | 69 | EGLint egl_pbuffer_attribs[] = { 70 | EGL_WIDTH, m_windowWidth, EGL_HEIGHT, m_windowHeight, 71 | EGL_NONE, 72 | }; 73 | 74 | EGLInternalData2* m_data = new EGLInternalData2(); 75 | 76 | // Load EGL functions 77 | int egl_version = gladLoaderLoadEGL(NULL); 78 | if(!egl_version) { 79 | fprintf(stderr, "failed to EGL with glad.\n"); 80 | exit(EXIT_FAILURE); 81 | }; 82 | 83 | // Query EGL Devices 84 | const int max_devices = 32; 85 | EGLDeviceEXT egl_devices[max_devices]; 86 | EGLint num_devices = 0; 87 | EGLint egl_error = eglGetError(); 88 | if (!eglQueryDevicesEXT(max_devices, egl_devices, &num_devices) || 89 | egl_error != EGL_SUCCESS) { 90 | printf("eglQueryDevicesEXT Failed.\n"); 91 | m_data->egl_display = EGL_NO_DISPLAY; 92 | } 93 | 94 | printf("%d", num_devices); 95 | 96 | return 0; 97 | } 98 | 99 | 100 | -------------------------------------------------------------------------------- /lib/utils/show_flows.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | from __future__ import print_function, division 6 | import numpy as np 7 | 8 | 9 | MAXCOLS = 60 10 | ncols = 0 11 | colorwheel = None 12 | 13 | 14 | def setcols(r, g, b, k): 15 | colorwheel[k][0] = r 16 | colorwheel[k][1] = g 17 | colorwheel[k][2] = b 18 | 19 | 20 | def makecolorwheel(): 21 | # relative lengths of color transitions: 22 | # these are chosen based on/home/davidm/DA-RNN/DA-RNN/lib/utils/test_triplet_flow_loss.py perceptual similarity 23 | # (e.g. one can distinguish more shades between red and yellow 24 | # than between yellow and green) 25 | RY = 15 26 | YG = 6 27 | GC = 4 28 | CB = 11 29 | BM = 13 30 | MR = 6 31 | global ncols, colorwheel 32 | ncols = RY + YG + GC + CB + BM + MR 33 | colorwheel = np.zeros([ncols, 3], dtype=np.float32) 34 | #print("ncols = %d\n" % ncols) 35 | if (ncols > MAXCOLS): 36 | raise EnvironmentError("something went wrong?") 37 | k = 0 38 | for i in range(RY): 39 | setcols(1, 1.0*float(i)/RY, 0, k) 40 | k += 1 41 | for i in range(YG): 42 | setcols(1.0-float(i)/YG, 1, 0, k) 43 | k += 1 44 | for i in range(GC): 45 | setcols(0, 1, float(i)/GC, k) 46 | k += 1 47 | for i in range(CB): 48 | setcols(0, 1-float(i)/CB, 1, k) 49 | k += 1 50 | for i in range(BM): 51 | setcols(float(i)/BM, 0, 1, k) 52 | k += 1 53 | for i in range(MR): 54 | setcols(1, 0, 1-float(i)/MR, k) 55 | k += 1 56 | makecolorwheel() 57 | 58 | 59 | 60 | 61 | def sintel_compute_color(data_interlaced): 62 | # type: (np.ndarray) -> np.ndarray 63 | data_u_in, data_v_in = np.split(data_interlaced, 2, axis=2) 64 | data_u_in = np.squeeze(data_u_in) 65 | data_v_in = np.squeeze(data_v_in) 66 | # pre-normalize 67 | max_rad = np.max(np.sqrt(np.power(data_u_in, 2) + np.power(data_v_in, 2))) + 1E-10 68 | fx = data_u_in / max_rad 69 | fy = data_v_in / max_rad 70 | 71 | # now do the stuff done in computeColor() 72 | rad = np.sqrt(np.power(fx, 2) + np.power(fy, 2)) 73 | a = np.nan_to_num(np.arctan2(-fy, -fx) / np.pi) 74 | fk = (a + 1.0) / 2.0 * (ncols-1) 75 | k0 = fk.astype(np.int32) 76 | k1 = ((k0 + 1) % ncols).astype(np.int32) 77 | f = fk - k0 78 | h, w = k0.shape 79 | col0 = colorwheel[k0.reshape(-1)].reshape([h, w, 3]) 80 | col1 = colorwheel[k1.reshape(-1)].reshape([h, w, 3]) 81 | col = (1 - f[..., np.newaxis]) * col0 + f[..., np.newaxis] * col1 82 | # col = col0 83 | 84 | col = 1 - rad[..., np.newaxis] * (1 - col) # increase saturation with radius 85 | return col 86 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_2.txt: -------------------------------------------------------------------------------- 1 | 0907T162817/scene_01 2 | 0907T163317/scene_01 3 | 0907T164137/scene_04 4 | 0907T164137/scene_06 5 | 0907T171024/scene_01 6 | 0907T172157/scene_04 7 | 0907T172157/scene_18 8 | 0907T172157/scene_19 9 | 0907T180254/scene_02 10 | 0907T180254/scene_06 11 | 0907T181442/scene_01 12 | 0907T191150/scene_02 13 | 0907T192325/scene_01 14 | 0907T194155/scene_02 15 | 0907T215136/scene_01 16 | 0907T215805/scene_01 17 | 0907T221844/scene_01 18 | 0907T222106/scene_03 19 | 0907T222555/scene_02 20 | 0907T222933/scene_02 21 | 0907T223820/scene_02 22 | 0907T225112/scene_03 23 | 0907T225112/scene_05 24 | 0907T231131/scene_05 25 | 0907T231131/scene_09 26 | 0907T231131/scene_12 27 | 0907T231131/scene_15 28 | 0907T231131/scene_25 29 | 0908T000608/scene_01 30 | 0908T000608/scene_03 31 | 0908T000608/scene_05 32 | 0908T003015/scene_02 33 | 0908T003015/scene_10 34 | 0910T015831/scene_01 35 | 0910T020707/scene_06 36 | 0910T020707/scene_08 37 | 0910T020707/scene_10 38 | 0910T022211/scene_02 39 | 0910T022211/scene_08 40 | 0910T022211/scene_09 41 | 0910T025409/scene_02 42 | 0910T025409/scene_04 43 | 0910T025409/scene_12 44 | 0910T033945/scene_07 45 | 0910T033945/scene_09 46 | 0910T033945/scene_12 47 | 0910T040255/scene_06 48 | 0910T040255/scene_07 49 | 0910T042817/scene_02 50 | 0910T042817/scene_06 51 | 0910T042817/scene_09 52 | 0910T044956/scene_02 53 | 0910T044956/scene_11 54 | 0910T044956/scene_23 55 | 0910T044956/scene_27 56 | 0910T053353/scene_02 57 | 0910T053353/scene_08 58 | 0910T054747/scene_01 59 | 0910T060213/scene_01 60 | 0910T060213/scene_12 61 | 0912T112032/scene_04 62 | 0912T112032/scene_05 63 | 0912T112032/scene_07 64 | 0912T114842/scene_03 65 | 0912T114842/scene_09 66 | 0912T114842/scene_19 67 | 0912T114842/scene_21 68 | 0912T114842/scene_24 69 | 0912T122347/scene_05 70 | 0912T122347/scene_09 71 | 0912T122347/scene_10 72 | 0912T124551/scene_03 73 | 0912T124551/scene_06 74 | 0912T124551/scene_11 75 | 0912T192512/scene_01 76 | 0912T193313/scene_08 77 | 0912T193313/scene_23 78 | 0912T193313/scene_24 79 | 0912T193313/scene_30 80 | 0912T193313/scene_31 81 | 0912T193313/scene_33 82 | 0912T202829/scene_01 83 | 0912T202829/scene_06 84 | 0912T210233/scene_01 85 | 0912T211535/scene_01 86 | 0912T212212/scene_03 87 | 0912T212212/scene_07 88 | 0912T212212/scene_08 89 | 0912T212212/scene_15 90 | 0912T212212/scene_18 91 | 0912T230419/scene_03 92 | 0912T230419/scene_09 93 | 0912T230419/scene_11 94 | 0912T230419/scene_14 95 | 0912T230419/scene_20 96 | 0912T230419/scene_22 97 | 0912T230419/scene_33 98 | 0913T000123/scene_13 99 | 0913T000123/scene_14 100 | 0913T000123/scene_16 101 | 0913T002954/scene_06 102 | 0913T004159/scene_04 103 | 0913T004159/scene_08 104 | 0913T010525/scene_03 105 | 0913T010525/scene_07 106 | 0913T010525/scene_15 107 | 0913T010525/scene_18 108 | 0913T013716/scene_05 109 | 0913T013716/scene_07 110 | 0913T013716/scene_15 111 | 0913T020922/scene_14 112 | 0913T020922/scene_16 113 | 0913T020922/scene_19 114 | 0913T020922/scene_25 115 | 0913T024454/scene_04 116 | 0913T024454/scene_05 117 | 0913T025555/scene_09 118 | 0913T025555/scene_12 119 | 0913T025555/scene_16 120 | 0913T025555/scene_18 121 | 0913T025555/scene_19 122 | 0913T025555/scene_21 123 | -------------------------------------------------------------------------------- /ycb_render/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import os 6 | import re 7 | import sys 8 | import platform 9 | import subprocess 10 | 11 | from setuptools import setup, Extension 12 | from setuptools.command.build_ext import build_ext 13 | from distutils.version import LooseVersion 14 | 15 | 16 | class CMakeExtension(Extension): 17 | def __init__(self, name, sourcedir=''): 18 | Extension.__init__(self, name, sources=[]) 19 | self.sourcedir = os.path.abspath(sourcedir) 20 | 21 | 22 | class CMakeBuild(build_ext): 23 | def run(self): 24 | try: 25 | out = subprocess.check_output(['cmake', '--version']) 26 | except OSError: 27 | raise RuntimeError("CMake must be installed to build the following extensions: " + 28 | ", ".join(e.name for e in self.extensions)) 29 | 30 | if platform.system() == "Windows": 31 | cmake_version = LooseVersion( 32 | re.search(r'version\s*([\d.]+)', out.decode()).group(1)) 33 | if cmake_version < '3.1.0': 34 | raise RuntimeError("CMake >= 3.1.0 is required on Windows") 35 | 36 | for ext in self.extensions: 37 | self.build_extension(ext) 38 | 39 | def build_extension(self, ext): 40 | extdir = os.path.abspath(os.path.dirname( 41 | self.get_ext_fullpath(ext.name))) 42 | cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir, 43 | '-DCMAKE_RUNTIME_OUTPUT_DIRECTORY=' + 44 | os.path.join(extdir, 'build'), 45 | '-DPYTHON_EXECUTABLE=' + sys.executable] 46 | 47 | cfg = 'Debug' if self.debug else 'Release' 48 | build_args = ['--config', cfg] 49 | 50 | if platform.system() == "Windows": 51 | cmake_args += [ 52 | '-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)] 53 | if sys.maxsize > 2**32: 54 | cmake_args += ['-A', 'x64'] 55 | build_args += ['--', '/m'] 56 | else: 57 | cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg] 58 | build_args += ['--', '-j2'] 59 | 60 | env = os.environ.copy() 61 | env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''), 62 | self.distribution.get_version()) 63 | if not os.path.exists(self.build_temp): 64 | os.makedirs(self.build_temp) 65 | subprocess.check_call(['cmake', ext.sourcedir] + 66 | cmake_args, cwd=self.build_temp, env=env) 67 | subprocess.check_call(['cmake', '--build', '.'] + 68 | build_args, cwd=self.build_temp) 69 | 70 | 71 | setup( 72 | name='CppYCBRenderer', 73 | version='0.0.1', 74 | author='Fei Xia', 75 | author_email='xf1280@gmail.com', 76 | description='A test project using pybind11 and CMake', 77 | long_description='', 78 | ext_modules=[CMakeExtension('CppYCBRenderer')], 79 | cmdclass=dict(build_ext=CMakeBuild), 80 | zip_safe=False, 81 | ) 82 | -------------------------------------------------------------------------------- /ycb_render/shaders/frag_blinnphong.shader: -------------------------------------------------------------------------------- 1 | #version 460 2 | uniform sampler2D texUnit; 3 | in vec2 theCoords; 4 | in vec3 Normal; 5 | in vec3 Normal_cam; 6 | in vec3 FragPos; 7 | in vec3 Instance_color; 8 | in vec3 Pos_cam; 9 | in vec3 Pos_obj; 10 | in float inverse_normal; 11 | layout (location = 0) out vec4 outputColour; 12 | layout (location = 1) out vec4 NormalColour; 13 | layout (location = 2) out vec4 InstanceColour; 14 | layout (location = 3) out vec4 PCObject; 15 | layout (location = 4) out vec4 PCColour; 16 | 17 | uniform vec3 light_position; // in world coordinate 18 | uniform vec3 light_color; // light color 19 | uniform vec3 world_light_pos1; 20 | uniform vec3 world_light_pos2; 21 | uniform vec3 mat_ambient; 22 | uniform vec3 mat_diffuse; 23 | uniform vec3 mat_specular; 24 | uniform float mat_shininess; 25 | 26 | void main() { 27 | if (inverse_normal > 0) discard; 28 | vec4 texColor = texture(texUnit, theCoords); 29 | if(texColor.a < 0.1) discard; 30 | 31 | // attenuation 32 | float a = 1.0f; 33 | float b = 0.5f; 34 | float c = 0.25f; 35 | float r = length(light_position - Pos_obj); 36 | float scalar = (a + b*r + c*r*r); 37 | if(scalar < 0.00000001) 38 | scalar = 0.0; 39 | else 40 | scalar = 1.0/scalar; 41 | 42 | vec3 norm = normalize(Normal); 43 | vec3 ambient = mat_ambient * light_color; 44 | vec3 lightDir = normalize(light_position - FragPos); 45 | float diff = max(dot(norm, lightDir), 0.0); 46 | vec3 diffuse = diff * light_color * mat_diffuse; 47 | vec3 viewDir = normalize(Pos_cam - FragPos); 48 | vec3 reflectDir = reflect(-lightDir, norm); 49 | float spec = pow(max(dot(viewDir, reflectDir), 0.0), mat_shininess); 50 | vec3 specular = light_color * (spec * mat_specular); 51 | 52 | // gamma correction 53 | vec3 linearColour = ambient + scalar*(diffuse + specular); 54 | vec3 gamma = vec3(1.0/2.2); 55 | outputColour = texColor * vec4(pow(linearColour, gamma), 1); 56 | 57 | // add few more lights 58 | // lightDir = normalize(world_light_pos1 - FragPos); 59 | // diff = max(dot(norm, lightDir), 0.0); 60 | // diffuse = diff * light_color * mat_diffuse; 61 | // viewDir = normalize(Pos_cam - FragPos); 62 | // reflectDir = reflect(-lightDir, norm); 63 | // spec = pow(max(dot(viewDir, reflectDir), 0.0), mat_shininess); 64 | // specular = light_color * (spec * mat_specular); 65 | // outputColour += texture(texUnit, theCoords) * vec4(diffuse + ambient + specular, 1); 66 | // lightDir = normalize(world_light_pos2 - FragPos); 67 | // diff = max(dot(norm, lightDir), 0.0); 68 | // diffuse = diff * light_color * mat_diffuse; 69 | // viewDir = normalize(Pos_cam - FragPos); 70 | // reflectDir = reflect(-lightDir, norm); 71 | // spec = pow(max(dot(viewDir, reflectDir), 0.0), mat_shininess); 72 | // specular = light_color * (spec * mat_specular); 73 | // outputColour += texture(texUnit, theCoords) * vec4(ambient + scalar*(diffuse + specular), 1); 74 | 75 | //NormalColour = vec4((Normal_cam + 1) / 2,1); 76 | NormalColour = vec4(Normal_cam,1); 77 | InstanceColour = vec4(Instance_color,1); 78 | PCObject = vec4(Pos_obj,1); 79 | PCColour = vec4(Pos_cam,1); 80 | } -------------------------------------------------------------------------------- /lib/utils/se3.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import numpy as np 6 | import numpy.matlib as npm 7 | from transforms3d.quaternions import mat2quat, quat2mat 8 | import scipy.stats as sci_stats 9 | 10 | # RT is a 3x4 matrix 11 | def se3_inverse(RT): 12 | R = RT[0:3, 0:3] 13 | T = RT[0:3, 3].reshape((3,1)) 14 | RT_new = np.zeros((3, 4), dtype=np.float32) 15 | RT_new[0:3, 0:3] = R.transpose() 16 | RT_new[0:3, 3] = -1 * np.dot(R.transpose(), T).reshape((3)) 17 | return RT_new 18 | 19 | def se3_mul(RT1, RT2): 20 | R1 = RT1[0:3, 0:3] 21 | T1 = RT1[0:3, 3].reshape((3,1)) 22 | 23 | R2 = RT2[0:3, 0:3] 24 | T2 = RT2[0:3, 3].reshape((3,1)) 25 | 26 | RT_new = np.zeros((3, 4), dtype=np.float32) 27 | RT_new[0:3, 0:3] = np.dot(R1, R2) 28 | T_new = np.dot(R1, T2) + T1 29 | RT_new[0:3, 3] = T_new.reshape((3)) 30 | return RT_new 31 | 32 | 33 | def T_inv_transform(T_src, T_tgt): 34 | ''' 35 | :param T_src: 36 | :param T_tgt: 37 | :return: T_delta: delta in pixel 38 | ''' 39 | T_delta = np.zeros((3, ), dtype=np.float32) 40 | 41 | T_delta[0] = T_tgt[0] / T_tgt[2] - T_src[0] / T_src[2] 42 | T_delta[1] = T_tgt[1] / T_tgt[2] - T_src[1] / T_src[2] 43 | T_delta[2] = np.log(T_src[2] / T_tgt[2]) 44 | 45 | return T_delta 46 | 47 | 48 | def rotation_x(theta): 49 | t = theta * np.pi / 180.0 50 | R = np.zeros((3, 3), dtype=np.float32) 51 | R[0, 0] = 1 52 | R[1, 1] = np.cos(t) 53 | R[1, 2] = -np.sin(t) 54 | R[2, 1] = np.sin(t) 55 | R[2, 2] = np.cos(t) 56 | return R 57 | 58 | def rotation_y(theta): 59 | t = theta * np.pi / 180.0 60 | R = np.zeros((3, 3), dtype=np.float32) 61 | R[0, 0] = np.cos(t) 62 | R[0, 2] = np.sin(t) 63 | R[1, 1] = 1 64 | R[2, 0] = -np.sin(t) 65 | R[2, 2] = np.cos(t) 66 | return R 67 | 68 | def rotation_z(theta): 69 | t = theta * np.pi / 180.0 70 | R = np.zeros((3, 3), dtype=np.float32) 71 | R[0, 0] = np.cos(t) 72 | R[0, 1] = -np.sin(t) 73 | R[1, 0] = np.sin(t) 74 | R[1, 1] = np.cos(t) 75 | R[2, 2] = 1 76 | return R 77 | 78 | 79 | # Q is a Nx4 numpy matrix and contains the quaternions to average in the rows. 80 | # The quaternions are arranged as (w,x,y,z), with w being the scalar 81 | # The result will be the average quaternion of the input. Note that the signs 82 | # of the output quaternion can be reversed, since q and -q describe the same orientation 83 | def averageQuaternions(Q): 84 | # Number of quaternions to average 85 | M = Q.shape[0] 86 | A = npm.zeros(shape=(4,4)) 87 | 88 | for i in range(0,M): 89 | q = Q[i,:] 90 | # multiply q with its transposed version q' and add A 91 | A = np.outer(q,q) + A 92 | 93 | # scale 94 | A = (1.0/M)*A 95 | # compute eigenvalues and -vectors 96 | eigenValues, eigenVectors = np.linalg.eig(A) 97 | # Sort by largest eigenvalue 98 | eigenVectors = eigenVectors[:,eigenValues.argsort()[::-1]] 99 | # return the real part of the largest eigenvector (has only real part) 100 | return np.real(eigenVectors[:,0].A1) 101 | -------------------------------------------------------------------------------- /lib/datasets/imdb.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import os 6 | import os.path as osp 7 | import numpy as np 8 | import datasets 9 | try: 10 | import cPickle # Use cPickle on Python 2.7 11 | except ImportError: 12 | import pickle as cPickle 13 | import glob 14 | from fcn.config import cfg 15 | 16 | class imdb(object): 17 | """Image database.""" 18 | 19 | def __init__(self): 20 | self._name = '' 21 | self._num_classes = 0 22 | self._classes = [] 23 | 24 | @property 25 | def name(self): 26 | return self._name 27 | 28 | @property 29 | def num_classes(self): 30 | return len(self._classes) 31 | 32 | @property 33 | def classes(self): 34 | return self._classes 35 | 36 | @property 37 | def cache_path(self): 38 | cache_path = osp.abspath(osp.join(datasets.ROOT_DIR, 'data', 'cache')) 39 | if not os.path.exists(cache_path): 40 | os.makedirs(cache_path) 41 | return cache_path 42 | 43 | 44 | # backproject pixels into 3D points in camera's coordinate system 45 | def backproject(self, depth_cv, intrinsic_matrix, factor): 46 | 47 | depth = depth_cv.astype(np.float32, copy=True) / factor 48 | 49 | # get intrinsic matrix 50 | K = intrinsic_matrix 51 | Kinv = np.linalg.inv(K) 52 | 53 | # compute the 3D points 54 | width = depth.shape[1] 55 | height = depth.shape[0] 56 | 57 | # construct the 2D points matrix 58 | x, y = np.meshgrid(np.arange(width), np.arange(height)) 59 | ones = np.ones((height, width), dtype=np.float32) 60 | x2d = np.stack((x, y, ones), axis=2).reshape(width*height, 3) 61 | 62 | # backprojection 63 | R = np.dot(Kinv, x2d.transpose()) 64 | 65 | # compute the 3D points 66 | X = np.multiply(np.tile(depth.reshape(1, width*height), (3, 1)), R) 67 | return np.array(X).transpose().reshape((height, width, 3)) 68 | 69 | 70 | def _build_uniform_poses(self): 71 | 72 | self.eulers = [] 73 | for roll in range(0, 360, 15): 74 | for pitch in range(0, 360, 15): 75 | for yaw in range(0, 360, 15): 76 | self.eulers.append([roll, pitch, yaw]) 77 | 78 | # sample indexes 79 | num_poses = len(self.eulers) 80 | num_classes = len(self._classes_all) 81 | self.pose_indexes = np.zeros((num_classes, ), dtype=np.int32) 82 | self.pose_lists = [] 83 | for i in range(num_classes): 84 | self.pose_lists.append(np.random.permutation(np.arange(num_poses))) 85 | 86 | 87 | """ Compute ordered point cloud from depth image and camera parameters. 88 | 89 | If focal lengths fx,fy are stored in the camera_params dictionary, use that. 90 | Else, assume camera_params contains parameters used to generate synthetic data (e.g. fov, near, far, etc) 91 | 92 | @param depth_img: a [H x W] numpy array of depth values in meters 93 | @param camera_params: a dictionary with parameters of the camera used 94 | """ 95 | def compute_xyz(self, depth_img, fx, fy, px, py): 96 | height = depth_img.shape[0] 97 | width = depth_img.shape[1] 98 | indices = np.indices((height, width), dtype=np.float32).transpose(1,2,0) 99 | z_e = depth_img 100 | x_e = (indices[..., 1] - px) * z_e / fx 101 | y_e = (indices[..., 0] - py) * z_e / fy 102 | xyz_img = np.stack([x_e, y_e, z_e], axis=-1) # Shape: [H x W x 3] 103 | return xyz_img 104 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/stats.txt: -------------------------------------------------------------------------------- 1 | num of scenes: 497 2 | num of images: 6541 3 | =============training 0.20================= 4 | num of scenes: 79 5 | num of images: 1203 6 | 002_master_chef_can: 152 7 | 003_cracker_box: 354 8 | 004_sugar_box: 196 9 | 005_tomato_soup_can: 222 10 | 006_mustard_bottle: 233 11 | 007_tuna_fish_can: 169 12 | 008_pudding_box: 384 13 | 009_gelatin_box: 295 14 | 010_potted_meat_can: 179 15 | 011_banana: 195 16 | 019_pitcher_base: 168 17 | 021_bleach_cleanser: 220 18 | 024_bowl: 178 19 | 025_mug: 181 20 | 035_power_drill: 151 21 | 036_wood_block: 145 22 | 037_scissors: 188 23 | 040_large_marker: 176 24 | 052_extra_large_clamp: 201 25 | 061_foam_brick: 328 26 | ============================== 27 | =============training 0.40================= 28 | num of scenes: 122 29 | num of images: 1770 30 | 002_master_chef_can: 236 31 | 003_cracker_box: 458 32 | 004_sugar_box: 292 33 | 005_tomato_soup_can: 331 34 | 006_mustard_bottle: 380 35 | 007_tuna_fish_can: 284 36 | 008_pudding_box: 450 37 | 009_gelatin_box: 360 38 | 010_potted_meat_can: 296 39 | 011_banana: 292 40 | 019_pitcher_base: 264 41 | 021_bleach_cleanser: 303 42 | 024_bowl: 311 43 | 025_mug: 262 44 | 035_power_drill: 281 45 | 036_wood_block: 253 46 | 037_scissors: 303 47 | 040_large_marker: 259 48 | 052_extra_large_clamp: 292 49 | 061_foam_brick: 387 50 | ============================== 51 | =============training 0.60================= 52 | num of scenes: 169 53 | num of images: 2403 54 | 002_master_chef_can: 340 55 | 003_cracker_box: 523 56 | 004_sugar_box: 418 57 | 005_tomato_soup_can: 460 58 | 006_mustard_bottle: 551 59 | 007_tuna_fish_can: 405 60 | 008_pudding_box: 548 61 | 009_gelatin_box: 431 62 | 010_potted_meat_can: 422 63 | 011_banana: 420 64 | 019_pitcher_base: 387 65 | 021_bleach_cleanser: 375 66 | 024_bowl: 410 67 | 025_mug: 368 68 | 035_power_drill: 372 69 | 036_wood_block: 363 70 | 037_scissors: 430 71 | 040_large_marker: 373 72 | 052_extra_large_clamp: 433 73 | 061_foam_brick: 471 74 | ============================== 75 | =============training 0.80================= 76 | num of scenes: 212 77 | num of images: 2898 78 | 002_master_chef_can: 441 79 | 003_cracker_box: 592 80 | 004_sugar_box: 493 81 | 005_tomato_soup_can: 553 82 | 006_mustard_bottle: 668 83 | 007_tuna_fish_can: 472 84 | 008_pudding_box: 620 85 | 009_gelatin_box: 474 86 | 010_potted_meat_can: 510 87 | 011_banana: 517 88 | 019_pitcher_base: 485 89 | 021_bleach_cleanser: 459 90 | 024_bowl: 521 91 | 025_mug: 457 92 | 035_power_drill: 482 93 | 036_wood_block: 450 94 | 037_scissors: 546 95 | 040_large_marker: 446 96 | 052_extra_large_clamp: 519 97 | 061_foam_brick: 528 98 | ============================== 99 | =============training 1.00================= 100 | num of scenes: 265 101 | num of images: 3590 102 | 002_master_chef_can: 550 103 | 003_cracker_box: 710 104 | 004_sugar_box: 539 105 | 005_tomato_soup_can: 709 106 | 006_mustard_bottle: 761 107 | 007_tuna_fish_can: 598 108 | 008_pudding_box: 752 109 | 009_gelatin_box: 595 110 | 010_potted_meat_can: 610 111 | 011_banana: 570 112 | 019_pitcher_base: 541 113 | 021_bleach_cleanser: 591 114 | 024_bowl: 614 115 | 025_mug: 644 116 | 035_power_drill: 581 117 | 036_wood_block: 584 118 | 037_scissors: 721 119 | 040_large_marker: 580 120 | 052_extra_large_clamp: 624 121 | 061_foam_brick: 688 122 | ============================== 123 | =============testing================= 124 | num of scenes: 232 125 | num of images: 2951 126 | 002_master_chef_can: 438 127 | 003_cracker_box: 496 128 | 004_sugar_box: 517 129 | 005_tomato_soup_can: 615 130 | 006_mustard_bottle: 798 131 | 007_tuna_fish_can: 505 132 | 008_pudding_box: 509 133 | 009_gelatin_box: 403 134 | 010_potted_meat_can: 548 135 | 011_banana: 503 136 | 019_pitcher_base: 475 137 | 021_bleach_cleanser: 459 138 | 024_bowl: 532 139 | 025_mug: 534 140 | 035_power_drill: 492 141 | 036_wood_block: 462 142 | 037_scissors: 634 143 | 040_large_marker: 453 144 | 052_extra_large_clamp: 445 145 | 061_foam_brick: 471 146 | ============================== 147 | -------------------------------------------------------------------------------- /ros/test_images.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 4 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 5 | # text can be found in LICENSE.md 6 | 7 | """Test a DeepIM on images""" 8 | 9 | import torch 10 | import torch.nn.parallel 11 | import torch.backends.cudnn as cudnn 12 | import torch.utils.data 13 | 14 | import argparse 15 | import pprint 16 | import time, os, sys 17 | import os.path as osp 18 | import numpy as np 19 | 20 | import _init_paths 21 | from fcn.train_test import test 22 | from fcn.config import cfg, cfg_from_file, get_output_dir 23 | from datasets.factory import get_dataset 24 | import networks 25 | import rospy 26 | from listener import ImageListener 27 | 28 | def parse_args(): 29 | """ 30 | Parse input arguments 31 | """ 32 | parser = argparse.ArgumentParser(description='Test a DeepIM network') 33 | parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', 34 | default=0, type=int) 35 | parser.add_argument('--instance', dest='instance_id', help='DeepIM instance id to use', 36 | default=0, type=int) 37 | parser.add_argument('--pretrained', dest='pretrained', 38 | help='initialize with pretrained checkpoint', 39 | default=None, type=str) 40 | parser.add_argument('--cfg', dest='cfg_file', 41 | help='optional config file', default=None, type=str) 42 | parser.add_argument('--dataset', dest='dataset_name', 43 | help='dataset to train on', 44 | default='shapenet_scene_train', type=str) 45 | parser.add_argument('--rand', dest='randomize', 46 | help='randomize (do not use a fixed seed)', 47 | action='store_true') 48 | parser.add_argument('--network', dest='network_name', 49 | help='name of the network', 50 | default=None, type=str) 51 | parser.add_argument('--background', dest='background_name', 52 | help='name of the background file', 53 | default=None, type=str) 54 | 55 | if len(sys.argv) == 1: 56 | parser.print_help() 57 | sys.exit(1) 58 | 59 | args = parser.parse_args() 60 | return args 61 | 62 | 63 | if __name__ == '__main__': 64 | args = parse_args() 65 | 66 | print('Called with args:') 67 | print(args) 68 | 69 | if args.cfg_file is not None: 70 | cfg_from_file(args.cfg_file) 71 | 72 | print('Using config:') 73 | pprint.pprint(cfg) 74 | 75 | if not args.randomize: 76 | # fix the random seeds (numpy and caffe) for reproducibility 77 | np.random.seed(cfg.RNG_SEED) 78 | 79 | # device 80 | cfg.device = torch.device('cuda:{:d}'.format(0)) 81 | print 'GPU device {:d}'.format(args.gpu_id) 82 | cfg.gpu_id = args.gpu_id 83 | cfg.instance_id = args.instance_id 84 | 85 | # dataset 86 | cfg.classes = cfg.TEST.CLASSES 87 | cfg.MODE = 'TEST' 88 | cfg.TEST.SYNTHESIZE = False 89 | cfg.TEST.VISUALIZE = False 90 | dataset = get_dataset(args.dataset_name) 91 | 92 | # prepare network 93 | if args.pretrained: 94 | network_data = torch.load(args.pretrained) 95 | print("=> using pre-trained network '{}'".format(args.pretrained)) 96 | else: 97 | network_data = None 98 | print("no pretrained network specified") 99 | sys.exit() 100 | 101 | network = networks.__dict__[args.network_name](dataset.num_classes, network_data).cuda(device=cfg.device) 102 | network = torch.nn.DataParallel(network, device_ids=[0]).cuda(device=cfg.device) 103 | cudnn.benchmark = True 104 | 105 | # image listener 106 | network.eval() 107 | listener = ImageListener(network, dataset) 108 | 109 | while not rospy.is_shutdown(): 110 | listener.run_network() 111 | -------------------------------------------------------------------------------- /lib/utils/zoom_in.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import numpy as np 6 | import cv2 7 | from fcn.config import cfg 8 | 9 | def zoom_images(image_blob, image_real, image_imgn, image_flow, pose_src, intrinsic_matrix): 10 | 11 | batch_size = image_real.shape[0] 12 | height = image_real.shape[1] 13 | width = image_real.shape[2] 14 | ratio = float(height) / float(width) 15 | 16 | image_zoom = np.zeros((batch_size, height ,width, 3), dtype=np.float32) 17 | image_real_zoom = np.zeros((batch_size, height ,width, 3), dtype=np.float32) 18 | image_imgn_zoom = np.zeros((batch_size, height ,width, 3), dtype=np.float32) 19 | flow_zoom = np.zeros((batch_size, height ,width, 2), dtype=np.float32) 20 | mask_real = np.zeros((batch_size, height ,width, 1), dtype=np.float32) 21 | mask_imgn = np.zeros((batch_size, height ,width, 1), dtype=np.float32) 22 | zoom_factor = np.zeros((batch_size, 4), dtype=np.float32) 23 | 24 | for i in range(batch_size): 25 | 26 | # real image 27 | nz_y, nz_x = np.where(image_real[i, :, :, 0] > 0) 28 | obj_real_start_x = np.min(nz_x) 29 | obj_real_end_x = np.max(nz_x) 30 | obj_real_start_y = np.min(nz_y) 31 | obj_real_end_y = np.max(nz_y) 32 | obj_real_c_x = (obj_real_start_x + obj_real_end_x) * 0.5 33 | obj_real_c_y = (obj_real_start_y + obj_real_end_y) * 0.5 34 | 35 | # rendered image 36 | nz_y, nz_x = np.where(image_imgn[i, :, :, 0] > 0) 37 | obj_imgn_start_x = np.min(nz_x) 38 | obj_imgn_end_x = np.max(nz_x) 39 | obj_imgn_start_y = np.min(nz_y) 40 | obj_imgn_end_y = np.max(nz_y) 41 | obj_imgn_c = np.dot(intrinsic_matrix, pose_src[i, 6:]) 42 | zoom_c_x = obj_imgn_c[0] / obj_imgn_c[2] 43 | zoom_c_y = obj_imgn_c[1] / obj_imgn_c[2] 44 | 45 | # mask region 46 | left_dist = max(zoom_c_x - obj_imgn_start_x, zoom_c_x - obj_real_start_x) 47 | right_dist = max(obj_imgn_end_x - zoom_c_x, obj_real_end_x - zoom_c_x) 48 | up_dist = max(zoom_c_y - obj_imgn_start_y, zoom_c_y - obj_real_start_y) 49 | down_dist = max(obj_real_end_y - zoom_c_y, obj_imgn_end_y - zoom_c_y) 50 | 51 | # crop_height = np.max([up_dist, down_dist]) 52 | # crop_width = np.max([left_dist, right_dist]) 53 | crop_height = np.max([ratio * right_dist, ratio * left_dist, up_dist, down_dist]) * 2 54 | crop_width = crop_height / ratio 55 | 56 | # affine transformation 57 | x1 = zoom_c_x - crop_width / 2 58 | x2 = zoom_c_x + crop_width / 2 59 | y1 = zoom_c_y - crop_height / 2 60 | y2 = zoom_c_y + crop_height / 2 61 | 62 | pts1 = np.float32([[x1, y1], [x1, y2], [x2, y1]]) 63 | pts2 = np.float32([[0, 0], [0, height], [width, 0]]) 64 | affine_matrix = cv2.getAffineTransform(pts1, pts2) 65 | 66 | idx = int(pose_src[i, 0]) 67 | image_zoom[i, :, :, :] = cv2.warpAffine(image_blob[idx, :, :, :], affine_matrix, (width, height)) 68 | image_real_zoom[i, :, :, :] = cv2.warpAffine(image_real[i, :, :, :], affine_matrix, (width, height)) 69 | image_imgn_zoom[i, :, :, :] = cv2.warpAffine(image_imgn[i, :, :, :], affine_matrix, (width, height)) 70 | flow_zoom[i, :, :, :] = cv2.warpAffine(image_flow[i, :, :, :], affine_matrix, (width, height)) 71 | flow_zoom[i, :, :, 0] *= affine_matrix[0, 0] 72 | flow_zoom[i, :, :, 1] *= affine_matrix[1, 1] 73 | 74 | # construct masks 75 | nz_y, nz_x = np.where(image_imgn_zoom[i, :, :, 0] > 0) 76 | x1 = int(np.min(nz_x)) 77 | x2 = int(np.max(nz_x)) 78 | y1 = int(np.min(nz_y)) 79 | y2 = int(np.max(nz_y)) 80 | mask_real[i, y1:y2, x1:x2, :] = 1.0 81 | mask_imgn[i, nz_y, nz_x, :] = 1.0 82 | 83 | # image_zoom[i, :, :, :] -= cfg.PIXEL_MEANS 84 | # image_real_zoom[i, :, :, :] -= cfg.PIXEL_MEANS 85 | # image_imgn_zoom[i, :, :, :] -= cfg.PIXEL_MEANS 86 | 87 | zoom_factor[i, 0] = affine_matrix[0, 0] 88 | zoom_factor[i, 1] = affine_matrix[1, 1] 89 | zoom_factor[i, 2] = affine_matrix[0, 2] 90 | zoom_factor[i, 3] = affine_matrix[1, 2] 91 | 92 | return image_zoom, image_real_zoom, image_imgn_zoom, flow_zoom, mask_real, mask_imgn, zoom_factor 93 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_3.txt: -------------------------------------------------------------------------------- 1 | 0907T162817/scene_01 2 | 0907T163317/scene_01 3 | 0907T164137/scene_04 4 | 0907T164137/scene_06 5 | 0907T164137/scene_07 6 | 0907T171024/scene_01 7 | 0907T171024/scene_03 8 | 0907T172157/scene_02 9 | 0907T172157/scene_04 10 | 0907T172157/scene_06 11 | 0907T172157/scene_18 12 | 0907T172157/scene_19 13 | 0907T180254/scene_02 14 | 0907T180254/scene_06 15 | 0907T181442/scene_01 16 | 0907T181442/scene_02 17 | 0907T191150/scene_02 18 | 0907T192325/scene_01 19 | 0907T194155/scene_02 20 | 0907T215136/scene_01 21 | 0907T215805/scene_01 22 | 0907T221844/scene_01 23 | 0907T222106/scene_03 24 | 0907T222555/scene_02 25 | 0907T222933/scene_02 26 | 0907T222933/scene_05 27 | 0907T223820/scene_02 28 | 0907T225112/scene_03 29 | 0907T225112/scene_05 30 | 0907T231131/scene_05 31 | 0907T231131/scene_09 32 | 0907T231131/scene_12 33 | 0907T231131/scene_15 34 | 0907T231131/scene_20 35 | 0907T231131/scene_24 36 | 0907T231131/scene_25 37 | 0907T231131/scene_28 38 | 0908T000608/scene_01 39 | 0908T000608/scene_03 40 | 0908T000608/scene_05 41 | 0908T000608/scene_09 42 | 0908T003015/scene_02 43 | 0908T003015/scene_10 44 | 0910T015831/scene_01 45 | 0910T015831/scene_05 46 | 0910T020707/scene_06 47 | 0910T020707/scene_08 48 | 0910T020707/scene_09 49 | 0910T020707/scene_10 50 | 0910T022211/scene_02 51 | 0910T022211/scene_07 52 | 0910T022211/scene_08 53 | 0910T022211/scene_09 54 | 0910T025409/scene_01 55 | 0910T025409/scene_02 56 | 0910T025409/scene_04 57 | 0910T025409/scene_12 58 | 0910T033945/scene_03 59 | 0910T033945/scene_07 60 | 0910T033945/scene_09 61 | 0910T033945/scene_12 62 | 0910T040255/scene_06 63 | 0910T040255/scene_07 64 | 0910T040255/scene_09 65 | 0910T042817/scene_01 66 | 0910T042817/scene_02 67 | 0910T042817/scene_06 68 | 0910T042817/scene_09 69 | 0910T044956/scene_02 70 | 0910T044956/scene_11 71 | 0910T044956/scene_13 72 | 0910T044956/scene_23 73 | 0910T044956/scene_27 74 | 0910T053353/scene_02 75 | 0910T053353/scene_08 76 | 0910T054747/scene_01 77 | 0910T054747/scene_05 78 | 0910T060213/scene_01 79 | 0910T060213/scene_05 80 | 0910T060213/scene_12 81 | 0912T112032/scene_04 82 | 0912T112032/scene_05 83 | 0912T112032/scene_07 84 | 0912T112032/scene_08 85 | 0912T112032/scene_16 86 | 0912T114842/scene_01 87 | 0912T114842/scene_03 88 | 0912T114842/scene_09 89 | 0912T114842/scene_17 90 | 0912T114842/scene_19 91 | 0912T114842/scene_21 92 | 0912T114842/scene_24 93 | 0912T122347/scene_03 94 | 0912T122347/scene_05 95 | 0912T122347/scene_09 96 | 0912T122347/scene_10 97 | 0912T124551/scene_01 98 | 0912T124551/scene_03 99 | 0912T124551/scene_06 100 | 0912T124551/scene_09 101 | 0912T124551/scene_11 102 | 0912T192512/scene_01 103 | 0912T192512/scene_04 104 | 0912T193313/scene_01 105 | 0912T193313/scene_08 106 | 0912T193313/scene_23 107 | 0912T193313/scene_24 108 | 0912T193313/scene_28 109 | 0912T193313/scene_30 110 | 0912T193313/scene_31 111 | 0912T193313/scene_33 112 | 0912T202829/scene_01 113 | 0912T202829/scene_05 114 | 0912T202829/scene_06 115 | 0912T210233/scene_01 116 | 0912T211535/scene_01 117 | 0912T212212/scene_03 118 | 0912T212212/scene_07 119 | 0912T212212/scene_08 120 | 0912T212212/scene_13 121 | 0912T212212/scene_15 122 | 0912T212212/scene_17 123 | 0912T212212/scene_18 124 | 0912T230419/scene_03 125 | 0912T230419/scene_07 126 | 0912T230419/scene_09 127 | 0912T230419/scene_11 128 | 0912T230419/scene_14 129 | 0912T230419/scene_19 130 | 0912T230419/scene_20 131 | 0912T230419/scene_22 132 | 0912T230419/scene_27 133 | 0912T230419/scene_33 134 | 0913T000123/scene_12 135 | 0913T000123/scene_13 136 | 0913T000123/scene_14 137 | 0913T000123/scene_16 138 | 0913T000123/scene_17 139 | 0913T002954/scene_03 140 | 0913T002954/scene_06 141 | 0913T004159/scene_04 142 | 0913T004159/scene_07 143 | 0913T004159/scene_08 144 | 0913T010525/scene_03 145 | 0913T010525/scene_07 146 | 0913T010525/scene_08 147 | 0913T010525/scene_15 148 | 0913T010525/scene_18 149 | 0913T013716/scene_05 150 | 0913T013716/scene_06 151 | 0913T013716/scene_07 152 | 0913T013716/scene_15 153 | 0913T020922/scene_01 154 | 0913T020922/scene_14 155 | 0913T020922/scene_16 156 | 0913T020922/scene_17 157 | 0913T020922/scene_19 158 | 0913T020922/scene_25 159 | 0913T024454/scene_04 160 | 0913T024454/scene_05 161 | 0913T025555/scene_01 162 | 0913T025555/scene_03 163 | 0913T025555/scene_07 164 | 0913T025555/scene_09 165 | 0913T025555/scene_12 166 | 0913T025555/scene_16 167 | 0913T025555/scene_18 168 | 0913T025555/scene_19 169 | 0913T025555/scene_21 170 | -------------------------------------------------------------------------------- /ycb_render/visualize_sim.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import torch 6 | import cv2 7 | import numpy as np 8 | import glob 9 | from transforms3d.quaternions import mat2quat, quat2mat 10 | from ycb_renderer_sim import YCBRenderer 11 | 12 | if __name__ == '__main__': 13 | 14 | model_path = '.' 15 | width = 640 16 | height = 480 17 | files = glob.glob('data/*.npy') 18 | 19 | renderer = YCBRenderer(width=width, height=height, render_marker=True) 20 | models = ['003_cracker_box', '004_sugar_box', '005_tomato_soup_can', '006_mustard_bottle', '010_potted_meat_can'] 21 | colors = [[0, 1, 0], [0, 0, 1], [1, 1, 0], [1, 0, 1], [0.5, 0.5, 0]] 22 | 23 | # models = ['003_cracker_box'] 24 | # colors = [[0, 1, 0]] 25 | 26 | obj_paths = [ 27 | '{}/models_sim/{}/meshes/{}.obj'.format(model_path, item, item) for item in models] 28 | texture_paths = [ 29 | '{}/models_sim/{}/meshes/texture_map.png'.format(model_path, item) for item in models] 30 | renderer.load_objects(obj_paths, texture_paths, colors) 31 | 32 | renderer.set_fov(60) 33 | renderer.set_light_pos([0, 0, 0]) 34 | renderer.set_camera([0, 0, 0], [1, 0, 0], [0, 0, 1]) 35 | 36 | image_tensor = torch.cuda.FloatTensor(height, width, 4).detach() 37 | seg_tensor = torch.cuda.FloatTensor(height, width, 4).detach() 38 | RT_object = np.zeros((3, 4), dtype=np.float32) 39 | RT_camera = np.zeros((3, 4), dtype=np.float32) 40 | 41 | for file_path in files[1:]: 42 | 43 | print file_path 44 | 45 | data = np.load(file_path).item() 46 | cls_indexes = [] 47 | poses = [] 48 | 49 | print('object_labels', data['object_labels']) 50 | print('fov', data['horizontal_fov']) 51 | 52 | for i, object_name in enumerate(data['object_labels']): 53 | 54 | cls_index = -1 55 | for j in range(len(models)): 56 | if object_name in models[j]: 57 | cls_index = j 58 | break 59 | 60 | if cls_index >= 0: 61 | cls_indexes.append(cls_index) 62 | 63 | RT = np.zeros((3, 4), dtype=np.float32) 64 | 65 | w = data['relative_poses'][i][0] 66 | x = data['relative_poses'][i][1] 67 | y = data['relative_poses'][i][2] 68 | z = data['relative_poses'][i][3] 69 | RT[:3, :3] = quat2mat([w, x, y, z]) 70 | 71 | x = data['relative_poses'][i][4] 72 | y = data['relative_poses'][i][5] 73 | z = data['relative_poses'][i][6] 74 | RT[:, 3] = [x, y, z] 75 | print RT 76 | 77 | qt = np.zeros((7, ), dtype=np.float32) 78 | qt[3:] = mat2quat(RT[:3, :3]) 79 | qt[:3] = RT[:, 3] 80 | print qt 81 | 82 | poses.append(qt) 83 | 84 | print('object_name: {}, relative_qt = {}, absolute_qt = {}'.format(data['object_labels'][i], data['relative_poses'][i], data['absolute_poses'][i])) 85 | 86 | renderer.set_poses(poses) 87 | 88 | renderer.render(cls_indexes, image_tensor, seg_tensor) 89 | image_tensor = image_tensor.flip(0) 90 | seg_tensor = seg_tensor.flip(0) 91 | 92 | # RGB to BGR order 93 | im = image_tensor.cpu().numpy() 94 | im = np.clip(im, 0, 1) 95 | im = im[:, :, (2, 1, 0)] * 255 96 | im = im.astype(np.uint8) 97 | 98 | im_label = seg_tensor.cpu().numpy() 99 | im_label = im_label[:, :, (2, 1, 0)] * 255 100 | im_label = np.round(im_label).astype(np.uint8) 101 | im_label = np.clip(im_label, 0, 255) 102 | 103 | import matplotlib.pyplot as plt 104 | fig = plt.figure() 105 | ax = fig.add_subplot(2, 2, 1) 106 | plt.imshow(data['rgb'][:, :, (2, 1, 0)]) 107 | 108 | ax = fig.add_subplot(2, 2, 2) 109 | mask = np.squeeze(data['segmentation'], -1).astype(np.uint8) 110 | mask *= 40 111 | plt.imshow(mask) 112 | 113 | ax = fig.add_subplot(2, 2, 3) 114 | plt.imshow(im[:, :, (2, 1, 0)]) 115 | 116 | ax = fig.add_subplot(2, 2, 4) 117 | plt.imshow(im_label[:, :, (2, 1, 0)]) 118 | plt.show() 119 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | # NVIDIA Source Code License for DeepIM-PyTorch: A PyTorch Implementation of the DeepIM Framework for 6D Object Pose Estimation 2 | 3 | ## 1. Definitions 4 | 5 | “Licensor” means any person or entity that distributes its Work. 6 | 7 | “Software” means the original work of authorship made available under this License. 8 | “Work” means the Software and any additions to or derivative works of the Software that are made available under this License. 9 | 10 | “Nvidia Processors” means any central processing unit (CPU), graphics processing unit (GPU), field-programmable gate array (FPGA), application-specific integrated circuit (ASIC) or any combination thereof designed, made, sold, or provided by Nvidia or its affiliates. 11 | 12 | The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” have the meaning as provided under U.S. copyright law; provided, however, that for the purposes of this License, derivative works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work. 13 | 14 | Works, including the Software, are “made available” under this License by including in or with the Work either (a) a copyright notice referencing the applicability of this License to the Work, or (b) a copy of this License. 15 | 16 | ## 2. License Grants 17 | 18 | ### 2.1 Copyright Grant. 19 | Subject to the terms and conditions of this License, each Licensor grants to you a perpetual, worldwide, non-exclusive, royalty-free, copyright license to reproduce, prepare derivative works of, publicly display, publicly perform, sublicense and distribute its Work and any resulting derivative works in any form. 20 | 21 | ## 3. Limitations 22 | 23 | ### 3.1 Redistribution. 24 | You may reproduce or distribute the Work only if (a) you do so under this License, (b) you include a complete copy of this License with your distribution, and (c) you retain without modification any copyright, patent, trademark, or attribution notices that are present in the Work. 25 | 26 | ### 3.2 Derivative Works. 27 | You may specify that additional or different terms apply to the use, reproduction, and distribution of your derivative works of the Work (“Your Terms”) only if (a) Your Terms provide that the use limitation in Section 3.3 applies to your derivative works, and (b) you identify the specific derivative works that are subject to Your Terms. Notwithstanding Your Terms, this License (including the redistribution requirements in Section 3.1) will continue to apply to the Work itself. 28 | 29 | ### 3.3 Use Limitation. 30 | The Work and any derivative works thereof only may be used or intended for use non-commercially. The Work or derivative works thereof may be used or intended for use by Nvidia or its affiliates commercially or non-commercially. As used herein, “non-commercially” means for research or evaluation purposes only. 31 | 32 | ### 3.4 Patent Claims. 33 | If you bring or threaten to bring a patent claim against any Licensor (including any claim, cross-claim or counterclaim in a lawsuit) to enforce any patents that you allege are infringed by any Work, then your rights under this License from such Licensor (including the grants in Sections 2.1 and 2.2) will terminate immediately. 34 | 35 | ### 3.5 Trademarks. 36 | This License does not grant any rights to use any Licensor’s or its affiliates’ names, logos, or trademarks, except as necessary to reproduce the notices described in this License. 37 | 38 | ### 3.6 Termination. 39 | If you violate any term of this License, then your rights under this License (including the grants in Sections 2.1 and 2.2) will terminate immediately. 40 | 41 | ## 4. Disclaimer of Warranty. 42 | 43 | THE WORK IS PROVIDED “AS IS” WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WARRANTIES OR CONDITIONS OF M ERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE OR NON-INFRINGEMENT. YOU BEAR THE RISK OF UNDERTAKING ANY ACTIVITIES UNDER THIS LICENSE. 44 | 45 | ## 5. Limitation of Liability. 46 | 47 | EXCEPT AS PROHIBITED BY APPLICABLE LAW, IN NO EVENT AND UNDER NO LEGAL THEORY, WHETHER IN TORT (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE SHALL ANY LICENSOR BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES ARISING OUT OF OR RELATED TO THIS LICENSE, THE USE OR INABILITY TO USE THE WORK (INCLUDING BUT NOT LIMITED TO LOSS OF GOODWILL, BUSINESS INTERRUPTION, LOST PROFITS OR DATA, COMPUTER FAILURE OR MALFUNCTION, OR ANY OTHER COMM ERCIAL DAMAGES OR LOSSES), EVEN IF THE LICENSOR HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. 48 | 49 | -------------------------------------------------------------------------------- /lib/utils/setup.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | import os 6 | from os.path import join as pjoin 7 | import numpy as np 8 | from distutils.core import setup 9 | from distutils.extension import Extension 10 | from Cython.Distutils import build_ext 11 | 12 | def find_in_path(name, path): 13 | "Find a file in a search path" 14 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 15 | for dir in path.split(os.pathsep): 16 | binpath = pjoin(dir, name) 17 | if os.path.exists(binpath): 18 | return os.path.abspath(binpath) 19 | return None 20 | 21 | def locate_cuda(): 22 | """Locate the CUDA environment on the system 23 | 24 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 25 | and values giving the absolute path to each directory. 26 | 27 | Starts by looking for the CUDAHOME env variable. If not found, everything 28 | is based on finding 'nvcc' in the PATH. 29 | """ 30 | 31 | # first check if the CUDAHOME env variable is in use 32 | if 'CUDAHOME' in os.environ: 33 | home = os.environ['CUDAHOME'] 34 | nvcc = pjoin(home, 'bin', 'nvcc') 35 | else: 36 | # otherwise, search the PATH for NVCC 37 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 38 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 39 | if nvcc is None: 40 | raise EnvironmentError('The nvcc binary could not be ' 41 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 42 | home = os.path.dirname(os.path.dirname(nvcc)) 43 | 44 | cudaconfig = {'home':home, 'nvcc':nvcc, 45 | 'include': pjoin(home, 'include'), 46 | 'lib64': pjoin(home, 'lib64')} 47 | for k, v in cudaconfig.items(): 48 | if not os.path.exists(v): 49 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 50 | 51 | return cudaconfig 52 | CUDA = locate_cuda() 53 | 54 | # Obtain the numpy include directory. This logic works across numpy versions. 55 | try: 56 | numpy_include = np.get_include() 57 | except AttributeError: 58 | numpy_include = np.get_numpy_include() 59 | 60 | def customize_compiler_for_nvcc(self): 61 | """inject deep into distutils to customize how the dispatch 62 | to gcc/nvcc works. 63 | 64 | If you subclass UnixCCompiler, it's not trivial to get your subclass 65 | injected in, and still have the right customizations (i.e. 66 | distutils.sysconfig.customize_compiler) run on it. So instead of going 67 | the OO route, I have this. Note, it's kindof like a wierd functional 68 | subclassing going on.""" 69 | 70 | # tell the compiler it can processes .cu 71 | self.src_extensions.append('.cu') 72 | 73 | # save references to the default compiler_so and _comple methods 74 | default_compiler_so = self.compiler_so 75 | super = self._compile 76 | 77 | # now redefine the _compile method. This gets executed for each 78 | # object but distutils doesn't have the ability to change compilers 79 | # based on source extension: we add it. 80 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 81 | if os.path.splitext(src)[1] == '.cu': 82 | # use the cuda for .cu files 83 | self.set_executable('compiler_so', CUDA['nvcc']) 84 | # use only a subset of the extra_postargs, which are 1-1 translated 85 | # from the extra_compile_args in the Extension class 86 | postargs = extra_postargs['nvcc'] 87 | else: 88 | postargs = extra_postargs['gcc'] 89 | 90 | super(obj, src, ext, cc_args, postargs, pp_opts) 91 | # reset the default compiler_so, which we might have changed for cuda 92 | self.compiler_so = default_compiler_so 93 | 94 | # inject our redefined _compile method into the class 95 | self._compile = _compile 96 | 97 | 98 | # run the customize_compiler 99 | class custom_build_ext(build_ext): 100 | def build_extensions(self): 101 | customize_compiler_for_nvcc(self.compiler) 102 | build_ext.build_extensions(self) 103 | 104 | ext_modules = [ 105 | Extension( 106 | "cython_bbox", 107 | ["bbox.pyx"], 108 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 109 | include_dirs = [numpy_include] 110 | ) 111 | ] 112 | 113 | setup( 114 | name='fcn', 115 | ext_modules=ext_modules, 116 | # inject our custom trigger 117 | cmdclass={'build_ext': custom_build_ext}, 118 | ) 119 | -------------------------------------------------------------------------------- /ycb_render/glutils/glcontext.py: -------------------------------------------------------------------------------- 1 | """Headless GPU-accelerated OpenGL context creation on Google Colaboratory. 2 | 3 | Typical usage: 4 | 5 | # Optional PyOpenGL configuratiopn can be done here. 6 | # import OpenGL 7 | # OpenGL.ERROR_CHECKING = True 8 | 9 | # 'glcontext' must be imported before any OpenGL.* API. 10 | from lucid.misc.gl.glcontext import create_opengl_context 11 | 12 | # Now it's safe to import OpenGL and EGL functions 13 | import OpenGL.GL as gl 14 | 15 | # create_opengl_context() creates a GL context that is attached to an 16 | # offscreen surface of the specified size. Note that rendering to buffers 17 | # of other sizes and formats is still possible with OpenGL Framebuffers. 18 | # 19 | # Users are expected to directly use the EGL API in case more advanced 20 | # context management is required. 21 | width, height = 640, 480 22 | create_opengl_context((width, height)) 23 | 24 | # OpenGL context is available here. 25 | 26 | """ 27 | 28 | from __future__ import print_function 29 | 30 | # pylint: disable=unused-import,g-import-not-at-top,g-statement-before-imports 31 | 32 | import os 33 | 34 | os.environ['PYOPENGL_PLATFORM'] = 'egl' 35 | 36 | try: 37 | import OpenGL 38 | except: 39 | print('This module depends on PyOpenGL.') 40 | print('Please run "\033[1m!pip install -q pyopengl\033[0m" ' 41 | 'prior importing this module.') 42 | raise 43 | 44 | import ctypes 45 | from ctypes import pointer 46 | from ctypes import util 47 | from ctypes.util import find_library 48 | 49 | # OpenGL loading workaround. 50 | # 51 | # * PyOpenGL tries to load libGL, but we need libOpenGL, see [1,2]. 52 | # This could have been solved by a symlink libGL->libOpenGL, but: 53 | # 54 | # * Python 2.7 can't find libGL and linEGL due to a bug (see [3]) 55 | # in ctypes.util, that was only wixed in Python 3.6. 56 | # 57 | # So, the only solution I've found is to monkeypatch ctypes.util 58 | # [1] https://devblogs.nvidia.com/egl-eye-opengl-visualization-without-x-server/ 59 | # [2] https://devblogs.nvidia.com/linking-opengl-server-side-rendering/ 60 | # [3] https://bugs.python.org/issue9998 61 | _find_library_old = ctypes.util.find_library 62 | try: 63 | 64 | def _find_library_new(name): 65 | return { 66 | 'GL': 'libOpenGL.so', 67 | 'EGL': 'libEGL.so', 68 | }.get(name, _find_library_old(name)) 69 | ctypes.util.find_library = _find_library_new 70 | import OpenGL.GL as gl 71 | import OpenGL.EGL as egl 72 | except: 73 | print('Unable to load OpenGL libraries. ' 74 | 'Make sure you use GPU-enabled backend.') 75 | print('Press "Runtime->Change runtime type" and set ' 76 | '"Hardware accelerator" to GPU.') 77 | raise 78 | finally: 79 | ctypes.util.find_library = _find_library_old 80 | 81 | 82 | class Context: 83 | def __init__(self): 84 | pass 85 | 86 | def create_opengl_context(self, surface_size=(640, 480)): 87 | """Create offscreen OpenGL context and make it current. 88 | 89 | Users are expected to directly use EGL API in case more advanced 90 | context management is required. 91 | 92 | Args: 93 | surface_size: (width, height), size of the offscreen rendering surface. 94 | """ 95 | egl_display = egl.eglGetDisplay(egl.EGL_DEFAULT_DISPLAY) 96 | 97 | major, minor = egl.EGLint(), egl.EGLint() 98 | egl.eglInitialize(egl_display, pointer(major), pointer(minor)) 99 | 100 | config_attribs = [ 101 | egl.EGL_SURFACE_TYPE, egl.EGL_PBUFFER_BIT, egl.EGL_BLUE_SIZE, 8, 102 | egl.EGL_GREEN_SIZE, 8, egl.EGL_RED_SIZE, 8, egl.EGL_DEPTH_SIZE, 24, 103 | egl.EGL_RENDERABLE_TYPE, egl.EGL_OPENGL_BIT, egl.EGL_NONE 104 | ] 105 | # if need MSAA https://www.khronos.org/opengl/wiki/Multisampling 106 | config_attribs = (egl.EGLint * len(config_attribs))(*config_attribs) 107 | 108 | num_configs = egl.EGLint() 109 | egl_cfg = egl.EGLConfig() 110 | egl.eglChooseConfig(egl_display, config_attribs, pointer(egl_cfg), 1, 111 | pointer(num_configs)) 112 | 113 | width, height = surface_size 114 | pbuffer_attribs = [ 115 | egl.EGL_WIDTH, 116 | width, 117 | egl.EGL_HEIGHT, 118 | height, 119 | egl.EGL_NONE, 120 | ] 121 | pbuffer_attribs = (egl.EGLint * len(pbuffer_attribs))(*pbuffer_attribs) 122 | egl_surf = egl.eglCreatePbufferSurface(egl_display, egl_cfg, pbuffer_attribs) 123 | 124 | egl.eglBindAPI(egl.EGL_OPENGL_API) 125 | 126 | egl_context = egl.eglCreateContext(egl_display, egl_cfg, egl.EGL_NO_CONTEXT, 127 | None) 128 | egl.eglMakeCurrent(egl_display, egl_surf, egl_surf, egl_context) 129 | self.display = egl_display 130 | 131 | def destroy(self): 132 | egl.eglTerminate(self.display) 133 | -------------------------------------------------------------------------------- /lib/utils/pose_error.py: -------------------------------------------------------------------------------- 1 | # Author: Tomas Hodan (hodantom@cmp.felk.cvut.cz) 2 | # Center for Machine Perception, Czech Technical University in Prague 3 | 4 | # Implementation of the pose error functions described in: 5 | # Hodan et al., "On Evaluation of 6D Object Pose Estimation", ECCVW 2016 6 | 7 | import math 8 | import numpy as np 9 | from scipy import spatial 10 | from transforms3d.quaternions import quat2mat, mat2quat 11 | 12 | def VOCap(rec, prec): 13 | index = np.where(np.isfinite(rec))[0] 14 | rec = rec[index] 15 | prec = prec[index] 16 | if len(rec) == 0 or len(prec) == 0: 17 | ap = 0 18 | else: 19 | mrec = np.insert(rec, 0, 0) 20 | mrec = np.append(mrec, 0.1) 21 | mpre = np.insert(prec, 0, 0) 22 | mpre = np.append(mpre, prec[-1]) 23 | for i in range(1, len(mpre)): 24 | mpre[i] = max(mpre[i], mpre[i-1]) 25 | i = np.where(mrec[1:] != mrec[:-1])[0] + 1 26 | ap = np.sum(np.multiply(mrec[i] - mrec[i-1], mpre[i])) * 10 27 | return ap 28 | 29 | def transform_pts_Rt(pts, R, t): 30 | """ 31 | Applies a rigid transformation to 3D points. 32 | 33 | :param pts: nx3 ndarray with 3D points. 34 | :param R: 3x3 rotation matrix. 35 | :param t: 3x1 translation vector. 36 | :return: nx3 ndarray with transformed 3D points. 37 | """ 38 | assert(pts.shape[1] == 3) 39 | pts_t = R.dot(pts.T) + t.reshape((3, 1)) 40 | return pts_t.T 41 | 42 | def reproj(K, R_est, t_est, R_gt, t_gt, pts): 43 | """ 44 | reprojection error. 45 | :param K intrinsic matrix 46 | :param R_est, t_est: Estimated pose (3x3 rot. matrix and 3x1 trans. vector). 47 | :param R_gt, t_gt: GT pose (3x3 rot. matrix and 3x1 trans. vector). 48 | :param model: Object model given by a dictionary where item 'pts' 49 | is nx3 ndarray with 3D model points. 50 | :return: Error of pose_est w.r.t. pose_gt. 51 | """ 52 | pts_est = transform_pts_Rt(pts, R_est, t_est) 53 | pts_gt = transform_pts_Rt(pts, R_gt, t_gt) 54 | 55 | pixels_est = K.dot(pts_est.T) 56 | pixels_est = pixels_est.T 57 | pixels_gt = K.dot(pts_gt.T) 58 | pixels_gt = pixels_gt.T 59 | 60 | n = pts.shape[0] 61 | est = np.zeros((n, 2), dtype=np.float32); 62 | est[:, 0] = np.divide(pixels_est[:, 0], pixels_est[:, 2]) 63 | est[:, 1] = np.divide(pixels_est[:, 1], pixels_est[:, 2]) 64 | 65 | gt = np.zeros((n, 2), dtype=np.float32); 66 | gt[:, 0] = np.divide(pixels_gt[:, 0], pixels_gt[:, 2]) 67 | gt[:, 1] = np.divide(pixels_gt[:, 1], pixels_gt[:, 2]) 68 | 69 | e = np.linalg.norm(est - gt, axis=1).mean() 70 | return e 71 | 72 | def add(R_est, t_est, R_gt, t_gt, pts): 73 | """ 74 | Average Distance of Model Points for objects with no indistinguishable views 75 | - by Hinterstoisser et al. (ACCV 2012). 76 | 77 | :param R_est, t_est: Estimated pose (3x3 rot. matrix and 3x1 trans. vector). 78 | :param R_gt, t_gt: GT pose (3x3 rot. matrix and 3x1 trans. vector). 79 | :param model: Object model given by a dictionary where item 'pts' 80 | is nx3 ndarray with 3D model points. 81 | :return: Error of pose_est w.r.t. pose_gt. 82 | """ 83 | pts_est = transform_pts_Rt(pts, R_est, t_est) 84 | pts_gt = transform_pts_Rt(pts, R_gt, t_gt) 85 | e = np.linalg.norm(pts_est - pts_gt, axis=1).mean() 86 | return e 87 | 88 | def adi(R_est, t_est, R_gt, t_gt, pts): 89 | """ 90 | Average Distance of Model Points for objects with indistinguishable views 91 | - by Hinterstoisser et al. (ACCV 2012). 92 | 93 | :param R_est, t_est: Estimated pose (3x3 rot. matrix and 3x1 trans. vector). 94 | :param R_gt, t_gt: GT pose (3x3 rot. matrix and 3x1 trans. vector). 95 | :param model: Object model given by a dictionary where item 'pts' 96 | is nx3 ndarray with 3D model points. 97 | :return: Error of pose_est w.r.t. pose_gt. 98 | """ 99 | pts_est = transform_pts_Rt(pts, R_est, t_est) 100 | pts_gt = transform_pts_Rt(pts, R_gt, t_gt) 101 | 102 | # Calculate distances to the nearest neighbors from pts_gt to pts_est 103 | nn_index = spatial.cKDTree(pts_est) 104 | nn_dists, _ = nn_index.query(pts_gt, k=1) 105 | 106 | e = nn_dists.mean() 107 | return e 108 | 109 | def re(R_est, R_gt): 110 | """ 111 | Rotational Error. 112 | 113 | :param R_est: Rotational element of the estimated pose (3x1 vector). 114 | :param R_gt: Rotational element of the ground truth pose (3x1 vector). 115 | :return: Error of t_est w.r.t. t_gt. 116 | """ 117 | assert(R_est.shape == R_gt.shape == (3, 3)) 118 | error_cos = 0.5 * (np.trace(R_est.dot(np.linalg.inv(R_gt))) - 1.0) 119 | error_cos = min(1.0, max(-1.0, error_cos)) # Avoid invalid values due to numerical errors 120 | error = math.acos(error_cos) 121 | error = 180.0 * error / np.pi # [rad] -> [deg] 122 | return error 123 | 124 | def te(t_est, t_gt): 125 | """ 126 | Translational Error. 127 | 128 | :param t_est: Translation element of the estimated pose (3x1 vector). 129 | :param t_gt: Translation element of the ground truth pose (3x1 vector). 130 | :return: Error of t_est w.r.t. t_gt. 131 | """ 132 | assert(t_est.size == t_gt.size == 3) 133 | error = np.linalg.norm(t_gt - t_est) 134 | return error 135 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_4.txt: -------------------------------------------------------------------------------- 1 | 0907T162817/scene_01 2 | 0907T163317/scene_01 3 | 0907T164137/scene_02 4 | 0907T164137/scene_04 5 | 0907T164137/scene_06 6 | 0907T164137/scene_07 7 | 0907T171024/scene_01 8 | 0907T171024/scene_03 9 | 0907T172157/scene_02 10 | 0907T172157/scene_04 11 | 0907T172157/scene_05 12 | 0907T172157/scene_06 13 | 0907T172157/scene_18 14 | 0907T172157/scene_19 15 | 0907T180254/scene_02 16 | 0907T180254/scene_03 17 | 0907T180254/scene_06 18 | 0907T181442/scene_01 19 | 0907T181442/scene_02 20 | 0907T191150/scene_02 21 | 0907T192325/scene_01 22 | 0907T194155/scene_02 23 | 0907T215136/scene_01 24 | 0907T215805/scene_01 25 | 0907T221844/scene_01 26 | 0907T222106/scene_03 27 | 0907T222555/scene_02 28 | 0907T222933/scene_02 29 | 0907T222933/scene_05 30 | 0907T223820/scene_02 31 | 0907T225112/scene_03 32 | 0907T225112/scene_05 33 | 0907T225112/scene_07 34 | 0907T231131/scene_01 35 | 0907T231131/scene_05 36 | 0907T231131/scene_09 37 | 0907T231131/scene_12 38 | 0907T231131/scene_15 39 | 0907T231131/scene_20 40 | 0907T231131/scene_21 41 | 0907T231131/scene_24 42 | 0907T231131/scene_25 43 | 0907T231131/scene_28 44 | 0908T000608/scene_01 45 | 0908T000608/scene_03 46 | 0908T000608/scene_04 47 | 0908T000608/scene_05 48 | 0908T000608/scene_09 49 | 0908T003015/scene_02 50 | 0908T003015/scene_03 51 | 0908T003015/scene_10 52 | 0910T015831/scene_01 53 | 0910T015831/scene_05 54 | 0910T020707/scene_06 55 | 0910T020707/scene_08 56 | 0910T020707/scene_09 57 | 0910T020707/scene_10 58 | 0910T020707/scene_11 59 | 0910T022211/scene_01 60 | 0910T022211/scene_02 61 | 0910T022211/scene_07 62 | 0910T022211/scene_08 63 | 0910T022211/scene_09 64 | 0910T025409/scene_01 65 | 0910T025409/scene_02 66 | 0910T025409/scene_04 67 | 0910T025409/scene_06 68 | 0910T025409/scene_12 69 | 0910T033945/scene_03 70 | 0910T033945/scene_07 71 | 0910T033945/scene_09 72 | 0910T033945/scene_12 73 | 0910T033945/scene_15 74 | 0910T040255/scene_05 75 | 0910T040255/scene_06 76 | 0910T040255/scene_07 77 | 0910T040255/scene_09 78 | 0910T042817/scene_01 79 | 0910T042817/scene_02 80 | 0910T042817/scene_05 81 | 0910T042817/scene_06 82 | 0910T042817/scene_09 83 | 0910T044956/scene_02 84 | 0910T044956/scene_08 85 | 0910T044956/scene_11 86 | 0910T044956/scene_13 87 | 0910T044956/scene_18 88 | 0910T044956/scene_23 89 | 0910T044956/scene_27 90 | 0910T053353/scene_01 91 | 0910T053353/scene_02 92 | 0910T053353/scene_08 93 | 0910T054747/scene_01 94 | 0910T054747/scene_05 95 | 0910T060213/scene_01 96 | 0910T060213/scene_05 97 | 0910T060213/scene_11 98 | 0910T060213/scene_12 99 | 0912T112032/scene_04 100 | 0912T112032/scene_05 101 | 0912T112032/scene_07 102 | 0912T112032/scene_08 103 | 0912T112032/scene_11 104 | 0912T112032/scene_16 105 | 0912T114842/scene_01 106 | 0912T114842/scene_03 107 | 0912T114842/scene_09 108 | 0912T114842/scene_15 109 | 0912T114842/scene_17 110 | 0912T114842/scene_19 111 | 0912T114842/scene_21 112 | 0912T114842/scene_24 113 | 0912T114842/scene_25 114 | 0912T122347/scene_03 115 | 0912T122347/scene_05 116 | 0912T122347/scene_06 117 | 0912T122347/scene_09 118 | 0912T122347/scene_10 119 | 0912T124551/scene_01 120 | 0912T124551/scene_03 121 | 0912T124551/scene_05 122 | 0912T124551/scene_06 123 | 0912T124551/scene_09 124 | 0912T124551/scene_11 125 | 0912T192512/scene_01 126 | 0912T192512/scene_04 127 | 0912T193313/scene_01 128 | 0912T193313/scene_08 129 | 0912T193313/scene_09 130 | 0912T193313/scene_12 131 | 0912T193313/scene_23 132 | 0912T193313/scene_24 133 | 0912T193313/scene_26 134 | 0912T193313/scene_28 135 | 0912T193313/scene_30 136 | 0912T193313/scene_31 137 | 0912T193313/scene_33 138 | 0912T202829/scene_01 139 | 0912T202829/scene_02 140 | 0912T202829/scene_05 141 | 0912T202829/scene_06 142 | 0912T210233/scene_01 143 | 0912T211535/scene_01 144 | 0912T212212/scene_03 145 | 0912T212212/scene_07 146 | 0912T212212/scene_08 147 | 0912T212212/scene_13 148 | 0912T212212/scene_14 149 | 0912T212212/scene_15 150 | 0912T212212/scene_17 151 | 0912T212212/scene_18 152 | 0912T212212/scene_19 153 | 0912T230419/scene_03 154 | 0912T230419/scene_07 155 | 0912T230419/scene_08 156 | 0912T230419/scene_09 157 | 0912T230419/scene_11 158 | 0912T230419/scene_14 159 | 0912T230419/scene_19 160 | 0912T230419/scene_20 161 | 0912T230419/scene_22 162 | 0912T230419/scene_24 163 | 0912T230419/scene_27 164 | 0912T230419/scene_32 165 | 0912T230419/scene_33 166 | 0913T000123/scene_12 167 | 0913T000123/scene_13 168 | 0913T000123/scene_14 169 | 0913T000123/scene_16 170 | 0913T000123/scene_17 171 | 0913T000123/scene_18 172 | 0913T002954/scene_03 173 | 0913T002954/scene_06 174 | 0913T004159/scene_04 175 | 0913T004159/scene_06 176 | 0913T004159/scene_07 177 | 0913T004159/scene_08 178 | 0913T010525/scene_03 179 | 0913T010525/scene_06 180 | 0913T010525/scene_07 181 | 0913T010525/scene_08 182 | 0913T010525/scene_15 183 | 0913T010525/scene_18 184 | 0913T010525/scene_19 185 | 0913T013716/scene_05 186 | 0913T013716/scene_06 187 | 0913T013716/scene_07 188 | 0913T013716/scene_12 189 | 0913T013716/scene_15 190 | 0913T020922/scene_01 191 | 0913T020922/scene_09 192 | 0913T020922/scene_14 193 | 0913T020922/scene_16 194 | 0913T020922/scene_17 195 | 0913T020922/scene_18 196 | 0913T020922/scene_19 197 | 0913T020922/scene_25 198 | 0913T024454/scene_04 199 | 0913T024454/scene_05 200 | 0913T024454/scene_06 201 | 0913T025555/scene_01 202 | 0913T025555/scene_03 203 | 0913T025555/scene_07 204 | 0913T025555/scene_09 205 | 0913T025555/scene_12 206 | 0913T025555/scene_16 207 | 0913T025555/scene_17 208 | 0913T025555/scene_18 209 | 0913T025555/scene_19 210 | 0913T025555/scene_21 211 | 0913T025555/scene_25 212 | 0913T025555/scene_26 213 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/test.txt: -------------------------------------------------------------------------------- 1 | 0907T162817/scene_02 2 | 0907T163317/scene_03 3 | 0907T164137/scene_01 4 | 0907T164137/scene_03 5 | 0907T164137/scene_08 6 | 0907T164137/scene_09 7 | 0907T171024/scene_04 8 | 0907T171024/scene_05 9 | 0907T172157/scene_01 10 | 0907T172157/scene_09 11 | 0907T172157/scene_11 12 | 0907T172157/scene_12 13 | 0907T172157/scene_13 14 | 0907T172157/scene_15 15 | 0907T172157/scene_16 16 | 0907T180254/scene_01 17 | 0907T180254/scene_07 18 | 0907T180254/scene_08 19 | 0907T181442/scene_03 20 | 0907T181442/scene_04 21 | 0907T191150/scene_01 22 | 0907T192325/scene_02 23 | 0907T194155/scene_01 24 | 0907T215136/scene_03 25 | 0907T215805/scene_02 26 | 0907T221844/scene_02 27 | 0907T222106/scene_01 28 | 0907T222555/scene_01 29 | 0907T222933/scene_04 30 | 0907T222933/scene_06 31 | 0907T223820/scene_01 32 | 0907T225112/scene_01 33 | 0907T225112/scene_02 34 | 0907T225112/scene_04 35 | 0907T231131/scene_02 36 | 0907T231131/scene_03 37 | 0907T231131/scene_04 38 | 0907T231131/scene_07 39 | 0907T231131/scene_08 40 | 0907T231131/scene_10 41 | 0907T231131/scene_14 42 | 0907T231131/scene_16 43 | 0907T231131/scene_18 44 | 0907T231131/scene_22 45 | 0907T231131/scene_23 46 | 0907T231131/scene_26 47 | 0908T000608/scene_02 48 | 0908T000608/scene_06 49 | 0908T000608/scene_08 50 | 0908T000608/scene_11 51 | 0908T000608/scene_12 52 | 0908T003015/scene_04 53 | 0908T003015/scene_05 54 | 0908T003015/scene_08 55 | 0910T015831/scene_04 56 | 0910T015831/scene_06 57 | 0910T020707/scene_01 58 | 0910T020707/scene_02 59 | 0910T020707/scene_04 60 | 0910T020707/scene_05 61 | 0910T020707/scene_07 62 | 0910T022211/scene_03 63 | 0910T022211/scene_05 64 | 0910T022211/scene_06 65 | 0910T022211/scene_11 66 | 0910T022211/scene_12 67 | 0910T025409/scene_07 68 | 0910T025409/scene_08 69 | 0910T025409/scene_09 70 | 0910T025409/scene_13 71 | 0910T025409/scene_14 72 | 0910T033945/scene_01 73 | 0910T033945/scene_02 74 | 0910T033945/scene_04 75 | 0910T033945/scene_06 76 | 0910T033945/scene_08 77 | 0910T033945/scene_11 78 | 0910T040255/scene_01 79 | 0910T040255/scene_02 80 | 0910T040255/scene_04 81 | 0910T040255/scene_08 82 | 0910T042817/scene_03 83 | 0910T042817/scene_04 84 | 0910T042817/scene_07 85 | 0910T042817/scene_08 86 | 0910T042817/scene_10 87 | 0910T044956/scene_01 88 | 0910T044956/scene_03 89 | 0910T044956/scene_06 90 | 0910T044956/scene_09 91 | 0910T044956/scene_17 92 | 0910T044956/scene_19 93 | 0910T044956/scene_20 94 | 0910T044956/scene_22 95 | 0910T053353/scene_03 96 | 0910T053353/scene_07 97 | 0910T053353/scene_09 98 | 0910T054747/scene_02 99 | 0910T054747/scene_08 100 | 0910T060213/scene_03 101 | 0910T060213/scene_06 102 | 0910T060213/scene_07 103 | 0910T060213/scene_09 104 | 0912T112032/scene_01 105 | 0912T112032/scene_03 106 | 0912T112032/scene_06 107 | 0912T112032/scene_10 108 | 0912T112032/scene_12 109 | 0912T112032/scene_13 110 | 0912T112032/scene_14 111 | 0912T114842/scene_02 112 | 0912T114842/scene_04 113 | 0912T114842/scene_05 114 | 0912T114842/scene_06 115 | 0912T114842/scene_08 116 | 0912T114842/scene_11 117 | 0912T114842/scene_12 118 | 0912T114842/scene_13 119 | 0912T114842/scene_22 120 | 0912T114842/scene_23 121 | 0912T122347/scene_01 122 | 0912T122347/scene_02 123 | 0912T122347/scene_04 124 | 0912T122347/scene_07 125 | 0912T122347/scene_08 126 | 0912T124551/scene_02 127 | 0912T124551/scene_04 128 | 0912T124551/scene_07 129 | 0912T124551/scene_10 130 | 0912T124551/scene_12 131 | 0912T124551/scene_14 132 | 0912T124551/scene_15 133 | 0912T192512/scene_02 134 | 0912T192512/scene_03 135 | 0912T193313/scene_02 136 | 0912T193313/scene_04 137 | 0912T193313/scene_05 138 | 0912T193313/scene_06 139 | 0912T193313/scene_07 140 | 0912T193313/scene_10 141 | 0912T193313/scene_11 142 | 0912T193313/scene_14 143 | 0912T193313/scene_18 144 | 0912T193313/scene_21 145 | 0912T193313/scene_22 146 | 0912T193313/scene_29 147 | 0912T193313/scene_32 148 | 0912T202829/scene_03 149 | 0912T202829/scene_04 150 | 0912T202829/scene_07 151 | 0912T202829/scene_08 152 | 0912T210233/scene_02 153 | 0912T211535/scene_02 154 | 0912T212212/scene_01 155 | 0912T212212/scene_04 156 | 0912T212212/scene_05 157 | 0912T212212/scene_06 158 | 0912T212212/scene_09 159 | 0912T212212/scene_10 160 | 0912T212212/scene_12 161 | 0912T212212/scene_16 162 | 0912T212212/scene_20 163 | 0912T212212/scene_21 164 | 0912T230419/scene_01 165 | 0912T230419/scene_04 166 | 0912T230419/scene_05 167 | 0912T230419/scene_06 168 | 0912T230419/scene_10 169 | 0912T230419/scene_12 170 | 0912T230419/scene_15 171 | 0912T230419/scene_16 172 | 0912T230419/scene_18 173 | 0912T230419/scene_21 174 | 0912T230419/scene_23 175 | 0912T230419/scene_26 176 | 0912T230419/scene_28 177 | 0912T230419/scene_29 178 | 0912T230419/scene_30 179 | 0912T230419/scene_31 180 | 0913T000123/scene_02 181 | 0913T000123/scene_06 182 | 0913T000123/scene_08 183 | 0913T000123/scene_09 184 | 0913T000123/scene_10 185 | 0913T000123/scene_11 186 | 0913T000123/scene_15 187 | 0913T002954/scene_01 188 | 0913T002954/scene_07 189 | 0913T004159/scene_02 190 | 0913T004159/scene_03 191 | 0913T004159/scene_09 192 | 0913T004159/scene_10 193 | 0913T010525/scene_02 194 | 0913T010525/scene_05 195 | 0913T010525/scene_10 196 | 0913T010525/scene_11 197 | 0913T010525/scene_12 198 | 0913T010525/scene_14 199 | 0913T010525/scene_17 200 | 0913T010525/scene_20 201 | 0913T013716/scene_03 202 | 0913T013716/scene_04 203 | 0913T013716/scene_08 204 | 0913T013716/scene_11 205 | 0913T013716/scene_14 206 | 0913T013716/scene_16 207 | 0913T020922/scene_03 208 | 0913T020922/scene_05 209 | 0913T020922/scene_06 210 | 0913T020922/scene_07 211 | 0913T020922/scene_08 212 | 0913T020922/scene_11 213 | 0913T020922/scene_12 214 | 0913T020922/scene_15 215 | 0913T020922/scene_23 216 | 0913T024454/scene_01 217 | 0913T024454/scene_02 218 | 0913T024454/scene_03 219 | 0913T025555/scene_02 220 | 0913T025555/scene_04 221 | 0913T025555/scene_05 222 | 0913T025555/scene_06 223 | 0913T025555/scene_08 224 | 0913T025555/scene_10 225 | 0913T025555/scene_11 226 | 0913T025555/scene_13 227 | 0913T025555/scene_15 228 | 0913T025555/scene_20 229 | 0913T025555/scene_22 230 | 0913T025555/scene_24 231 | 0913T025555/scene_27 232 | 0913T025555/scene_28 233 | -------------------------------------------------------------------------------- /tools/test_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 4 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 5 | # text can be found in LICENSE.md 6 | 7 | """Test a DeepIM network on an image database.""" 8 | 9 | import torch 10 | import torch.nn.parallel 11 | import torch.backends.cudnn as cudnn 12 | import torch.utils.data 13 | 14 | import argparse 15 | import pprint 16 | import time, os, sys 17 | import os.path as osp 18 | import numpy as np 19 | 20 | import _init_paths 21 | from fcn.train_test import test 22 | from fcn.config import cfg, cfg_from_file, get_output_dir 23 | from datasets.factory import get_dataset 24 | import networks 25 | from ycb_renderer import YCBRenderer 26 | 27 | def parse_args(): 28 | """ 29 | Parse input arguments 30 | """ 31 | parser = argparse.ArgumentParser(description='Test a DeepIM network') 32 | parser.add_argument('--gpu', dest='gpu_id', help='GPU id to use', 33 | default=0, type=int) 34 | parser.add_argument('--pretrained', dest='pretrained', 35 | help='initialize with pretrained checkpoint', 36 | default=None, type=str) 37 | parser.add_argument('--cfg', dest='cfg_file', 38 | help='optional config file', default=None, type=str) 39 | parser.add_argument('--dataset', dest='dataset_name', 40 | help='dataset to train on', 41 | default='shapenet_scene_train', type=str) 42 | parser.add_argument('--rand', dest='randomize', 43 | help='randomize (do not use a fixed seed)', 44 | action='store_true') 45 | parser.add_argument('--network', dest='network_name', 46 | help='name of the network', 47 | default=None, type=str) 48 | parser.add_argument('--cad', dest='cad_name', 49 | help='name of the CAD file', 50 | default=None, type=str) 51 | parser.add_argument('--pose', dest='pose_name', 52 | help='name of the pose files', 53 | default=None, type=str) 54 | parser.add_argument('--background', dest='background_name', 55 | help='name of the background file', 56 | default=None, type=str) 57 | parser.add_argument('--dataset_background', dest='dataset_background_name', 58 | help='background dataset to train on', 59 | default='background_nvidia', type=str) 60 | 61 | if len(sys.argv) == 1: 62 | parser.print_help() 63 | sys.exit(1) 64 | 65 | args = parser.parse_args() 66 | return args 67 | 68 | if __name__ == '__main__': 69 | args = parse_args() 70 | 71 | print('Called with args:') 72 | print(args) 73 | 74 | if args.cfg_file is not None: 75 | cfg_from_file(args.cfg_file) 76 | 77 | print('Using config:') 78 | pprint.pprint(cfg) 79 | 80 | if not args.randomize and not cfg.TEST.VISUALIZE: 81 | # fix the random seeds (numpy and caffe) for reproducibility 82 | np.random.seed(cfg.RNG_SEED) 83 | torch.manual_seed(cfg.RNG_SEED) 84 | 85 | # device 86 | cfg.gpu_id = 0 87 | cfg.device = torch.device('cuda:{:d}'.format(cfg.gpu_id)) 88 | print('GPU device {:d}'.format(args.gpu_id)) 89 | 90 | cfg.classes = cfg.TEST.CLASSES 91 | # prepare dataset 92 | if cfg.TEST.VISUALIZE: 93 | shuffle = True 94 | else: 95 | shuffle = False 96 | cfg.MODE = 'TEST' 97 | 98 | dataset = get_dataset(args.dataset_name) 99 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=cfg.TEST.IMS_PER_BATCH, shuffle=shuffle, num_workers=0) 100 | print('Use dataset `{:s}` for training'.format(dataset.name)) 101 | 102 | # background dataset 103 | if cfg.TEST.SYNTHESIZE: 104 | if cfg.TRAIN.SYN_BACKGROUND_SPECIFIC: 105 | background_dataset = get_dataset(args.dataset_background_name) 106 | else: 107 | background_dataset = get_dataset('background_coco') 108 | background_loader = torch.utils.data.DataLoader(background_dataset, batch_size=cfg.TRAIN.IMS_PER_BATCH, 109 | shuffle=True, num_workers=4) 110 | else: 111 | background_loader = None 112 | 113 | cfg.TEST.MODEL = args.pretrained.split('/')[-1] 114 | output_dir = get_output_dir(dataset, None) 115 | output_dir = os.path.join(output_dir, cfg.TEST.MODEL) 116 | print('Output will be saved to `{:s}`'.format(output_dir)) 117 | if not os.path.exists(output_dir): 118 | os.makedirs(output_dir) 119 | 120 | print('loading 3D models') 121 | cfg.renderer = YCBRenderer(width=cfg.TRAIN.SYN_WIDTH, height=cfg.TRAIN.SYN_HEIGHT, render_marker=False, gpu_id=args.gpu_id) 122 | if cfg.TEST.SYNTHESIZE: 123 | cfg.renderer.load_objects(dataset.model_mesh_paths, dataset.model_texture_paths, dataset.model_colors) 124 | print(dataset.model_mesh_paths) 125 | else: 126 | cfg.renderer.load_objects(dataset.model_mesh_paths_target, dataset.model_texture_paths_target, dataset.model_colors_target) 127 | print(dataset.model_mesh_paths_target) 128 | cfg.renderer.set_camera_default() 129 | 130 | # prepare network 131 | if args.pretrained: 132 | network_data = torch.load(args.pretrained) 133 | print("=> using pre-trained network '{}'".format(args.pretrained)) 134 | else: 135 | network_data = None 136 | print("no pretrained network specified") 137 | sys.exit() 138 | # make sure the model is loaded 139 | network = networks.__dict__[args.network_name](len(cfg.TRAIN.CLASSES), network_data).cuda() 140 | network = torch.nn.DataParallel(network).cuda() 141 | cudnn.benchmark = True 142 | 143 | # test network 144 | test(dataloader, background_loader, network, output_dir) 145 | 146 | # evaluation 147 | dataset.evaluation(output_dir) 148 | -------------------------------------------------------------------------------- /ycb_render/glad/EGL/eglplatform.h: -------------------------------------------------------------------------------- 1 | #ifndef __eglplatform_h_ 2 | #define __eglplatform_h_ 3 | 4 | /* 5 | ** Copyright (c) 2007-2016 The Khronos Group Inc. 6 | ** 7 | ** Permission is hereby granted, free of charge, to any person obtaining a 8 | ** copy of this software and/or associated documentation files (the 9 | ** "Materials"), to deal in the Materials without restriction, including 10 | ** without limitation the rights to use, copy, modify, merge, publish, 11 | ** distribute, sublicense, and/or sell copies of the Materials, and to 12 | ** permit persons to whom the Materials are furnished to do so, subject to 13 | ** the following conditions: 14 | ** 15 | ** The above copyright notice and this permission notice shall be included 16 | ** in all copies or substantial portions of the Materials. 17 | ** 18 | ** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 | ** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 | ** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 | ** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | ** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 | ** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 | ** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS. 25 | */ 26 | 27 | /* Platform-specific types and definitions for egl.h 28 | * $Revision: 30994 $ on $Date: 2015-04-30 13:36:48 -0700 (Thu, 30 Apr 2015) $ 29 | * 30 | * Adopters may modify khrplatform.h and this file to suit their platform. 31 | * You are encouraged to submit all modifications to the Khronos group so that 32 | * they can be included in future versions of this file. Please submit changes 33 | * by sending them to the public Khronos Bugzilla (http://khronos.org/bugzilla) 34 | * by filing a bug against product "EGL" component "Registry". 35 | */ 36 | 37 | #include 38 | 39 | /* Macros used in EGL function prototype declarations. 40 | * 41 | * EGL functions should be prototyped as: 42 | * 43 | * EGLAPI return-type EGLAPIENTRY eglFunction(arguments); 44 | * typedef return-type (EXPAPIENTRYP PFNEGLFUNCTIONPROC) (arguments); 45 | * 46 | * KHRONOS_APICALL and KHRONOS_APIENTRY are defined in KHR/khrplatform.h 47 | */ 48 | 49 | #ifndef EGLAPI 50 | #define EGLAPI KHRONOS_APICALL 51 | #endif 52 | 53 | #ifndef EGLAPIENTRY 54 | #define EGLAPIENTRY KHRONOS_APIENTRY 55 | #endif 56 | #define EGLAPIENTRYP EGLAPIENTRY* 57 | 58 | /* The types NativeDisplayType, NativeWindowType, and NativePixmapType 59 | * are aliases of window-system-dependent types, such as X Display * or 60 | * Windows Device Context. They must be defined in platform-specific 61 | * code below. The EGL-prefixed versions of Native*Type are the same 62 | * types, renamed in EGL 1.3 so all types in the API start with "EGL". 63 | * 64 | * Khronos STRONGLY RECOMMENDS that you use the default definitions 65 | * provided below, since these changes affect both binary and source 66 | * portability of applications using EGL running on different EGL 67 | * implementations. 68 | */ 69 | 70 | #if defined(_WIN32) || defined(__VC32__) && !defined(__CYGWIN__) && !defined(__SCITECH_SNAP__) /* Win32 and WinCE */ 71 | #ifndef WIN32_LEAN_AND_MEAN 72 | #define WIN32_LEAN_AND_MEAN 1 73 | #endif 74 | #include 75 | 76 | typedef HDC EGLNativeDisplayType; 77 | typedef HBITMAP EGLNativePixmapType; 78 | typedef HWND EGLNativeWindowType; 79 | 80 | #elif defined(__WINSCW__) || defined(__SYMBIAN32__) /* Symbian */ 81 | 82 | typedef int EGLNativeDisplayType; 83 | typedef void *EGLNativeWindowType; 84 | typedef void *EGLNativePixmapType; 85 | 86 | #elif defined(WL_EGL_PLATFORM) 87 | 88 | typedef struct wl_display *EGLNativeDisplayType; 89 | typedef struct wl_egl_pixmap *EGLNativePixmapType; 90 | typedef struct wl_egl_window *EGLNativeWindowType; 91 | 92 | #elif defined(__GBM__) 93 | 94 | typedef struct gbm_device *EGLNativeDisplayType; 95 | typedef struct gbm_bo *EGLNativePixmapType; 96 | typedef void *EGLNativeWindowType; 97 | 98 | #elif defined(__ANDROID__) || defined(ANDROID) 99 | 100 | struct ANativeWindow; 101 | struct egl_native_pixmap_t; 102 | 103 | typedef struct ANativeWindow* EGLNativeWindowType; 104 | typedef struct egl_native_pixmap_t* EGLNativePixmapType; 105 | typedef void* EGLNativeDisplayType; 106 | 107 | #elif defined(__unix__) || defined(__APPLE__) 108 | 109 | #if defined(MESA_EGL_NO_X11_HEADERS) 110 | 111 | typedef void *EGLNativeDisplayType; 112 | typedef khronos_uintptr_t EGLNativePixmapType; 113 | typedef khronos_uintptr_t EGLNativeWindowType; 114 | 115 | #else 116 | 117 | /* X11 (tentative) */ 118 | #include 119 | #include 120 | 121 | typedef Display *EGLNativeDisplayType; 122 | typedef Pixmap EGLNativePixmapType; 123 | typedef Window EGLNativeWindowType; 124 | 125 | #endif /* MESA_EGL_NO_X11_HEADERS */ 126 | 127 | #elif __HAIKU__ 128 | #include 129 | typedef void *EGLNativeDisplayType; 130 | typedef khronos_uintptr_t EGLNativePixmapType; 131 | typedef khronos_uintptr_t EGLNativeWindowType; 132 | 133 | #else 134 | #error "Platform not recognized" 135 | #endif 136 | 137 | /* EGL 1.2 types, renamed for consistency in EGL 1.3 */ 138 | typedef EGLNativeDisplayType NativeDisplayType; 139 | typedef EGLNativePixmapType NativePixmapType; 140 | typedef EGLNativeWindowType NativeWindowType; 141 | 142 | 143 | /* Define EGLint. This must be a signed integral type large enough to contain 144 | * all legal attribute names and values passed into and out of EGL, whether 145 | * their type is boolean, bitmask, enumerant (symbolic constant), integer, 146 | * handle, or other. While in general a 32-bit integer will suffice, if 147 | * handles are 64 bit types, then EGLint should be defined as a signed 64-bit 148 | * integer type. 149 | */ 150 | typedef khronos_int32_t EGLint; 151 | 152 | 153 | /* C++ / C typecast macros for special EGL handle values */ 154 | #if defined(__cplusplus) 155 | #define EGL_CAST(type, value) (static_cast(value)) 156 | #else 157 | #define EGL_CAST(type, value) ((type) (value)) 158 | #endif 159 | 160 | #endif /* __eglplatform_h */ 161 | -------------------------------------------------------------------------------- /ycb_render/cpp/test_device.cpp: -------------------------------------------------------------------------------- 1 | // Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | // This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | // text can be found in LICENSE.md 4 | 5 | //g++ glad/egl.c glad/gl.c egl.cpp -I glad -lpthread -ldl 6 | #include 7 | 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | 14 | #include 15 | #include 16 | struct EGLInternalData2 { 17 | bool m_isInitialized; 18 | 19 | int m_windowWidth; 20 | int m_windowHeight; 21 | int m_renderDevice; 22 | 23 | EGLBoolean success; 24 | EGLint num_configs; 25 | EGLConfig egl_config; 26 | EGLSurface egl_surface; 27 | EGLContext egl_context; 28 | EGLDisplay egl_display; 29 | 30 | EGLInternalData2() 31 | : m_isInitialized(false), 32 | m_windowWidth(0), 33 | m_windowHeight(0) {} 34 | }; 35 | 36 | int main(int argc, char ** argv){ 37 | 38 | 39 | int m_windowWidth; 40 | int m_windowHeight; 41 | int m_renderDevice; 42 | 43 | EGLBoolean success; 44 | EGLint num_configs; 45 | EGLConfig egl_config; 46 | EGLSurface egl_surface; 47 | EGLContext egl_context; 48 | EGLDisplay egl_display; 49 | 50 | m_windowWidth = 256; 51 | m_windowHeight = 256; 52 | m_renderDevice = -1; 53 | 54 | EGLint egl_config_attribs[] = {EGL_RED_SIZE, 55 | 8, 56 | EGL_GREEN_SIZE, 57 | 8, 58 | EGL_BLUE_SIZE, 59 | 8, 60 | EGL_DEPTH_SIZE, 61 | 8, 62 | EGL_SURFACE_TYPE, 63 | EGL_PBUFFER_BIT, 64 | EGL_RENDERABLE_TYPE, 65 | EGL_OPENGL_BIT, 66 | EGL_NONE}; 67 | 68 | EGLint egl_pbuffer_attribs[] = { 69 | EGL_WIDTH, m_windowWidth, EGL_HEIGHT, m_windowHeight, 70 | EGL_NONE, 71 | }; 72 | 73 | EGLInternalData2* m_data = new EGLInternalData2(); 74 | 75 | // Load EGL functions 76 | int egl_version = gladLoaderLoadEGL(NULL); 77 | if(!egl_version) { 78 | fprintf(stderr, "failed to EGL with glad.\n"); 79 | exit(EXIT_FAILURE); 80 | 81 | }; 82 | 83 | // Query EGL Devices 84 | const int max_devices = 32; 85 | EGLDeviceEXT egl_devices[max_devices]; 86 | EGLint num_devices = 0; 87 | EGLint egl_error = eglGetError(); 88 | if (!eglQueryDevicesEXT(max_devices, egl_devices, &num_devices) || 89 | egl_error != EGL_SUCCESS) { 90 | printf("eglQueryDevicesEXT Failed.\n"); 91 | m_data->egl_display = EGL_NO_DISPLAY; 92 | } 93 | 94 | //printf("number of devices found %d\n", num_devices); 95 | 96 | 97 | m_data->m_renderDevice = atoi(argv[1]); 98 | 99 | // Set display 100 | EGLDisplay display = eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, 101 | egl_devices[m_data->m_renderDevice], NULL); 102 | if (eglGetError() == EGL_SUCCESS && display != EGL_NO_DISPLAY) { 103 | int major, minor; 104 | EGLBoolean initialized = eglInitialize(display, &major, &minor); 105 | if (eglGetError() == EGL_SUCCESS && initialized == EGL_TRUE) { 106 | m_data->egl_display = display; 107 | } 108 | } 109 | 110 | if (!eglInitialize(m_data->egl_display, NULL, NULL)) { 111 | fprintf(stderr, "Unable to initialize EGL\n"); 112 | exit(EXIT_FAILURE); 113 | } 114 | 115 | egl_version = gladLoaderLoadEGL(m_data->egl_display); 116 | if (!egl_version) { 117 | fprintf(stderr, "Unable to reload EGL.\n"); 118 | exit(EXIT_FAILURE); 119 | } 120 | printf("Loaded EGL %d.%d after reload.\n", GLAD_VERSION_MAJOR(egl_version), 121 | GLAD_VERSION_MINOR(egl_version)); 122 | 123 | 124 | m_data->success = eglBindAPI(EGL_OPENGL_API); 125 | if (!m_data->success) { 126 | // TODO: Properly handle this error (requires change to default window 127 | // API to change return on all window types to bool). 128 | fprintf(stderr, "Failed to bind OpenGL API.\n"); 129 | exit(EXIT_FAILURE); 130 | } 131 | 132 | m_data->success = 133 | eglChooseConfig(m_data->egl_display, egl_config_attribs, 134 | &m_data->egl_config, 1, &m_data->num_configs); 135 | if (!m_data->success) { 136 | // TODO: Properly handle this error (requires change to default window 137 | // API to change return on all window types to bool). 138 | fprintf(stderr, "Failed to choose config (eglError: %d)\n", eglGetError()); 139 | exit(EXIT_FAILURE); 140 | } 141 | if (m_data->num_configs != 1) { 142 | fprintf(stderr, "Didn't get exactly one config, but %d\n", m_data->num_configs); 143 | exit(EXIT_FAILURE); 144 | } 145 | 146 | m_data->egl_surface = eglCreatePbufferSurface( 147 | m_data->egl_display, m_data->egl_config, egl_pbuffer_attribs); 148 | if (m_data->egl_surface == EGL_NO_SURFACE) { 149 | fprintf(stderr, "Unable to create EGL surface (eglError: %d)\n", eglGetError()); 150 | exit(EXIT_FAILURE); 151 | } 152 | 153 | 154 | m_data->egl_context = eglCreateContext( 155 | m_data->egl_display, m_data->egl_config, EGL_NO_CONTEXT, NULL); 156 | if (!m_data->egl_context) { 157 | fprintf(stderr, "Unable to create EGL context (eglError: %d)\n",eglGetError()); 158 | exit(EXIT_FAILURE); 159 | } 160 | 161 | m_data->success = 162 | eglMakeCurrent(m_data->egl_display, m_data->egl_surface, m_data->egl_surface, 163 | m_data->egl_context); 164 | if (!m_data->success) { 165 | fprintf(stderr, "Failed to make context current (eglError: %d)\n", eglGetError()); 166 | exit(EXIT_FAILURE); 167 | } 168 | 169 | if (!gladLoadGL(eglGetProcAddress)) { 170 | fprintf(stderr, "failed to load GL with glad.\n"); 171 | exit(EXIT_FAILURE); 172 | } 173 | 174 | const GLubyte* ven = glGetString(GL_VENDOR); 175 | printf("GL_VENDOR=%s\n", ven); 176 | 177 | const GLubyte* ren = glGetString(GL_RENDERER); 178 | printf("GL_RENDERER=%s\n", ren); 179 | const GLubyte* ver = glGetString(GL_VERSION); 180 | printf("GL_VERSION=%s\n", ver); 181 | const GLubyte* sl = glGetString(GL_SHADING_LANGUAGE_VERSION); 182 | printf("GL_SHADING_LANGUAGE_VERSION=%s\n", sl); 183 | 184 | return 0; 185 | } 186 | 187 | 188 | -------------------------------------------------------------------------------- /data/YCB_Self_Supervision/train_5.txt: -------------------------------------------------------------------------------- 1 | 0907T162426/scene_01 2 | 0907T162628/scene_01 3 | 0907T162817/scene_01 4 | 0907T162817/scene_03 5 | 0907T163317/scene_01 6 | 0907T163317/scene_02 7 | 0907T164137/scene_02 8 | 0907T164137/scene_04 9 | 0907T164137/scene_06 10 | 0907T164137/scene_07 11 | 0907T171024/scene_01 12 | 0907T171024/scene_02 13 | 0907T171024/scene_03 14 | 0907T172157/scene_02 15 | 0907T172157/scene_04 16 | 0907T172157/scene_05 17 | 0907T172157/scene_06 18 | 0907T172157/scene_08 19 | 0907T172157/scene_18 20 | 0907T172157/scene_19 21 | 0907T180254/scene_02 22 | 0907T180254/scene_03 23 | 0907T180254/scene_06 24 | 0907T181442/scene_01 25 | 0907T181442/scene_02 26 | 0907T191030/scene_01 27 | 0907T191150/scene_02 28 | 0907T192325/scene_01 29 | 0907T192325/scene_03 30 | 0907T194155/scene_02 31 | 0907T215136/scene_01 32 | 0907T215805/scene_01 33 | 0907T221844/scene_01 34 | 0907T222106/scene_02 35 | 0907T222106/scene_03 36 | 0907T222555/scene_02 37 | 0907T222933/scene_02 38 | 0907T222933/scene_03 39 | 0907T222933/scene_05 40 | 0907T223820/scene_02 41 | 0907T225112/scene_03 42 | 0907T225112/scene_05 43 | 0907T225112/scene_07 44 | 0907T231131/scene_01 45 | 0907T231131/scene_05 46 | 0907T231131/scene_06 47 | 0907T231131/scene_09 48 | 0907T231131/scene_12 49 | 0907T231131/scene_15 50 | 0907T231131/scene_20 51 | 0907T231131/scene_21 52 | 0907T231131/scene_24 53 | 0907T231131/scene_25 54 | 0907T231131/scene_27 55 | 0907T231131/scene_28 56 | 0908T000608/scene_01 57 | 0908T000608/scene_03 58 | 0908T000608/scene_04 59 | 0908T000608/scene_05 60 | 0908T000608/scene_09 61 | 0908T003015/scene_01 62 | 0908T003015/scene_02 63 | 0908T003015/scene_03 64 | 0908T003015/scene_10 65 | 0910T014844/scene_01 66 | 0910T015406/scene_01 67 | 0910T015831/scene_01 68 | 0910T015831/scene_03 69 | 0910T015831/scene_05 70 | 0910T020707/scene_06 71 | 0910T020707/scene_08 72 | 0910T020707/scene_09 73 | 0910T020707/scene_10 74 | 0910T020707/scene_11 75 | 0910T022211/scene_01 76 | 0910T022211/scene_02 77 | 0910T022211/scene_04 78 | 0910T022211/scene_07 79 | 0910T022211/scene_08 80 | 0910T022211/scene_09 81 | 0910T025409/scene_01 82 | 0910T025409/scene_02 83 | 0910T025409/scene_04 84 | 0910T025409/scene_06 85 | 0910T025409/scene_12 86 | 0910T033945/scene_03 87 | 0910T033945/scene_07 88 | 0910T033945/scene_09 89 | 0910T033945/scene_12 90 | 0910T033945/scene_13 91 | 0910T033945/scene_15 92 | 0910T040255/scene_05 93 | 0910T040255/scene_06 94 | 0910T040255/scene_07 95 | 0910T040255/scene_09 96 | 0910T042817/scene_01 97 | 0910T042817/scene_02 98 | 0910T042817/scene_05 99 | 0910T042817/scene_06 100 | 0910T042817/scene_09 101 | 0910T044956/scene_02 102 | 0910T044956/scene_08 103 | 0910T044956/scene_10 104 | 0910T044956/scene_11 105 | 0910T044956/scene_13 106 | 0910T044956/scene_18 107 | 0910T044956/scene_23 108 | 0910T044956/scene_27 109 | 0910T053353/scene_01 110 | 0910T053353/scene_02 111 | 0910T053353/scene_08 112 | 0910T054747/scene_01 113 | 0910T054747/scene_05 114 | 0910T054747/scene_06 115 | 0910T060213/scene_01 116 | 0910T060213/scene_05 117 | 0910T060213/scene_08 118 | 0910T060213/scene_11 119 | 0910T060213/scene_12 120 | 0912T112032/scene_04 121 | 0912T112032/scene_05 122 | 0912T112032/scene_07 123 | 0912T112032/scene_08 124 | 0912T112032/scene_09 125 | 0912T112032/scene_11 126 | 0912T112032/scene_15 127 | 0912T112032/scene_16 128 | 0912T114842/scene_01 129 | 0912T114842/scene_03 130 | 0912T114842/scene_09 131 | 0912T114842/scene_10 132 | 0912T114842/scene_14 133 | 0912T114842/scene_15 134 | 0912T114842/scene_17 135 | 0912T114842/scene_19 136 | 0912T114842/scene_21 137 | 0912T114842/scene_24 138 | 0912T114842/scene_25 139 | 0912T122347/scene_03 140 | 0912T122347/scene_05 141 | 0912T122347/scene_06 142 | 0912T122347/scene_09 143 | 0912T122347/scene_10 144 | 0912T124551/scene_01 145 | 0912T124551/scene_03 146 | 0912T124551/scene_05 147 | 0912T124551/scene_06 148 | 0912T124551/scene_09 149 | 0912T124551/scene_11 150 | 0912T124551/scene_13 151 | 0912T191616/scene_01 152 | 0912T192512/scene_01 153 | 0912T192512/scene_04 154 | 0912T193313/scene_01 155 | 0912T193313/scene_03 156 | 0912T193313/scene_08 157 | 0912T193313/scene_09 158 | 0912T193313/scene_12 159 | 0912T193313/scene_23 160 | 0912T193313/scene_24 161 | 0912T193313/scene_25 162 | 0912T193313/scene_26 163 | 0912T193313/scene_28 164 | 0912T193313/scene_30 165 | 0912T193313/scene_31 166 | 0912T193313/scene_33 167 | 0912T202829/scene_01 168 | 0912T202829/scene_02 169 | 0912T202829/scene_05 170 | 0912T202829/scene_06 171 | 0912T204813/scene_01 172 | 0912T205100/scene_01 173 | 0912T205434/scene_01 174 | 0912T205830/scene_01 175 | 0912T210233/scene_01 176 | 0912T211535/scene_01 177 | 0912T212212/scene_02 178 | 0912T212212/scene_03 179 | 0912T212212/scene_07 180 | 0912T212212/scene_08 181 | 0912T212212/scene_11 182 | 0912T212212/scene_13 183 | 0912T212212/scene_14 184 | 0912T212212/scene_15 185 | 0912T212212/scene_17 186 | 0912T212212/scene_18 187 | 0912T212212/scene_19 188 | 0912T221206/scene_01 189 | 0912T224427/scene_01 190 | 0912T230419/scene_02 191 | 0912T230419/scene_03 192 | 0912T230419/scene_07 193 | 0912T230419/scene_08 194 | 0912T230419/scene_09 195 | 0912T230419/scene_11 196 | 0912T230419/scene_13 197 | 0912T230419/scene_14 198 | 0912T230419/scene_17 199 | 0912T230419/scene_19 200 | 0912T230419/scene_20 201 | 0912T230419/scene_22 202 | 0912T230419/scene_24 203 | 0912T230419/scene_25 204 | 0912T230419/scene_27 205 | 0912T230419/scene_32 206 | 0912T230419/scene_33 207 | 0913T000123/scene_01 208 | 0913T000123/scene_04 209 | 0913T000123/scene_12 210 | 0913T000123/scene_13 211 | 0913T000123/scene_14 212 | 0913T000123/scene_16 213 | 0913T000123/scene_17 214 | 0913T000123/scene_18 215 | 0913T002954/scene_02 216 | 0913T002954/scene_03 217 | 0913T002954/scene_06 218 | 0913T004159/scene_04 219 | 0913T004159/scene_06 220 | 0913T004159/scene_07 221 | 0913T004159/scene_08 222 | 0913T010525/scene_03 223 | 0913T010525/scene_04 224 | 0913T010525/scene_06 225 | 0913T010525/scene_07 226 | 0913T010525/scene_08 227 | 0913T010525/scene_09 228 | 0913T010525/scene_15 229 | 0913T010525/scene_18 230 | 0913T010525/scene_19 231 | 0913T013716/scene_01 232 | 0913T013716/scene_05 233 | 0913T013716/scene_06 234 | 0913T013716/scene_07 235 | 0913T013716/scene_09 236 | 0913T013716/scene_12 237 | 0913T013716/scene_15 238 | 0913T020922/scene_01 239 | 0913T020922/scene_04 240 | 0913T020922/scene_09 241 | 0913T020922/scene_13 242 | 0913T020922/scene_14 243 | 0913T020922/scene_16 244 | 0913T020922/scene_17 245 | 0913T020922/scene_18 246 | 0913T020922/scene_19 247 | 0913T020922/scene_25 248 | 0913T024454/scene_04 249 | 0913T024454/scene_05 250 | 0913T024454/scene_06 251 | 0913T025555/scene_01 252 | 0913T025555/scene_03 253 | 0913T025555/scene_07 254 | 0913T025555/scene_09 255 | 0913T025555/scene_12 256 | 0913T025555/scene_14 257 | 0913T025555/scene_16 258 | 0913T025555/scene_17 259 | 0913T025555/scene_18 260 | 0913T025555/scene_19 261 | 0913T025555/scene_21 262 | 0913T025555/scene_23 263 | 0913T025555/scene_25 264 | 0913T025555/scene_26 265 | 0913T025555/scene_29 266 | -------------------------------------------------------------------------------- /lib/utils/blob.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 2 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 3 | # text can be found in LICENSE.md 4 | 5 | """Blob helper functions.""" 6 | 7 | import numpy as np 8 | import cv2 9 | import torch 10 | import torch.nn as nn 11 | import random 12 | 13 | def im_list_to_blob(ims, num_channels): 14 | """Convert a list of images into a network input. 15 | 16 | Assumes images are already prepared (means subtracted, BGR order, ...). 17 | """ 18 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 19 | num_images = len(ims) 20 | blob = np.zeros((num_images, max_shape[0], max_shape[1], num_channels), 21 | dtype=np.float32) 22 | for i in xrange(num_images): 23 | im = ims[i] 24 | if num_channels == 1: 25 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im[:,:,np.newaxis] 26 | else: 27 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 28 | 29 | return blob 30 | 31 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 32 | """Mean subtract and scale an image for use in a blob.""" 33 | im = im.astype(np.float32, copy=False) 34 | im -= pixel_means 35 | im_shape = im.shape 36 | im_size_min = np.min(im_shape[0:2]) 37 | im_size_max = np.max(im_shape[0:2]) 38 | im_scale = float(target_size) / float(im_size_min) 39 | # Prevent the biggest axis from being more than MAX_SIZE 40 | if np.round(im_scale * im_size_max) > max_size: 41 | im_scale = float(max_size) / float(im_size_max) 42 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 43 | interpolation=cv2.INTER_LINEAR) 44 | 45 | return im, im_scale 46 | 47 | 48 | def pad_im(im, factor, value=0): 49 | height = im.shape[0] 50 | width = im.shape[1] 51 | 52 | pad_height = int(np.ceil(height / float(factor)) * factor - height) 53 | pad_width = int(np.ceil(width / float(factor)) * factor - width) 54 | 55 | if len(im.shape) == 3: 56 | return np.lib.pad(im, ((0, pad_height), (0, pad_width), (0,0)), 'constant', constant_values=value) 57 | elif len(im.shape) == 2: 58 | return np.lib.pad(im, ((0, pad_height), (0, pad_width)), 'constant', constant_values=value) 59 | 60 | 61 | def unpad_im(im, factor): 62 | height = im.shape[0] 63 | width = im.shape[1] 64 | 65 | pad_height = int(np.ceil(height / float(factor)) * factor - height) 66 | pad_width = int(np.ceil(width / float(factor)) * factor - width) 67 | 68 | if len(im.shape) == 3: 69 | return im[0:height-pad_height, 0:width-pad_width, :] 70 | elif len(im.shape) == 2: 71 | return im[0:height-pad_height, 0:width-pad_width] 72 | 73 | 74 | def chromatic_transform(im, label=None, d_h=None, d_s=None, d_l=None): 75 | """ 76 | Given an image array, add the hue, saturation and luminosity to the image 77 | """ 78 | # Set random hue, luminosity and saturation which ranges from -0.1 to 0.1 79 | if d_h is None: 80 | d_h = (np.random.rand(1) - 0.5) * 0.02 * 180 81 | if d_l is None: 82 | d_l = (np.random.rand(1) - 0.5) * 0.2 * 256 83 | if d_s is None: 84 | d_s = (np.random.rand(1) - 0.5) * 0.2 * 256 85 | # Convert the BGR to HLS 86 | hls = cv2.cvtColor(im, cv2.COLOR_BGR2HLS) 87 | h, l, s = cv2.split(hls) 88 | # Add the values to the image H, L, S 89 | new_h = (h + d_h) % 180 90 | new_l = np.clip(l + d_l, 0, 255) 91 | new_s = np.clip(s + d_s, 0, 255) 92 | # Convert the HLS to BGR 93 | new_hls = cv2.merge((new_h, new_l, new_s)).astype('uint8') 94 | new_im = cv2.cvtColor(new_hls, cv2.COLOR_HLS2BGR) 95 | 96 | if label is not None: 97 | I = np.where(label > 0) 98 | new_im[I[0], I[1], :] = im[I[0], I[1], :] 99 | return new_im 100 | 101 | 102 | def add_noise(image): 103 | 104 | # random number 105 | r = np.random.rand(1) 106 | 107 | # gaussian noise 108 | if r < 0.9: 109 | row,col,ch= image.shape 110 | mean = 0 111 | var = np.random.rand(1) * 0.3 * 256 112 | sigma = var**0.5 113 | gauss = sigma * np.random.randn(row,col) + mean 114 | gauss = np.repeat(gauss[:, :, np.newaxis], ch, axis=2) 115 | noisy = image + gauss 116 | noisy = np.clip(noisy, 0, 255) 117 | else: 118 | # motion blur 119 | sizes = [3, 5, 7, 9, 11, 15] 120 | size = sizes[int(np.random.randint(len(sizes), size=1))] 121 | kernel_motion_blur = np.zeros((size, size)) 122 | if np.random.rand(1) < 0.5: 123 | kernel_motion_blur[int((size-1)/2), :] = np.ones(size) 124 | else: 125 | kernel_motion_blur[:, int((size-1)/2)] = np.ones(size) 126 | kernel_motion_blur = kernel_motion_blur / size 127 | noisy = cv2.filter2D(image, -1, kernel_motion_blur) 128 | 129 | return noisy 130 | 131 | 132 | def add_noise_depth(image, level = 0.1): 133 | row,col,ch= image.shape 134 | noise_level = random.uniform(0, level) 135 | gauss = noise_level * np.random.randn(row,col) 136 | gauss = np.repeat(gauss[:, :, np.newaxis], ch, axis=2) 137 | noisy = image + gauss 138 | return noisy 139 | 140 | 141 | def add_noise_depth_cuda(image): 142 | noise_level = random.uniform(0, 0.05) 143 | gauss = torch.randn_like(image) * noise_level 144 | noisy = image + gauss 145 | return noisy 146 | 147 | 148 | def add_noise_cuda(image): 149 | # random number 150 | r = np.random.rand(1) 151 | 152 | # gaussian noise 153 | if r < 0.8: 154 | noise_level = random.uniform(0, 0.05) 155 | gauss = torch.randn_like(image) * noise_level 156 | noisy = image + gauss 157 | noisy = torch.clamp(noisy, 0, 1.0) 158 | else: 159 | # motion blur 160 | sizes = [3, 5, 7, 9, 11, 15] 161 | size = sizes[int(np.random.randint(len(sizes), size=1))] 162 | kernel_motion_blur = torch.zeros((size, size)) 163 | if np.random.rand(1) < 0.5: 164 | kernel_motion_blur[int((size-1)/2), :] = torch.ones(size) 165 | else: 166 | kernel_motion_blur[:, int((size-1)/2)] = torch.ones(size) 167 | kernel_motion_blur = kernel_motion_blur.cuda() / size 168 | kernel_motion_blur = kernel_motion_blur.view(1, 1, size, size) 169 | kernel_motion_blur = kernel_motion_blur.repeat(image.size(2), 1, 1, 1) 170 | 171 | motion_blur_filter = nn.Conv2d(in_channels=image.size(2), 172 | out_channels=image.size(2), 173 | kernel_size=size, 174 | groups=image.size(2), 175 | bias=False, 176 | padding=int(size/2)) 177 | 178 | motion_blur_filter.weight.data = kernel_motion_blur 179 | motion_blur_filter.weight.requires_grad = False 180 | noisy = motion_blur_filter(image.permute(2, 0, 1).unsqueeze(0)) 181 | noisy = noisy.squeeze(0).permute(1, 2, 0) 182 | 183 | return noisy 184 | -------------------------------------------------------------------------------- /ycb_render/glutils/meshutil.py: -------------------------------------------------------------------------------- 1 | """3D mesh manipulation utilities.""" 2 | 3 | from builtins import str 4 | from collections import OrderedDict 5 | import numpy as np 6 | from transforms3d import quaternions 7 | from transforms3d.quaternions import axangle2quat, mat2quat, quat2mat 8 | 9 | def frustum(left, right, bottom, top, znear, zfar): 10 | """Create view frustum matrix.""" 11 | assert right != left 12 | assert bottom != top 13 | assert znear != zfar 14 | 15 | M = np.zeros((4, 4), dtype=np.float32) 16 | M[0, 0] = +2.0 * znear / (right - left) 17 | M[2, 0] = (right + left) / (right - left) 18 | M[1, 1] = +2.0 * znear / (top - bottom) 19 | M[3, 1] = (top + bottom) / (top - bottom) 20 | M[2, 2] = -(zfar + znear) / (zfar - znear) 21 | M[3, 2] = -2.0 * znear * zfar / (zfar - znear) 22 | M[2, 3] = -1.0 23 | return M 24 | 25 | 26 | def perspective(fovy, aspect, znear, zfar): 27 | """Create perspective projection matrix.""" 28 | assert znear != zfar 29 | h = np.tan(fovy / 360.0 * np.pi) * znear 30 | w = h * aspect 31 | return frustum(-w, w, -h, h, znear, zfar) 32 | 33 | 34 | def anorm(x, axis=None, keepdims=False): 35 | """Compute L2 norms alogn specified axes.""" 36 | return np.sqrt((x * x).sum(axis=axis, keepdims=keepdims)) 37 | 38 | 39 | def normalize(v, axis=None, eps=1e-10): 40 | """L2 Normalize along specified axes.""" 41 | return v / max(anorm(v, axis=axis, keepdims=True), eps) 42 | 43 | 44 | def lookat(eye, target=[0, 0, 0], up=[0, 1, 0]): 45 | """Generate LookAt modelview matrix.""" 46 | eye = np.float32(eye) 47 | forward = normalize(target - eye) 48 | side = normalize(np.cross(forward, up)) 49 | up = np.cross(side, forward) 50 | M = np.eye(4, dtype=np.float32) 51 | R = M[:3, :3] 52 | R[:] = [side, up, -forward] 53 | M[:3, 3] = -R.dot(eye) 54 | return M 55 | 56 | 57 | def sample_view(min_dist, max_dist=None): 58 | '''Sample random camera position. 59 | 60 | Sample origin directed camera position in given distance 61 | range from the origin. ModelView matrix is returned. 62 | ''' 63 | if max_dist is None: 64 | max_dist = min_dist 65 | dist = np.random.uniform(min_dist, max_dist) 66 | eye = np.random.normal(size=3) 67 | eye = normalize(eye) * dist 68 | return lookat(eye) 69 | 70 | 71 | def homotrans(M, p): 72 | p = np.asarray(p) 73 | if p.shape[-1] == M.shape[1] - 1: 74 | p = np.append(p, np.ones_like(p[..., :1]), -1) 75 | p = np.dot(p, M.T) 76 | return p[..., :-1] / p[..., -1:] 77 | 78 | 79 | def _parse_vertex_tuple(s): 80 | """Parse vertex indices in '/' separated form (like 'i/j/k', 'i//k' ...).""" 81 | vt = [0, 0, 0] 82 | for i, c in enumerate(s.split('/')): 83 | if c: 84 | vt[i] = int(c) 85 | return tuple(vt) 86 | 87 | 88 | def _unify_rows(a): 89 | """Unify lengths of each row of a.""" 90 | lens = np.fromiter(map(len, a), np.int32) 91 | if not (lens[0] == lens).all(): 92 | out = np.zeros((len(a), lens.max()), np.float32) 93 | for i, row in enumerate(a): 94 | out[i, :lens[i]] = row 95 | else: 96 | out = np.float32(a) 97 | return out 98 | 99 | 100 | def load_obj(fn): 101 | """Load 3d mesh form .obj' file. 102 | 103 | Args: 104 | fn: Input file name or file-like object. 105 | 106 | Returns: 107 | dictionary with the following keys (some of which may be missing): 108 | position: np.float32, (n, 3) array, vertex positions 109 | uv: np.float32, (n, 2) array, vertex uv coordinates 110 | normal: np.float32, (n, 3) array, vertex uv normals 111 | face: np.int32, (k*3,) traingular face indices 112 | """ 113 | position = [np.zeros(3, dtype=np.float32)] 114 | normal = [np.zeros(3, dtype=np.float32)] 115 | uv = [np.zeros(2, dtype=np.float32)] 116 | 117 | tuple2idx = OrderedDict() 118 | trinagle_indices = [] 119 | 120 | input_file = open(fn) if isinstance(fn, str) else fn 121 | for line in input_file: 122 | line = line.strip() 123 | if not line or line[0] == '#': 124 | continue 125 | line = line.split(' ', 1) 126 | tag = line[0] 127 | if len(line) > 1: 128 | line = line[1] 129 | else: 130 | line = '' 131 | if tag == 'v': 132 | position.append(np.fromstring(line, sep=' ')) 133 | elif tag == 'vt': 134 | uv.append(np.fromstring(line, sep=' ')) 135 | elif tag == 'vn': 136 | normal.append(np.fromstring(line, sep=' ')) 137 | elif tag == 'f': 138 | output_face_indices = [] 139 | for chunk in line.split(): 140 | # tuple order: pos_idx, uv_idx, normal_idx 141 | vt = _parse_vertex_tuple(chunk) 142 | if vt not in tuple2idx: # create a new output vertex? 143 | tuple2idx[vt] = len(tuple2idx) 144 | output_face_indices.append(tuple2idx[vt]) 145 | # generate face triangles 146 | for i in range(1, len(output_face_indices) - 1): 147 | for vi in [0, i, i + 1]: 148 | trinagle_indices.append(output_face_indices[vi]) 149 | 150 | outputs = {} 151 | outputs['face'] = np.int32(trinagle_indices) 152 | pos_idx, uv_idx, normal_idx = np.int32(list(tuple2idx)).T 153 | if np.any(pos_idx): 154 | outputs['position'] = _unify_rows(position)[pos_idx] 155 | if np.any(uv_idx): 156 | outputs['uv'] = _unify_rows(uv)[uv_idx] 157 | if np.any(normal_idx): 158 | outputs['normal'] = _unify_rows(normal)[normal_idx] 159 | return outputs 160 | 161 | def normalize_mesh(mesh): 162 | '''Scale mesh to fit into -1..1 cube''' 163 | mesh = dict(mesh) 164 | pos = mesh['position'][:, :3].copy() 165 | pos -= (pos.max(0) + pos.min(0)) / 2.0 166 | pos /= np.abs(pos).max() 167 | mesh['position'] = pos 168 | return mesh 169 | 170 | def quat2rotmat(quat): 171 | quat_mat = np.eye(4) 172 | quat_mat[:3,:3] = quaternions.quat2mat(quat) 173 | return quat_mat 174 | 175 | def mat2rotmat(mat): 176 | quat_mat = np.eye(4) 177 | quat_mat[:3,:3] = mat 178 | return quat_mat 179 | 180 | def quat2rotmat(quat): 181 | quat_mat = np.eye(4) 182 | quat_mat[:3,:3] = quaternions.quat2mat(quat) 183 | return quat_mat 184 | 185 | def xyz2mat(xyz): 186 | trans_mat = np.eye(4) 187 | trans_mat[-1, :3] = xyz 188 | return trans_mat 189 | 190 | def mat2xyz(mat): 191 | xyz = mat[-1,:3] 192 | xyz[np.isnan(xyz)] = 0 193 | return xyz 194 | 195 | def safemat2quat(mat): 196 | quat = np.array([1,0,0,0]) 197 | try: 198 | quat = mat2quat(mat) 199 | except: 200 | pass 201 | quat[np.isnan(quat)] = 0 202 | return quat 203 | 204 | def unpack_pose(pose): 205 | unpacked = np.eye(4) 206 | unpacked[:3, :3] = quat2mat(pose[3:]) 207 | unpacked[:3, 3] = pose[:3] 208 | return unpacked 209 | 210 | def pack_pose(pose): 211 | packed = np.zeros(7) 212 | packed[:3] = pose[:3, 3] 213 | packed[3:] = safemat2quat(pose[:3, :3]) 214 | return packed 215 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(deepim_pytorch) 3 | 4 | ## Compile as C++11, supported in ROS Kinetic and newer 5 | # add_compile_options(-std=c++11) 6 | 7 | ## Find catkin macros and libraries 8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) 9 | ## is used, also find other catkin packages 10 | find_package(catkin REQUIRED) 11 | 12 | ## System dependencies are found with CMake's conventions 13 | # find_package(Boost REQUIRED COMPONENTS system) 14 | 15 | 16 | ## Uncomment this if the package has a setup.py. This macro ensures 17 | ## modules and global scripts declared therein get installed 18 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html 19 | # catkin_python_setup() 20 | 21 | ################################################ 22 | ## Declare ROS messages, services and actions ## 23 | ################################################ 24 | 25 | ## To declare and build messages, services or actions from within this 26 | ## package, follow these steps: 27 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in 28 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). 29 | ## * In the file package.xml: 30 | ## * add a build_depend tag for "message_generation" 31 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET 32 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in 33 | ## but can be declared for certainty nonetheless: 34 | ## * add a exec_depend tag for "message_runtime" 35 | ## * In this file (CMakeLists.txt): 36 | ## * add "message_generation" and every package in MSG_DEP_SET to 37 | ## find_package(catkin REQUIRED COMPONENTS ...) 38 | ## * add "message_runtime" and every package in MSG_DEP_SET to 39 | ## catkin_package(CATKIN_DEPENDS ...) 40 | ## * uncomment the add_*_files sections below as needed 41 | ## and list every .msg/.srv/.action file to be processed 42 | ## * uncomment the generate_messages entry below 43 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) 44 | 45 | ## Generate messages in the 'msg' folder 46 | # add_message_files( 47 | # FILES 48 | # Message1.msg 49 | # Message2.msg 50 | # ) 51 | 52 | ## Generate services in the 'srv' folder 53 | # add_service_files( 54 | # FILES 55 | # Service1.srv 56 | # Service2.srv 57 | # ) 58 | 59 | ## Generate actions in the 'action' folder 60 | # add_action_files( 61 | # FILES 62 | # Action1.action 63 | # Action2.action 64 | # ) 65 | 66 | ## Generate added messages and services with any dependencies listed here 67 | # generate_messages( 68 | # DEPENDENCIES 69 | # std_msgs # Or other packages containing msgs 70 | # ) 71 | 72 | ################################################ 73 | ## Declare ROS dynamic reconfigure parameters ## 74 | ################################################ 75 | 76 | ## To declare and build dynamic reconfigure parameters within this 77 | ## package, follow these steps: 78 | ## * In the file package.xml: 79 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" 80 | ## * In this file (CMakeLists.txt): 81 | ## * add "dynamic_reconfigure" to 82 | ## find_package(catkin REQUIRED COMPONENTS ...) 83 | ## * uncomment the "generate_dynamic_reconfigure_options" section below 84 | ## and list every .cfg file to be processed 85 | 86 | ## Generate dynamic reconfigure parameters in the 'cfg' folder 87 | # generate_dynamic_reconfigure_options( 88 | # cfg/DynReconf1.cfg 89 | # cfg/DynReconf2.cfg 90 | # ) 91 | 92 | ################################### 93 | ## catkin specific configuration ## 94 | ################################### 95 | ## The catkin_package macro generates cmake config files for your package 96 | ## Declare things to be passed to dependent projects 97 | ## INCLUDE_DIRS: uncomment this if your package contains header files 98 | ## LIBRARIES: libraries you create in this project that dependent projects also need 99 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 100 | ## DEPENDS: system dependencies of this project that dependent projects also need 101 | catkin_package( 102 | # INCLUDE_DIRS include 103 | # LIBRARIES deepim_pytorch 104 | # CATKIN_DEPENDS other_catkin_pkg 105 | # DEPENDS system_lib 106 | ) 107 | 108 | ########### 109 | ## Build ## 110 | ########### 111 | 112 | ## Specify additional locations of header files 113 | ## Your package locations should be listed before other locations 114 | include_directories( 115 | # include 116 | # ${catkin_INCLUDE_DIRS} 117 | ) 118 | 119 | ## Declare a C++ library 120 | # add_library(${PROJECT_NAME} 121 | # src/${PROJECT_NAME}/deepim_pytorch.cpp 122 | # ) 123 | 124 | ## Add cmake target dependencies of the library 125 | ## as an example, code may need to be generated before libraries 126 | ## either from message generation or dynamic reconfigure 127 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 128 | 129 | ## Declare a C++ executable 130 | ## With catkin_make all packages are built within a single CMake context 131 | ## The recommended prefix ensures that target names across packages don't collide 132 | # add_executable(${PROJECT_NAME}_node src/deepim_pytorch_node.cpp) 133 | 134 | ## Rename C++ executable without prefix 135 | ## The above recommended prefix causes long target names, the following renames the 136 | ## target back to the shorter version for ease of user use 137 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" 138 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") 139 | 140 | ## Add cmake target dependencies of the executable 141 | ## same as for the library above 142 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 143 | 144 | ## Specify libraries to link a library or executable target against 145 | # target_link_libraries(${PROJECT_NAME}_node 146 | # ${catkin_LIBRARIES} 147 | # ) 148 | 149 | ############# 150 | ## Install ## 151 | ############# 152 | 153 | # all install targets should use catkin DESTINATION variables 154 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html 155 | 156 | ## Mark executable scripts (Python etc.) for installation 157 | ## in contrast to setup.py, you can choose the destination 158 | # install(PROGRAMS 159 | # scripts/my_python_script 160 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 161 | # ) 162 | 163 | ## Mark executables and/or libraries for installation 164 | # install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node 165 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 166 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 167 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 168 | # ) 169 | 170 | ## Mark cpp header files for installation 171 | # install(DIRECTORY include/${PROJECT_NAME}/ 172 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} 173 | # FILES_MATCHING PATTERN "*.h" 174 | # PATTERN ".svn" EXCLUDE 175 | # ) 176 | 177 | ## Mark other files for installation (e.g. launch and bag files, etc.) 178 | # install(FILES 179 | # # myfile1 180 | # # myfile2 181 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 182 | # ) 183 | 184 | ############# 185 | ## Testing ## 186 | ############# 187 | 188 | ## Add gtest based cpp test target and link libraries 189 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_deepim_pytorch.cpp) 190 | # if(TARGET ${PROJECT_NAME}-test) 191 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) 192 | # endif() 193 | 194 | ## Add folders to be run by python nosetests 195 | # catkin_add_nosetests(test) 196 | -------------------------------------------------------------------------------- /ycb_render/glutils/glrenderer.py: -------------------------------------------------------------------------------- 1 | """OpenGL Mesh rendering utils.""" 2 | 3 | from contextlib import contextmanager 4 | import numpy as np 5 | 6 | import OpenGL.GL as gl 7 | 8 | from .meshutil import perspective 9 | from PIL import Image 10 | import numpy as np 11 | 12 | 13 | class GLObject(object): 14 | def __del__(self): 15 | self.release() 16 | 17 | def __enter__(self): 18 | bind_func, const = self._bind 19 | bind_func(const, self) 20 | 21 | def __exit__(self, *args): 22 | bind_func, const = self._bind 23 | bind_func(const, 0) 24 | 25 | 26 | class FBO(GLObject): 27 | _bind = gl.glBindFramebuffer, gl.GL_FRAMEBUFFER 28 | 29 | def __init__(self): 30 | self._as_parameter_ = gl.glGenFramebuffers(1) 31 | 32 | def release(self): 33 | gl.glDeleteFramebuffers(1, [self._as_parameter_]) 34 | 35 | 36 | class Texture(GLObject): 37 | _bind = gl.glBindTexture, gl.GL_TEXTURE_2D 38 | 39 | def __init__(self): 40 | self._as_parameter_ = gl.glGenTextures(1) 41 | 42 | def release(self): 43 | gl.glDeleteTextures([self._as_parameter_]) 44 | 45 | 46 | class Shader(GLObject): 47 | 48 | def __init__(self, vp_code, fp_code): 49 | # Importing here, when gl context is already present. 50 | # Otherwise get expection on Python3 because of PyOpenGL bug. 51 | from OpenGL.GL import shaders 52 | self._as_parameter_ = self._shader = shaders.compileProgram( 53 | shaders.compileShader(vp_code, gl.GL_VERTEX_SHADER), 54 | shaders.compileShader(fp_code, gl.GL_FRAGMENT_SHADER) 55 | ) 56 | self._uniforms = {} 57 | 58 | def release(self): 59 | gl.glDeleteProgram(self._as_parameter_) 60 | 61 | def __getitem__(self, uniform_name): 62 | if uniform_name not in self._uniforms: 63 | self._uniforms[uniform_name] = gl.glGetUniformLocation(self, uniform_name) 64 | return self._uniforms[uniform_name] 65 | 66 | def __enter__(self): 67 | return self._shader.__enter__() 68 | 69 | def __exit__(self, *args): 70 | return self._shader.__exit__(*args) 71 | 72 | 73 | class MeshRenderer(object): 74 | def __init__(self, size): 75 | self.size = size 76 | self.fbo = FBO() 77 | self.color_tex = Texture() 78 | self.depth_tex = Texture() 79 | w, h = size 80 | 81 | with self.color_tex: 82 | gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, w, h, 0, 83 | gl.GL_RGBA, gl.GL_FLOAT, None) 84 | 85 | with self.depth_tex: 86 | gl.glTexImage2D.wrappedOperation( 87 | gl.GL_TEXTURE_2D, 0, gl.GL_DEPTH24_STENCIL8, w, h, 0, 88 | gl.GL_DEPTH_STENCIL, gl.GL_UNSIGNED_INT_24_8, None) 89 | 90 | with self.fbo: 91 | gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0, 92 | gl.GL_TEXTURE_2D, self.color_tex, 0) 93 | gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_DEPTH_STENCIL_ATTACHMENT, 94 | gl.GL_TEXTURE_2D, self.depth_tex, 0) 95 | gl.glViewport(0, 0, w, h) 96 | assert gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) == gl.GL_FRAMEBUFFER_COMPLETE 97 | 98 | self.shader = Shader(vp_code=''' 99 | #version 130 100 | uniform mat4 MVP; 101 | in vec4 data; 102 | out vec4 aData; 103 | 104 | void main() { 105 | aData = data; 106 | gl_Position = MVP * gl_Vertex; 107 | } 108 | ''', 109 | fp_code=''' 110 | #version 130 111 | in vec4 aData; 112 | out vec4 fragColor; 113 | void main() { 114 | fragColor = aData; 115 | } 116 | ''') 117 | 118 | self.fovy = 10.0 119 | self.aspect = 1.0 * w / h 120 | self.znear, self.zfar = 0.01, 100.0 121 | 122 | @contextmanager 123 | def _bind_attrib(self, i, arr): 124 | if arr is None: 125 | yield 126 | return 127 | arr = np.ascontiguousarray(arr, np.float32) 128 | coord_n = arr.shape[-1] 129 | gl.glEnableVertexAttribArray(i) 130 | gl.glVertexAttribPointer(i, coord_n, gl.GL_FLOAT, gl.GL_FALSE, 0, arr) 131 | yield 132 | gl.glDisableVertexAttribArray(i) 133 | 134 | def proj_matrix(self): 135 | return perspective(self.fovy, self.aspect, self.znear, self.zfar) 136 | 137 | def render_mesh(self, position, uv, face=None, 138 | clear_color=[0, 0, 0, 0], 139 | modelview=np.eye(4)): 140 | MVP = modelview.T.dot(self.proj_matrix()) 141 | MVP = np.ascontiguousarray(MVP, np.float32) 142 | position = np.ascontiguousarray(position, np.float32) 143 | with self.fbo: 144 | gl.glClearColor(*clear_color) 145 | gl.glClear(gl.GL_COLOR_BUFFER_BIT | gl.GL_DEPTH_BUFFER_BIT) 146 | 147 | with self.shader, self._bind_attrib(0, position), self._bind_attrib(1, uv): 148 | gl.glUniformMatrix4fv(self.shader['MVP'], 1, gl.GL_FALSE, MVP) 149 | gl.glEnable(gl.GL_DEPTH_TEST) 150 | if face is not None: 151 | face = np.ascontiguousarray(face, np.uint32) 152 | gl.glDrawElements(gl.GL_TRIANGLES, face.size, gl.GL_UNSIGNED_INT, face) 153 | else: 154 | vert_n = position.size // position.shape[-1] 155 | gl.glDrawArrays(gl.GL_TRIANGLES, 0, vert_n) 156 | 157 | gl.glDisable(gl.GL_DEPTH_TEST) 158 | 159 | w, h = self.size 160 | frame = gl.glReadPixels(0, 0, w, h, gl.GL_RGBA, gl.GL_FLOAT) 161 | # from IPython import embed; embed() 162 | 163 | frame = frame.reshape(h, w, 4) # fix PyOpenGL bug 164 | # frame = frame.repeat(4, axis=2) 165 | # frame = (1 - frame) * 100 166 | frame = frame[::-1, ::-1] # verical flip to match GL convention 167 | return frame 168 | 169 | def loadTexture(self, path): 170 | img = Image.open(path).transpose(Image.FLIP_TOP_BOTTOM) 171 | img_data = np.fromstring(img.tostring(), np.uint8) 172 | width, height = img.size 173 | 174 | # glTexImage2D expects the first element of the image data to be the 175 | # bottom-left corner of the image. Subsequent elements go left to right, 176 | # with subsequent lines going from bottom to top. 177 | 178 | # However, the image data was created with PIL Image tostring and numpy's 179 | # fromstring, which means we have to do a bit of reorganization. The first 180 | # element in the data output by tostring() will be the top-left corner of 181 | # the image, with following values going left-to-right and lines going 182 | # top-to-bottom. So, we need to flip the vertical coordinate (y). 183 | texture = gl.glGenTextures(1) 184 | gl.glPixelStorei(gl.GL_UNPACK_ALIGNMENT, 1) 185 | gl.glBindTexture(gl.GL_TEXTURE_2D, texture) 186 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR) 187 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR_MIPMAP_LINEAR) 188 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE) 189 | gl.glTexParameterf(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE) 190 | gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, width, height, 0, 191 | gl.GL_RGBA, gl.GL_UNSIGNED_BYTE, img_data) 192 | gl.glGenerateMipmap(gl.GL_TEXTURE_2D) 193 | return texture 194 | -------------------------------------------------------------------------------- /ycb_render/glutils/trackball.py: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------------------------------------- 2 | # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved. 3 | # Distributed under the (new) BSD License. 4 | # ----------------------------------------------------------------------------- 5 | import numpy as np 6 | from . import _trackball 7 | import platform 8 | if platform.python_version().startswith('3'): 9 | from .meshutil import * 10 | else: 11 | from meshutil import * 12 | class Trackball(): 13 | """ 14 | 3D trackball transform 15 | 16 | :param float aspect: 17 | Indicate what is the aspect ratio of the object displayed. This is 18 | necessary to convert pixel drag move in oject space coordinates. 19 | Default is None. 20 | 21 | :param float znear: 22 | Near clip plane. Default is 2. 23 | 24 | :param float zfar: 25 | Distance clip plane. Default is 1000. 26 | 27 | :param float theta: 28 | Angle (in degrees) around the z axis. Default is 45. 29 | 30 | :param float phi: 31 | Angle (in degrees) around the x axis. Default is 45. 32 | 33 | :param float distance: 34 | Distance from the trackball to the object. Default is 8. 35 | 36 | :param float zoom: 37 | Zoom level. Default is 35. 38 | 39 | The trackball transform simulates a virtual trackball (3D) that can rotate 40 | around the origin using intuitive mouse gestures. 41 | 42 | The transform is connected to the following events: 43 | 44 | * ``on_attach``: Transform initialization 45 | * ``on_resize``: Tranform update to maintain aspect 46 | * ``on_mouse_scroll``: Zoom in & out (user action) 47 | * ``on_mouse_grab``: Drag (user action) 48 | 49 | **Usage example**: 50 | 51 | .. code:: python 52 | 53 | vertex = ''' 54 | attribute vec2 position; 55 | void main() 56 | { 57 | gl_Position = (vec4(position, 0.0, 1.0)); 58 | } ''' 59 | 60 | ... 61 | window = app.Window(width=800, height=800) 62 | program = gloo.Program(vertex, fragment, count=4) 63 | ... 64 | program['transform'] = Trackball(aspect=1) 65 | window.attach(program['transform']) 66 | ... 67 | """ 68 | 69 | aliases = { "view" : "trackball_view", 70 | "model" : "trackball_model", 71 | "projection" : "trackball_projection" } 72 | 73 | def __init__(self, width, height, cam_pos=[0,0,2.0]): 74 | """ 75 | Initialize the transform. 76 | """ 77 | self._aspect = 1 78 | self._znear = 0.2 79 | self._zfar = 6.0 80 | theta = 45 81 | phi = 45 82 | distance = np.linalg.norm(cam_pos) 83 | self._distance = -distance 84 | self._zoom = 1 85 | self._width = width 86 | self._height = height 87 | self._window_aspect = 1.5 88 | 89 | self._trackball = _trackball.Trackball(45,45) 90 | aspect = self._window_aspect * self._aspect 91 | self._projection = perspective(self._zoom, aspect, 92 | self._znear, self._zfar) 93 | self.property = {} 94 | self._view = np.eye(4, dtype=np.float32) 95 | self._view[:3, 3] = -np.array(cam_pos) 96 | self.property["view"] = self._view 97 | self.property["model"] = np.eye(4) 98 | # self.set_distance(self._distance) 99 | 100 | @property 101 | def distance(self): 102 | """ Distance from the trackball to the object """ 103 | 104 | return self._distance 105 | 106 | # @distance.setter 107 | # def distance(self, distance): 108 | # """ Distance from the trackball to the object """ 109 | 110 | # self._view = np.eye(4, dtype=np.float32) 111 | # self._view[2, 3] = -self._distance 112 | # self.property["view"] = self._view 113 | 114 | 115 | @property 116 | def theta(self): 117 | """ Angle (in degrees) around the z axis """ 118 | 119 | return self._trackball.theta 120 | 121 | @theta.setter 122 | def theta(self, theta): 123 | """ Angle (in degrees) around the z axis """ 124 | 125 | self._trackball.theta = theta 126 | self.property["model"] = self._trackball.model 127 | 128 | 129 | @property 130 | def phi(self): 131 | """ Angle (in degrees) around the x axis """ 132 | 133 | return self._trackball.phi 134 | 135 | @phi.setter 136 | def phi(self, phi): 137 | """ Angle (in degrees) around the x axis """ 138 | 139 | self._trackball.phi = phi 140 | self.property["model"] = self._trackball.model 141 | 142 | 143 | @property 144 | def zoom(self): 145 | """ Zoom level (aperture angle in degrees) """ 146 | 147 | return self._zoom 148 | 149 | 150 | @phi.setter 151 | def zoom(self, value): 152 | """ Zoom level (aperture angle in degrees) """ 153 | 154 | aspect = self._window_aspect * self._aspect 155 | self._zoom = min(max(value, 1.0), 179.0) 156 | self.property['projection'] = glm.perspective(self._zoom, aspect, 157 | self._znear, self._zfar) 158 | 159 | @property 160 | def aspect(self): 161 | """ Projection aspect """ 162 | 163 | return self._aspect 164 | 165 | 166 | @aspect.setter 167 | def aspect(self, value): 168 | """ Projection aspect """ 169 | 170 | aspect = self._window_aspect * self._aspect 171 | self.property['projection'] = glm.perspective(self._zoom, aspect, 172 | self._znear, self._zfar) 173 | 174 | 175 | def on_attach(self, program): 176 | self.property["view"] = self._view 177 | self.property["model"] = self._trackball.model 178 | self.property["projection"] = self._projection 179 | 180 | 181 | def on_resize(self, width, height): 182 | self._width = float(width) 183 | self._height = float(height) 184 | self._window_aspect = self._width / self._height 185 | aspect = self._window_aspect * self._aspect 186 | self.property['projection'] = perspective(self._zoom, aspect, 187 | self._znear, self._zfar) 188 | 189 | 190 | 191 | def on_mouse_drag(self, x, y, dx, dy, button=None): 192 | width = self._width 193 | height = self._height 194 | x = (x*2.0 - width)/width 195 | dx = (2.*dx)/width 196 | y = (height - y*2.0)/height 197 | dy = -(2.*dy)/height 198 | self._trackball.drag_to(x,y,dx,dy) 199 | self.property["model"] = self._trackball.model 200 | 201 | 202 | def on_mouse_scroll(self, x, y, dx, dy): 203 | width = self._width 204 | height = self._height 205 | aspect = self._window_aspect * self._aspect 206 | self._zoom = min(max(self._zoom*(1-dy/100), 1.0), 179.0) 207 | self.property['projection'] = perspective(self._zoom, aspect, 208 | self._znear, self._zfar) 209 | 210 | def reinit(self, cam_pos): 211 | self._zoom = 1 212 | self._window_aspect = 1.5 213 | 214 | self._trackball = _trackball.Trackball(45,45) 215 | aspect = self._window_aspect * self._aspect 216 | self._projection = perspective(self._zoom, aspect, 217 | self._znear, self._zfar) 218 | self.property = {} 219 | self._view = np.eye(4, dtype=np.float32) 220 | self._view[:3, 3] = -np.array(cam_pos) 221 | self.property["view"] = self._view 222 | self.property["projection"] = np.eye(4) 223 | self.property["model"] = np.eye(4) 224 | self._trackball._model = np.eye(4) 225 | -------------------------------------------------------------------------------- /tools/train_net.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | # Copyright (c) 2020 NVIDIA Corporation. All rights reserved. 4 | # This work is licensed under the NVIDIA Source Code License - Non-commercial. Full 5 | # text can be found in LICENSE.md 6 | 7 | """Train a Fully Convolutional Network (FCN) on image segmentation database.""" 8 | 9 | import torch 10 | import torch.nn.parallel 11 | import torch.backends.cudnn as cudnn 12 | import torch.optim 13 | import torch.utils.data 14 | 15 | import argparse 16 | import pprint 17 | import numpy as np 18 | import sys 19 | import os 20 | import os.path as osp 21 | import cv2 22 | 23 | import _init_paths 24 | from fcn.config import cfg, cfg_from_file, get_output_dir 25 | from fcn.train_test import train 26 | from datasets.factory import get_dataset 27 | 28 | import networks 29 | from ycb_renderer import YCBRenderer 30 | 31 | 32 | def parse_args(): 33 | """ 34 | Parse input arguments 35 | """ 36 | parser = argparse.ArgumentParser(description='Train a Fast R-CNN network') 37 | parser.add_argument('--gpu', dest='gpu_id', 38 | help='GPU device id to use [0]', 39 | default=0, type=int) 40 | parser.add_argument('--epochs', dest='epochs', 41 | help='number of epochs to train', 42 | default=40000, type=int) 43 | parser.add_argument('--startepoch', dest='startepoch', 44 | help='the starting epoch', 45 | default=0, type=int) 46 | parser.add_argument('--pretrained', dest='pretrained', 47 | help='initialize with pretrained checkpoint', 48 | default=None, type=str) 49 | parser.add_argument('--cfg', dest='cfg_file', 50 | help='optional config file', 51 | default=None, type=str) 52 | parser.add_argument('--solver', dest='solver', 53 | help='solver type', 54 | default='sgd', type=str) 55 | parser.add_argument('--dataset', dest='dataset_name', 56 | help='dataset to train on', 57 | default='shapenet_scene_train', type=str) 58 | parser.add_argument('--dataset_background', dest='dataset_background_name', 59 | help='background dataset to train on', 60 | default='background_nvidia', type=str) 61 | parser.add_argument('--rand', dest='randomize', 62 | help='randomize (do not use a fixed seed)', 63 | action='store_true') 64 | parser.add_argument('--network', dest='network_name', 65 | help='name of the network', 66 | default=None, type=str) 67 | 68 | if len(sys.argv) == 1: 69 | parser.print_help() 70 | sys.exit(1) 71 | 72 | args = parser.parse_args() 73 | return args 74 | 75 | 76 | if __name__ == '__main__': 77 | args = parse_args() 78 | 79 | print('Called with args:') 80 | print(args) 81 | 82 | if args.cfg_file is not None: 83 | cfg_from_file(args.cfg_file) 84 | 85 | print('Using config:') 86 | pprint.pprint(cfg) 87 | 88 | if not args.randomize: 89 | # fix the random seeds (numpy and caffe) for reproducibility 90 | np.random.seed(cfg.RNG_SEED) 91 | torch.manual_seed(cfg.RNG_SEED) 92 | # device 93 | print('GPU device {:d}'.format(args.gpu_id)) 94 | 95 | cfg.classes = cfg.TRAIN.CLASSES 96 | # prepare dataset 97 | cfg.MODE = 'TRAIN' 98 | dataset = get_dataset(args.dataset_name) 99 | if cfg.TRAIN.SYNTHESIZE: 100 | num_workers = 0 101 | else: 102 | num_workers = 4 103 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=cfg.TRAIN.IMS_PER_BATCH, shuffle=True, num_workers=num_workers) 104 | print('Use dataset `{:s}` for training'.format(dataset.name)) 105 | 106 | # background dataset 107 | if cfg.TRAIN.SYN_BACKGROUND_SPECIFIC: 108 | background_dataset = get_dataset(args.dataset_background_name) 109 | else: 110 | background_dataset = get_dataset('background_coco') 111 | background_loader = torch.utils.data.DataLoader(background_dataset, batch_size=cfg.TRAIN.IMS_PER_BATCH, 112 | shuffle=True, num_workers=4) 113 | 114 | # overwrite intrinsics 115 | if len(cfg.INTRINSICS) > 0: 116 | K = np.array(cfg.INTRINSICS).reshape(3, 3) 117 | dataset._intrinsic_matrix = K 118 | background_dataset._intrinsic_matrix = K 119 | print(dataset._intrinsic_matrix) 120 | 121 | # set up renderer 122 | print('loading 3D models') 123 | cfg.renderer = YCBRenderer(width=cfg.TRAIN.SYN_WIDTH, height=cfg.TRAIN.SYN_HEIGHT, render_marker=False, gpu_id=args.gpu_id) 124 | cfg.renderer.load_objects(dataset.model_mesh_paths, dataset.model_texture_paths, dataset.model_colors) 125 | cfg.renderer.set_camera_default() 126 | print(dataset.model_mesh_paths) 127 | 128 | # output directory 129 | output_dir = get_output_dir(dataset, None) 130 | print('Output will be saved to `{:s}`'.format(output_dir)) 131 | if not os.path.exists(output_dir): 132 | os.makedirs(output_dir) 133 | 134 | # prepare network 135 | if args.pretrained: 136 | network_data = torch.load(args.pretrained) 137 | print("=> using pre-trained network '{}'".format(args.network_name)) 138 | else: 139 | network_data = None 140 | print("=> creating network '{}'".format(args.network_name)) 141 | 142 | network = networks.__dict__[args.network_name](dataset.num_classes, network_data).cuda() 143 | if torch.cuda.device_count() > 1: 144 | cfg.TRAIN.GPUNUM = torch.cuda.device_count() 145 | print("Let's use", torch.cuda.device_count(), "GPUs!") 146 | network = torch.nn.DataParallel(network).cuda() 147 | cudnn.benchmark = True 148 | 149 | assert(args.solver in ['adam', 'sgd']) 150 | print('=> setting {} solver'.format(args.solver)) 151 | param_groups = [{'params': network.module.bias_parameters(), 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}, 152 | {'params': network.module.weight_parameters(), 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}] 153 | if args.solver == 'adam': 154 | optimizer = torch.optim.Adam(param_groups, cfg.TRAIN.LEARNING_RATE, 155 | betas=(cfg.TRAIN.MOMENTUM, cfg.TRAIN.BETA)) 156 | elif args.solver == 'sgd': 157 | optimizer = torch.optim.SGD(param_groups, cfg.TRAIN.LEARNING_RATE, 158 | momentum=cfg.TRAIN.MOMENTUM) 159 | 160 | scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, \ 161 | milestones=[m - args.startepoch for m in cfg.TRAIN.MILESTONES], gamma=cfg.TRAIN.GAMMA) 162 | cfg.epochs = args.epochs 163 | 164 | num_iters = 1 165 | for epoch in range(args.startepoch): 166 | if cfg.TRAIN.HEATUP == 0: 167 | num_iters = 4 168 | elif (epoch + 1) % cfg.TRAIN.HEATUP == 0 and num_iters < cfg.TRAIN.ITERNUM: 169 | num_iters += 1 170 | 171 | for epoch in range(args.startepoch, args.epochs): 172 | scheduler.step() 173 | 174 | train(dataloader, background_loader, network, optimizer, epoch, num_iters) 175 | 176 | # save checkpoint 177 | if (epoch+1) % cfg.TRAIN.SNAPSHOT_EPOCHS == 0 or epoch == args.epochs - 1: 178 | state = {'epoch': epoch + 1, 'state_dict': network.module.state_dict()} 179 | infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX 180 | if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') 181 | filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix + '_epoch_{:d}'.format(epoch+1) + '.checkpoint.pth') 182 | torch.save(state, os.path.join(output_dir, filename)) 183 | print(filename) 184 | 185 | # train for one epoch 186 | if (epoch + 1) % cfg.TRAIN.HEATUP == 0 and num_iters < cfg.TRAIN.ITERNUM: 187 | num_iters += 1 188 | --------------------------------------------------------------------------------