├── CMakeLists.txt
├── README.md
├── datasets
└── ycb
│ ├── __init__.pyc
│ ├── dataset.pyc
│ └── dataset_config
│ ├── classes.txt
│ ├── test_data_list.txt
│ └── train_data_list.txt
├── package.xml
├── scripts
├── 1
├── LICENSE
├── README.md
├── assets
│ ├── compare.png
│ ├── pullfig.png
│ ├── result_linemod.png
│ └── result_ycb.png
├── datasets
│ ├── linemod
│ │ ├── dataset.py
│ │ └── dataset_config
│ │ │ └── models_info.yml
│ └── ycb
│ │ ├── dataset.py
│ │ └── dataset_config
│ │ ├── classes.txt
│ │ ├── test_data_list.txt
│ │ └── train_data_list.txt
├── distortion.npy
├── eval.py
├── experiments
│ └── scripts
│ │ ├── eval_linemod.sh
│ │ ├── eval_ycb.sh
│ │ ├── ros_eval_msg.sh
│ │ ├── ros_eval_ycb.sh
│ │ ├── test.sh
│ │ ├── train_linemod.sh
│ │ └── train_ycb.sh
├── lib
│ ├── __init__.pyc
│ ├── extractors.pyc
│ ├── knn
│ │ ├── __init__.pyc
│ │ ├── build
│ │ │ └── knn_cuda_kernel.so
│ │ ├── build_ffi.py
│ │ ├── knn_pytorch
│ │ │ ├── __init__.py
│ │ │ ├── __init__.pyc
│ │ │ ├── __pycache__
│ │ │ │ ├── __init__.cpython-35.pyc
│ │ │ │ └── __init__.cpython-36.pyc
│ │ │ └── _knn_pytorch.so
│ │ └── src
│ │ │ ├── knn_cuda_kernel.cu
│ │ │ ├── knn_cuda_kernel.h
│ │ │ ├── knn_pytorch.c
│ │ │ └── knn_pytorch.h
│ ├── loss.py
│ ├── loss.pyc
│ ├── loss_refiner.py
│ ├── network.py
│ ├── pspnet.pyc
│ └── transformations.pyc
├── loss.py
├── matrix.npy
├── model
│ ├── build_BiSeNet.py
│ └── build_contextpath.py
├── predict.npy
├── tools
│ ├── __pycache__
│ │ └── _init_paths.cpython-35.pyc
│ ├── _init_paths.py
│ ├── _init_paths.pyc
│ ├── eval_linemod.py
│ ├── eval_ycb.py
│ ├── ros_eval_ycb.py
│ ├── ros_eval_ycb_message.py
│ ├── ros_eval_ycb_publisher.py
│ ├── temp.py
│ ├── test.py
│ └── train.py
└── utils.pyc
└── srv
└── CameraRequests.srv
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 2.8.3)
2 | project(densefusion)
3 |
4 | ## Compile as C++11, supported in ROS Kinetic and newer
5 | # add_compile_options(-std=c++11)
6 |
7 | ## Find catkin macros and libraries
8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
9 | ## is used, also find other catkin packages
10 | find_package(catkin REQUIRED COMPONENTS
11 | rospy
12 | std_msgs
13 | message_generation
14 | )
15 |
16 | ## System dependencies are found with CMake's conventions
17 | # find_package(Boost REQUIRED COMPONENTS system)
18 |
19 |
20 | ## Uncomment this if the package has a setup.py. This macro ensures
21 | ## modules and global scripts declared therein get installed
22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
23 | # catkin_python_setup()
24 |
25 | ################################################
26 | ## Declare ROS messages, services and actions ##
27 | ################################################
28 |
29 | ## To declare and build messages, services or actions from within this
30 | ## package, follow these steps:
31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in
32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
33 | ## * In the file package.xml:
34 | ## * add a build_depend tag for "message_generation"
35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in
37 | ## but can be declared for certainty nonetheless:
38 | ## * add a exec_depend tag for "message_runtime"
39 | ## * In this file (CMakeLists.txt):
40 | ## * add "message_generation" and every package in MSG_DEP_SET to
41 | ## find_package(catkin REQUIRED COMPONENTS ...)
42 | ## * add "message_runtime" and every package in MSG_DEP_SET to
43 | ## catkin_package(CATKIN_DEPENDS ...)
44 | ## * uncomment the add_*_files sections below as needed
45 | ## and list every .msg/.srv/.action file to be processed
46 | ## * uncomment the generate_messages entry below
47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
48 |
49 | ## Generate messages in the 'msg' folder
50 | # add_message_files(
51 | # FILES
52 | # Message1.msg
53 | # Message2.msg
54 | # )
55 |
56 | ## Generate services in the 'srv' folder
57 | add_service_files(
58 | FILES
59 | CameraRequests.srv
60 | )
61 |
62 | ## Generate actions in the 'action' folder
63 | # add_action_files(
64 | # FILES
65 | # Action1.action
66 | # Action2.action
67 | # )
68 |
69 | ## Generate added messages and services with any dependencies listed here
70 | generate_messages(
71 | DEPENDENCIES
72 | std_msgs
73 | )
74 |
75 | ################################################
76 | ## Declare ROS dynamic reconfigure parameters ##
77 | ################################################
78 |
79 | ## To declare and build dynamic reconfigure parameters within this
80 | ## package, follow these steps:
81 | ## * In the file package.xml:
82 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
83 | ## * In this file (CMakeLists.txt):
84 | ## * add "dynamic_reconfigure" to
85 | ## find_package(catkin REQUIRED COMPONENTS ...)
86 | ## * uncomment the "generate_dynamic_reconfigure_options" section below
87 | ## and list every .cfg file to be processed
88 |
89 | ## Generate dynamic reconfigure parameters in the 'cfg' folder
90 | # generate_dynamic_reconfigure_options(
91 | # cfg/DynReconf1.cfg
92 | # cfg/DynReconf2.cfg
93 | # )
94 |
95 | ###################################
96 | ## catkin specific configuration ##
97 | ###################################
98 | ## The catkin_package macro generates cmake config files for your package
99 | ## Declare things to be passed to dependent projects
100 | ## INCLUDE_DIRS: uncomment this if your package contains header files
101 | ## LIBRARIES: libraries you create in this project that dependent projects also need
102 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
103 | ## DEPENDS: system dependencies of this project that dependent projects also need
104 | catkin_package(
105 | # INCLUDE_DIRS include
106 | # LIBRARIES densefusion
107 | # CATKIN_DEPENDS rospy std_msgs
108 | # DEPENDS system_lib
109 | )
110 |
111 | ###########
112 | ## Build ##
113 | ###########
114 |
115 | ## Specify additional locations of header files
116 | ## Your package locations should be listed before other locations
117 | include_directories(
118 | # include
119 | ${catkin_INCLUDE_DIRS}
120 | )
121 |
122 | ## Declare a C++ library
123 | # add_library(${PROJECT_NAME}
124 | # src/${PROJECT_NAME}/densefusion.cpp
125 | # )
126 |
127 | ## Add cmake target dependencies of the library
128 | ## as an example, code may need to be generated before libraries
129 | ## either from message generation or dynamic reconfigure
130 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
131 |
132 | ## Declare a C++ executable
133 | ## With catkin_make all packages are built within a single CMake context
134 | ## The recommended prefix ensures that target names across packages don't collide
135 | # add_executable(${PROJECT_NAME}_node src/densefusion_node.cpp)
136 |
137 | ## Rename C++ executable without prefix
138 | ## The above recommended prefix causes long target names, the following renames the
139 | ## target back to the shorter version for ease of user use
140 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
141 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
142 |
143 | ## Add cmake target dependencies of the executable
144 | ## same as for the library above
145 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
146 |
147 | ## Specify libraries to link a library or executable target against
148 | # target_link_libraries(${PROJECT_NAME}_node
149 | # ${catkin_LIBRARIES}
150 | # )
151 |
152 | #############
153 | ## Install ##
154 | #############
155 |
156 | # all install targets should use catkin DESTINATION variables
157 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
158 |
159 | ## Mark executable scripts (Python etc.) for installation
160 | ## in contrast to setup.py, you can choose the destination
161 | # install(PROGRAMS
162 | # scripts/my_python_script
163 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
164 | # )
165 |
166 | ## Mark executables and/or libraries for installation
167 | # install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
168 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
169 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
170 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
171 | # )
172 |
173 | ## Mark cpp header files for installation
174 | # install(DIRECTORY include/${PROJECT_NAME}/
175 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
176 | # FILES_MATCHING PATTERN "*.h"
177 | # PATTERN ".svn" EXCLUDE
178 | # )
179 |
180 | ## Mark other files for installation (e.g. launch and bag files, etc.)
181 | # install(FILES
182 | # # myfile1
183 | # # myfile2
184 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
185 | # )
186 |
187 | #############
188 | ## Testing ##
189 | #############
190 |
191 | ## Add gtest based cpp test target and link libraries
192 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_densefusion.cpp)
193 | # if(TARGET ${PROJECT_NAME}-test)
194 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
195 | # endif()
196 |
197 | ## Add folders to be run by python nosetests
198 | # catkin_add_nosetests(test)
199 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # DenseFusion_ROS
2 |
3 | This repository is based on https://github.com/j96w/DenseFusion and https://github.com/ooooverflow/BiSeNet.
4 |
5 | If you are a person using Docker, https://hub.docker.com/repository/docker/choo2969/ros-densefusion
6 |
7 | segmentation weight file [link](https://drive.google.com/drive/folders/1fRie5jwj9Liuwvs64_Mru8wUCy65Os0_?usp=sharing)
8 | densefusion weight file [link](https://github.com/j96w/DenseFusion)
9 |
10 | ~~~
11 | $ docker pull choo2969/ros-densefusion
12 | ~~~
13 |
14 |
15 | ## Requirements
16 | ---
17 | - ROS (Kinetic)
18 | - Python2.7
19 | - Pytorch 0.4.1
20 | - PIL
21 | - scipy
22 | - numpy
23 | - pyyaml
24 | - logging
25 | - matplotlib
26 | - CUDA
27 |
28 |
29 |
30 | ## Start
31 | ---
32 | we have tested on Ubuntu 16.04 with ROS Kinetic and NVIDIA Titan XP and Geforce 1080 Ti
33 | 1. Start camera node (D435)
34 |
35 | - Step1. Run your own camera, If your camera is not a D435 or D415, you will need to edit the RGB image and Depth Subscriber. Edit image_subscriber and depth_subscriber with your camera node
36 | ~~~
37 | vim path/densefusion/scripts/experiments/scripts/ros_eval_msg.sh
38 | ~~~
39 |
40 | - Step2. Edit the cam_cx,cam_cy,cam_fx,cam_fy values
41 | ~~~
42 | vim path/densefusion/scripts/tool/ros_eval_ycb_message.py
43 | ~~~
44 |
45 | 2. Start
46 | ~~~
47 | sh path/densefusion/scripts/experiments/scripts/ros_eval_msg.sh
48 | ~~~
49 | Running this whill launch the SErvice Sever rining 6D Pose Estimation
50 |
--------------------------------------------------------------------------------
/datasets/ycb/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/datasets/ycb/__init__.pyc
--------------------------------------------------------------------------------
/datasets/ycb/dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/datasets/ycb/dataset.pyc
--------------------------------------------------------------------------------
/datasets/ycb/dataset_config/classes.txt:
--------------------------------------------------------------------------------
1 | 002_master_chef_can
2 | 003_cracker_box
3 | 004_sugar_box
4 | 005_tomato_soup_can
5 | 006_mustard_bottle
6 | 007_tuna_fish_can
7 | 008_pudding_box
8 | 009_gelatin_box
9 | 010_potted_meat_can
10 | 011_banana
11 | 019_pitcher_base
12 | 021_bleach_cleanser
13 | 024_bowl
14 | 025_mug
15 | 035_power_drill
16 | 036_wood_block
17 | 037_scissors
18 | 040_large_marker
19 | 051_large_clamp
20 | 052_extra_large_clamp
21 | 061_foam_brick
22 |
--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | densefusion
4 | 0.0.0
5 | The densefusion package
6 |
7 |
8 |
9 |
10 | root
11 |
12 |
13 |
14 |
15 |
16 | TODO
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 | catkin
53 | rospy
54 | std_msgs
55 | message_generation
56 |
57 | rospy
58 | std_msgs
59 |
60 | rospy
61 | std_msgs
62 | message_runtime
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
--------------------------------------------------------------------------------
/scripts/1:
--------------------------------------------------------------------------------
1 | # ~/.bashrc: executed by bash(1) for non-login shells.
2 | # see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
3 | # for examples
4 |
5 | # If not running interactively, don't do anything
6 | [ -z "$PS1" ] && return
7 |
8 | # don't put duplicate lines in the history. See bash(1) for more options
9 | # ... or force ignoredups and ignorespace
10 | HISTCONTROL=ignoredups:ignorespace
11 |
12 | # append to the history file, don't overwrite it
13 | shopt -s histappend
14 |
15 | # for setting history length see HISTSIZE and HISTFILESIZE in bash(1)
16 | HISTSIZE=1000
17 | HISTFILESIZE=2000
18 |
19 | # check the window size after each command and, if necessary,
20 | # update the values of LINES and COLUMNS.
21 | shopt -s checkwinsize
22 |
23 | # make less more friendly for non-text input files, see lesspipe(1)
24 | [ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)"
25 |
26 | # set variable identifying the chroot you work in (used in the prompt below)
27 | if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
28 | debian_chroot=$(cat /etc/debian_chroot)
29 | fi
30 |
31 | # set a fancy prompt (non-color, unless we know we "want" color)
32 | case "$TERM" in
33 | xterm-color) color_prompt=yes;;
34 | esac
35 |
36 | # uncomment for a colored prompt, if the terminal has the capability; turned
37 | # off by default to not distract the user: the focus in a terminal window
38 | # should be on the output of commands, not on the prompt
39 | #force_color_prompt=yes
40 |
41 | if [ -n "$force_color_prompt" ]; then
42 | if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then
43 | # We have color support; assume it's compliant with Ecma-48
44 | # (ISO/IEC-6429). (Lack of such support is extremely rare, and such
45 | # a case would tend to support setf rather than setaf.)
46 | color_prompt=yes
47 | else
48 | color_prompt=
49 | fi
50 | fi
51 |
52 | if [ "$color_prompt" = yes ]; then
53 | PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
54 | else
55 | PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ '
56 | fi
57 | unset color_prompt force_color_prompt
58 |
59 | # If this is an xterm set the title to user@host:dir
60 | case "$TERM" in
61 | xterm*|rxvt*)
62 | PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1"
63 | ;;
64 | *)
65 | ;;
66 | esac
67 |
68 | # enable color support of ls and also add handy aliases
69 | if [ -x /usr/bin/dircolors ]; then
70 | test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)"
71 | alias ls='ls --color=auto'
72 | #alias dir='dir --color=auto'
73 | #alias vdir='vdir --color=auto'
74 |
75 | alias grep='grep --color=auto'
76 | alias fgrep='fgrep --color=auto'
77 | alias egrep='egrep --color=auto'
78 | fi
79 |
80 | # some more ls aliases
81 | alias ll='ls -alF'
82 | alias la='ls -A'
83 | alias l='ls -CF'
84 |
85 | # Alias definitions.
86 | # You may want to put all your additions into a separate file like
87 | # ~/.bash_aliases, instead of adding them here directly.
88 | # See /usr/share/doc/bash-doc/examples in the bash-doc package.
89 |
90 | if [ -f ~/.bash_aliases ]; then
91 | . ~/.bash_aliases
92 | fi
93 |
94 | # enable programmable completion features (you don't need to enable
95 | # this, if it's already enabled in /etc/bash.bashrc and /etc/profile
96 | # sources /etc/bash.bashrc).
97 | #if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
98 | # . /etc/bash_completion
99 | #fi
100 | alias eb='nano ~/.bashrc'
101 | alias sb='source ~/.bashrc'
102 | alias gs='git status'
103 | alias gp='git pull'
104 | alias cw='cd ~/catkin_ws'
105 | alias cs='cd ~/catkin_ws/src'
106 | alias cm='cd ~/catkin_ws && catkin_make'
107 | source ~/catkin_ws/devel/setup.bash
108 | export ROS_MASTER_URI=http://192.168.1.15:11311
109 | export ROS_HOSTNAME=172.17.0.5
110 |
--------------------------------------------------------------------------------
/scripts/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2019 Jeremy Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/scripts/assets/compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/compare.png
--------------------------------------------------------------------------------
/scripts/assets/pullfig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/pullfig.png
--------------------------------------------------------------------------------
/scripts/assets/result_linemod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/result_linemod.png
--------------------------------------------------------------------------------
/scripts/assets/result_ycb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/result_ycb.png
--------------------------------------------------------------------------------
/scripts/datasets/linemod/dataset.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as data
2 | from PIL import Image
3 | import os
4 | import os.path
5 | import errno
6 | import torch
7 | import json
8 | import codecs
9 | import numpy as np
10 | import sys
11 | import torchvision.transforms as transforms
12 | import argparse
13 | import json
14 | import time
15 | import random
16 | import numpy.ma as ma
17 | import copy
18 | import scipy.misc
19 | import scipy.io as scio
20 | import yaml
21 | import cv2
22 |
23 |
24 | class PoseDataset(data.Dataset):
25 | def __init__(self, mode, num, add_noise, root, noise_trans, refine):
26 | self.objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
27 | self.mode = mode
28 |
29 | self.list_rgb = []
30 | self.list_depth = []
31 | self.list_label = []
32 | self.list_obj = []
33 | self.list_rank = []
34 | self.meta = {}
35 | self.pt = {}
36 | self.root = root
37 | self.noise_trans = noise_trans
38 | self.refine = refine
39 |
40 | item_count = 0
41 | for item in self.objlist:
42 | if self.mode == 'train':
43 | input_file = open('{0}/data/{1}/train.txt'.format(self.root, '%02d' % item))
44 | else:
45 | input_file = open('{0}/data/{1}/test.txt'.format(self.root, '%02d' % item))
46 | while 1:
47 | item_count += 1
48 | input_line = input_file.readline()
49 | if self.mode == 'test' and item_count % 10 != 0:
50 | continue
51 | if not input_line:
52 | break
53 | if input_line[-1:] == '\n':
54 | input_line = input_line[:-1]
55 | self.list_rgb.append('{0}/data/{1}/rgb/{2}.png'.format(self.root, '%02d' % item, input_line))
56 | self.list_depth.append('{0}/data/{1}/depth/{2}.png'.format(self.root, '%02d' % item, input_line))
57 | if self.mode == 'eval':
58 | self.list_label.append('{0}/segnet_results/{1}_label/{2}_label.png'.format(self.root, '%02d' % item, input_line))
59 | else:
60 | self.list_label.append('{0}/data/{1}/mask/{2}.png'.format(self.root, '%02d' % item, input_line))
61 |
62 | self.list_obj.append(item)
63 | self.list_rank.append(int(input_line))
64 |
65 | meta_file = open('{0}/data/{1}/gt.yml'.format(self.root, '%02d' % item), 'r')
66 | self.meta[item] = yaml.load(meta_file)
67 | self.pt[item] = ply_vtx('{0}/models/obj_{1}.ply'.format(self.root, '%02d' % item))
68 |
69 | print("Object {0} buffer loaded".format(item))
70 |
71 | self.length = len(self.list_rgb)
72 |
73 | self.cam_cx = 325.26110
74 | self.cam_cy = 242.04899
75 | self.cam_fx = 572.41140
76 | self.cam_fy = 573.57043
77 |
78 | self.xmap = np.array([[j for i in range(640)] for j in range(480)])
79 | self.ymap = np.array([[i for i in range(640)] for j in range(480)])
80 |
81 | self.num = num
82 | self.add_noise = add_noise
83 | self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
84 | self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
85 | self.border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
86 | self.num_pt_mesh_large = 500
87 | self.num_pt_mesh_small = 500
88 | self.symmetry_obj_idx = [7, 8]
89 |
90 | def __getitem__(self, index):
91 | img = Image.open(self.list_rgb[index])
92 | ori_img = np.array(img)
93 | depth = np.array(Image.open(self.list_depth[index]))
94 | label = np.array(Image.open(self.list_label[index]))
95 | obj = self.list_obj[index]
96 | rank = self.list_rank[index]
97 |
98 | if obj == 2:
99 | for i in range(0, len(self.meta[obj][rank])):
100 | if self.meta[obj][rank][i]['obj_id'] == 2:
101 | meta = self.meta[obj][rank][i]
102 | break
103 | else:
104 | meta = self.meta[obj][rank][0]
105 |
106 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
107 | if self.mode == 'eval':
108 | mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255)))
109 | else:
110 | mask_label = ma.getmaskarray(ma.masked_equal(label, np.array([255, 255, 255])))[:, :, 0]
111 |
112 | mask = mask_label * mask_depth
113 |
114 | if self.add_noise:
115 | img = self.trancolor(img)
116 |
117 | img = np.array(img)[:, :, :3]
118 | img = np.transpose(img, (2, 0, 1))
119 | img_masked = img
120 |
121 | if self.mode == 'eval':
122 | rmin, rmax, cmin, cmax = get_bbox(mask_to_bbox(mask_label))
123 | else:
124 | rmin, rmax, cmin, cmax = get_bbox(meta['obj_bb'])
125 |
126 | img_masked = img_masked[:, rmin:rmax, cmin:cmax]
127 | #p_img = np.transpose(img_masked, (1, 2, 0))
128 | #scipy.misc.imsave('evaluation_result/{0}_input.png'.format(index), p_img)
129 |
130 | target_r = np.resize(np.array(meta['cam_R_m2c']), (3, 3))
131 | target_t = np.array(meta['cam_t_m2c'])
132 | add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])
133 |
134 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
135 | if len(choose) == 0:
136 | cc = torch.LongTensor([0])
137 | return(cc, cc, cc, cc, cc, cc)
138 |
139 | if len(choose) > self.num:
140 | c_mask = np.zeros(len(choose), dtype=int)
141 | c_mask[:self.num] = 1
142 | np.random.shuffle(c_mask)
143 | choose = choose[c_mask.nonzero()]
144 | else:
145 | choose = np.pad(choose, (0, self.num - len(choose)), 'wrap')
146 |
147 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
148 | xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
149 | ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
150 | choose = np.array([choose])
151 |
152 | cam_scale = 1.0
153 | pt2 = depth_masked / cam_scale
154 | pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
155 | pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
156 | cloud = np.concatenate((pt0, pt1, pt2), axis=1)
157 | cloud = cloud / 1000.0
158 |
159 | if self.add_noise:
160 | cloud = np.add(cloud, add_t)
161 |
162 | #fw = open('evaluation_result/{0}_cld.xyz'.format(index), 'w')
163 | #for it in cloud:
164 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
165 | #fw.close()
166 |
167 | model_points = self.pt[obj] / 1000.0
168 | dellist = [j for j in range(0, len(model_points))]
169 | dellist = random.sample(dellist, len(model_points) - self.num_pt_mesh_small)
170 | model_points = np.delete(model_points, dellist, axis=0)
171 |
172 | #fw = open('evaluation_result/{0}_model_points.xyz'.format(index), 'w')
173 | #for it in model_points:
174 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
175 | #fw.close()
176 |
177 | target = np.dot(model_points, target_r.T)
178 | if self.add_noise:
179 | target = np.add(target, target_t / 1000.0 + add_t)
180 | out_t = target_t / 1000.0 + add_t
181 | else:
182 | target = np.add(target, target_t / 1000.0)
183 | out_t = target_t / 1000.0
184 |
185 | #fw = open('evaluation_result/{0}_tar.xyz'.format(index), 'w')
186 | #for it in target:
187 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
188 | #fw.close()
189 |
190 | return torch.from_numpy(cloud.astype(np.float32)), \
191 | torch.LongTensor(choose.astype(np.int32)), \
192 | self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
193 | torch.from_numpy(target.astype(np.float32)), \
194 | torch.from_numpy(model_points.astype(np.float32)), \
195 | torch.LongTensor([self.objlist.index(obj)])
196 |
197 | def __len__(self):
198 | return self.length
199 |
200 | def get_sym_list(self):
201 | return self.symmetry_obj_idx
202 |
203 | def get_num_points_mesh(self):
204 | if self.refine:
205 | return self.num_pt_mesh_large
206 | else:
207 | return self.num_pt_mesh_small
208 |
209 |
210 |
211 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
212 | img_width = 480
213 | img_length = 640
214 |
215 |
216 | def mask_to_bbox(mask):
217 | mask = mask.astype(np.uint8)
218 | contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
219 |
220 |
221 | x = 0
222 | y = 0
223 | w = 0
224 | h = 0
225 | for contour in contours:
226 | tmp_x, tmp_y, tmp_w, tmp_h = cv2.boundingRect(contour)
227 | if tmp_w * tmp_h > w * h:
228 | x = tmp_x
229 | y = tmp_y
230 | w = tmp_w
231 | h = tmp_h
232 | return [x, y, w, h]
233 |
234 |
235 | def get_bbox(bbox):
236 | bbx = [bbox[1], bbox[1] + bbox[3], bbox[0], bbox[0] + bbox[2]]
237 | if bbx[0] < 0:
238 | bbx[0] = 0
239 | if bbx[1] >= 480:
240 | bbx[1] = 479
241 | if bbx[2] < 0:
242 | bbx[2] = 0
243 | if bbx[3] >= 640:
244 | bbx[3] = 639
245 | rmin, rmax, cmin, cmax = bbx[0], bbx[1], bbx[2], bbx[3]
246 | r_b = rmax - rmin
247 | for tt in range(len(border_list)):
248 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
249 | r_b = border_list[tt + 1]
250 | break
251 | c_b = cmax - cmin
252 | for tt in range(len(border_list)):
253 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
254 | c_b = border_list[tt + 1]
255 | break
256 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
257 | rmin = center[0] - int(r_b / 2)
258 | rmax = center[0] + int(r_b / 2)
259 | cmin = center[1] - int(c_b / 2)
260 | cmax = center[1] + int(c_b / 2)
261 | if rmin < 0:
262 | delt = -rmin
263 | rmin = 0
264 | rmax += delt
265 | if cmin < 0:
266 | delt = -cmin
267 | cmin = 0
268 | cmax += delt
269 | if rmax > 480:
270 | delt = rmax - 480
271 | rmax = 480
272 | rmin -= delt
273 | if cmax > 640:
274 | delt = cmax - 640
275 | cmax = 640
276 | cmin -= delt
277 | return rmin, rmax, cmin, cmax
278 |
279 |
280 | def ply_vtx(path):
281 | f = open(path)
282 | assert f.readline().strip() == "ply"
283 | f.readline()
284 | f.readline()
285 | N = int(f.readline().split()[-1])
286 | while f.readline().strip() != "end_header":
287 | continue
288 | pts = []
289 | for _ in range(N):
290 | pts.append(np.float32(f.readline().split()[:3]))
291 | return np.array(pts)
292 |
--------------------------------------------------------------------------------
/scripts/datasets/linemod/dataset_config/models_info.yml:
--------------------------------------------------------------------------------
1 | 1: {diameter: 102.09865663, min_x: -37.93430000, min_y: -38.79960000, min_z: -45.88450000, size_x: 75.86860000, size_y: 77.59920000, size_z: 91.76900000}
2 | 2: {diameter: 247.50624233, min_x: -107.83500000, min_y: -60.92790000, min_z: -109.70500000, size_x: 215.67000000, size_y: 121.85570000, size_z: 219.41000000}
3 | 3: {diameter: 167.35486092, min_x: -83.21620000, min_y: -82.65910000, min_z: -37.23640000, size_x: 166.43240000, size_y: 165.31820000, size_z: 74.47280000}
4 | 4: {diameter: 172.49224865, min_x: -68.32970000, min_y: -71.51510000, min_z: -50.24850000, size_x: 136.65940000, size_y: 143.03020000, size_z: 100.49700000}
5 | 5: {diameter: 201.40358597, min_x: -50.39580000, min_y: -90.89790000, min_z: -96.86700000, size_x: 100.79160000, size_y: 181.79580000, size_z: 193.73400000}
6 | 6: {diameter: 154.54551808, min_x: -33.50540000, min_y: -63.81650000, min_z: -58.72830000, size_x: 67.01070000, size_y: 127.63300000, size_z: 117.45660000}
7 | 7: {diameter: 124.26430816, min_x: -58.78990000, min_y: -45.75560000, min_z: -47.31120000, size_x: 117.57980000, size_y: 91.51120000, size_z: 94.62240000}
8 | 8: {diameter: 261.47178102, min_x: -114.73800000, min_y: -37.73570000, min_z: -104.00100000, size_x: 229.47600000, size_y: 75.47140000, size_z: 208.00200000}
9 | 9: {diameter: 108.99920102, min_x: -52.21460000, min_y: -38.70380000, min_z: -42.84850000, size_x: 104.42920000, size_y: 77.40760000, size_z: 85.69700000}
10 | 10: {diameter: 164.62758848, min_x: -75.09230000, min_y: -53.53750000, min_z: -34.62070000, size_x: 150.18460000, size_y: 107.07500000, size_z: 69.24140000}
11 | 11: {diameter: 175.88933422, min_x: -18.36050000, min_y: -38.93300000, min_z: -86.40790000, size_x: 36.72110000, size_y: 77.86600000, size_z: 172.81580000}
12 | 12: {diameter: 145.54287471, min_x: -50.44390000, min_y: -54.24850000, min_z: -45.40000000, size_x: 100.88780000, size_y: 108.49700000, size_z: 90.80000000}
13 | 13: {diameter: 278.07811733, min_x: -129.11300000, min_y: -59.24100000, min_z: -70.56620000, size_x: 258.22600000, size_y: 118.48210000, size_z: 141.13240000}
14 | 14: {diameter: 282.60129399, min_x: -101.57300000, min_y: -58.87630000, min_z: -106.55800000, size_x: 203.14600000, size_y: 117.75250000, size_z: 213.11600000}
15 | 15: {diameter: 212.35825148, min_x: -46.95910000, min_y: -73.71670000, min_z: -92.37370000, size_x: 93.91810000, size_y: 147.43340000, size_z: 184.74740000}
--------------------------------------------------------------------------------
/scripts/datasets/ycb/dataset.py:
--------------------------------------------------------------------------------
1 | import torch.utils.data as data
2 | from PIL import Image
3 | import os
4 | import os.path
5 | import torch
6 | import numpy as np
7 | import torchvision.transforms as transforms
8 | import argparse
9 | import time
10 | import random
11 | from lib.transformations import quaternion_from_euler, euler_matrix, random_quaternion, quaternion_matrix
12 | import numpy.ma as ma
13 | import copy
14 | import scipy.misc
15 | import scipy.io as scio
16 |
17 |
18 | class PoseDataset(data.Dataset):
19 | def __init__(self, mode, num_pt, add_noise, root, noise_trans, refine):
20 | if mode == 'train':
21 | self.path = 'datasets/ycb/dataset_config/train_data_list.txt'
22 | elif mode == 'test':
23 | self.path = 'datasets/ycb/dataset_config/test_data_list.txt'
24 | self.num_pt = num_pt
25 | self.root = root
26 | self.add_noise = add_noise
27 | self.noise_trans = noise_trans
28 |
29 | self.list = []
30 | self.real = []
31 | self.syn = []
32 | input_file = open(self.path)
33 | while 1:
34 | input_line = input_file.readline()
35 | if not input_line:
36 | break
37 | if input_line[-1:] == '\n':
38 | input_line = input_line[:-1]
39 | if input_line[:5] == 'data/':
40 | self.real.append(input_line)
41 | else:
42 | self.syn.append(input_line)
43 | self.list.append(input_line)
44 | input_file.close()
45 |
46 | self.length = len(self.list)
47 | self.len_real = len(self.real)
48 | self.len_syn = len(self.syn)
49 |
50 | class_file = open('datasets/ycb/dataset_config/classes.txt')
51 | class_id = 1
52 | self.cld = {}
53 | while 1:
54 | class_input = class_file.readline()
55 | if not class_input:
56 | break
57 |
58 | input_file = open('{0}/models/{1}/points.xyz'.format(self.root, class_input[:-1]))
59 | self.cld[class_id] = []
60 | while 1:
61 | input_line = input_file.readline()
62 | if not input_line:
63 | break
64 | input_line = input_line[:-1].split(' ')
65 | self.cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
66 | self.cld[class_id] = np.array(self.cld[class_id])
67 | input_file.close()
68 |
69 | class_id += 1
70 |
71 | self.cam_cx_1 = 312.9869
72 | self.cam_cy_1 = 241.3109
73 | self.cam_fx_1 = 1066.778
74 | self.cam_fy_1 = 1067.487
75 |
76 | self.cam_cx_2 = 323.7872
77 | self.cam_cy_2 = 279.6921
78 | self.cam_fx_2 = 1077.836
79 | self.cam_fy_2 = 1078.189
80 |
81 | self.xmap = np.array([[j for i in range(640)] for j in range(480)])
82 | self.ymap = np.array([[i for i in range(640)] for j in range(480)])
83 |
84 | self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
85 | self.noise_img_loc = 0.0
86 | self.noise_img_scale = 7.0
87 | self.minimum_num_pt = 50
88 | self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
89 | self.symmetry_obj_idx = [12, 15, 18, 19, 20]
90 | self.num_pt_mesh_small = 500
91 | self.num_pt_mesh_large = 2600
92 | self.refine = refine
93 | self.front_num = 2
94 |
95 | print(len(self.list))
96 |
97 | def __getitem__(self, index):
98 | img = Image.open('{0}/{1}-color.png'.format(self.root, self.list[index]))
99 | depth = np.array(Image.open('{0}/{1}-depth.png'.format(self.root, self.list[index])))
100 | label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, self.list[index])))
101 | meta = scio.loadmat('{0}/{1}-meta.mat'.format(self.root, self.list[index]))
102 |
103 | if self.list[index][:8] != 'data_syn' and int(self.list[index][5:9]) >= 60:
104 | cam_cx = self.cam_cx_2
105 | cam_cy = self.cam_cy_2
106 | cam_fx = self.cam_fx_2
107 | cam_fy = self.cam_fy_2
108 | else:
109 | cam_cx = self.cam_cx_1
110 | cam_cy = self.cam_cy_1
111 | cam_fx = self.cam_fx_1
112 | cam_fy = self.cam_fy_1
113 |
114 | mask_back = ma.getmaskarray(ma.masked_equal(label, 0))
115 |
116 | add_front = False
117 | if self.add_noise:
118 | for k in range(5):
119 | seed = random.choice(self.syn)
120 | front = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
121 | front = np.transpose(front, (2, 0, 1))
122 | f_label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, seed)))
123 | front_label = np.unique(f_label).tolist()[1:]
124 | if len(front_label) < self.front_num:
125 | continue
126 | front_label = random.sample(front_label, self.front_num)
127 | for f_i in front_label:
128 | mk = ma.getmaskarray(ma.masked_not_equal(f_label, f_i))
129 | if f_i == front_label[0]:
130 | mask_front = mk
131 | else:
132 | mask_front = mask_front * mk
133 | t_label = label * mask_front
134 | if len(t_label.nonzero()[0]) > 1000:
135 | label = t_label
136 | add_front = True
137 | break
138 |
139 | obj = meta['cls_indexes'].flatten().astype(np.int32)
140 |
141 | while 1:
142 | idx = np.random.randint(0, len(obj))
143 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
144 | mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx]))
145 | mask = mask_label * mask_depth
146 | if len(mask.nonzero()[0]) > self.minimum_num_pt:
147 | break
148 |
149 | if self.add_noise:
150 | img = self.trancolor(img)
151 |
152 | rmin, rmax, cmin, cmax = get_bbox(mask_label)
153 | img = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]
154 |
155 | if self.list[index][:8] == 'data_syn':
156 | seed = random.choice(self.real)
157 | back = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
158 | back = np.transpose(back, (2, 0, 1))[:, rmin:rmax, cmin:cmax]
159 | img_masked = back * mask_back[rmin:rmax, cmin:cmax] + img
160 | else:
161 | img_masked = img
162 |
163 | if self.add_noise and add_front:
164 | img_masked = img_masked * mask_front[rmin:rmax, cmin:cmax] + front[:, rmin:rmax, cmin:cmax] * ~(mask_front[rmin:rmax, cmin:cmax])
165 |
166 | if self.list[index][:8] == 'data_syn':
167 | img_masked = img_masked + np.random.normal(loc=0.0, scale=7.0, size=img_masked.shape)
168 |
169 | # p_img = np.transpose(img_masked, (1, 2, 0))
170 | # scipy.misc.imsave('temp/{0}_input.png'.format(index), p_img)
171 | # scipy.misc.imsave('temp/{0}_label.png'.format(index), mask[rmin:rmax, cmin:cmax].astype(np.int32))
172 |
173 | target_r = meta['poses'][:, :, idx][:, 0:3]
174 | target_t = np.array([meta['poses'][:, :, idx][:, 3:4].flatten()])
175 | add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])
176 |
177 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
178 | if len(choose) > self.num_pt:
179 | c_mask = np.zeros(len(choose), dtype=int)
180 | c_mask[:self.num_pt] = 1
181 | np.random.shuffle(c_mask)
182 | choose = choose[c_mask.nonzero()]
183 | else:
184 | choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')
185 |
186 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
187 | xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
188 | ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
189 | choose = np.array([choose])
190 |
191 | cam_scale = meta['factor_depth'][0][0]
192 | pt2 = depth_masked / cam_scale
193 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
194 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
195 | cloud = np.concatenate((pt0, pt1, pt2), axis=1)
196 | if self.add_noise:
197 | cloud = np.add(cloud, add_t)
198 |
199 | # fw = open('temp/{0}_cld.xyz'.format(index), 'w')
200 | # for it in cloud:
201 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
202 | # fw.close()
203 |
204 | dellist = [j for j in range(0, len(self.cld[obj[idx]]))]
205 | if self.refine:
206 | dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_large)
207 | else:
208 | dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_small)
209 | model_points = np.delete(self.cld[obj[idx]], dellist, axis=0)
210 |
211 | # fw = open('temp/{0}_model_points.xyz'.format(index), 'w')
212 | # for it in model_points:
213 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
214 | # fw.close()
215 |
216 | target = np.dot(model_points, target_r.T)
217 | if self.add_noise:
218 | target = np.add(target, target_t + add_t)
219 | else:
220 | target = np.add(target, target_t)
221 |
222 | # fw = open('temp/{0}_tar.xyz'.format(index), 'w')
223 | # for it in target:
224 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
225 | # fw.close()
226 |
227 | return torch.from_numpy(cloud.astype(np.float32)), \
228 | torch.LongTensor(choose.astype(np.int32)), \
229 | self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
230 | torch.from_numpy(target.astype(np.float32)), \
231 | torch.from_numpy(model_points.astype(np.float32)), \
232 | torch.LongTensor([int(obj[idx]) - 1])
233 |
234 | def __len__(self):
235 | return self.length
236 |
237 | def get_sym_list(self):
238 | return self.symmetry_obj_idx
239 |
240 | def get_num_points_mesh(self):
241 | if self.refine:
242 | return self.num_pt_mesh_large
243 | else:
244 | return self.num_pt_mesh_small
245 |
246 |
247 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
248 | img_width = 480
249 | img_length = 640
250 |
251 | def get_bbox(label):
252 | rows = np.any(label, axis=1)
253 | cols = np.any(label, axis=0)
254 | rmin, rmax = np.where(rows)[0][[0, -1]]
255 | cmin, cmax = np.where(cols)[0][[0, -1]]
256 | rmax += 1
257 | cmax += 1
258 | r_b = rmax - rmin
259 | for tt in range(len(border_list)):
260 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
261 | r_b = border_list[tt + 1]
262 | break
263 | c_b = cmax - cmin
264 | for tt in range(len(border_list)):
265 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
266 | c_b = border_list[tt + 1]
267 | break
268 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
269 | rmin = center[0] - int(r_b / 2)
270 | rmax = center[0] + int(r_b / 2)
271 | cmin = center[1] - int(c_b / 2)
272 | cmax = center[1] + int(c_b / 2)
273 | if rmin < 0:
274 | delt = -rmin
275 | rmin = 0
276 | rmax += delt
277 | if cmin < 0:
278 | delt = -cmin
279 | cmin = 0
280 | cmax += delt
281 | if rmax > img_width:
282 | delt = rmax - img_width
283 | rmax = img_width
284 | rmin -= delt
285 | if cmax > img_length:
286 | delt = cmax - img_length
287 | cmax = img_length
288 | cmin -= delt
289 | return rmin, rmax, cmin, cmax
290 |
--------------------------------------------------------------------------------
/scripts/datasets/ycb/dataset_config/classes.txt:
--------------------------------------------------------------------------------
1 | 002_master_chef_can
2 | 003_cracker_box
3 | 004_sugar_box
4 | 005_tomato_soup_can
5 | 006_mustard_bottle
6 | 007_tuna_fish_can
7 | 008_pudding_box
8 | 009_gelatin_box
9 | 010_potted_meat_can
10 | 011_banana
11 | 019_pitcher_base
12 | 021_bleach_cleanser
13 | 024_bowl
14 | 025_mug
15 | 035_power_drill
16 | 036_wood_block
17 | 037_scissors
18 | 040_large_marker
19 | 051_large_clamp
20 | 052_extra_large_clamp
21 | 061_foam_brick
22 |
--------------------------------------------------------------------------------
/scripts/distortion.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/distortion.npy
--------------------------------------------------------------------------------
/scripts/eval.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | ########################################## ros packages ##############################################
4 | import rospy
5 | from sensor_msgs.msg import Image, CameraInfo
6 | from cv_bridge import CvBridge, CvBridgeError
7 |
8 | ########################################################################################################
9 |
10 | import cv2
11 | import torch
12 | import argparse
13 | import os
14 | from torch.utils.data import DataLoader
15 | from model.build_BiSeNet import BiSeNet
16 | import numpy as np
17 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
18 | from datasets.dataset import ycb_Dataset
19 | from matplotlib import pyplot as plt
20 | from torchvision import transforms
21 |
22 |
23 | def eval(model,dataloader, args ):
24 | print('start test!')
25 | with torch.no_grad():
26 | model.eval()
27 | precision_record = []
28 | tq = tqdm.tqdm(total=len(dataloader) * args.batch_size)
29 | tq.set_description('test')
30 | hist = np.zeros((args.num_classes, args.num_classes))
31 | for i, (data, label) in enumerate(dataloader):
32 | tq.update(args.batch_size)
33 | if torch.cuda.is_available() and args.use_gpu:
34 | data = data.cuda()
35 | label = label.cuda()
36 | predict = model(data).squeeze()
37 | predict = reverse_one_hot(predict)
38 | predict = np.array(predict)
39 | # predict = colour_code_segmentation(np.array(predict), label_info)
40 |
41 | label = label.squeeze()
42 | if args.loss == 'dice':
43 | label = reverse_one_hot(label)
44 | label = np.array(label)
45 | # label = colour_code_segmentation(np.array(label), label_info)
46 |
47 | precision = compute_global_accuracy(predict, label)
48 | hist += fast_hist(label.flatten(), predict.flatten(), args.num_classes)
49 | precision_record.append(precision)
50 | save_img(i,data,predict)
51 | precision = np.mean(precision_record)
52 | miou_list = per_class_iu(hist)[:-1]
53 | miou = np.mean(miou_list)
54 | print('IoU for each class:')
55 | tq.close()
56 | print('precision for test: %.3f' % precision)
57 | print('mIoU for validation: %.3f' % miou)
58 | return precision
59 | def save_img(iteration,img,label):
60 | img = img.cpu()
61 | img = img.numpy()
62 | img = np.transpose(img, [0,2,3,1])
63 | _,h,w,c = img.shape
64 | img = img.reshape([h,w,c])
65 | fig, axes = plt.subplots(1,2,figsize = (8,4))
66 | ax = axes.ravel()
67 | ax[0].imshow(img)
68 | ax[1].imshow(label)
69 | plt.show()
70 | plt.savefig('./ycb/segmentation_result/{}.png'.format(iteration))
71 | plt.close()
72 |
73 | ######################################################################################################
74 | ############################################## test ##################################################
75 | #####################################################################################################
76 | class object_segmentation:
77 | def __init__(self,model):
78 | self.model = model
79 | self.bridge = CvBridge()
80 | self.label_pub = rospy.Publisher('label',Image,queue_size = 10)
81 | self.rgb_sub = rospy.Subscriber('rgb_image',Image, self.seg_callback)
82 | def seg_callback(self, rgb):
83 | try:
84 | with torch.no_grad():
85 | self.model.eval()
86 | rgb = self.bridge.imgmsg_to_cv2(rgb,'bgr8')
87 | self.to_tensor = transforms.Compose([
88 | transforms.ToTensor(),
89 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
90 | ])
91 | #rgb = np.transpose(rgb, (2,0,1))
92 | #rgb = np.expand_dims(rgb, axis = 0)
93 | #print(type(rgb))
94 | #rgb = torch.from_numpy(rgb)
95 | rgb = self.to_tensor(rgb)
96 | rgb = rgb.unsqueeze_(0)
97 | rgb = rgb.cuda()
98 | predict = self.model(rgb).squeeze()
99 | predict = reverse_one_hot(predict)
100 | predict = np.array(predict)
101 | np.save('./predict',predict)
102 | self.label_pub.publish(self.bridge.cv2_to_imgmsg(predict,'32SC1'))
103 | print('ss')
104 | except CvBridgeError as e:
105 | print(e)
106 |
107 |
108 |
109 |
110 |
111 | def main(params):
112 | # basic parameters
113 | parser = argparse.ArgumentParser()
114 | parser.add_argument('--checkpoint_path', type=str, default=None, required=True, help='The path to the pretrained weights of model')
115 | parser.add_argument('--crop_height', type=int, default=720, help='Height of cropped/resized input image to network')
116 | parser.add_argument('--crop_width', type=int, default=960, help='Width of cropped/resized input image to network')
117 | parser.add_argument('--data', type=str, default='/path/to/data', help='Path of training data')
118 | parser.add_argument('--batch_size', type=int, default=1, help='Number of images in each batch')
119 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
120 | parser.add_argument('--cuda', type=str, default='0', help='GPU ids used for training')
121 | parser.add_argument('--use_gpu', type=bool, default=True, help='Whether to user gpu for training')
122 | parser.add_argument('--num_classes', type=int, default=32, help='num of object classes (with void)')
123 | parser.add_argument('--loss', type=str, default='dice', help='loss function, dice or crossentropy')
124 | args = parser.parse_args(params)
125 |
126 | # build model
127 | os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda
128 | model = BiSeNet(args.num_classes, args.context_path)
129 | if torch.cuda.is_available() and args.use_gpu:
130 | model = torch.nn.DataParallel(model).cuda()
131 |
132 | # load pretrained model if exists
133 | print('load model from %s ...' % args.checkpoint_path)
134 | model.module.load_state_dict(torch.load(args.checkpoint_path))
135 | print('Done!')
136 |
137 | rospy.init_node('obj_seg',anonymous=True)
138 | Seg = object_segmentation(model)
139 | rospy.spin()
140 |
141 |
142 |
143 | if __name__ == '__main__':
144 | params = [
145 | '--checkpoint_path', './checkpoints_18_sgd/best_dice_loss.pth',
146 | '--data', './CamVid/',
147 | '--cuda', '1',
148 | '--context_path', 'resnet101',
149 | '--num_classes', '21'
150 | ]
151 | main(params)
152 |
--------------------------------------------------------------------------------
/scripts/experiments/scripts/eval_linemod.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | export PYTHONUNBUFFERED="True"
7 | export CUDA_VISIBLE_DEVICES=0
8 |
9 | python3 ./tools/eval_linemod.py --dataset_root ./datasets/linemod/Linemod_preprocessed\
10 | --model trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth\
11 | --refine_model trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth
--------------------------------------------------------------------------------
/scripts/experiments/scripts/eval_ycb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | export PYTHONUNBUFFERED="True"
7 | export CUDA_VISIBLE_DEVICES=0
8 |
9 | if [ ! -d YCB_Video_toolbox ];then
10 | echo 'Downloading the YCB_Video_toolbox...'
11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 | cd YCB_Video_toolbox
13 | unzip results_PoseCNN_RSS2018.zip
14 | cd ..
15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 |
18 | python ./tools/eval_ycb.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
19 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
20 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth
21 |
--------------------------------------------------------------------------------
/scripts/experiments/scripts/ros_eval_msg.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | export PYTHONUNBUFFERED="True"
7 | export CUDA_VISIBLE_DEVICES=0
8 |
9 | if [ ! -d YCB_Video_toolbox ];then
10 | echo 'Downloading the YCB_Video_toolbox...'
11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 | cd YCB_Video_toolbox
13 | unzip results_PoseCNN_RSS2018.zip
14 | cd ..
15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 |
18 | python ./tools/ros_eval_ycb_message.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
19 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
20 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \
21 | --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \
22 | --num_classes 22 \
23 | --context_path resnet18
24 |
25 |
--------------------------------------------------------------------------------
/scripts/experiments/scripts/ros_eval_ycb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | export PYTHONUNBUFFERED="True"
7 | export CUDA_VISIBLE_DEVICES=0
8 |
9 | if [ ! -d YCB_Video_toolbox ];then
10 | echo 'Downloading the YCB_Video_toolbox...'
11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 | cd YCB_Video_toolbox
13 | unzip results_PoseCNN_RSS2018.zip
14 | cd ..
15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 | # --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
18 | #--model trained_checkpoints/ycb/pose_model_13_0.01985655868300905.pth \
19 | python ./tools/ros_eval_ycb.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
20 | --model trained_checkpoints/ycb/pose_model_13_0.01985655868300905.pth \
21 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
22 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \
23 | --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \
24 | --num_classes 21 \
25 | --context_path resnet101
26 |
--------------------------------------------------------------------------------
/scripts/experiments/scripts/test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | export PYTHONUNBUFFERED="True"
7 | export CUDA_VISIBLE_DEVICES=0
8 |
9 | if [ ! -d YCB_Video_toolbox ];then
10 | echo 'Downloading the YCB_Video_toolbox...'
11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 | cd YCB_Video_toolbox
13 | unzip results_PoseCNN_RSS2018.zip
14 | cd ..
15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 |
18 | python ./tools/ros_eval_ycb2.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
19 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
20 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \
21 | --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \
22 | --num_classes 21 \
23 | --context_path resnet18
24 |
--------------------------------------------------------------------------------
/scripts/experiments/scripts/train_linemod.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | export PYTHONUNBUFFERED="True"
7 | export CUDA_VISIBLE_DEVICES=0
8 |
9 | python3 ./tools/train.py --dataset linemod\
10 | --dataset_root ./datasets/linemod/Linemod_preprocessed
--------------------------------------------------------------------------------
/scripts/experiments/scripts/train_ycb.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | set -x
4 | set -e
5 |
6 | export PYTHONUNBUFFERED="True"
7 | export CUDA_VISIBLE_DEVICES=0
8 |
9 | python2 ./tools/train.py --dataset ycb\
10 | --dataset_root ./datasets/ycb/YCB_Video_Dataset
11 |
--------------------------------------------------------------------------------
/scripts/lib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/__init__.pyc
--------------------------------------------------------------------------------
/scripts/lib/extractors.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/extractors.pyc
--------------------------------------------------------------------------------
/scripts/lib/knn/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/__init__.pyc
--------------------------------------------------------------------------------
/scripts/lib/knn/build/knn_cuda_kernel.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/build/knn_cuda_kernel.so
--------------------------------------------------------------------------------
/scripts/lib/knn/build_ffi.py:
--------------------------------------------------------------------------------
1 | # https://gist.github.com/tonyseek/7821993
2 | import glob
3 | import torch
4 | from os import path as osp
5 | from torch.utils.ffi import create_extension
6 |
7 | abs_path = osp.dirname(osp.realpath(__file__))
8 | extra_objects = [osp.join(abs_path, 'build/knn_cuda_kernel.so')]
9 | extra_objects += glob.glob('/usr/local/cuda/lib64/*.a')
10 |
11 | ffi = create_extension(
12 | 'knn_pytorch',
13 | headers=['src/knn_pytorch.h'],
14 | sources=['src/knn_pytorch.c'],
15 | define_macros=[('WITH_CUDA', None)],
16 | relative_to=__file__,
17 | with_cuda=True,
18 | extra_objects=extra_objects,
19 | include_dirs=[osp.join(abs_path, 'include')]
20 | )
21 |
22 |
23 | if __name__ == '__main__':
24 | assert torch.cuda.is_available(), 'Please install CUDA for GPU support.'
25 | ffi.build()
26 |
--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__init__.py:
--------------------------------------------------------------------------------
1 |
2 | from torch.utils.ffi import _wrap_function
3 | from ._knn_pytorch import lib as _lib, ffi as _ffi
4 |
5 | __all__ = []
6 | def _import_symbols(locals):
7 | for symbol in dir(_lib):
8 | fn = getattr(_lib, symbol)
9 | if callable(fn):
10 | locals[symbol] = _wrap_function(fn, _ffi)
11 | else:
12 | locals[symbol] = fn
13 | __all__.append(symbol)
14 |
15 | _import_symbols(locals())
16 |
--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__init__.pyc
--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-35.pyc
--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-36.pyc
--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/_knn_pytorch.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/_knn_pytorch.so
--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_cuda_kernel.cu:
--------------------------------------------------------------------------------
1 | /** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA
2 | * The modifications are
3 | * removed texture memory usage
4 | * removed split query KNN computation
5 | * added feature extraction with bilinear interpolation
6 | *
7 | * Last modified by Christopher B. Choy 12/23/2016
8 | */
9 |
10 | // Includes
11 | #include
12 | #include "cuda.h"
13 |
14 | #include "knn_cuda_kernel.h"
15 |
16 | // Constants used by the program
17 | #define BLOCK_DIM 16
18 | #define DEBUG 0
19 |
20 | /**
21 | * Computes the distance between two matrix A (reference points) and
22 | * B (query points) containing respectively wA and wB points.
23 | *
24 | * @param A pointer on the matrix A
25 | * @param wA width of the matrix A = number of points in A
26 | * @param B pointer on the matrix B
27 | * @param wB width of the matrix B = number of points in B
28 | * @param dim dimension of points = height of matrices A and B
29 | * @param AB pointer on the matrix containing the wA*wB distances computed
30 | */
31 | __global__ void cuComputeDistanceGlobal( float* A, int wA,
32 | float* B, int wB, int dim, float* AB){
33 |
34 | // Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B
35 | __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM];
36 | __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM];
37 |
38 | // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step)
39 | __shared__ int begin_A;
40 | __shared__ int begin_B;
41 | __shared__ int step_A;
42 | __shared__ int step_B;
43 | __shared__ int end_A;
44 |
45 | // Thread index
46 | int tx = threadIdx.x;
47 | int ty = threadIdx.y;
48 |
49 | // Other variables
50 | float tmp;
51 | float ssd = 0;
52 |
53 | // Loop parameters
54 | begin_A = BLOCK_DIM * blockIdx.y;
55 | begin_B = BLOCK_DIM * blockIdx.x;
56 | step_A = BLOCK_DIM * wA;
57 | step_B = BLOCK_DIM * wB;
58 | end_A = begin_A + (dim-1) * wA;
59 |
60 | // Conditions
61 | int cond0 = (begin_A + tx < wA); // used to write in shared memory
62 | int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix
63 | int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix
64 |
65 | // Loop over all the sub-matrices of A and B required to compute the block sub-matrix
66 | for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) {
67 | // Load the matrices from device memory to shared memory; each thread loads one element of each matrix
68 | if (a/wA + ty < dim){
69 | shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0;
70 | shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0;
71 | }
72 | else{
73 | shared_A[ty][tx] = 0;
74 | shared_B[ty][tx] = 0;
75 | }
76 |
77 | // Synchronize to make sure the matrices are loaded
78 | __syncthreads();
79 |
80 | // Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix
81 | if (cond2 && cond1){
82 | for (int k = 0; k < BLOCK_DIM; ++k){
83 | tmp = shared_A[k][ty] - shared_B[k][tx];
84 | ssd += tmp*tmp;
85 | }
86 | }
87 |
88 | // Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration
89 | __syncthreads();
90 | }
91 |
92 | // Write the block sub-matrix to device memory; each thread writes one element
93 | if (cond2 && cond1)
94 | AB[(begin_A + ty) * wB + begin_B + tx] = ssd;
95 | }
96 |
97 |
98 | /**
99 | * Gathers k-th smallest distances for each column of the distance matrix in the top.
100 | *
101 | * @param dist distance matrix
102 | * @param ind index matrix
103 | * @param width width of the distance matrix and of the index matrix
104 | * @param height height of the distance matrix and of the index matrix
105 | * @param k number of neighbors to consider
106 | */
107 | __global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){
108 |
109 | // Variables
110 | int l, i, j;
111 | float *p_dist;
112 | long *p_ind;
113 | float curr_dist, max_dist;
114 | long curr_row, max_row;
115 | unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
116 |
117 | if (xIndexcurr_dist){
132 | i=a;
133 | break;
134 | }
135 | }
136 | for (j=l; j>i; j--){
137 | p_dist[j*width] = p_dist[(j-1)*width];
138 | p_ind[j*width] = p_ind[(j-1)*width];
139 | }
140 | p_dist[i*width] = curr_dist;
141 | p_ind[i*width] = l+1;
142 | } else {
143 | p_ind[l*width] = l+1;
144 | }
145 | max_dist = p_dist[curr_row];
146 | }
147 |
148 | // Part 2 : insert element in the k-th first lines
149 | max_row = (k-1)*width;
150 | for (l=k; lcurr_dist){
156 | i=a;
157 | break;
158 | }
159 | }
160 | for (j=k-1; j>i; j--){
161 | p_dist[j*width] = p_dist[(j-1)*width];
162 | p_ind[j*width] = p_ind[(j-1)*width];
163 | }
164 | p_dist[i*width] = curr_dist;
165 | p_ind[i*width] = l+1;
166 | max_dist = p_dist[max_row];
167 | }
168 | }
169 | }
170 | }
171 |
172 |
173 | /**
174 | * Computes the square root of the first line (width-th first element)
175 | * of the distance matrix.
176 | *
177 | * @param dist distance matrix
178 | * @param width width of the distance matrix
179 | * @param k number of neighbors to consider
180 | */
181 | __global__ void cuParallelSqrt(float *dist, int width, int k){
182 | unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
183 | unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
184 | if (xIndex>>(ref_dev, ref_nb,
231 | query_dev, query_nb, dim, dist_dev);
232 |
233 | // Kernel 2: Sort each column
234 | cuInsertionSort<<>>(dist_dev, ind_dev,
235 | query_nb, ref_nb, k);
236 |
237 | // Kernel 3: Compute square root of k first elements
238 | // cuParallelSqrt<<>>(dist_dev, query_nb, k);
239 |
240 | #if DEBUG
241 | unsigned int size_of_float = sizeof(float);
242 | unsigned long size_of_long = sizeof(long);
243 |
244 | float* dist_host = new float[query_nb * k];
245 | long* idx_host = new long[query_nb * k];
246 |
247 | // Memory copy of output from device to host
248 | cudaMemcpy(&dist_host[0], dist_dev,
249 | query_nb * k *size_of_float, cudaMemcpyDeviceToHost);
250 |
251 | cudaMemcpy(&idx_host[0], ind_dev,
252 | query_nb * k * size_of_long, cudaMemcpyDeviceToHost);
253 |
254 | int i = 0;
255 | for(i = 0; i < 100; i++){
256 | printf("IDX[%d]: %d\n", i, (int)idx_host[i]);
257 | }
258 | #endif
259 | }
260 |
--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_cuda_kernel.h:
--------------------------------------------------------------------------------
1 | #ifndef _MATHUTIL_CUDA_KERNEL
2 | #define _MATHUTIL_CUDA_KERNEL
3 |
4 | #define IDX2D(i, j, dj) (dj * i + j)
5 | #define IDX3D(i, j, k, dj, dk) (IDX2D(IDX2D(i, j, dj), k, dk))
6 |
7 | #define BLOCK 512
8 | #define MAX_STREAMS 512
9 |
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 |
14 | void knn_device(float* ref_dev, int ref_width,
15 | float* query_dev, int query_width,
16 | int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream);
17 |
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 |
22 | #endif
23 |
--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_pytorch.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "knn_cuda_kernel.h"
3 |
4 | extern THCState *state;
5 |
6 | int knn(THCudaTensor *ref_tensor, THCudaTensor *query_tensor,
7 | THCudaLongTensor *idx_tensor) {
8 |
9 | THCAssertSameGPU(THCudaTensor_checkGPU(state, 3, idx_tensor, ref_tensor, query_tensor));
10 | long batch, ref_nb, query_nb, dim, k;
11 | THArgCheck(THCudaTensor_nDimension(state, ref_tensor) == 3 , 0, "ref_tensor: 3D Tensor expected");
12 | THArgCheck(THCudaTensor_nDimension(state, query_tensor) == 3 , 1, "query_tensor: 3D Tensor expected");
13 | THArgCheck(THCudaLongTensor_nDimension(state, idx_tensor) == 3 , 3, "idx_tensor: 3D Tensor expected");
14 | THArgCheck(THCudaTensor_size(state, ref_tensor, 0) == THCudaTensor_size(state, query_tensor,0), 0, "input sizes must match");
15 | THArgCheck(THCudaTensor_size(state, ref_tensor, 1) == THCudaTensor_size(state, query_tensor,1), 0, "input sizes must match");
16 | THArgCheck(THCudaTensor_size(state, idx_tensor, 2) == THCudaTensor_size(state, query_tensor,2), 0, "input sizes must match");
17 |
18 | //ref_tensor = THCudaTensor_newContiguous(state, ref_tensor);
19 | //query_tensor = THCudaTensor_newContiguous(state, query_tensor);
20 |
21 | batch = THCudaLongTensor_size(state, ref_tensor, 0);
22 | dim = THCudaTensor_size(state, ref_tensor, 1);
23 | k = THCudaLongTensor_size(state, idx_tensor, 1);
24 | ref_nb = THCudaTensor_size(state, ref_tensor, 2);
25 | query_nb = THCudaTensor_size(state, query_tensor, 2);
26 |
27 | float *ref_dev = THCudaTensor_data(state, ref_tensor);
28 | float *query_dev = THCudaTensor_data(state, query_tensor);
29 | long *idx_dev = THCudaLongTensor_data(state, idx_tensor);
30 | // scratch buffer for distances
31 | float *dist_dev = (float*)THCudaMalloc(state, ref_nb * query_nb * sizeof(float));
32 |
33 | for (int b = 0; b < batch; b++) {
34 | knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k,
35 | dist_dev, idx_dev + b * k * query_nb, THCState_getCurrentStream(state));
36 | }
37 | // free buffer
38 | THCudaFree(state, dist_dev);
39 | //printf("aaaaa\n");
40 | // check for errors
41 | cudaError_t err = cudaGetLastError();
42 | if (err != cudaSuccess) {
43 | printf("error in knn: %s\n", cudaGetErrorString(err));
44 | THError("aborting");
45 | }
46 |
47 | return 1;
48 | }
49 |
--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_pytorch.h:
--------------------------------------------------------------------------------
1 | int knn(THCudaTensor *ref_tensor, THCudaTensor *query_tensor,
2 | THCudaLongTensor *idx_tensor);
3 |
--------------------------------------------------------------------------------
/scripts/lib/loss.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.loss import _Loss
2 | from torch.autograd import Variable
3 | import torch
4 | import time
5 | import numpy as np
6 | import torch.nn as nn
7 | import random
8 | import torch.backends.cudnn as cudnn
9 | from lib.knn.__init__ import KNearestNeighbor
10 |
11 |
12 | def loss_calculation(pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine, num_point_mesh, sym_list):
13 | knn = KNearestNeighbor(1)
14 | bs, num_p, _ = pred_c.size()
15 |
16 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(bs, num_p, 1))
17 |
18 | base = torch.cat(((1.0 - 2.0*(pred_r[:, :, 2]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1),\
19 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] - 2.0*pred_r[:, :, 0]*pred_r[:, :, 3]).view(bs, num_p, 1), \
20 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
21 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 3]*pred_r[:, :, 0]).view(bs, num_p, 1), \
22 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1), \
23 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
24 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
25 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
26 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 2]**2)).view(bs, num_p, 1)), dim=2).contiguous().view(bs * num_p, 3, 3)
27 |
28 | ori_base = base
29 | base = base.contiguous().transpose(2, 1).contiguous()
30 | model_points = model_points.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
31 | target = target.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
32 | ori_target = target
33 | pred_t = pred_t.contiguous().view(bs * num_p, 1, 3)
34 | ori_t = pred_t
35 | points = points.contiguous().view(bs * num_p, 1, 3)
36 | pred_c = pred_c.contiguous().view(bs * num_p)
37 |
38 | pred = torch.add(torch.bmm(model_points, base), points + pred_t)
39 |
40 | if not refine:
41 | if idx[0].item() in sym_list:
42 | target = target[0].transpose(1, 0).contiguous().view(3, -1)
43 | pred = pred.permute(2, 0, 1).contiguous().view(3, -1)
44 | inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
45 | target = torch.index_select(target, 1, inds.view(-1) - 1)
46 | target = target.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
47 | pred = pred.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
48 |
49 | dis = torch.mean(torch.norm((pred - target), dim=2), dim=1)
50 | loss = torch.mean((dis * pred_c - w * torch.log(pred_c)), dim=0)
51 |
52 |
53 | pred_c = pred_c.view(bs, num_p)
54 | how_max, which_max = torch.max(pred_c, 1)
55 | dis = dis.view(bs, num_p)
56 |
57 |
58 | t = ori_t[which_max[0]] + points[which_max[0]]
59 | points = points.view(1, bs * num_p, 3)
60 |
61 | ori_base = ori_base[which_max[0]].view(1, 3, 3).contiguous()
62 | ori_t = t.repeat(bs * num_p, 1).contiguous().view(1, bs * num_p, 3)
63 | new_points = torch.bmm((points - ori_t), ori_base).contiguous()
64 |
65 | new_target = ori_target[0].view(1, num_point_mesh, 3).contiguous()
66 | ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3)
67 | new_target = torch.bmm((new_target - ori_t), ori_base).contiguous()
68 |
69 | # print('------------> ', dis[0][which_max[0]].item(), pred_c[0][which_max[0]].item(), idx[0].item())
70 | del knn
71 | return loss, dis[0][which_max[0]], new_points.detach(), new_target.detach()
72 |
73 |
74 | class Loss(_Loss):
75 |
76 | def __init__(self, num_points_mesh, sym_list):
77 | super(Loss, self).__init__(True)
78 | self.num_pt_mesh = num_points_mesh
79 | self.sym_list = sym_list
80 |
81 | def forward(self, pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine):
82 |
83 | return loss_calculation(pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine, self.num_pt_mesh, self.sym_list)
84 |
--------------------------------------------------------------------------------
/scripts/lib/loss.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/loss.pyc
--------------------------------------------------------------------------------
/scripts/lib/loss_refiner.py:
--------------------------------------------------------------------------------
1 | from torch.nn.modules.loss import _Loss
2 | from torch.autograd import Variable
3 | import torch
4 | import time
5 | import numpy as np
6 | import torch.nn as nn
7 | import random
8 | import torch.backends.cudnn as cudnn
9 | from lib.knn.__init__ import KNearestNeighbor
10 |
11 |
12 | def loss_calculation(pred_r, pred_t, target, model_points, idx, points, num_point_mesh, sym_list):
13 | knn = KNearestNeighbor(1)
14 | pred_r = pred_r.view(1, 1, -1)
15 | pred_t = pred_t.view(1, 1, -1)
16 | bs, num_p, _ = pred_r.size()
17 | num_input_points = len(points[0])
18 |
19 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(bs, num_p, 1))
20 |
21 | base = torch.cat(((1.0 - 2.0*(pred_r[:, :, 2]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1),\
22 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] - 2.0*pred_r[:, :, 0]*pred_r[:, :, 3]).view(bs, num_p, 1), \
23 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
24 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 3]*pred_r[:, :, 0]).view(bs, num_p, 1), \
25 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1), \
26 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
27 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
28 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
29 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 2]**2)).view(bs, num_p, 1)), dim=2).contiguous().view(bs * num_p, 3, 3)
30 |
31 | ori_base = base
32 | base = base.contiguous().transpose(2, 1).contiguous()
33 | model_points = model_points.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
34 | target = target.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
35 | ori_target = target
36 | pred_t = pred_t.contiguous().view(bs * num_p, 1, 3)
37 | ori_t = pred_t
38 |
39 | pred = torch.add(torch.bmm(model_points, base), pred_t)
40 |
41 | if idx[0].item() in sym_list:
42 | target = target[0].transpose(1, 0).contiguous().view(3, -1)
43 | pred = pred.permute(2, 0, 1).contiguous().view(3, -1)
44 | inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
45 | target = torch.index_select(target, 1, inds.view(-1) - 1)
46 | target = target.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
47 | pred = pred.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
48 |
49 | dis = torch.mean(torch.norm((pred - target), dim=2), dim=1)
50 |
51 | t = ori_t[0]
52 | points = points.view(1, num_input_points, 3)
53 |
54 | ori_base = ori_base[0].view(1, 3, 3).contiguous()
55 | ori_t = t.repeat(bs * num_input_points, 1).contiguous().view(1, bs * num_input_points, 3)
56 | new_points = torch.bmm((points - ori_t), ori_base).contiguous()
57 |
58 | new_target = ori_target[0].view(1, num_point_mesh, 3).contiguous()
59 | ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3)
60 | new_target = torch.bmm((new_target - ori_t), ori_base).contiguous()
61 |
62 | # print('------------> ', dis.item(), idx[0].item())
63 | del knn
64 | return dis, new_points.detach(), new_target.detach()
65 |
66 |
67 | class Loss_refine(_Loss):
68 |
69 | def __init__(self, num_points_mesh, sym_list):
70 | super(Loss_refine, self).__init__(True)
71 | self.num_pt_mesh = num_points_mesh
72 | self.sym_list = sym_list
73 |
74 |
75 | def forward(self, pred_r, pred_t, target, model_points, idx, points):
76 | return loss_calculation(pred_r, pred_t, target, model_points, idx, points, self.num_pt_mesh, self.sym_list)
77 |
--------------------------------------------------------------------------------
/scripts/lib/network.py:
--------------------------------------------------------------------------------
1 | import argparse
2 | import os
3 | import random
4 | import torch
5 | import torch.nn as nn
6 | import torch.nn.parallel
7 | import torch.backends.cudnn as cudnn
8 | import torch.optim as optim
9 | import torch.utils.data
10 | import torchvision.transforms as transforms
11 | import torchvision.utils as vutils
12 | from torch.autograd import Variable
13 | from PIL import Image
14 | import numpy as np
15 | import pdb
16 | import torch.nn.functional as F
17 | from lib.pspnet import PSPNet
18 |
19 | psp_models = {
20 | 'resnet18': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet18'),
21 | 'resnet34': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34'),
22 | 'resnet50': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet50'),
23 | 'resnet101': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet101'),
24 | 'resnet152': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet152')
25 | }
26 |
27 | class ModifiedResnet(nn.Module):
28 |
29 | def __init__(self, usegpu=True):
30 | super(ModifiedResnet, self).__init__()
31 |
32 | self.model = psp_models['resnet18'.lower()]()
33 | self.model = nn.DataParallel(self.model)
34 |
35 | def forward(self, x):
36 | x = self.model(x)
37 | return x
38 |
39 | class PoseNetFeat(nn.Module):
40 | def __init__(self, num_points):
41 | super(PoseNetFeat, self).__init__()
42 | self.conv1 = torch.nn.Conv1d(3, 64, 1)
43 | self.conv2 = torch.nn.Conv1d(64, 128, 1)
44 |
45 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1)
46 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1)
47 |
48 | self.conv5 = torch.nn.Conv1d(256, 512, 1)
49 | self.conv6 = torch.nn.Conv1d(512, 1024, 1)
50 |
51 | self.ap1 = torch.nn.AvgPool1d(num_points)
52 | self.num_points = num_points
53 | def forward(self, x, emb):
54 | x = F.relu(self.conv1(x))
55 | emb = F.relu(self.e_conv1(emb))
56 | pointfeat_1 = torch.cat((x, emb), dim=1)
57 |
58 | x = F.relu(self.conv2(x))
59 | emb = F.relu(self.e_conv2(emb))
60 | pointfeat_2 = torch.cat((x, emb), dim=1)
61 |
62 | x = F.relu(self.conv5(pointfeat_2))
63 | x = F.relu(self.conv6(x))
64 |
65 | ap_x = self.ap1(x)
66 |
67 | ap_x = ap_x.view(-1, 1024, 1).repeat(1, 1, self.num_points)
68 | return torch.cat([pointfeat_1, pointfeat_2, ap_x], 1) #128 + 256 + 1024
69 |
70 | class PoseNet(nn.Module):
71 | def __init__(self, num_points, num_obj):
72 | super(PoseNet, self).__init__()
73 | self.num_points = num_points
74 | self.cnn = ModifiedResnet()
75 | self.feat = PoseNetFeat(num_points)
76 |
77 | self.conv1_r = torch.nn.Conv1d(1408, 640, 1)
78 | self.conv1_t = torch.nn.Conv1d(1408, 640, 1)
79 | self.conv1_c = torch.nn.Conv1d(1408, 640, 1)
80 |
81 | self.conv2_r = torch.nn.Conv1d(640, 256, 1)
82 | self.conv2_t = torch.nn.Conv1d(640, 256, 1)
83 | self.conv2_c = torch.nn.Conv1d(640, 256, 1)
84 |
85 | self.conv3_r = torch.nn.Conv1d(256, 128, 1)
86 | self.conv3_t = torch.nn.Conv1d(256, 128, 1)
87 | self.conv3_c = torch.nn.Conv1d(256, 128, 1)
88 |
89 | self.conv4_r = torch.nn.Conv1d(128, num_obj*4, 1) #quaternion
90 | self.conv4_t = torch.nn.Conv1d(128, num_obj*3, 1) #translation
91 | self.conv4_c = torch.nn.Conv1d(128, num_obj*1, 1) #confidence
92 |
93 | self.num_obj = num_obj
94 |
95 | def forward(self, img, x, choose, obj):
96 | out_img = self.cnn(img)
97 |
98 | bs, di, _, _ = out_img.size()
99 |
100 | emb = out_img.view(bs, di, -1)
101 | choose = choose.repeat(1, di, 1)
102 | emb = torch.gather(emb, 2, choose).contiguous()
103 |
104 | x = x.transpose(2, 1).contiguous()
105 | ap_x = self.feat(x, emb)
106 |
107 | rx = F.relu(self.conv1_r(ap_x))
108 | tx = F.relu(self.conv1_t(ap_x))
109 | cx = F.relu(self.conv1_c(ap_x))
110 |
111 | rx = F.relu(self.conv2_r(rx))
112 | tx = F.relu(self.conv2_t(tx))
113 | cx = F.relu(self.conv2_c(cx))
114 |
115 | rx = F.relu(self.conv3_r(rx))
116 | tx = F.relu(self.conv3_t(tx))
117 | cx = F.relu(self.conv3_c(cx))
118 |
119 | rx = self.conv4_r(rx).view(bs, self.num_obj, 4, self.num_points)
120 | tx = self.conv4_t(tx).view(bs, self.num_obj, 3, self.num_points)
121 | cx = torch.sigmoid(self.conv4_c(cx)).view(bs, self.num_obj, 1, self.num_points)
122 |
123 | b = 0
124 | out_rx = torch.index_select(rx[b], 0, obj[b])
125 | out_tx = torch.index_select(tx[b], 0, obj[b])
126 | out_cx = torch.index_select(cx[b], 0, obj[b])
127 |
128 | out_rx = out_rx.contiguous().transpose(2, 1).contiguous()
129 | out_cx = out_cx.contiguous().transpose(2, 1).contiguous()
130 | out_tx = out_tx.contiguous().transpose(2, 1).contiguous()
131 |
132 | return out_rx, out_tx, out_cx, emb.detach()
133 |
134 |
135 |
136 | class PoseRefineNetFeat(nn.Module):
137 | def __init__(self, num_points):
138 | super(PoseRefineNetFeat, self).__init__()
139 | self.conv1 = torch.nn.Conv1d(3, 64, 1)
140 | self.conv2 = torch.nn.Conv1d(64, 128, 1)
141 |
142 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1)
143 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1)
144 |
145 | self.conv5 = torch.nn.Conv1d(384, 512, 1)
146 | self.conv6 = torch.nn.Conv1d(512, 1024, 1)
147 |
148 | self.ap1 = torch.nn.AvgPool1d(num_points)
149 | self.num_points = num_points
150 |
151 | def forward(self, x, emb):
152 | x = F.relu(self.conv1(x))
153 | emb = F.relu(self.e_conv1(emb))
154 | pointfeat_1 = torch.cat([x, emb], dim=1)
155 |
156 | x = F.relu(self.conv2(x))
157 | emb = F.relu(self.e_conv2(emb))
158 | pointfeat_2 = torch.cat([x, emb], dim=1)
159 |
160 | pointfeat_3 = torch.cat([pointfeat_1, pointfeat_2], dim=1)
161 |
162 | x = F.relu(self.conv5(pointfeat_3))
163 | x = F.relu(self.conv6(x))
164 |
165 | ap_x = self.ap1(x)
166 |
167 | ap_x = ap_x.view(-1, 1024)
168 | return ap_x
169 |
170 | class PoseRefineNet(nn.Module):
171 | def __init__(self, num_points, num_obj):
172 | super(PoseRefineNet, self).__init__()
173 | self.num_points = num_points
174 | self.feat = PoseRefineNetFeat(num_points)
175 |
176 | self.conv1_r = torch.nn.Linear(1024, 512)
177 | self.conv1_t = torch.nn.Linear(1024, 512)
178 |
179 | self.conv2_r = torch.nn.Linear(512, 128)
180 | self.conv2_t = torch.nn.Linear(512, 128)
181 |
182 | self.conv3_r = torch.nn.Linear(128, num_obj*4) #quaternion
183 | self.conv3_t = torch.nn.Linear(128, num_obj*3) #translation
184 |
185 | self.num_obj = num_obj
186 |
187 | def forward(self, x, emb, obj):
188 | bs = x.size()[0]
189 |
190 | x = x.transpose(2, 1).contiguous()
191 | ap_x = self.feat(x, emb)
192 |
193 | rx = F.relu(self.conv1_r(ap_x))
194 | tx = F.relu(self.conv1_t(ap_x))
195 |
196 | rx = F.relu(self.conv2_r(rx))
197 | tx = F.relu(self.conv2_t(tx))
198 |
199 | rx = self.conv3_r(rx).view(bs, self.num_obj, 4)
200 | tx = self.conv3_t(tx).view(bs, self.num_obj, 3)
201 |
202 | b = 0
203 | out_rx = torch.index_select(rx[b], 0, obj[b])
204 | out_tx = torch.index_select(tx[b], 0, obj[b])
205 |
206 | return out_rx, out_tx
207 |
--------------------------------------------------------------------------------
/scripts/lib/pspnet.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/pspnet.pyc
--------------------------------------------------------------------------------
/scripts/lib/transformations.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/transformations.pyc
--------------------------------------------------------------------------------
/scripts/loss.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import torch
3 | import torch.nn.functional as F
4 |
5 | def flatten(tensor):
6 | """Flattens a given tensor such that the channel axis is first.
7 | The shapes are transformed as follows:
8 | (N, C, D, H, W) -> (C, N * D * H * W)
9 | """
10 | C = tensor.size(1)
11 | # new axis order
12 | axis_order = (1, 0) + tuple(range(2, tensor.dim()))
13 | # Transpose: (N, C, D, H, W) -> (C, N, D, H, W)
14 | transposed = tensor.permute(axis_order)
15 | # Flatten: (C, N, D, H, W) -> (C, N * D * H * W)
16 | return transposed.contiguous().view(C, -1)
17 |
18 |
19 | class DiceLoss(nn.Module):
20 | def __init__(self):
21 | super().__init__()
22 | self.epsilon = 1e-5
23 |
24 | def forward(self, output, target):
25 | assert output.size() == target.size(), "'input' and 'target' must have the same shape"
26 | output = F.softmax(output, dim=1)
27 | output = flatten(output)
28 | target = flatten(target)
29 | # intersect = (output * target).sum(-1).sum() + self.epsilon
30 | # denominator = ((output + target).sum(-1)).sum() + self.epsilon
31 |
32 | intersect = (output * target).sum(-1)
33 | denominator = (output + target).sum(-1)
34 | dice = intersect / denominator
35 | dice = torch.mean(dice)
36 | return 1 - dice
37 | # return 1 - 2. * intersect / denominator
38 |
--------------------------------------------------------------------------------
/scripts/matrix.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/matrix.npy
--------------------------------------------------------------------------------
/scripts/model/build_BiSeNet.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn
3 | from model.build_contextpath import build_contextpath
4 | import warnings
5 | warnings.filterwarnings(action='ignore')
6 |
7 | class ConvBlock(torch.nn.Module):
8 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1):
9 | super(ConvBlock,self).__init__()
10 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
11 | self.bn = nn.BatchNorm2d(out_channels)
12 | self.relu = nn.ReLU()
13 |
14 | def forward(self, input):
15 | x = self.conv1(input)
16 | return self.relu(self.bn(x))
17 |
18 | class Spatial_path(torch.nn.Module):
19 | def __init__(self):
20 | super(Spatial_path,self).__init__()
21 | self.convblock1 = ConvBlock(in_channels=3, out_channels=64)
22 | self.convblock2 = ConvBlock(in_channels=64, out_channels=128)
23 | self.convblock3 = ConvBlock(in_channels=128, out_channels=256)
24 |
25 | def forward(self, input):
26 | x = self.convblock1(input)
27 | x = self.convblock2(x)
28 | x = self.convblock3(x)
29 | return x
30 |
31 | class AttentionRefinementModule(torch.nn.Module):
32 | def __init__(self, in_channels, out_channels):
33 | super(AttentionRefinementModule,self).__init__()
34 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
35 | self.bn = nn.BatchNorm2d(out_channels)
36 | self.sigmoid = nn.Sigmoid()
37 | self.in_channels = in_channels
38 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
39 |
40 | def forward(self, input):
41 | # global average pooling
42 | x = self.avgpool(input)
43 | assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1))
44 | x = self.conv(x)
45 | # x = self.sigmoid(self.bn(x))
46 | x = self.sigmoid(x)
47 | # channels of input and x should be same
48 | x = torch.mul(input, x)
49 | return x
50 |
51 |
52 | class FeatureFusionModule(torch.nn.Module):
53 | def __init__(self, num_classes, in_channels):
54 | super(FeatureFusionModule,self).__init__()
55 | # self.in_channels = input_1.channels + input_2.channels
56 | # resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path)
57 | # resnet18 1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path)
58 | self.in_channels = in_channels
59 |
60 | self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1)
61 | self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
62 | self.relu = nn.ReLU()
63 | self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
64 | self.sigmoid = nn.Sigmoid()
65 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
66 |
67 |
68 | def forward(self, input_1, input_2):
69 | x = torch.cat((input_1, input_2), dim=1)
70 | assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1))
71 | feature = self.convblock(x)
72 | x = self.avgpool(feature)
73 |
74 | x = self.relu(self.conv1(x))
75 | x = self.sigmoid(self.conv2(x))
76 | x = torch.mul(feature, x)
77 | x = torch.add(x, feature)
78 | return x
79 |
80 | class BiSeNet(torch.nn.Module):
81 | def __init__(self, num_classes, context_path):
82 | super(BiSeNet,self).__init__()
83 | # build spatial path
84 | self.saptial_path = Spatial_path()
85 |
86 | # build context path
87 | self.context_path = build_contextpath(name=context_path)
88 |
89 | # build attention refinement module for resnet 101
90 | if context_path == 'resnet101':
91 | self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024)
92 | self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048)
93 | # supervision block
94 | self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1)
95 | self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1)
96 | # build feature fusion module
97 | self.feature_fusion_module = FeatureFusionModule(num_classes, 3328)
98 |
99 | elif context_path == 'resnet18':
100 | # build attention refinement module for resnet 18
101 | self.attention_refinement_module1 = AttentionRefinementModule(256, 256)
102 | self.attention_refinement_module2 = AttentionRefinementModule(512, 512)
103 | # supervision block
104 | self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1)
105 | self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1)
106 | # build feature fusion module
107 | self.feature_fusion_module = FeatureFusionModule(num_classes, 1024)
108 | else:
109 | print('Error: unspport context_path network \n')
110 |
111 | # build final convolution
112 | self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1)
113 |
114 | self.init_weight()
115 |
116 | self.mul_lr = []
117 | self.mul_lr.append(self.saptial_path)
118 | self.mul_lr.append(self.attention_refinement_module1)
119 | self.mul_lr.append(self.attention_refinement_module2)
120 | self.mul_lr.append(self.supervision1)
121 | self.mul_lr.append(self.supervision2)
122 | self.mul_lr.append(self.feature_fusion_module)
123 | self.mul_lr.append(self.conv)
124 |
125 | def init_weight(self):
126 | for name, m in self.named_modules():
127 | if 'context_path' not in name:
128 | if isinstance(m, nn.Conv2d):
129 | nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
130 | elif isinstance(m, nn.BatchNorm2d):
131 | m.eps = 1e-5
132 | m.momentum = 0.1
133 | nn.init.constant_(m.weight, 1)
134 | nn.init.constant_(m.bias, 0)
135 |
136 | def forward(self, input):
137 | # output of spatial path
138 | sx = self.saptial_path(input)
139 |
140 | # output of context path
141 | cx1, cx2, tail = self.context_path(input)
142 | cx1 = self.attention_refinement_module1(cx1)
143 | cx2 = self.attention_refinement_module2(cx2)
144 | cx2 = torch.mul(cx2, tail)
145 | # upsampling
146 | cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear')
147 | cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear')
148 | cx = torch.cat((cx1, cx2), dim=1)
149 |
150 | if self.training == True:
151 | cx1_sup = self.supervision1(cx1)
152 | cx2_sup = self.supervision2(cx2)
153 | cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear')
154 | cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear')
155 |
156 | # output of feature fusion module
157 | result = self.feature_fusion_module(sx, cx)
158 |
159 | # upsampling
160 | result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear')
161 | result = self.conv(result)
162 |
163 | if self.training == True:
164 | return result, cx1_sup, cx2_sup
165 |
166 | return result
167 |
168 |
169 | if __name__ == '__main__':
170 | import os
171 | os.environ['CUDA_VISIBLE_DEVICES'] = '0'
172 | model = BiSeNet(21, 'resnet18')
173 | # model = nn.DataParallel(model)
174 |
175 | model = model.cuda()
176 | x = torch.rand(2, 3, 256, 256)
177 | record = model.parameters()
178 | # for key, params in model.named_parameters():
179 | # if 'bn' in key:
180 | # params.requires_grad = False
181 | from utils import group_weight
182 | # params_list = []
183 | # for module in model.mul_lr:
184 | # params_list = group_weight(params_list, module, nn.BatchNorm2d, 10)
185 | # params_list = group_weight(params_list, model.context_path, torch.nn.BatchNorm2d, 1)
186 |
187 | print(model.parameters())
188 |
--------------------------------------------------------------------------------
/scripts/model/build_contextpath.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torchvision import models
3 |
4 |
5 | class resnet18(torch.nn.Module):
6 | def __init__(self, pretrained=True):
7 | super(resnet18,self).__init__()
8 | self.features = models.resnet18(pretrained=True)
9 | self.conv1 = self.features.conv1
10 | self.bn1 = self.features.bn1
11 | self.relu = self.features.relu
12 | self.maxpool1 = self.features.maxpool
13 | self.layer1 = self.features.layer1
14 | self.layer2 = self.features.layer2
15 | self.layer3 = self.features.layer3
16 | self.layer4 = self.features.layer4
17 |
18 | def forward(self, input):
19 | x = self.conv1(input)
20 | x = self.relu(self.bn1(x))
21 | x = self.maxpool1(x)
22 | feature1 = self.layer1(x) # 1 / 4
23 | feature2 = self.layer2(feature1) # 1 / 8
24 | feature3 = self.layer3(feature2) # 1 / 16
25 | feature4 = self.layer4(feature3) # 1 / 32
26 | # global average pooling to build tail
27 | tail = torch.mean(feature4, 3, keepdim=True)
28 | tail = torch.mean(tail, 2, keepdim=True)
29 | return feature3, feature4, tail
30 |
31 |
32 | class resnet101(torch.nn.Module):
33 | def __init__(self, pretrained=True):
34 | super(resnet101,self).__init__()
35 | self.features = models.resnet101(pretrained=True)
36 | self.conv1 = self.features.conv1
37 | self.bn1 = self.features.bn1
38 | self.relu = self.features.relu
39 | self.maxpool1 = self.features.maxpool
40 | self.layer1 = self.features.layer1
41 | self.layer2 = self.features.layer2
42 | self.layer3 = self.features.layer3
43 | self.layer4 = self.features.layer4
44 |
45 | def forward(self, input):
46 | x = self.conv1(input)
47 | x = self.relu(self.bn1(x))
48 | x = self.maxpool1(x)
49 | feature1 = self.layer1(x) # 1 / 4
50 | feature2 = self.layer2(feature1) # 1 / 8
51 | feature3 = self.layer3(feature2) # 1 / 16
52 | feature4 = self.layer4(feature3) # 1 / 32
53 | # global average pooling to build tail
54 | tail = torch.mean(feature4, 3, keepdim=True)
55 | tail = torch.mean(tail, 2, keepdim=True)
56 | return feature3, feature4, tail
57 |
58 |
59 | def build_contextpath(name):
60 | model = {
61 | 'resnet18': resnet18(pretrained=True),
62 | 'resnet101': resnet101(pretrained=True)
63 | }
64 | return model[name]
65 |
66 |
67 | if __name__ == '__main__':
68 | #
69 | model_18 = build_contextpath('resnet18')
70 | model_101 = build_contextpath('resnet101')
71 | x = torch.rand(1, 3, 256, 256)
72 |
73 | y_18 = model_18(x)
74 | y_101 = model_101(x)
75 |
--------------------------------------------------------------------------------
/scripts/predict.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/predict.npy
--------------------------------------------------------------------------------
/scripts/tools/__pycache__/_init_paths.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/tools/__pycache__/_init_paths.cpython-35.pyc
--------------------------------------------------------------------------------
/scripts/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.insert(0, os.getcwd())
--------------------------------------------------------------------------------
/scripts/tools/_init_paths.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/tools/_init_paths.pyc
--------------------------------------------------------------------------------
/scripts/tools/eval_linemod.py:
--------------------------------------------------------------------------------
1 | import _init_paths
2 | import argparse
3 | import os
4 | import random
5 | import numpy as np
6 | import yaml
7 | import copy
8 | import torch
9 | import torch.nn as nn
10 | import torch.nn.parallel
11 | import torch.backends.cudnn as cudnn
12 | import torch.optim as optim
13 | import torch.utils.data
14 | import torchvision.datasets as dset
15 | import torchvision.transforms as transforms
16 | import torchvision.utils as vutils
17 | from torch.autograd import Variable
18 | from datasets.linemod.dataset import PoseDataset as PoseDataset_linemod
19 | from lib.network import PoseNet, PoseRefineNet
20 | from lib.loss import Loss
21 | from lib.loss_refiner import Loss_refine
22 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
23 | from lib.knn.__init__ import KNearestNeighbor
24 |
25 | parser = argparse.ArgumentParser()
26 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
27 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model')
28 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model')
29 | opt = parser.parse_args()
30 |
31 | num_objects = 13
32 | objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
33 | num_points = 500
34 | iteration = 2
35 | bs = 1
36 | dataset_config_dir = 'datasets/linemod/dataset_config'
37 | output_result_dir = 'experiments/eval_result/linemod'
38 | knn = KNearestNeighbor(1)
39 |
40 | estimator = PoseNet(num_points = num_points, num_obj = num_objects)
41 | estimator.cuda()
42 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_objects)
43 | refiner.cuda()
44 | estimator.load_state_dict(torch.load(opt.model))
45 | refiner.load_state_dict(torch.load(opt.refine_model))
46 | estimator.eval()
47 | refiner.eval()
48 |
49 | testdataset = PoseDataset_linemod('eval', num_points, False, opt.dataset_root, 0.0, True)
50 | testdataloader = torch.utils.data.DataLoader(testdataset, batch_size=1, shuffle=False, num_workers=10)
51 |
52 | sym_list = testdataset.get_sym_list()
53 | num_points_mesh = testdataset.get_num_points_mesh()
54 | criterion = Loss(num_points_mesh, sym_list)
55 | criterion_refine = Loss_refine(num_points_mesh, sym_list)
56 |
57 | diameter = []
58 | meta_file = open('{0}/models_info.yml'.format(dataset_config_dir), 'r')
59 | meta = yaml.load(meta_file)
60 | for obj in objlist:
61 | diameter.append(meta[obj]['diameter'] / 1000.0 * 0.1)
62 | print(diameter)
63 |
64 | success_count = [0 for i in range(num_objects)]
65 | num_count = [0 for i in range(num_objects)]
66 | fw = open('{0}/eval_result_logs.txt'.format(output_result_dir), 'w')
67 |
68 | for i, data in enumerate(testdataloader, 0):
69 | points, choose, img, target, model_points, idx = data
70 | if len(points.size()) == 2:
71 | print('No.{0} NOT Pass! Lost detection!'.format(i))
72 | fw.write('No.{0} NOT Pass! Lost detection!\n'.format(i))
73 | continue
74 | points, choose, img, target, model_points, idx = Variable(points).cuda(), \
75 | Variable(choose).cuda(), \
76 | Variable(img).cuda(), \
77 | Variable(target).cuda(), \
78 | Variable(model_points).cuda(), \
79 | Variable(idx).cuda()
80 |
81 | pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
82 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
83 | pred_c = pred_c.view(bs, num_points)
84 | how_max, which_max = torch.max(pred_c, 1)
85 | pred_t = pred_t.view(bs * num_points, 1, 3)
86 |
87 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
88 | my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
89 | my_pred = np.append(my_r, my_t)
90 |
91 | for ite in range(0, iteration):
92 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
93 | my_mat = quaternion_matrix(my_r)
94 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
95 | my_mat[0:3, 3] = my_t
96 |
97 | new_points = torch.bmm((points - T), R).contiguous()
98 | pred_r, pred_t = refiner(new_points, emb, idx)
99 | pred_r = pred_r.view(1, 1, -1)
100 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
101 | my_r_2 = pred_r.view(-1).cpu().data.numpy()
102 | my_t_2 = pred_t.view(-1).cpu().data.numpy()
103 | my_mat_2 = quaternion_matrix(my_r_2)
104 | my_mat_2[0:3, 3] = my_t_2
105 |
106 | my_mat_final = np.dot(my_mat, my_mat_2)
107 | my_r_final = copy.deepcopy(my_mat_final)
108 | my_r_final[0:3, 3] = 0
109 | my_r_final = quaternion_from_matrix(my_r_final, True)
110 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
111 |
112 | my_pred = np.append(my_r_final, my_t_final)
113 | my_r = my_r_final
114 | my_t = my_t_final
115 |
116 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
117 |
118 | model_points = model_points[0].cpu().detach().numpy()
119 | my_r = quaternion_matrix(my_r)[:3, :3]
120 | pred = np.dot(model_points, my_r.T) + my_t
121 | target = target[0].cpu().detach().numpy()
122 |
123 | if idx[0].item() in sym_list:
124 | pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(1, 0).contiguous()
125 | target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(1, 0).contiguous()
126 | inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
127 | target = torch.index_select(target, 1, inds.view(-1) - 1)
128 | dis = torch.mean(torch.norm((pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item()
129 | else:
130 | dis = np.mean(np.linalg.norm(pred - target, axis=1))
131 |
132 | if dis < diameter[idx[0].item()]:
133 | success_count[idx[0].item()] += 1
134 | print('No.{0} Pass! Distance: {1}'.format(i, dis))
135 | fw.write('No.{0} Pass! Distance: {1}\n'.format(i, dis))
136 | else:
137 | print('No.{0} NOT Pass! Distance: {1}'.format(i, dis))
138 | fw.write('No.{0} NOT Pass! Distance: {1}\n'.format(i, dis))
139 | num_count[idx[0].item()] += 1
140 |
141 | for i in range(num_objects):
142 | print('Object {0} success rate: {1}'.format(objlist[i], float(success_count[i]) / num_count[i]))
143 | fw.write('Object {0} success rate: {1}\n'.format(objlist[i], float(success_count[i]) / num_count[i]))
144 | print('ALL success rate: {0}'.format(float(sum(success_count)) / sum(num_count)))
145 | fw.write('ALL success rate: {0}\n'.format(float(sum(success_count)) / sum(num_count)))
146 | fw.close()
147 |
--------------------------------------------------------------------------------
/scripts/tools/eval_ycb.py:
--------------------------------------------------------------------------------
1 | import _init_paths
2 | import argparse
3 | import os
4 | import copy
5 | import random
6 | import numpy as np
7 | from PIL import Image
8 | import scipy.io as scio
9 | import scipy.misc
10 | import numpy.ma as ma
11 | import math
12 | import torch
13 | import torch.nn as nn
14 | import torch.nn.parallel
15 | import torch.backends.cudnn as cudnn
16 | import torch.optim as optim
17 | import torch.utils.data
18 | import torchvision.datasets as dset
19 | import torchvision.transforms as transforms
20 | import torchvision.utils as vutils
21 | import torch.nn.functional as F
22 | from torch.autograd import Variable
23 | from datasets.ycb.dataset import PoseDataset
24 | from lib.network import PoseNet, PoseRefineNet
25 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
26 |
27 | parser = argparse.ArgumentParser()
28 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
29 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model')
30 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model')
31 | opt = parser.parse_args()
32 |
33 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
34 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
35 | xmap = np.array([[j for i in range(640)] for j in range(480)])
36 | ymap = np.array([[i for i in range(640)] for j in range(480)])
37 | cam_cx = 312.9869
38 | cam_cy = 241.3109
39 | cam_fx = 1066.778
40 | cam_fy = 1067.487
41 | cam_scale = 10000.0
42 | num_obj = 21
43 | img_width = 480
44 | img_length = 640
45 | num_points = 1000
46 | num_points_mesh = 500
47 | iteration = 2
48 | bs = 1
49 | dataset_config_dir = 'datasets/ycb/dataset_config'
50 | ycb_toolbox_dir = 'YCB_Video_toolbox'
51 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
52 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
53 |
54 | def get_bbox(posecnn_rois):
55 | rmin = int(posecnn_rois[idx][3]) + 1
56 | rmax = int(posecnn_rois[idx][5]) - 1
57 | cmin = int(posecnn_rois[idx][2]) + 1
58 | cmax = int(posecnn_rois[idx][4]) - 1
59 | r_b = rmax - rmin
60 | for tt in range(len(border_list)):
61 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
62 | r_b = border_list[tt + 1]
63 | break
64 | c_b = cmax - cmin
65 | for tt in range(len(border_list)):
66 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
67 | c_b = border_list[tt + 1]
68 | break
69 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
70 | rmin = center[0] - int(r_b / 2)
71 | rmax = center[0] + int(r_b / 2)
72 | cmin = center[1] - int(c_b / 2)
73 | cmax = center[1] + int(c_b / 2)
74 | if rmin < 0:
75 | delt = -rmin
76 | rmin = 0
77 | rmax += delt
78 | if cmin < 0:
79 | delt = -cmin
80 | cmin = 0
81 | cmax += delt
82 | if rmax > img_width:
83 | delt = rmax - img_width
84 | rmax = img_width
85 | rmin -= delt
86 | if cmax > img_length:
87 | delt = cmax - img_length
88 | cmax = img_length
89 | cmin -= delt
90 | return rmin, rmax, cmin, cmax
91 |
92 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
93 | estimator.cuda()
94 | estimator.load_state_dict(torch.load(opt.model))
95 | estimator.eval()
96 |
97 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
98 | refiner.cuda()
99 | refiner.load_state_dict(torch.load(opt.refine_model))
100 | refiner.eval()
101 |
102 | testlist = []
103 | input_file = open('{0}/test_data_list.txt'.format(dataset_config_dir))
104 | while 1:
105 | input_line = input_file.readline()
106 | if not input_line:
107 | break
108 | if input_line[-1:] == '\n':
109 | input_line = input_line[:-1]
110 | testlist.append(input_line)
111 | input_file.close()
112 | print(len(testlist))
113 |
114 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
115 | class_id = 1
116 | cld = {}
117 | while 1:
118 | class_input = class_file.readline()
119 | if not class_input:
120 | break
121 | class_input = class_input[:-1]
122 |
123 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
124 | cld[class_id] = []
125 | while 1:
126 | input_line = input_file.readline()
127 | if not input_line:
128 | break
129 | input_line = input_line[:-1]
130 | input_line = input_line.split(' ')
131 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
132 | input_file.close()
133 | cld[class_id] = np.array(cld[class_id])
134 | class_id += 1
135 |
136 | for now in range(0, 2949):
137 | img = Image.open('{0}/{1}-color.png'.format(opt.dataset_root, testlist[now]))
138 | depth = np.array(Image.open('{0}/{1}-depth.png'.format(opt.dataset_root, testlist[now])))
139 | posecnn_meta = scio.loadmat('{0}/results_PoseCNN_RSS2018/{1}.mat'.format(ycb_toolbox_dir, '%06d' % now))
140 | label = np.array(posecnn_meta['labels'])
141 | posecnn_rois = np.array(posecnn_meta['rois'])
142 |
143 | lst = posecnn_rois[:, 1:2].flatten()
144 | my_result_wo_refine = []
145 | my_result = []
146 |
147 | for idx in range(len(lst)):
148 | itemid = lst[idx]
149 | try:
150 | rmin, rmax, cmin, cmax = get_bbox(posecnn_rois)
151 |
152 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
153 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
154 | mask = mask_label * mask_depth
155 |
156 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
157 | if len(choose) > num_points:
158 | c_mask = np.zeros(len(choose), dtype=int)
159 | c_mask[:num_points] = 1
160 | np.random.shuffle(c_mask)
161 | choose = choose[c_mask.nonzero()]
162 | else:
163 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
164 |
165 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
166 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
167 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
168 | choose = np.array([choose])
169 |
170 | pt2 = depth_masked / cam_scale
171 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
172 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
173 | cloud = np.concatenate((pt0, pt1, pt2), axis=1)
174 |
175 | img_masked = np.array(img)[:, :, :3]
176 | img_masked = np.transpose(img_masked, (2, 0, 1))
177 | img_masked = img_masked[:, rmin:rmax, cmin:cmax]
178 |
179 | cloud = torch.from_numpy(cloud.astype(np.float32))
180 | choose = torch.LongTensor(choose.astype(np.int32))
181 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
182 | index = torch.LongTensor([itemid - 1])
183 |
184 | cloud = Variable(cloud).cuda()
185 | choose = Variable(choose).cuda()
186 | img_masked = Variable(img_masked).cuda()
187 | index = Variable(index).cuda()
188 |
189 | cloud = cloud.view(1, num_points, 3)
190 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
191 |
192 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
193 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
194 |
195 | pred_c = pred_c.view(bs, num_points)
196 | how_max, which_max = torch.max(pred_c, 1)
197 | pred_t = pred_t.view(bs * num_points, 1, 3)
198 | points = cloud.view(bs * num_points, 1, 3)
199 |
200 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
201 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
202 | my_pred = np.append(my_r, my_t)
203 | my_result_wo_refine.append(my_pred.tolist())
204 |
205 | for ite in range(0, iteration):
206 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
207 | my_mat = quaternion_matrix(my_r)
208 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
209 | my_mat[0:3, 3] = my_t
210 |
211 | new_cloud = torch.bmm((cloud - T), R).contiguous()
212 | pred_r, pred_t = refiner(new_cloud, emb, index)
213 | pred_r = pred_r.view(1, 1, -1)
214 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
215 | my_r_2 = pred_r.view(-1).cpu().data.numpy()
216 | my_t_2 = pred_t.view(-1).cpu().data.numpy()
217 | my_mat_2 = quaternion_matrix(my_r_2)
218 |
219 | my_mat_2[0:3, 3] = my_t_2
220 |
221 | my_mat_final = np.dot(my_mat, my_mat_2)
222 | my_r_final = copy.deepcopy(my_mat_final)
223 | my_r_final[0:3, 3] = 0
224 | my_r_final = quaternion_from_matrix(my_r_final, True)
225 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
226 |
227 | my_pred = np.append(my_r_final, my_t_final)
228 | my_r = my_r_final
229 | my_t = my_t_final
230 |
231 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
232 |
233 | my_result.append(my_pred.tolist())
234 | except ZeroDivisionError:
235 | print("PoseCNN Detector Lost {0} at No.{1} keyframe".format(itemid, now))
236 | my_result_wo_refine.append([0.0 for i in range(7)])
237 | my_result.append([0.0 for i in range(7)])
238 |
239 | scio.savemat('{0}/{1}.mat'.format(result_wo_refine_dir, '%04d' % now), {'poses':my_result_wo_refine})
240 | scio.savemat('{0}/{1}.mat'.format(result_refine_dir, '%04d' % now), {'poses':my_result})
241 | print("Finish No.{0} keyframe".format(now))
242 |
--------------------------------------------------------------------------------
/scripts/tools/ros_eval_ycb.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | ############# ros packages #####################
4 | import cv2
5 | import rospy
6 | from sensor_msgs.msg import Image, CameraInfo
7 | from cv_bridge import CvBridge, CvBridgeError
8 | from be.srv import AddTwoInts, AddTwoIntsResponse
9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
10 | from geometry_msgs.msg import Pose, PoseArray
11 | import tf
12 | import message_filters
13 |
14 | ############ python pakcages ###################
15 | import _init_paths
16 | import argparse
17 | import os
18 | import copy
19 | import random
20 | import numpy as np
21 | import scipy.io as scio
22 | import scipy.misc
23 | import numpy.ma as ma
24 | import math
25 | import torch
26 | import torch.nn as nn
27 | import torch.nn.parallel
28 | import torch.backends.cudnn as cudnn
29 | import torch.optim as optim
30 | import torch.utils.data
31 | import torchvision.datasets as dset
32 | import torchvision.transforms as transforms
33 | import torchvision.utils as vutils
34 | import torch.nn.functional as F
35 | from torch.autograd import Variable
36 | from datasets.ycb.dataset import PoseDataset
37 | from lib.network import PoseNet, PoseRefineNet
38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
39 | from model.build_BiSeNet import BiSeNet
40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
41 | from matplotlib import pyplot as plt
42 | import time
43 |
44 |
45 |
46 | ##########################################################################################
47 |
48 | parser = argparse.ArgumentParser()
49 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
50 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model')
51 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model')
52 | parser.add_argument('--checkpoint_path', type=str, default='', required=True, help='The path to the pretrained weights of model')
53 | parser.add_argument('--num_classes', type=int, default=22, help='num of object classes (with void)')
54 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
55 |
56 |
57 | opt = parser.parse_args()
58 | import numpy.ma as ma
59 |
60 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
61 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
62 | xmap = np.array([[j for i in range(640)] for j in range(480)])
63 | ymap = np.array([[i for i in range(640)] for j in range(480)])
64 | cam_cx = 312.9869
65 | cam_cy = 241.3109
66 | cam_fx = 1066.778
67 | cam_fy = 1067.487
68 | #cam_scale = 10000.0
69 | cam_scale = 1000.0
70 | num_obj = 21
71 | img_width = 480
72 | img_length = 640
73 | num_points = 1000
74 | num_points_mesh = 500
75 | iteration = 2
76 | bs = 1
77 | dataset_config_dir = 'datasets/ycb/dataset_config'
78 | ycb_toolbox_dir = 'YCB_Video_toolbox'
79 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
80 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
81 | cam_mat = np.load('matrix.npy')
82 | dist = np.load('distortion.npy')
83 | #dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0])
84 |
85 | #########################################################################################
86 |
87 | def isRotationMatrix(R) :
88 | Rt = np.transpose(R)
89 | shouldBeIdentity = np.dot(Rt, R)
90 | I = np.identity(3, dtype = R.dtype)
91 | n = np.linalg.norm(I - shouldBeIdentity)
92 | return n < 1e-6
93 |
94 |
95 | def rotationMatrixToEulerAngles(R) :
96 |
97 | assert(isRotationMatrix(R))
98 |
99 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
100 |
101 | singular = sy < 1e-6
102 |
103 | if not singular :
104 | x = math.atan2(R[2,1] , R[2,2])
105 | y = math.atan2(-R[2,0], sy)
106 | z = math.atan2(R[1,0], R[0,0])
107 | else :
108 | x = math.atan2(-R[1,2], R[1,1])
109 | y = math.atan2(-R[2,0], sy)
110 | z = 0
111 |
112 | return np.array([x, y, z])
113 |
114 | ################################################################################################
115 |
116 | # get bbox coordinate
117 | def get_bbox(label):
118 | rows = np.any(label, axis=1)
119 | cols = np.any(label, axis=0)
120 | rmin, rmax = np.where(rows)[0][[0, -1]]
121 | cmin, cmax = np.where(cols)[0][[0, -1]]
122 | rmax += 1
123 | cmax += 1
124 | r_b = rmax - rmin
125 | for tt in range(len(border_list)):
126 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
127 | r_b = border_list[tt + 1]
128 | break
129 | c_b = cmax - cmin
130 | for tt in range(len(border_list)):
131 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
132 | c_b = border_list[tt + 1]
133 | break
134 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
135 | rmin = center[0] - int(r_b / 2)
136 | rmax = center[0] + int(r_b / 2)
137 | cmin = center[1] - int(c_b / 2)
138 | cmax = center[1] + int(c_b / 2)
139 | if rmin < 0:
140 | delt = -rmin
141 | rmin = 0
142 | rmax += delt
143 | if cmin < 0:
144 | delt = -cmin
145 | cmin = 0
146 | cmax += delt
147 | if rmax > img_width:
148 | delt = rmax - img_width
149 | rmax = img_width
150 | rmin -= delt
151 | if cmax > img_length:
152 | delt = cmax - img_length
153 | cmax = img_length
154 | cmin -= delt
155 | return rmin, rmax, cmin, cmax
156 |
157 | ############################ with detection algorithm #############################
158 | # def get_bbox(rois,idx):
159 | # # rmin = int(posecnn_rois[idx][2]) + 1
160 | # # rmax = int(posecnn_rois[idx][4]) - 1
161 | # # cmin = int(posecnn_rois[idx][1]) + 1
162 | # # cmax = int(posecnn_rois[idx][3]) - 1
163 | # rmin = int(rois[idx].xmin) + 1
164 | # rmax = int(rois[idx].xmax) - 1
165 | # cmin = int(rois[idx].ymin) + 1
166 | # cmax = int(rois[idx].ymax) - 1
167 | # r_b = rmax - rmin
168 | # for tt in range(len(border_list)):
169 | # if r_b > border_list[tt] and r_b < border_list[tt + 1]:
170 | # r_b = border_list[tt + 1]
171 | # break
172 | # c_b = cmax - cmin
173 | # for tt in range(len(border_list)):
174 | # if c_b > border_list[tt] and c_b < border_list[tt + 1]:
175 | # c_b = border_list[tt + 1]
176 | # break
177 | # center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
178 | # rmin = center[0] - int(r_b / 2)
179 | # rmax = center[0] + int(r_b / 2)
180 | # cmin = center[1] - int(c_b / 2)
181 | # cmax = center[1] + int(c_b / 2)
182 | # if rmin < 0:
183 | # delt = -rmin
184 | # rmin = 0
185 | # rmax += delt
186 | # if cmin < 0:
187 | # delt = -cmin
188 | # cmin = 0
189 | # cmax += delt
190 | # if rmax > img_width:
191 | # delt = rmax - img_width
192 | # rmax = img_width
193 | # rmin -= delt
194 | # if cmax > img_length:
195 | # delt = cmax - img_length
196 | # cmax = img_length
197 | # cmin -= delt
198 | # return rmin, rmax, cmin, cmax
199 |
200 |
201 | ####################################################################################################
202 | ################################### load BiSeNet parameters ########################################
203 | ####################################################################################################
204 | print('load BiseNet')
205 | start_time = time.time()
206 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
207 | bise_model = bise_model.cuda()
208 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
209 | global bise_model
210 | print('Done!')
211 | print("Load time : {}".format(time.time() - start_time))
212 |
213 | #####################################################################################################
214 | ######################## load Densefusion Netwopy4thork, 3d model #############################
215 | #####################################################################################################
216 | print('load densefusion network')
217 | start_time = time.time()
218 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
219 | estimator.cuda()
220 | estimator.load_state_dict(torch.load(opt.model))
221 | estimator.eval()
222 | ############################################################################
223 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
224 | refiner.cuda()
225 | refiner.load_state_dict(torch.load(opt.refine_model))
226 | refiner.eval()
227 | print('Done')
228 | print("Load time : {}".format(time.time() - start_time))
229 | #####################################################################################################
230 | # class list upload
231 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
232 | class_id = 1
233 | cld = {}
234 | while 1:
235 | class_input = class_file.readline()
236 | if not class_input:
237 | break
238 | class_input = class_input[:-1]
239 |
240 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
241 | cld[class_id] = []
242 | while 1:
243 | input_line = input_file.readline()
244 | if not input_line:
245 | break
246 | input_line = input_line[:-1]
247 | input_line = input_line.split(' ')
248 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
249 | input_file.close()
250 | cld[class_id] = np.array(cld[class_id])
251 | class_id += 1
252 | ########################################################################################################
253 | def seg_predict(image):
254 | global bise_model
255 | try:
256 | with torch.no_grad():
257 | bise_model.eval()
258 | h,w,_ = image.shape
259 | to_tensor = transforms.Compose([
260 | transforms.ToTensor(),
261 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
262 | ])
263 |
264 | image = to_tensor(image)
265 | image = image.unsqueeze_(0)
266 | image = image.cuda()
267 | predict = bise_model(image).squeeze()
268 | predict = reverse_one_hot(predict)
269 | predict = np.array(predict)
270 | print(np.unique(predict))
271 | predict = np.resize(predict,[h,w])
272 | pub_label = np.uint8(predict)
273 | cv2.imwrite('./segmentation_image.png', pub_label)
274 |
275 | return predict, pub_label
276 | except CvBridgeError as e:
277 | print(e)
278 |
279 |
280 |
281 |
282 |
283 | def pose_predict(img, depth,rois):
284 | label_pub = rospy.Publisher('/label',Image, queue_size = 10)
285 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
286 | class_list = ['002_master_chef_can',
287 | '003_cracker_box',
288 | '004_sugar_box',
289 | '005_tomato_soup_can',
290 | '006_mustard_bottle',
291 | '007_tuna_fish_can',
292 | '008_pudding_box',
293 | '009_gelatin_box',
294 | '010_potted_meat_can',
295 | '011_banana',
296 | '019_pitcher_base',
297 | '025_mug',
298 | '021_bleach_cleanser',
299 | '024_bowl',
300 | '035_power_drill',
301 | '036_wood_block',
302 | '037_scissors',
303 | '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick']
304 | try:
305 | object_number = len(rois)
306 |
307 | #lst = posecnn_rois[:,0:1].flatten()
308 | #lst = np.unique(label)
309 | my_result_wo_refine = []
310 | my_result = []
311 | for idx in range(object_number):
312 | #itemid = lst[idx]
313 | itemid = class_list.index(rois[idx].Class) +1
314 | #itemid = class_list.index(rois[idx].Class) +3
315 | print(object_number,itemid, rois[idx])
316 |
317 | try:
318 | label, pub_label = seg_predict(img)
319 | pub_label =pub_label * 50
320 | label_pub.publish(bridge.cv2_to_imgmsg(pub_label,'8UC1'))
321 | ####################### with Detection algorithm #################################
322 | # rmin, rmax, cmin,cmax = get_bbox(rois,idx)
323 | #####################################################################################
324 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
325 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
326 | mask = mask_label * mask_depth
327 | rmin, rmax, cmin, cmax = get_bbox(mask_label)
328 |
329 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
330 | if len(choose) > num_points:
331 | c_mask = np.zeros(len(choose), dtype=int)
332 | c_mask[:num_points] = 1
333 | np.random.shuffle(c_mask)
334 | choose = choose[c_mask.nonzero()]
335 | else:
336 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
337 |
338 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
339 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
340 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
341 | choose = np.array([choose])
342 |
343 | pt2 = depth_masked / cam_scale
344 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
345 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
346 | cloud = np.concatenate((pt0, pt1, pt2), axis=1)
347 |
348 | img_masked = np.array(img)[:, :, :3]
349 | img_masked = np.transpose(img_masked, (2, 0, 1))
350 | img_masked = img_masked[:, rmin:rmax, cmin:cmax]
351 |
352 | cloud = torch.from_numpy(cloud.astype(np.float32))
353 | choose = torch.LongTensor(choose.astype(np.int32))
354 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
355 | index = torch.LongTensor([itemid - 1])
356 |
357 | cloud = Variable(cloud).cuda()
358 | choose = Variable(choose).cuda()
359 | img_masked = Variable(img_masked).cuda()
360 | index = Variable(index).cuda()
361 | cloud = cloud.view(1, num_points, 3)
362 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
363 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
364 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
365 | pred_c = pred_c.view(bs, num_points)
366 | how_max, which_max = torch.max(pred_c, 1)
367 | pred_t = pred_t.view(bs * num_points, 1, 3)
368 | points = cloud.view(bs * num_points, 1, 3)
369 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
370 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
371 | my_pred = np.append(my_r, my_t)
372 | # making pose matrix
373 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
374 | rot_to_angle = rot_to_angle.reshape(1,3)
375 | my_t = my_t.reshape(1,3)
376 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
377 |
378 | # cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
379 | # [0, 0, 1]]))
380 | #tl = np.array([100,100,100])
381 | #cam_mat = cv2.UMat(np.matrix([[960.14238289, 0, 252.43270692], [0, 960.14238289, 317.39366696],
382 | # [0, 0, 1]]))
383 |
384 |
385 |
386 | for ite in range(0, iteration):
387 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
388 | my_mat = quaternion_matrix(my_r)
389 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
390 | my_mat[0:3, 3] = my_t
391 |
392 | new_cloud = torch.bmm((cloud - T), R).contiguous()
393 | pred_r, pred_t = refiner(new_cloud, emb, index)
394 | pred_r = pred_r.view(1, 1, -1)
395 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
396 | my_r_2 = pred_r.view(-1).cpu().data.numpy()
397 | my_t_2 = pred_t.view(-1).cpu().data.numpy()
398 | my_mat_2 = quaternion_matrix(my_r_2)
399 |
400 |
401 | my_mat_2[0:3, 3] = my_t_2
402 | my_mat_final = np.dot(my_mat, my_mat_2)
403 | my_r_final = copy.deepcopy(my_mat_final)
404 | my_r_final[0:3, 3] = 0
405 | my_r_final = quaternion_from_matrix(my_r_final, True)
406 |
407 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
408 |
409 | my_pred = np.append(my_r_final, my_t_final)
410 | my_r = my_r_final
411 | my_t = my_t_final
412 | open_cv_image = img.copy()
413 | open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
414 | dof = quaternion_matrix(my_r)
415 | dof[0:3,3] = my_t
416 |
417 |
418 | object_poses = {
419 | 'tx':my_t[0][0],
420 | 'ty':my_t[0][1],
421 | 'tz':my_t[0][2],
422 | 'qx':my_r[0],
423 | 'qy':my_r[1],
424 | 'qz':my_r[2],
425 | 'qw':my_r[3]}
426 | my_result.append(object_poses)
427 | open_cv_image = img.copy()
428 | open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
429 | imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 13 = mug
430 | open_cv_image = draw(open_cv_image,imgpts, itemid)
431 |
432 |
433 | except ZeroDivisionError:
434 | open_cv_image = None
435 | print('Fail')
436 | except CvBridgeError as e:
437 | print(e)
438 | return my_result, open_cv_image
439 |
440 | def draw(img, imgpts, label):
441 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
442 | for point in imgpts:
443 |
444 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
445 | return img
446 |
447 |
448 |
449 | def image_callback(rgb):
450 | bridge = CvBridge()
451 | cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8')
452 |
453 | global cv_image
454 | global bridge
455 |
456 |
457 | def depth_callback(depth):
458 | cv_depth = bridge.imgmsg_to_cv2(depth,'16UC1')
459 | cv2.imwrite('./depth.png', cv_depth)
460 |
461 |
462 | global cv_depth
463 |
464 | def rois_callback(rois):
465 | detect_res = rois.bounding_boxes
466 | global detect_res
467 |
468 | def pose_server():
469 | rospy.init_node('pose_estimation_server')
470 | s = rospy.Service('/cvipl/pose_server', AddTwoInts, implimentation_seg)
471 | # only Segmentation
472 |
473 | # only Pose Estimation
474 | rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, image_callback)
475 | depth_sub = rospy.Subscriber('/camera/aligned_depth_to_color/image_raw',Image, depth_callback)
476 | rois_sub = rospy.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes, rois_callback)
477 |
478 |
479 | def implimentation_seg(req):
480 | global cv_image
481 | global cv_depth
482 | global detect_res
483 |
484 | pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
485 | pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10)
486 | if req.a == 2 :
487 | print(cv_image.shape,cv_depth.shape,detect_res)
488 | seg_result = seg_predict(cv_image)
489 |
490 | elif req.a == 3 :
491 | print(req.a)
492 | pose_estimation,fit_image = pose_predict(cv_image, cv_depth, detect_res)
493 | pose_array = PoseArray()
494 | pose_msg = Pose()
495 | print(pose_estimation)
496 |
497 | for i in range(len(pose_estimation)):
498 | pose_msg.position.x = pose_estimation[i]['tx']
499 | pose_msg.position.y = pose_estimation[i]['ty']
500 | pose_msg.position.z = pose_estimation[i]['tz']
501 |
502 | pose_msg.orientation.x = pose_estimation[i]['qx']
503 | pose_msg.orientation.y = pose_estimation[i]['qy']
504 | pose_msg.orientation.z = pose_estimation[i]['qz']
505 | pose_msg.orientation.w = pose_estimation[i]['qw']
506 |
507 | pose_array.poses.append(pose_msg)
508 | pose_pub.publish(pose_array)
509 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
510 |
511 |
512 |
513 | def main():
514 | pose_server()
515 | rospy.spin()
516 |
517 | if __name__ == '__main__':
518 | main()
519 |
--------------------------------------------------------------------------------
/scripts/tools/ros_eval_ycb_message.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | ############# ros packages #####################
4 | import cv2
5 | import rospy
6 | from sensor_msgs.msg import Image, CameraInfo
7 | from cv_bridge import CvBridge, CvBridgeError
8 | from be.srv import AddTwoInts, AddTwoIntsResponse
9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
10 | from geometry_msgs.msg import Pose, PoseArray
11 | import tf
12 | import message_filters
13 |
14 | ############ python pakcages ###################
15 | import _init_paths
16 | import argparse
17 | import os
18 | import copy
19 | import random
20 | import numpy as np
21 | import scipy.io as scio
22 | import scipy.misc
23 | import numpy.ma as ma
24 | import math
25 | import torch
26 | import torch.nn as nn
27 | import torch.nn.parallel
28 | import torch.backends.cudnn as cudnn
29 | import torch.optim as optim
30 | import torch.utils.data
31 | import torchvision.datasets as dset
32 | import torchvision.transforms as transforms
33 | import torchvision.utils as vutils
34 | import torch.nn.functional as F
35 | from torch.autograd import Variable
36 | from datasets.ycb.dataset import PoseDataset
37 | from lib.network import PoseNet, PoseRefineNet
38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
39 | from model.build_BiSeNet import BiSeNet
40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
41 | from matplotlib import pyplot as plt
42 | import time
43 |
44 |
45 |
46 | ##########################################################################################
47 |
48 | parser = argparse.ArgumentParser()
49 | parser.add_argument('--dataset_root', type=str, default = 'datasets/ycb/YCB_Video_Dataset/', help='dataset root dir')
50 | parser.add_argument('--model', type=str, default = 'trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth', help='resume PoseNet model')
51 | parser.add_argument('--refine_model', type=str, default = 'trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth', help='resume PoseRefineNet model')
52 | parser.add_argument('--checkpoint_path', type=str, default='trained_checkpoints/ycb/best_dice_loss.pth', help='The path to the pretrained weights of model')
53 | parser.add_argument('--num_classes', type=int, default=21, help='num of object classes (with void)')
54 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
55 | parser.add_argument('--image_subscriber', type=str,defualt='/camera/color/image_raw')
56 | parser.add_argument('--depth_subscriber', type=str,defualt='/camera/depth/image_rect_raw')
57 |
58 |
59 |
60 | opt = parser.parse_args()
61 |
62 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
63 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
64 | xmap = np.array([[j for i in range(640)] for j in range(480)])
65 | ymap = np.array([[i for i in range(640)] for j in range(480)])
66 | cam_cx = 312.9869
67 | cam_cy = 241.3109
68 | cam_fx = 1066.778
69 | cam_fy = 1067.487
70 | cam_scale = 10000.0
71 | num_obj = 21
72 | img_width = 480
73 | img_length = 640
74 | num_points = 1000
75 | num_points_mesh = 500
76 | iteration = 2
77 | bs = 1
78 | dataset_config_dir = 'datasets/ycb/dataset_config'
79 | ycb_toolbox_dir = 'YCB_Video_toolbox'
80 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
81 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
82 | dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0])
83 |
84 |
85 | def image_callback(rgb):
86 | bridge = CvBridge()
87 | cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8')
88 | global cv_image
89 |
90 |
91 | def depth_callback(depth):
92 | bridge = CvBridge()
93 | cv_depth = bridge.imgmsg_to_cv2(depth,'32SC1')
94 | global cv_depth
95 |
96 | def rois_callback(rois):
97 |
98 | detect_res = rois.bounding_boxes
99 | global detect_res
100 | implimentation_seg()
101 |
102 |
103 | rgb_sub = rospy.Subscriber(args.image_subsriber,Image, image_callback)
104 | depth_sub = rospy.Subscriber(args.depth_subscriber,Image, depth_callback)
105 | rois_sub = rospy.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes, rois_callback)
106 | #########################################################################################
107 |
108 | def isRotationMatrix(R) :
109 | Rt = np.transpose(R)
110 | shouldBeIdentity = np.dot(Rt, R)
111 | I = np.identity(3, dtype = R.dtype)
112 | n = np.linalg.norm(I - shouldBeIdentity)
113 | return n < 1e-6
114 |
115 |
116 | def rotationMatrixToEulerAngles(R) :
117 |
118 | assert(isRotationMatrix(R))
119 |
120 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
121 |
122 | singular = sy < 1e-6
123 |
124 | if not singular :
125 | x = math.atan2(R[2,1] , R[2,2])
126 | y = math.atan2(-R[2,0], sy)
127 | z = math.atan2(R[1,0], R[0,0])
128 | else :
129 | x = math.atan2(-R[1,2], R[1,1])
130 | y = math.atan2(-R[2,0], sy)
131 | z = 0
132 |
133 | return np.array([x, y, z])
134 |
135 | ################################################################################################
136 | """
137 | ##################################################################################################
138 | # get bbox coordinate
139 | def get_bbox(label):
140 | rows = np.any(label, axis=1)
141 | cols = np.any(label, axis=0)
142 | rmin, rmax =
143 | np.where(rows)[0][[0, -1]]
144 | cmin, cmax = np.where(cols)[0][[0, -1]]
145 | rmax += 1
146 | cmax += 1
147 | r_b = rmax - rmin
148 | for tt in range(len(border_list)):
149 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
150 | r_b = border_list[tt + 1]
151 | break
152 | c_b = cmax - cmin
153 | for tt in range(len(border_list)):
154 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
155 | c_b = border_list[tt + 1]
156 | break
157 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
158 | rmin = center[0] - int(r_b / 2)
159 | rmax = center[0] + int(r_b / 2)
160 | cmin = center[1] - int(c_b / 2)
161 | cmax = center[1] + int(c_b / 2)
162 | if rmin < 0:
163 | delt = -rmin
164 | rmin = 0
165 | rmax += delt
166 | if cmin < 0:
167 | delt = -cmin
168 | cmin = 0
169 | cmax += delt
170 | if rmax > img_width:
171 | delt = rmax - img_width
172 | rmax = img_width
173 | rmin -= delt
174 | if cmax > img_length:
175 | delt = cmax - img_length
176 | cmax = img_length
177 | cmin -= delt
178 | return rmin, rmax, cmin, cmax
179 | """
180 | def get_bbox(rois,idx):
181 | # rmin = int(posecnn_rois[idx][2]) + 1
182 | # rmax = int(posecnn_rois[idx][4]) - 1
183 | # cmin = int(posecnn_rois[idx][1]) + 1
184 | # cmax = int(posecnn_rois[idx][3]) - 1
185 | rmin = int(rois[idx].xmin) + 1
186 | rmax = int(rois[idx].xmax) - 1
187 | cmin = int(rois[idx].ymin) + 1
188 | cmax = int(rois[idx].ymax) - 1
189 | r_b = rmax - rmin
190 | for tt in range(len(border_list)):
191 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
192 | r_b = border_list[tt + 1]
193 | break
194 | c_b = cmax - cmin
195 | for tt in range(len(border_list)):
196 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
197 | c_b = border_list[tt + 1]
198 | break
199 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
200 | rmin = center[0] - int(r_b / 2)
201 | rmax = center[0] + int(r_b / 2)
202 | cmin = center[1] - int(c_b / 2)
203 | cmax = center[1] + int(c_b / 2)
204 | if rmin < 0:
205 | delt = -rmin
206 | rmin = 0
207 | rmax += delt
208 | if cmin < 0:
209 | delt = -cmin
210 | cmin = 0
211 | cmax += delt
212 | if rmax > img_width:
213 | delt = rmax - img_width
214 | rmax = img_width
215 | rmin -= delt
216 | if cmax > img_length:
217 | delt = cmax - img_length
218 | cmax = img_length
219 | cmin -= delt
220 | return rmin, rmax, cmin, cmax
221 | ####################################################################################################
222 | ################################### load BiSeNet parameters ########################################
223 | ####################################################################################################
224 | print('load BiseNet')
225 | start_time = time.time()
226 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
227 | bise_model = bise_model.cuda()
228 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
229 | global bise_model
230 | print('Done!')
231 | print("Load time : {}".format(time.time() - start_time))
232 |
233 | #####################################################################################################
234 | ######################## load Densefusion Netwopy4thork, 3d model #############################
235 | #####################################################################################################
236 | print('load densefusion network')
237 | start_time = time.time()
238 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
239 | estimator.cuda()
240 | estimator.load_state_dict(torch.load(opt.model))
241 | estimator.eval()
242 | ############################################################################
243 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
244 | refiner.cuda()
245 | refiner.load_state_dict(torch.load(opt.refine_model))
246 | refiner.eval()
247 | print('Done')
248 | print("Load time : {}".format(time.time() - start_time))
249 | #####################################################################################################
250 | # class list upload
251 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
252 | class_id = 1
253 | cld = {}
254 | while 1:
255 | class_input = class_file.readline()
256 | if not class_input:
257 | break
258 | class_input = class_input[:-1]
259 |
260 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
261 | cld[class_id] = []
262 | while 1:
263 | input_line = input_file.readline()
264 | if not input_line:
265 | break
266 | input_line = input_line[:-1]
267 | input_line = input_line.split(' ')
268 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
269 | input_file.close()
270 | cld[class_id] = np.array(cld[class_id])
271 | class_id += 1
272 | ########################################################################################################
273 | def seg_predict(image):
274 | global bise_model
275 | try:
276 | with torch.no_grad():
277 | bise_model.eval()
278 | h,w,_ = image.shape
279 | to_tensor = transforms.Compose([
280 | transforms.ToTensor(),
281 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
282 | ])
283 |
284 | image = to_tensor(image)
285 | image = image.unsqueeze_(0)
286 | image = image.cuda()
287 | predict = bise_model(image).squeeze()
288 | predict = reverse_one_hot(predict)
289 | predict = np.array(predict)
290 | predict = np.resize(predict,[h,w])
291 | print(np.unique(predict))
292 | zzzz = cv2.cvtColor(np.uint8(predict), cv2.COLOR_GRAY2BGR)
293 | cv2.imwrite('./segmentation_image.png', zzzz)
294 |
295 | return predict
296 | except CvBridgeError as e:
297 | print(e)
298 |
299 |
300 |
301 |
302 |
303 | def pose_predict(img, depth,rois):
304 | class_list = ['002_master_chef_can',
305 | '003_cracker_box',
306 | '004_sugar_box',
307 | '005_tomato_soup_can',
308 | '006_mustard_bottle',
309 | '007_tuna_fish_can',
310 | '008_pudding_box',
311 | '009_gelatin_box',
312 | '010_potted_meat_can',
313 | '011_banana',
314 | '019_pitcher_base',
315 | '025_mug',
316 | '021_bleach_cleanser',
317 | '024_bowl',
318 | '035_power_drill',
319 | '036_wood_block',
320 | '037_scissors',
321 | '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick']
322 | try:
323 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
324 | object_number = len(rois)
325 | bridge = CvBridge()
326 |
327 | #lst = posecnn_rois[:,0:1].flatten()
328 | #lst = np.unique(label)
329 | my_result_wo_refine = []
330 | my_result = []
331 | for idx in range(object_number):
332 | #itemid = lst[idx]
333 | #itemid = class_list.index(rois[idx].Class) +1
334 | itemid = class_list.index(rois[idx].Class) +3
335 |
336 | try:
337 | label = seg_predict(img)
338 | cv2.imwrite('/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png', label)
339 | rmin, rmax, cmin,cmax = get_bbox(rois,idx)
340 | # bounding box cutting
341 | #label = seg_predict(img[rmin:rmax,cmin:cmax,:])
342 | #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
343 | #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
344 | #mask = mask_label * mask_depth
345 | # only image
346 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
347 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
348 | mask = mask_label * mask_depth
349 | #rmin, rmax, cmin, cmax = get_bbox(mask_label)
350 |
351 |
352 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
353 | print(choose)
354 | if len(choose) > num_points:
355 | c_mask = np.zeros(len(choose), dtype=int)
356 | c_mask[:num_points] = 1
357 | np.random.shuffle(c_mask)
358 | choose = choose[c_mask.nonzero()]
359 | else:
360 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
361 |
362 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
363 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
364 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
365 | choose = np.array([choose])
366 |
367 | pt2 = depth_masked / cam_scale
368 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
369 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
370 | cloud = np.concatenate((pt0, pt1, pt2), axis=1)
371 | img_masked = np.array(img)[:, :, :3]
372 | img_masked = np.transpose(img_masked, (2, 0, 1))
373 | img_masked = img_masked[:, rmin:rmax, cmin:cmax]
374 |
375 | cloud = torch.from_numpy(cloud.astype(np.float32))
376 | choose = torch.LongTensor(choose.astype(np.int32))
377 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
378 | index = torch.LongTensor([itemid - 1])
379 |
380 | cloud = Variable(cloud).cuda()
381 | choose = Variable(choose).cuda()
382 | img_masked = Variable(img_masked).cuda()
383 | index = Variable(index).cuda()
384 | cloud = cloud.view(1, num_points, 3)
385 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
386 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
387 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
388 | pred_c = pred_c.view(bs, num_points)
389 | how_max, which_max = torch.max(pred_c, 1)
390 | pred_t = pred_t.view(bs * num_points, 1, 3)
391 | points = cloud.view(bs * num_points, 1, 3)
392 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
393 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
394 | my_pred = np.append(my_r, my_t)
395 | # making pose matrix
396 | dof = quaternion_matrix(my_r)
397 | dof[0:3,3] = my_t
398 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
399 | rot_to_angle = rot_to_angle.reshape(1,3)
400 | my_t = my_t.reshape(1,3)
401 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
402 | object_poses = {
403 | 'tx':my_t[0][0],
404 | 'ty':my_t[0][1],
405 | 'tz':my_t[0][2],
406 | 'qx':my_r[0],
407 | 'qy':my_r[1],
408 | 'qz':my_r[2],
409 | 'qw':my_r[3]}
410 | my_result.append(object_poses)
411 | open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
412 | cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
413 | [0, 0, 1]]))
414 | imgpts, jac = cv2.projectPoints(cld[14], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 14 mugcup
415 | open_cv_image = draw(open_cv_image,imgpts.get(), itemid)
416 | my_result_wo_refine.append(my_pred.tolist())
417 | except ZeroDivisionError:
418 | # my_result_wo_refine.append([0.0 for i in range(7)])
419 | # my_result.append([0.0 for i in range(7)])
420 | open_cv_image = None
421 | print('Fail')
422 | except CvBridgeError as e:
423 | print(e)
424 |
425 | return my_result, open_cv_image
426 |
427 | def draw(img, imgpts, label):
428 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
429 | for point in imgpts:
430 |
431 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
432 | return img
433 |
434 |
435 |
436 |
437 |
438 |
439 | def implimentation_seg():
440 | global cv_image
441 | global cv_depth
442 | global detect_res
443 | label_pub = rospy.Publisher('/label',Image, queue_size = 10)
444 | pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
445 | bridge = CvBridge()
446 | pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10)
447 | pose_estimation,fit_image = pose_predict(cv_image, cv_depth, detect_res)
448 | pose_array = PoseArray()
449 | pose_msg = Pose()
450 | print(pose_estimation)
451 |
452 | for i in range(len(pose_estimation)):
453 | pose_msg.position.x = pose_estimation[i]['tx']
454 | pose_msg.position.y = pose_estimation[i]['ty']
455 | pose_msg.position.z = pose_estimation[i]['tz']
456 | pose_msg.orientation.x = pose_estimation[i]['qx']
457 | pose_msg.orientation.y = pose_estimation[i]['qy']
458 | pose_msg.orientation.z = pose_estimation[i]['qz']
459 | pose_msg.orientation.w = pose_estimation[i]['qw']
460 |
461 | pose_array.poses.append(pose_msg)
462 | pose_pub.publish(pose_array)
463 | if fit_image is not None:
464 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
465 |
466 |
467 |
468 | def main():
469 |
470 | rospy.init_node('pose_estimation_server')
471 | rospy.spin()
472 |
473 | if __name__ == '__main__':
474 | main()
475 |
--------------------------------------------------------------------------------
/scripts/tools/ros_eval_ycb_publisher.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 |
4 |
5 | ############# ros packages #####################
6 | import rospy
7 | from sensor_msgs.msg import Image, CameraInfo
8 | from cv_bridge import CvBridge, CvBridgeError
9 | #from be.srv import AddTwoInts, AddTwoIntsResponse
10 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
11 | from geometry_msgs.msg import Pose, PoseArray
12 | import tf
13 | import message_filters
14 | import cv2
15 |
16 | ############ python pakcages ###################
17 | import _init_paths
18 | import argparse
19 | import sys
20 | import os
21 | import os.path as osp
22 | root_dir = osp.dirname(osp.dirname(__file__))
23 |
24 | sys.path.append(root_dir)
25 |
26 | import copy
27 | import random
28 | import numpy as np
29 | import scipy.io as scio
30 | import scipy.misc
31 | import numpy.ma as ma
32 | import math
33 | import torch
34 | import torch.nn as nn
35 | import torch.nn.parallel
36 | import torch.backends.cudnn as cudnn
37 | import torch.optim as optim
38 | import torch.utils.data
39 | import torchvision.datasets as dset
40 | import torchvision.transforms as transforms
41 | import torchvision.utils as vutils
42 | import torch.nn.functional as F
43 | from torch.autograd import Variable
44 | from datasets.ycb.dataset import PoseDataset
45 | from lib.network import PoseNet, PoseRefineNet
46 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
47 | from model.build_BiSeNet import BiSeNet
48 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
49 | from matplotlib import pyplot as plt
50 | import time
51 | ###################################################
52 |
53 |
54 | ##########################################################################################
55 |
56 | parser = argparse.ArgumentParser()
57 | parser.add_argument('--dataset_root', type=str, default = 'datasets/ycb/YCB_Video_Dataset/', help='dataset root dir')
58 | parser.add_argument('--model', type=str, default = 'trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth', help='resume PoseNet model')
59 | parser.add_argument('--refine_model', type=str, default = 'trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth', help='resume PoseRefineNet model')
60 | parser.add_argument('--checkpoint_path', type=str, default='trained_checkpoints/ycb/best_dice_loss.pth', help='The path to the pretrained weights of model')
61 | parser.add_argument('--num_classes', type=int, default=21, help='num of object classes (with void)')
62 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
63 | parser.add_argument('--image_subscriber', type=str,default='/camera/color/image_raw')
64 | parser.add_argument('--depth_subscriber', type=str,default='/camera/depth/image_rect_raw')
65 |
66 |
67 |
68 | opt = parser.parse_args()
69 |
70 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
71 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
72 | xmap = np.array([[j for i in range(640)] for j in range(480)])
73 | ymap = np.array([[i for i in range(640)] for j in range(480)])
74 | #cam_cx = 312.9869
75 | #cam_cy = 241.3109
76 | #cam_fx = 1066.778
77 | #cam_fy = 1067.487
78 | cam_cx = 331.52874755859375
79 | cam_cy = 249.5271453857422
80 | cam_fx = 610.6751708984375
81 | cam_fy = 610.5318603515625
82 | cam_scale = 1000.0
83 | num_obj = 21
84 | img_width = 480
85 | img_length = 640
86 | num_points = 1000
87 | num_points_mesh = 500
88 | iteration = 2
89 | bs = 1
90 | dataset_config_dir = 'datasets/ycb/dataset_config'
91 | ycb_toolbox_dir = 'YCB_Video_toolbox'
92 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
93 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
94 | dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0])
95 |
96 | label_pub = rospy.Publisher('/label',Image, queue_size = 10)
97 | pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
98 | pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10)
99 |
100 | bridge = CvBridge()
101 |
102 | def image_callback(rgb):
103 | cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8')
104 | global cv_image
105 | global img_flg
106 | img_flg = True
107 | print('img_flg: ', img_flg)
108 |
109 | def depth_callback(depth):
110 | cv_depth = bridge.imgmsg_to_cv2(depth,'32SC1')
111 | global cv_depth
112 |
113 | if img_flg is True:
114 | implimentation_seg()
115 |
116 | def rois_callback(rois):
117 | detect_res = rois.bounding_boxes
118 | global detect_res
119 | print("get bbox")
120 |
121 |
122 |
123 | #########################################################################################
124 |
125 | def isRotationMatrix(R) :
126 | Rt = np.transpose(R)
127 | shouldBeIdentity = np.dot(Rt, R)
128 | I = np.identity(3, dtype = R.dtype)
129 | n = np.linalg.norm(I - shouldBeIdentity)
130 | return n < 1e-6
131 |
132 |
133 | def rotationMatrixToEulerAngles(R) :
134 |
135 | assert(isRotationMatrix(R))
136 |
137 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
138 |
139 | singular = sy < 1e-6
140 |
141 | if not singular :
142 | x = math.atan2(R[2,1] , R[2,2])
143 | y = math.atan2(-R[2,0], sy)
144 | z = math.atan2(R[1,0], R[0,0])
145 | else :
146 | x = math.atan2(-R[1,2], R[1,1])
147 | y = math.atan2(-R[2,0], sy)
148 | z = 0
149 |
150 | return np.array([x, y, z])
151 |
152 | ################################################################################################
153 |
154 | ##################################################################################################
155 | # get bbox coordinate
156 | def get_bbox(label):
157 | rows = np.any(label, axis=1)
158 | cols = np.any(label, axis=0)
159 | rmin, rmax = np.where(rows)[0][[0, -1]]
160 | cmin, cmax = np.where(cols)[0][[0, -1]]
161 | rmax += 1
162 | cmax += 1
163 | r_b = rmax - rmin
164 | for tt in range(len(border_list)):
165 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
166 | r_b = border_list[tt + 1]
167 | break
168 | c_b = cmax - cmin
169 | for tt in range(len(border_list)):
170 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
171 | c_b = border_list[tt + 1]
172 | break
173 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
174 | rmin = center[0] - int(r_b / 2)
175 | rmax = center[0] + int(r_b / 2)
176 | cmin = center[1] - int(c_b / 2)
177 | cmax = center[1] + int(c_b / 2)
178 | if rmin < 0:
179 | delt = -rmin
180 | rmin = 0
181 | rmax += delt
182 | if cmin < 0:
183 | delt = -cmin
184 | cmin = 0
185 | cmax += delt
186 | if rmax > img_width:
187 | delt = rmax - img_width
188 | rmax = img_width
189 | rmin -= delt
190 | if cmax > img_length:
191 | delt = cmax - img_length
192 | cmax = img_length
193 | cmin -= delt
194 | return rmin, rmax, cmin, cmax
195 |
196 | '''def get_bbox(rois,idx):
197 | # rmin = int(posecnn_rois[idx][2]) + 1
198 | # rmax = int(posecnn_rois[idx][4]) - 1
199 | # cmin = int(posecnn_rois[idx][1]) + 1
200 | # cmax = int(posecnn_rois[idx][3]) - 1
201 | rmin = int(rois[idx].xmin) + 1
202 | rmax = int(rois[idx].xmax) - 1
203 | cmin = int(rois[idx].ymin) + 1
204 | cmax = int(rois[idx].ymax) - 1
205 | r_b = rmax - rmin
206 | for tt in range(len(border_list)):
207 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
208 | r_b = border_list[tt + 1]
209 | break
210 | c_b = cmax - cmin
211 | for tt in range(len(border_list)):
212 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
213 | c_b = border_list[tt + 1]
214 | break
215 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
216 | rmin = center[0] - int(r_b / 2)
217 | rmax = center[0] + int(r_b / 2)
218 | cmin = center[1] - int(c_b / 2)
219 | cmax = center[1] + int(c_b / 2)
220 | if rmin < 0:
221 | delt = -rmin
222 | rmin = 0
223 | rmax += delt
224 | if cmin < 0:
225 | delt = -cmin
226 | cmin = 0
227 | cmax += delt
228 | if rmax > img_width:
229 | delt = rmax - img_width
230 | rmax = img_width
231 | rmin -= delt
232 | if cmax > img_length:
233 | delt = cmax - img_length
234 | cmax = img_length
235 | cmin -= delt
236 | return rmin, rmax, cmin, cmax'''
237 | ####################################################################################################
238 | ################################### load BiSeNet parameters ########################################
239 | ####################################################################################################
240 | print('load BiseNet')
241 | start_time = time.time()
242 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
243 | bise_model = bise_model.cuda()
244 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
245 | global bise_model
246 | print('Done!')
247 | print("Load time : {}".format(time.time() - start_time))
248 |
249 | #####################################################################################################
250 | ######################## load Densefusion Netwopy4thork, 3d model #############################
251 | #####################################################################################################
252 | print('load densefusion network')
253 | start_time = time.time()
254 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
255 | estimator.cuda()
256 | estimator.load_state_dict(torch.load(opt.model))
257 | estimator.eval()
258 | ############################################################################
259 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
260 | refiner.cuda()
261 | refiner.load_state_dict(torch.load(opt.refine_model))
262 | refiner.eval()
263 | print('Done!')
264 | print("Load time : {}".format(time.time() - start_time))
265 | #####################################################################################################
266 | # class list upload
267 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
268 | class_id = 1
269 | cld = {}
270 | while 1:
271 | class_input = class_file.readline()
272 | if not class_input:
273 | break
274 | class_input = class_input[:-1]
275 |
276 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
277 | cld[class_id] = []
278 | while 1:
279 | input_line = input_file.readline()
280 | if not input_line:
281 | break
282 | input_line = input_line[:-1]
283 | input_line = input_line.split(' ')
284 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
285 | input_file.close()
286 | cld[class_id] = np.array(cld[class_id])
287 | class_id += 1
288 | ########################################################################################################
289 | def seg_predict(image):
290 | global bise_model
291 | try:
292 | with torch.no_grad():
293 | bise_model.eval()
294 | h,w,_ = image.shape
295 | to_tensor = transforms.Compose([
296 | transforms.ToTensor(),
297 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
298 | ])
299 |
300 | image = to_tensor(image)
301 | image = image.unsqueeze_(0)
302 | image = image.cuda()
303 | predict = bise_model(image).squeeze()
304 | predict = reverse_one_hot(predict)
305 | predict = np.array(predict)
306 | predict = np.resize(predict,[h,w])
307 | pub_label = np.uint8(predict)
308 | #zzzz = cv2.cvtColor(np.uint8(predict), cv2.COLOR_GRAY2BGR)
309 | #cv2.imwrite('./segmentation_image.png', zzzz)
310 |
311 | return predict, pub_label
312 | except CvBridgeError as e:
313 | print(e)
314 |
315 | def pose_predict(img, depth):
316 | class_list = ['002_master_chef_can',
317 | '003_cracker_box',
318 | '004_sugar_box',
319 | '005_tomato_soup_can',
320 | '006_mustard_bottle',
321 | '007_tuna_fish_can',
322 | '008_pudding_box',
323 | '009_gelatin_box',
324 | '010_potted_meat_can',
325 | '011_banana',
326 | '019_pitcher_base',
327 | '025_mug',
328 | '021_bleach_cleanser',
329 | '024_bowl',
330 | '035_power_drill',
331 | '036_wood_block',
332 | '037_scissors',
333 | '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick']
334 | try:
335 | bridge = CvBridge()
336 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
337 | label, pub_label = seg_predict(img)
338 | label = label-1 # to match labeling results to class list
339 | pub_label =pub_label * 50
340 | label_pub.publish(bridge.cv2_to_imgmsg(pub_label,'8UC1'))
341 |
342 | object_number = len(np.unique(label))
343 | print('unique lable: ', np.unique(label))
344 |
345 | my_result_wo_refine = []
346 | my_result = []
347 | open_cv_image = np.zeros_like(img)
348 |
349 | for idx in range(object_number):
350 | if idx == 0: continue
351 |
352 | itemid = np.unique(label)[idx]
353 | print('itemid: ', itemid)
354 |
355 | try:
356 | #cv2.imwrite('/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png', label)
357 | rmin, rmax, cmin,cmax = get_bbox(label)
358 | # bounding box cutting
359 | #label = seg_predict(img[rmin:rmax,cmin:cmax,:])
360 | #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
361 | #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
362 | #mask = mask_label * mask_depth
363 | # only image
364 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
365 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
366 | mask = mask_label * mask_depth
367 | #rmin, rmax, cmin, cmax = get_bbox(mask_label)
368 |
369 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
370 |
371 | if len(choose) > num_points:
372 |
373 | c_mask = np.zeros(len(choose), dtype=int)
374 | c_mask[:num_points] = 1
375 | np.random.shuffle(c_mask)
376 | choose = choose[c_mask.nonzero()]
377 | else:
378 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
379 |
380 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
381 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
382 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
383 | choose = np.array([choose])
384 |
385 | pt2 = depth_masked / cam_scale
386 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
387 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
388 | cloud = np.concatenate((pt0, pt1, pt2), axis=1)
389 | img_masked = np.array(img)[:, :, :3]
390 | img_masked = np.transpose(img_masked, (2, 0, 1))
391 | img_masked = img_masked[:, rmin:rmax, cmin:cmax]
392 |
393 | cloud = torch.from_numpy(cloud.astype(np.float32))
394 | choose = torch.LongTensor(choose.astype(np.int32))
395 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
396 | index = torch.LongTensor([itemid])
397 |
398 | cloud = Variable(cloud).cuda()
399 | choose = Variable(choose).cuda()
400 | img_masked = Variable(img_masked).cuda()
401 | index = Variable(index).cuda()
402 | cloud = cloud.view(1, num_points, 3)
403 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
404 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
405 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
406 | pred_c = pred_c.view(bs, num_points)
407 | how_max, which_max = torch.max(pred_c, 1)
408 | pred_t = pred_t.view(bs * num_points, 1, 3)
409 | points = cloud.view(bs * num_points, 1, 3)
410 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
411 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
412 | my_pred = np.append(my_r, my_t)
413 | # making pose matrix
414 | dof = quaternion_matrix(my_r)
415 | dof[0:3,3] = my_t
416 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
417 | rot_to_angle = rot_to_angle.reshape(1,3)
418 | my_t = my_t.reshape(1,3)
419 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
420 | object_poses = {
421 | 'tx':my_t[0][0],
422 | 'ty':my_t[0][1],
423 | 'tz':my_t[0][2],
424 | 'qx':my_r[0],
425 | 'qy':my_r[1],
426 | 'qz':my_r[2],
427 | 'qw':my_r[3]}
428 | my_result.append(object_poses)
429 | open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
430 | cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
431 | [0, 0, 1]]))
432 | imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 14 mugcup
433 | open_cv_image = draw(open_cv_image,imgpts.get(), itemid)
434 | my_result_wo_refine.append(my_pred.tolist())
435 | except ZeroDivisionError:
436 | # my_result_wo_refine.append([0.0 for i in range(7)])
437 | # my_result.append([0.0 for i in range(7)])
438 | open_cv_image = None
439 | print('Fail')
440 | except CvBridgeError as e:
441 | print(e)
442 |
443 | return my_result, open_cv_image
444 |
445 | def draw(img, imgpts, label):
446 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
447 | for point in imgpts:
448 |
449 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
450 | return img
451 |
452 |
453 | def implimentation_seg():
454 | global cv_image
455 | global cv_depth
456 |
457 | bridge = CvBridge()
458 |
459 | pose_estimation,fit_image = pose_predict(cv_image, cv_depth)
460 | print('pose_estimation: ', pose_estimation)
461 | pose_array = PoseArray()
462 | pose_msg = Pose()
463 |
464 | for i in range(len(pose_estimation)):
465 | pose_msg.position.x = pose_estimation[i]['tx']
466 | pose_msg.position.y = pose_estimation[i]['ty']
467 | pose_msg.position.z = pose_estimation[i]['tz']
468 | pose_msg.orientation.x = pose_estimation[i]['qx']
469 | pose_msg.orientation.y = pose_estimation[i]['qy']
470 | pose_msg.orientation.z = pose_estimation[i]['qz']
471 | pose_msg.orientation.w = pose_estimation[i]['qw']
472 |
473 | pose_array.poses.append(pose_msg)
474 |
475 | pose_pub.publish(pose_array)
476 | if fit_image is not None:
477 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
478 |
479 |
480 | def main():
481 |
482 | rospy.init_node('pose_estimation_server')
483 | rgb_sub = rospy.Subscriber(opt.image_subscriber,Image, image_callback)
484 | depth_sub = rospy.Subscriber(opt.depth_subscriber,Image, depth_callback)
485 | rois_sub = rospy.Subscriber('/bbox',BoundingBoxes, rois_callback)
486 | rospy.spin()
487 |
488 | if __name__ == '__main__':
489 | main()
490 |
--------------------------------------------------------------------------------
/scripts/tools/temp.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 |
3 | ############# ros packages #####################
4 | import cv2
5 | import rospy
6 | from sensor_msgs.msg import Image, CameraInfo
7 | from cv_bridge import CvBridge, CvBridgeError
8 | from be.srv import AddTwoInts, AddTwoIntsResponse
9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
10 | from geometry_msgs.msg import Pose, PoseArray
11 | import tf
12 | import message_filters
13 |
14 | ############ python pakcages ###################
15 | import _init_paths
16 | import argparse
17 | import os
18 | import copy
19 | import random
20 | import numpy as np
21 | import scipy.io as scio
22 | import scipy.misc
23 | import numpy.ma as ma
24 | import math
25 | import torch
26 | import torch.nn as nn
27 | import torch.nn.parallel
28 | import torch.backends.cudnn as cudnn
29 | import torch.optim as optim
30 | import torch.utils.data
31 | import torchvision.datasets as dset
32 | import torchvision.transforms as transforms
33 | import torchvision.utils as vutils
34 | import torch.nn.functional as F
35 | from torch.autograd import Variable
36 | from datasets.ycb.dataset import PoseDataset
37 | from lib.network import PoseNet, PoseRefineNet
38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
39 | from model.build_BiSeNet import BiSeNet
40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
41 | import time
42 |
43 | ##########################################################################################
44 |
45 | parser = argparse.ArgumentParser()
46 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
47 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model')
48 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model')
49 | parser.add_argument('--checkpoint_path', type=str, default='', required=True, help='The path to the pretrained weights of model')
50 | parser.add_argument('--num_classes', type=int, default=32, help='num of object classes (with void)')
51 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
52 |
53 |
54 | opt = parser.parse_args()
55 |
56 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
57 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
58 | xmap = np.array([[j for i in range(640)] for j in range(480)])
59 | ymap = np.array([[i for i in range(640)] for j in range(480)])
60 | cam_cx = 312.9869
61 | cam_cy = 241.3109
62 | cam_fx = 1066.778
63 | cam_fy = 1067.487
64 | cam_scale = 10000.0
65 | num_obj = 21
66 | img_width = 480
67 | img_length = 640
68 | num_points = 1000
69 | num_points_mesh = 500
70 | iteration = 2
71 | bs = 1
72 | dataset_config_dir = 'datasets/ycb/dataset_config'
73 | ycb_toolbox_dir = 'YCB_Video_toolbox'
74 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
75 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
76 |
77 | #########################################################################################
78 |
79 | def isRotationMatrix(R) :
80 | Rt = np.transpose(R)
81 | shouldBeIdentity = np.dot(Rt, R)
82 | I = np.identity(3, dtype = R.dtype)
83 | n = np.linalg.norm(I - shouldBeIdentity)
84 | return n < 1e-6
85 |
86 |
87 | def rotationMatrixToEulerAngles(R) :
88 |
89 | assert(isRotationMatrix(R))
90 |
91 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0])
92 |
93 | singular = sy < 1e-6
94 |
95 | if not singular :
96 | x = math.atan2(R[2,1] , R[2,2])
97 | y = math.atan2(-R[2,0], sy)
98 | z = math.atan2(R[1,0], R[0,0])
99 | else :
100 | x = math.atan2(-R[1,2], R[1,1])
101 | y = math.atan2(-R[2,0], sy)
102 | z = 0
103 |
104 | return np.array([x, y, z])
105 |
106 | ################################################################################################
107 | """
108 | ##################################################################################################
109 | # get bbox coordinate
110 | def get_bbox(label):
111 | rows = np.any(label, axis=1)
112 | cols = np.any(label, axis=0)
113 | rmin, rmax =
114 | np.where(rows)[0][[0, -1]]
115 | cmin, cmax = np.where(cols)[0][[0, -1]]
116 | rmax += 1
117 | cmax += 1
118 | r_b = rmax - rmin
119 | for tt in range(len(border_list)):
120 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
121 | r_b = border_list[tt + 1]
122 | break
123 | c_b = cmax - cmin
124 | for tt in range(len(border_list)):
125 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
126 | c_b = border_list[tt + 1]
127 | break
128 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
129 | rmin = center[0] - int(r_b / 2)
130 | rmax = center[0] + int(r_b / 2)
131 | cmin = center[1] - int(c_b / 2)
132 | cmax = center[1] + int(c_b / 2)
133 | if rmin < 0:
134 | delt = -rmin
135 | rmin = 0
136 | rmax += delt
137 | if cmin < 0:
138 | delt = -cmin
139 | cmin = 0
140 | cmax += delt
141 | if rmax > img_width:
142 | delt = rmax - img_width
143 | rmax = img_width
144 | rmin -= delt
145 | if cmax > img_length:
146 | delt = cmax - img_length
147 | cmax = img_length
148 | cmin -= delt
149 | return rmin, rmax, cmin, cmax
150 | """
151 | def get_bbox(rois,idx):
152 | # rmin = int(posecnn_rois[idx][2]) + 1
153 | # rmax = int(posecnn_rois[idx][4]) - 1
154 | # cmin = int(posecnn_rois[idx][1]) + 1
155 | # cmax = int(posecnn_rois[idx][3]) - 1
156 | rmin = int(rois[idx].xmin) + 1
157 | rmax = int(rois[idx].xmax) - 1
158 | cmin = int(rois[idx].ymin) + 1
159 | cmax = int(rois[idx].ymax) - 1
160 | r_b = rmax - rmin
161 | for tt in range(len(border_list)):
162 | if r_b > border_list[tt] and r_b < border_list[tt + 1]:
163 | r_b = border_list[tt + 1]
164 | break
165 | c_b = cmax - cmin
166 | for tt in range(len(border_list)):
167 | if c_b > border_list[tt] and c_b < border_list[tt + 1]:
168 | c_b = border_list[tt + 1]
169 | break
170 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
171 | rmin = center[0] - int(r_b / 2)
172 | rmax = center[0] + int(r_b / 2)
173 | cmin = center[1] - int(c_b / 2)
174 | cmax = center[1] + int(c_b / 2)
175 | if rmin < 0:
176 | delt = -rmin
177 | rmin = 0
178 | rmax += delt
179 | if cmin < 0:
180 | delt = -cmin
181 | cmin = 0
182 | cmax += delt
183 | if rmax > img_width:
184 | delt = rmax - img_width
185 | rmax = img_width
186 | rmin -= delt
187 | if cmax > img_length:
188 | delt = cmax - img_length
189 | cmax = img_length
190 | cmin -= delt
191 | return rmin, rmax, cmin, cmax
192 | ####################################################################################################
193 | ################################### load BiSeNet parameters ########################################
194 | ####################################################################################################
195 | print('load BiseNet')
196 | start_time = time.time()
197 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
198 | bise_model = bise_model.cuda()
199 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
200 | global bise_model
201 | print('Done!')
202 | print("Load time : {}".format(time.time() - start_time))
203 |
204 | #####################################################################################################
205 | ######################## load Densefusion Netwopy4thork, 3d model #############################
206 | #####################################################################################################
207 | print('load densefusion network')
208 | start_time = time.time()
209 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
210 | estimator.cuda()
211 | estimator.load_state_dict(torch.load(opt.model))
212 | estimator.eval()
213 | ############################################################################
214 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
215 | refiner.cuda()
216 | refiner.load_state_dict(torch.load(opt.refine_model))
217 | refiner.eval()
218 | print('Done')
219 | print("Load time : {}".format(time.time() - start_time))
220 | #####################################################################################################
221 | # class list upload
222 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
223 | class_id = 1
224 | cld = {}
225 | while 1:
226 | class_input = class_file.readline()
227 | if not class_input:
228 | break
229 | class_input = class_input[:-1]
230 |
231 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
232 | cld[class_id] = []
233 | while 1:
234 | input_line = input_file.readline()
235 | if not input_line:
236 | break
237 | input_line = input_line[:-1]
238 | input_line = input_line.split(' ')
239 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
240 | input_file.close()
241 | cld[class_id] = np.array(cld[class_id])
242 | class_id += 1
243 | ########################################################################################################
244 | def seg_predict(image):
245 | global bise_model
246 | try:
247 | with torch.no_grad():
248 | bise_model.eval()
249 | h,w,_ = image.shape
250 | to_tensor = transforms.Compose([
251 | transforms.ToTensor(),
252 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
253 | ])
254 |
255 | image = to_tensor(image)
256 | image = image.unsqueeze_(0)
257 | image = image.cuda()
258 | predict = bise_model(image).squeeze()
259 | predict = reverse_one_hot(predict)
260 | predict = np.array(predict)
261 | predict = np.resize(predict,[h,w])
262 | print(np.unique(predict))
263 | except CvBridgeError as e:
264 | print(e)
265 |
266 | class object_pose_estimation:
267 | def __init__(self):
268 | self.bridge = CvBridge()
269 | # rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, image_callback)
270 | # depth_sub = rospy.Subscriber('',Image, depth_callback)
271 | # rois_sub = rospy.Subscriber('',BoundingBoxes, rois_callback)
272 | self.rgb_sub = message_filters.Subscriber('/camera/color/image_raw',Image)
273 | self.depth_sub = message_filters.Subscriber('/camera/depth/image_rect_raw',Image)
274 | self.rois_sub = message_filters.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes)
275 | self.pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
276 | self.ts = message_filters.TimeSynchronizer([self.rgb_sub, self.depth_sub,self.rois_sub], queue_size = 10)
277 | self.ts.registerCallback(self.estimation_callback)
278 |
279 |
280 | def estimation_callback(self, rgb,depth,rois):
281 | try:
282 | img = self.bridge.imgmsg_to_cv2(rgb,'bgr8')
283 | depth = self.bridge.imgmsg_to_cv2(depth,'32SC1')
284 | rois = rois.bounding_boxes
285 | print(img, depth,posecnn_rois)
286 | class_list = ['002_master_chef_can',
287 | '003_cracker_box',
288 | '004_sugar_box',
289 | '005_tomato_soup_can',
290 | '006_mustard_bottle',
291 | '007_tuna_fish_can',
292 | '008_pudding_box',
293 | '009_gelatin_box',
294 | '010_potted_meat_can',
295 | '011_banana',#'019_pitcher_base',
296 | '025_mug',
297 | '021_bleach_cleanser',
298 | '024_bowl',
299 | '035_power_drill',
300 | '036_wood_block',
301 | '037_scissors',
302 | '040_large_marker',
303 | '051_large_clamp',
304 | '052_extra_large_clamp',
305 | '061_foam_brick']
306 | object_number = len(rois)
307 | #lst = posecnn_rois[:,0:1].flatten()
308 | #lst = np.unique(label)
309 | my_result_wo_refine = []
310 | my_result = []
311 | for idx in range(object_number):
312 | #itemid = lst[idx]
313 | itemid = class_list.index(rois[idx].Class) +1
314 | print(itemid, rois[idx])
315 |
316 | try:
317 | label = seg_predict(img)
318 | rmin, rmax, cmin,cmax = get_bbox(rois,idx)
319 | # bounding box cutting
320 | #label = seg_predict(img[rmin:rmax,cmin:cmax,:])
321 | #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
322 | #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
323 | #mask = mask_label * mask_depth
324 | # only image
325 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
326 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
327 | mask = mask_label * mask_depth
328 | #rmin, rmax, cmin, cmax = get_bbox(mask_label)
329 |
330 |
331 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
332 | if len(choose) > num_points:
333 | c_mask = np.zeros(len(choose), dtype=int)
334 | c_mask[:num_points] = 1
335 | np.random.shuffle(c_mask)
336 | choose = choose[c_mask.nonzero()]
337 | else:
338 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
339 |
340 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
341 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
342 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
343 | choose = np.array([choose])
344 |
345 | pt2 = depth_masked / cam_scale
346 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
347 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
348 | cloud = np.concatenate((pt0, pt1, pt2), axis=1)
349 |
350 | img_masked = np.array(img)[:, :, :3]
351 | img_masked = np.transpose(img_masked, (2, 0, 1))
352 | img_masked = img_masked[:, rmin:rmax, cmin:cmax]
353 |
354 | cloud = torch.from_numpy(cloud.astype(np.float32))
355 | choose = torch.LongTensor(choose.astype(np.int32))
356 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
357 | index = torch.LongTensor([itemid - 1])
358 |
359 | cloud = Variable(cloud).cuda()
360 | choose = Variable(choose).cuda()
361 | img_masked = Variable(img_masked).cuda()
362 | index = Variable(index).cuda()
363 | cloud = cloud.view(1, num_points, 3)
364 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
365 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
366 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
367 | pred_c = pred_c.view(bs, num_points)
368 | how_max, which_max = torch.max(pred_c, 1)
369 | pred_t = pred_t.view(bs * num_points, 1, 3)
370 | points = cloud.view(bs * num_points, 1, 3)
371 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
372 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
373 | my_pred = np.append(my_r, my_t)
374 | # making pose matrix
375 | dof = quaternion_matrix(my_r)
376 | dof[0:3,3] = my_t
377 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
378 | rot_to_angle = rot_to_angle.reshape(1,3)
379 | my_t = my_t.reshape(1,3)
380 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
381 | object_poses = {
382 | 'tx':my_t[0][0],
383 | 'ty':my_t[0][1],
384 | 'tz':my_t[0][2],
385 | 'qx':my_r[0],
386 | 'qy':my_r[1],
387 | 'qz':my_r[2],
388 | 'qw':my_r[3]}
389 | my_result.append(object_poses)
390 | open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
391 | cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
392 | [0, 0, 1]]))
393 | imgpts, jac = cv2.projectPoints(cld[13], dof[0:3,0:3],dof[0:3,3],cam_mat,dist)
394 | open_cv_image = draw(open_cv_image,imgpts.get(), itemid)
395 | my_result_wo_refine.append(my_pred.tolist())
396 | pose_array = PoseArray()
397 | pose_msg = Pose()
398 | pose_pub.publish(pose_array)
399 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
400 |
401 | """
402 | for ite in range(0, iteration):
403 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
404 | my_mat = quaternion_matrix(my_r)
405 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
406 | my_mat[0:3, 3] = my_t
407 |
408 | new_cloud = torch.bmm((cloud - T), R).contiguous()
409 | pred_r, pred_t = refiner(new_cloud, emb, index)
410 | pred_r = pred_r.view(1, 1, -1)
411 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
412 | my_r_2 = pred_r.view(-1).cpu().data.numpy()
413 | my_t_2 = pred_t.view(-1).cpu().data.numpy()
414 | my_mat_2 = quaternion_matrix(my_r_2)
415 |
416 |
417 | my_mat_2[0:3, 3] = my_t_2
418 | my_mat_final = np.dot(my_mat, my_mat_2)
419 | my_r_final = copy.deepcopy(my_mat_final)
420 | my_r_final[0:3, 3] = 0
421 | my_r_final = quaternion_from_matrix(my_r_final, True)
422 |
423 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
424 |
425 | my_pred = np.append(my_r_final, my_t_final)
426 | my_r = my_r_final
427 | my_t = my_t_final
428 | """
429 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
430 | #my_result.append(my_pred.tolist())
431 | except ZeroDivisionError:
432 | # my_result_wo_refine.append([0.0 for i in range(7)])
433 | # my_result.append([0.0 for i in range(7)])
434 | print('Fail')
435 | except CvBridgeError as e:
436 | print(e)
437 |
438 |
439 | def draw(img, imgpts, label):
440 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
441 | for point in imgpts:
442 |
443 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
444 | return img
445 |
446 | def main():
447 | rospy.init_node('pose_estimator',anonymous= True)
448 | Pose = object_pose_estimation()
449 | rospy.spin()
450 |
451 | if __name__ == '__main__':
452 | main()
453 |
--------------------------------------------------------------------------------
/scripts/tools/test.py:
--------------------------------------------------------------------------------
1 | #! /usr/bin/env python
2 | import cv2
3 | import rospy
4 | from sensor_msgs.msg import Image, CameraInfo
5 | from cv_bridge import CvBridge, CvBridgeError
6 |
7 | class ImageIo:
8 | def __init__(self):
9 | self.rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, self.rgb_callback)
10 | def rgb_callback(self,rgb):
11 | bridge = CvBridge()
12 | label_pub = rospy.Publisher('/label',Image, queue_size = 10)
13 | img = bridge.imgmsg_to_cv2(rgb,"bgr8")
14 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
15 | image = cv2.cvtColor(img.copy(), cv2.COLOR_RGB2GRAY)
16 | label_pub.publish(bridge.cv2_to_imgmsg(image,encoding="8UC1"))
17 |
18 |
19 |
20 |
21 |
22 | def main():
23 | IO = ImageIo()
24 |
25 | if __name__ == '__main__':
26 | rospy.init_node('zzz',anonymous = True)
27 | main()
28 | rospy.spin()
--------------------------------------------------------------------------------
/scripts/tools/train.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # DenseFusion 6D Object Pose Estimation by Iterative Dense Fusion
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by Chen
5 | # --------------------------------------------------------
6 |
7 | import _init_paths
8 | import argparse
9 | import os
10 | import random
11 | import time
12 | import numpy as np
13 | import torch
14 | import torch.nn as nn
15 | import torch.nn.parallel
16 | import torch.backends.cudnn as cudnn
17 | import torch.optim as optim
18 | import torch.utils.data
19 | import torchvision.datasets as dset
20 | import torchvision.transforms as transforms
21 | import torchvision.utils as vutils
22 | from torch.autograd import Variable
23 | from datasets.ycb.dataset import PoseDataset as PoseDataset_ycb
24 | from datasets.linemod.dataset import PoseDataset as PoseDataset_linemod
25 | from lib.network import PoseNet, PoseRefineNet
26 | from lib.loss import Loss
27 | from lib.loss_refiner import Loss_refine
28 | from lib.utils import setup_logger
29 |
30 | parser = argparse.ArgumentParser()
31 | parser.add_argument('--dataset', type=str, default = 'ycb', help='ycb or linemod')
32 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir (''YCB_Video_Dataset'' or ''Linemod_preprocessed'')')
33 | parser.add_argument('--batch_size', type=int, default = 8, help='batch size')
34 | parser.add_argument('--workers', type=int, default = 10, help='number of data loading workers')
35 | parser.add_argument('--lr', default=0.0001, help='learning rate')
36 | parser.add_argument('--lr_rate', default=0.3, help='learning rate decay rate')
37 | parser.add_argument('--w', default=0.015, help='learning rate')
38 | parser.add_argument('--w_rate', default=0.3, help='learning rate decay rate')
39 | parser.add_argument('--decay_margin', default=0.016, help='margin to decay lr & w')
40 | parser.add_argument('--refine_margin', default=0.013, help='margin to start the training of iterative refinement')
41 | parser.add_argument('--noise_trans', default=0.03, help='range of the random noise of translation added to the training data')
42 | parser.add_argument('--iteration', type=int, default = 2, help='number of refinement iterations')
43 | parser.add_argument('--nepoch', type=int, default=500, help='max number of epochs to train')
44 | parser.add_argument('--resume_posenet', type=str, default = '', help='resume PoseNet model')
45 | parser.add_argument('--resume_refinenet', type=str, default = '', help='resume PoseRefineNet model')
46 | parser.add_argument('--start_epoch', type=int, default = 1, help='which epoch to start')
47 | opt = parser.parse_args()
48 |
49 |
50 | def main():
51 | opt.manualSeed = random.randint(1, 10000)
52 | random.seed(opt.manualSeed)
53 | torch.manual_seed(opt.manualSeed)
54 |
55 | if opt.dataset == 'ycb':
56 | opt.num_objects = 21 #number of object classes in the dataset
57 | opt.num_points = 1000 #number of points on the input pointcloud
58 | opt.outf = 'trained_models/ycb' #folder to save trained models
59 | opt.log_dir = 'experiments/logs/ycb' #folder to save logs
60 | opt.repeat_epoch = 1 #number of repeat times for one epoch training
61 | elif opt.dataset == 'linemod':
62 | opt.num_objects = 13
63 | opt.num_points = 500
64 | opt.outf = 'trained_models/linemod'
65 | opt.log_dir = 'experiments/logs/linemod'
66 | opt.repeat_epoch = 20
67 | else:
68 | print('Unknown dataset')
69 | return
70 |
71 | estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects)
72 | estimator.cuda()
73 | refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects)
74 | refiner.cuda()
75 |
76 | if opt.resume_posenet != '':
77 | estimator.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_posenet)))
78 |
79 | if opt.resume_refinenet != '':
80 | refiner.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_refinenet)))
81 | opt.refine_start = True
82 | opt.decay_start = True
83 | opt.lr *= opt.lr_rate
84 | opt.w *= opt.w_rate
85 | opt.batch_size = int(opt.batch_size / opt.iteration)
86 | optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)
87 | else:
88 | opt.refine_start = False
89 | opt.decay_start = False
90 | optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)
91 |
92 | if opt.dataset == 'ycb':
93 | dataset = PoseDataset_ycb('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
94 | elif opt.dataset == 'linemod':
95 | dataset = PoseDataset_linemod('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
96 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers)
97 | if opt.dataset == 'ycb':
98 | test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
99 | elif opt.dataset == 'linemod':
100 | test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
101 | testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
102 |
103 | opt.sym_list = dataset.get_sym_list()
104 | opt.num_points_mesh = dataset.get_num_points_mesh()
105 |
106 | print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list))
107 |
108 | criterion = Loss(opt.num_points_mesh, opt.sym_list)
109 | criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
110 |
111 | best_test = np.Inf
112 |
113 | if opt.start_epoch == 1:
114 | for log in os.listdir(opt.log_dir):
115 | os.remove(os.path.join(opt.log_dir, log))
116 | st_time = time.time()
117 |
118 | for epoch in range(opt.start_epoch, opt.nepoch):
119 | logger = setup_logger('epoch%d' % epoch, os.path.join(opt.log_dir, 'epoch_%d_log.txt' % epoch))
120 | logger.info('Train time {0}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)) + ', ' + 'Training started'))
121 | train_count = 0
122 | train_dis_avg = 0.0
123 | if opt.refine_start:
124 | estimator.eval()
125 | refiner.train()
126 | else:
127 | estimator.train()
128 | optimizer.zero_grad()
129 |
130 | for rep in range(opt.repeat_epoch):
131 | for i, data in enumerate(dataloader, 0):
132 | points, choose, img, target, model_points, idx = data
133 | points, choose, img, target, model_points, idx = Variable(points).cuda(), \
134 | Variable(choose).cuda(), \
135 | Variable(img).cuda(), \
136 | Variable(target).cuda(), \
137 | Variable(model_points).cuda(), \
138 | Variable(idx).cuda()
139 | pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
140 | loss, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)
141 |
142 | if opt.refine_start:
143 | for ite in range(0, opt.iteration):
144 | pred_r, pred_t = refiner(new_points, emb, idx)
145 | dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
146 | dis.backward()
147 | else:
148 | loss.backward()
149 |
150 | train_dis_avg += dis.item()
151 | train_count += 1
152 |
153 | if train_count % opt.batch_size == 0:
154 | logger.info('Train time {0} Epoch {1} Batch {2} Frame {3} Avg_dis:{4}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), epoch, int(train_count / opt.batch_size), train_count, train_dis_avg / opt.batch_size))
155 | optimizer.step()
156 | optimizer.zero_grad()
157 | train_dis_avg = 0
158 |
159 | if train_count != 0 and train_count % 1000 == 0:
160 | if opt.refine_start:
161 | torch.save(refiner.state_dict(), '{0}/pose_refine_model_current.pth'.format(opt.outf))
162 | else:
163 | torch.save(estimator.state_dict(), '{0}/pose_model_current.pth'.format(opt.outf))
164 |
165 | print('>>>>>>>>----------epoch {0} train finish---------<<<<<<<<'.format(epoch))
166 |
167 |
168 | logger = setup_logger('epoch%d_test' % epoch, os.path.join(opt.log_dir, 'epoch_%d_test_log.txt' % epoch))
169 | logger.info('Test time {0}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)) + ', ' + 'Testing started'))
170 | test_dis = 0.0
171 | test_count = 0
172 | estimator.eval()
173 | refiner.eval()
174 |
175 | for j, data in enumerate(testdataloader, 0):
176 | points, choose, img, target, model_points, idx = data
177 | points, choose, img, target, model_points, idx = Variable(points).cuda(), \
178 | Variable(choose).cuda(), \
179 | Variable(img).cuda(), \
180 | Variable(target).cuda(), \
181 | Variable(model_points).cuda(), \
182 | Variable(idx).cuda()
183 | pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
184 | _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)
185 |
186 | if opt.refine_start:
187 | for ite in range(0, opt.iteration):
188 | pred_r, pred_t = refiner(new_points, emb, idx)
189 | dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
190 |
191 | test_dis += dis.item()
192 | logger.info('Test time {0} Test Frame No.{1} dis:{2}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), test_count, dis))
193 |
194 | test_count += 1
195 |
196 | test_dis = test_dis / test_count
197 | logger.info('Test time {0} Epoch {1} TEST FINISH Avg dis: {2}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), epoch, test_dis))
198 | if test_dis <= best_test:
199 | best_test = test_dis
200 | if opt.refine_start:
201 | torch.save(refiner.state_dict(), '{0}/pose_refine_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis))
202 | else:
203 | torch.save(estimator.state_dict(), '{0}/pose_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis))
204 | print(epoch, '>>>>>>>>----------BEST TEST MODEL SAVED---------<<<<<<<<')
205 |
206 | if best_test < opt.decay_margin and not opt.decay_start:
207 | opt.decay_start = True
208 | opt.lr *= opt.lr_rate
209 | opt.w *= opt.w_rate
210 | optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)
211 |
212 | if best_test < opt.refine_margin and not opt.refine_start:
213 | opt.refine_start = True
214 | opt.batch_size = int(opt.batch_size / opt.iteration)
215 | optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)
216 |
217 | if opt.dataset == 'ycb':
218 | dataset = PoseDataset_ycb('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
219 | elif opt.dataset == 'linemod':
220 | dataset = PoseDataset_linemod('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
221 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers)
222 | if opt.dataset == 'ycb':
223 | test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
224 | elif opt.dataset == 'linemod':
225 | test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
226 | testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
227 |
228 | opt.sym_list = dataset.get_sym_list()
229 | opt.num_points_mesh = dataset.get_num_points_mesh()
230 |
231 | print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list))
232 |
233 | criterion = Loss(opt.num_points_mesh, opt.sym_list)
234 | criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
235 |
236 | if __name__ == '__main__':
237 | main()
238 |
--------------------------------------------------------------------------------
/scripts/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/utils.pyc
--------------------------------------------------------------------------------
/srv/CameraRequests.srv:
--------------------------------------------------------------------------------
1 | int64 a
2 | ---
3 | geometry_msgs/PoseArray pose_array
4 |
5 |
--------------------------------------------------------------------------------