├── CMakeLists.txt ├── README.md ├── datasets └── ycb │ ├── __init__.pyc │ ├── dataset.pyc │ └── dataset_config │ ├── classes.txt │ ├── test_data_list.txt │ └── train_data_list.txt ├── package.xml ├── scripts ├── 1 ├── LICENSE ├── README.md ├── assets │ ├── compare.png │ ├── pullfig.png │ ├── result_linemod.png │ └── result_ycb.png ├── datasets │ ├── linemod │ │ ├── dataset.py │ │ └── dataset_config │ │ │ └── models_info.yml │ └── ycb │ │ ├── dataset.py │ │ └── dataset_config │ │ ├── classes.txt │ │ ├── test_data_list.txt │ │ └── train_data_list.txt ├── distortion.npy ├── eval.py ├── experiments │ └── scripts │ │ ├── eval_linemod.sh │ │ ├── eval_ycb.sh │ │ ├── ros_eval_msg.sh │ │ ├── ros_eval_ycb.sh │ │ ├── test.sh │ │ ├── train_linemod.sh │ │ └── train_ycb.sh ├── lib │ ├── __init__.pyc │ ├── extractors.pyc │ ├── knn │ │ ├── __init__.pyc │ │ ├── build │ │ │ └── knn_cuda_kernel.so │ │ ├── build_ffi.py │ │ ├── knn_pytorch │ │ │ ├── __init__.py │ │ │ ├── __init__.pyc │ │ │ ├── __pycache__ │ │ │ │ ├── __init__.cpython-35.pyc │ │ │ │ └── __init__.cpython-36.pyc │ │ │ └── _knn_pytorch.so │ │ └── src │ │ │ ├── knn_cuda_kernel.cu │ │ │ ├── knn_cuda_kernel.h │ │ │ ├── knn_pytorch.c │ │ │ └── knn_pytorch.h │ ├── loss.py │ ├── loss.pyc │ ├── loss_refiner.py │ ├── network.py │ ├── pspnet.pyc │ └── transformations.pyc ├── loss.py ├── matrix.npy ├── model │ ├── build_BiSeNet.py │ └── build_contextpath.py ├── predict.npy ├── tools │ ├── __pycache__ │ │ └── _init_paths.cpython-35.pyc │ ├── _init_paths.py │ ├── _init_paths.pyc │ ├── eval_linemod.py │ ├── eval_ycb.py │ ├── ros_eval_ycb.py │ ├── ros_eval_ycb_message.py │ ├── ros_eval_ycb_publisher.py │ ├── temp.py │ ├── test.py │ └── train.py └── utils.pyc └── srv └── CameraRequests.srv /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 2.8.3) 2 | project(densefusion) 3 | 4 | ## Compile as C++11, supported in ROS Kinetic and newer 5 | # add_compile_options(-std=c++11) 6 | 7 | ## Find catkin macros and libraries 8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz) 9 | ## is used, also find other catkin packages 10 | find_package(catkin REQUIRED COMPONENTS 11 | rospy 12 | std_msgs 13 | message_generation 14 | ) 15 | 16 | ## System dependencies are found with CMake's conventions 17 | # find_package(Boost REQUIRED COMPONENTS system) 18 | 19 | 20 | ## Uncomment this if the package has a setup.py. This macro ensures 21 | ## modules and global scripts declared therein get installed 22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html 23 | # catkin_python_setup() 24 | 25 | ################################################ 26 | ## Declare ROS messages, services and actions ## 27 | ################################################ 28 | 29 | ## To declare and build messages, services or actions from within this 30 | ## package, follow these steps: 31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in 32 | ## your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...). 33 | ## * In the file package.xml: 34 | ## * add a build_depend tag for "message_generation" 35 | ## * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET 36 | ## * If MSG_DEP_SET isn't empty the following dependency has been pulled in 37 | ## but can be declared for certainty nonetheless: 38 | ## * add a exec_depend tag for "message_runtime" 39 | ## * In this file (CMakeLists.txt): 40 | ## * add "message_generation" and every package in MSG_DEP_SET to 41 | ## find_package(catkin REQUIRED COMPONENTS ...) 42 | ## * add "message_runtime" and every package in MSG_DEP_SET to 43 | ## catkin_package(CATKIN_DEPENDS ...) 44 | ## * uncomment the add_*_files sections below as needed 45 | ## and list every .msg/.srv/.action file to be processed 46 | ## * uncomment the generate_messages entry below 47 | ## * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...) 48 | 49 | ## Generate messages in the 'msg' folder 50 | # add_message_files( 51 | # FILES 52 | # Message1.msg 53 | # Message2.msg 54 | # ) 55 | 56 | ## Generate services in the 'srv' folder 57 | add_service_files( 58 | FILES 59 | CameraRequests.srv 60 | ) 61 | 62 | ## Generate actions in the 'action' folder 63 | # add_action_files( 64 | # FILES 65 | # Action1.action 66 | # Action2.action 67 | # ) 68 | 69 | ## Generate added messages and services with any dependencies listed here 70 | generate_messages( 71 | DEPENDENCIES 72 | std_msgs 73 | ) 74 | 75 | ################################################ 76 | ## Declare ROS dynamic reconfigure parameters ## 77 | ################################################ 78 | 79 | ## To declare and build dynamic reconfigure parameters within this 80 | ## package, follow these steps: 81 | ## * In the file package.xml: 82 | ## * add a build_depend and a exec_depend tag for "dynamic_reconfigure" 83 | ## * In this file (CMakeLists.txt): 84 | ## * add "dynamic_reconfigure" to 85 | ## find_package(catkin REQUIRED COMPONENTS ...) 86 | ## * uncomment the "generate_dynamic_reconfigure_options" section below 87 | ## and list every .cfg file to be processed 88 | 89 | ## Generate dynamic reconfigure parameters in the 'cfg' folder 90 | # generate_dynamic_reconfigure_options( 91 | # cfg/DynReconf1.cfg 92 | # cfg/DynReconf2.cfg 93 | # ) 94 | 95 | ################################### 96 | ## catkin specific configuration ## 97 | ################################### 98 | ## The catkin_package macro generates cmake config files for your package 99 | ## Declare things to be passed to dependent projects 100 | ## INCLUDE_DIRS: uncomment this if your package contains header files 101 | ## LIBRARIES: libraries you create in this project that dependent projects also need 102 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need 103 | ## DEPENDS: system dependencies of this project that dependent projects also need 104 | catkin_package( 105 | # INCLUDE_DIRS include 106 | # LIBRARIES densefusion 107 | # CATKIN_DEPENDS rospy std_msgs 108 | # DEPENDS system_lib 109 | ) 110 | 111 | ########### 112 | ## Build ## 113 | ########### 114 | 115 | ## Specify additional locations of header files 116 | ## Your package locations should be listed before other locations 117 | include_directories( 118 | # include 119 | ${catkin_INCLUDE_DIRS} 120 | ) 121 | 122 | ## Declare a C++ library 123 | # add_library(${PROJECT_NAME} 124 | # src/${PROJECT_NAME}/densefusion.cpp 125 | # ) 126 | 127 | ## Add cmake target dependencies of the library 128 | ## as an example, code may need to be generated before libraries 129 | ## either from message generation or dynamic reconfigure 130 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 131 | 132 | ## Declare a C++ executable 133 | ## With catkin_make all packages are built within a single CMake context 134 | ## The recommended prefix ensures that target names across packages don't collide 135 | # add_executable(${PROJECT_NAME}_node src/densefusion_node.cpp) 136 | 137 | ## Rename C++ executable without prefix 138 | ## The above recommended prefix causes long target names, the following renames the 139 | ## target back to the shorter version for ease of user use 140 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node" 141 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "") 142 | 143 | ## Add cmake target dependencies of the executable 144 | ## same as for the library above 145 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS}) 146 | 147 | ## Specify libraries to link a library or executable target against 148 | # target_link_libraries(${PROJECT_NAME}_node 149 | # ${catkin_LIBRARIES} 150 | # ) 151 | 152 | ############# 153 | ## Install ## 154 | ############# 155 | 156 | # all install targets should use catkin DESTINATION variables 157 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html 158 | 159 | ## Mark executable scripts (Python etc.) for installation 160 | ## in contrast to setup.py, you can choose the destination 161 | # install(PROGRAMS 162 | # scripts/my_python_script 163 | # DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 164 | # ) 165 | 166 | ## Mark executables and/or libraries for installation 167 | # install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node 168 | # ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 169 | # LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION} 170 | # RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION} 171 | # ) 172 | 173 | ## Mark cpp header files for installation 174 | # install(DIRECTORY include/${PROJECT_NAME}/ 175 | # DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION} 176 | # FILES_MATCHING PATTERN "*.h" 177 | # PATTERN ".svn" EXCLUDE 178 | # ) 179 | 180 | ## Mark other files for installation (e.g. launch and bag files, etc.) 181 | # install(FILES 182 | # # myfile1 183 | # # myfile2 184 | # DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION} 185 | # ) 186 | 187 | ############# 188 | ## Testing ## 189 | ############# 190 | 191 | ## Add gtest based cpp test target and link libraries 192 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_densefusion.cpp) 193 | # if(TARGET ${PROJECT_NAME}-test) 194 | # target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME}) 195 | # endif() 196 | 197 | ## Add folders to be run by python nosetests 198 | # catkin_add_nosetests(test) 199 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # DenseFusion_ROS 2 | 3 | This repository is based on https://github.com/j96w/DenseFusion and https://github.com/ooooverflow/BiSeNet. 4 | 5 | If you are a person using Docker, https://hub.docker.com/repository/docker/choo2969/ros-densefusion 6 | 7 | segmentation weight file [link](https://drive.google.com/drive/folders/1fRie5jwj9Liuwvs64_Mru8wUCy65Os0_?usp=sharing) 8 | densefusion weight file [link](https://github.com/j96w/DenseFusion) 9 | 10 | ~~~ 11 | $ docker pull choo2969/ros-densefusion 12 | ~~~ 13 | 14 | 15 | ## Requirements 16 | --- 17 | - ROS (Kinetic) 18 | - Python2.7 19 | - Pytorch 0.4.1 20 | - PIL 21 | - scipy 22 | - numpy 23 | - pyyaml 24 | - logging 25 | - matplotlib 26 | - CUDA 27 | 28 | 29 | 30 | ## Start 31 | --- 32 | we have tested on Ubuntu 16.04 with ROS Kinetic and NVIDIA Titan XP and Geforce 1080 Ti 33 | 1. Start camera node (D435) 34 | 35 | - Step1. Run your own camera, If your camera is not a D435 or D415, you will need to edit the RGB image and Depth Subscriber. Edit image_subscriber and depth_subscriber with your camera node 36 | ~~~ 37 | vim path/densefusion/scripts/experiments/scripts/ros_eval_msg.sh 38 | ~~~ 39 | 40 | - Step2. Edit the cam_cx,cam_cy,cam_fx,cam_fy values 41 | ~~~ 42 | vim path/densefusion/scripts/tool/ros_eval_ycb_message.py 43 | ~~~ 44 | 45 | 2. Start 46 | ~~~ 47 | sh path/densefusion/scripts/experiments/scripts/ros_eval_msg.sh 48 | ~~~ 49 | Running this whill launch the SErvice Sever rining 6D Pose Estimation 50 | -------------------------------------------------------------------------------- /datasets/ycb/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/datasets/ycb/__init__.pyc -------------------------------------------------------------------------------- /datasets/ycb/dataset.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/datasets/ycb/dataset.pyc -------------------------------------------------------------------------------- /datasets/ycb/dataset_config/classes.txt: -------------------------------------------------------------------------------- 1 | 002_master_chef_can 2 | 003_cracker_box 3 | 004_sugar_box 4 | 005_tomato_soup_can 5 | 006_mustard_bottle 6 | 007_tuna_fish_can 7 | 008_pudding_box 8 | 009_gelatin_box 9 | 010_potted_meat_can 10 | 011_banana 11 | 019_pitcher_base 12 | 021_bleach_cleanser 13 | 024_bowl 14 | 025_mug 15 | 035_power_drill 16 | 036_wood_block 17 | 037_scissors 18 | 040_large_marker 19 | 051_large_clamp 20 | 052_extra_large_clamp 21 | 061_foam_brick 22 | -------------------------------------------------------------------------------- /package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | densefusion 4 | 0.0.0 5 | The densefusion package 6 | 7 | 8 | 9 | 10 | root 11 | 12 | 13 | 14 | 15 | 16 | TODO 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | catkin 53 | rospy 54 | std_msgs 55 | message_generation 56 | 57 | rospy 58 | std_msgs 59 | 60 | rospy 61 | std_msgs 62 | message_runtime 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | -------------------------------------------------------------------------------- /scripts/1: -------------------------------------------------------------------------------- 1 | # ~/.bashrc: executed by bash(1) for non-login shells. 2 | # see /usr/share/doc/bash/examples/startup-files (in the package bash-doc) 3 | # for examples 4 | 5 | # If not running interactively, don't do anything 6 | [ -z "$PS1" ] && return 7 | 8 | # don't put duplicate lines in the history. See bash(1) for more options 9 | # ... or force ignoredups and ignorespace 10 | HISTCONTROL=ignoredups:ignorespace 11 | 12 | # append to the history file, don't overwrite it 13 | shopt -s histappend 14 | 15 | # for setting history length see HISTSIZE and HISTFILESIZE in bash(1) 16 | HISTSIZE=1000 17 | HISTFILESIZE=2000 18 | 19 | # check the window size after each command and, if necessary, 20 | # update the values of LINES and COLUMNS. 21 | shopt -s checkwinsize 22 | 23 | # make less more friendly for non-text input files, see lesspipe(1) 24 | [ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)" 25 | 26 | # set variable identifying the chroot you work in (used in the prompt below) 27 | if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then 28 | debian_chroot=$(cat /etc/debian_chroot) 29 | fi 30 | 31 | # set a fancy prompt (non-color, unless we know we "want" color) 32 | case "$TERM" in 33 | xterm-color) color_prompt=yes;; 34 | esac 35 | 36 | # uncomment for a colored prompt, if the terminal has the capability; turned 37 | # off by default to not distract the user: the focus in a terminal window 38 | # should be on the output of commands, not on the prompt 39 | #force_color_prompt=yes 40 | 41 | if [ -n "$force_color_prompt" ]; then 42 | if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then 43 | # We have color support; assume it's compliant with Ecma-48 44 | # (ISO/IEC-6429). (Lack of such support is extremely rare, and such 45 | # a case would tend to support setf rather than setaf.) 46 | color_prompt=yes 47 | else 48 | color_prompt= 49 | fi 50 | fi 51 | 52 | if [ "$color_prompt" = yes ]; then 53 | PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ ' 54 | else 55 | PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ ' 56 | fi 57 | unset color_prompt force_color_prompt 58 | 59 | # If this is an xterm set the title to user@host:dir 60 | case "$TERM" in 61 | xterm*|rxvt*) 62 | PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1" 63 | ;; 64 | *) 65 | ;; 66 | esac 67 | 68 | # enable color support of ls and also add handy aliases 69 | if [ -x /usr/bin/dircolors ]; then 70 | test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)" 71 | alias ls='ls --color=auto' 72 | #alias dir='dir --color=auto' 73 | #alias vdir='vdir --color=auto' 74 | 75 | alias grep='grep --color=auto' 76 | alias fgrep='fgrep --color=auto' 77 | alias egrep='egrep --color=auto' 78 | fi 79 | 80 | # some more ls aliases 81 | alias ll='ls -alF' 82 | alias la='ls -A' 83 | alias l='ls -CF' 84 | 85 | # Alias definitions. 86 | # You may want to put all your additions into a separate file like 87 | # ~/.bash_aliases, instead of adding them here directly. 88 | # See /usr/share/doc/bash-doc/examples in the bash-doc package. 89 | 90 | if [ -f ~/.bash_aliases ]; then 91 | . ~/.bash_aliases 92 | fi 93 | 94 | # enable programmable completion features (you don't need to enable 95 | # this, if it's already enabled in /etc/bash.bashrc and /etc/profile 96 | # sources /etc/bash.bashrc). 97 | #if [ -f /etc/bash_completion ] && ! shopt -oq posix; then 98 | # . /etc/bash_completion 99 | #fi 100 | alias eb='nano ~/.bashrc' 101 | alias sb='source ~/.bashrc' 102 | alias gs='git status' 103 | alias gp='git pull' 104 | alias cw='cd ~/catkin_ws' 105 | alias cs='cd ~/catkin_ws/src' 106 | alias cm='cd ~/catkin_ws && catkin_make' 107 | source ~/catkin_ws/devel/setup.bash 108 | export ROS_MASTER_URI=http://192.168.1.15:11311 109 | export ROS_HOSTNAME=172.17.0.5 110 | -------------------------------------------------------------------------------- /scripts/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Jeremy Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /scripts/README.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /scripts/assets/compare.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/compare.png -------------------------------------------------------------------------------- /scripts/assets/pullfig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/pullfig.png -------------------------------------------------------------------------------- /scripts/assets/result_linemod.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/result_linemod.png -------------------------------------------------------------------------------- /scripts/assets/result_ycb.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/result_ycb.png -------------------------------------------------------------------------------- /scripts/datasets/linemod/dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from PIL import Image 3 | import os 4 | import os.path 5 | import errno 6 | import torch 7 | import json 8 | import codecs 9 | import numpy as np 10 | import sys 11 | import torchvision.transforms as transforms 12 | import argparse 13 | import json 14 | import time 15 | import random 16 | import numpy.ma as ma 17 | import copy 18 | import scipy.misc 19 | import scipy.io as scio 20 | import yaml 21 | import cv2 22 | 23 | 24 | class PoseDataset(data.Dataset): 25 | def __init__(self, mode, num, add_noise, root, noise_trans, refine): 26 | self.objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15] 27 | self.mode = mode 28 | 29 | self.list_rgb = [] 30 | self.list_depth = [] 31 | self.list_label = [] 32 | self.list_obj = [] 33 | self.list_rank = [] 34 | self.meta = {} 35 | self.pt = {} 36 | self.root = root 37 | self.noise_trans = noise_trans 38 | self.refine = refine 39 | 40 | item_count = 0 41 | for item in self.objlist: 42 | if self.mode == 'train': 43 | input_file = open('{0}/data/{1}/train.txt'.format(self.root, '%02d' % item)) 44 | else: 45 | input_file = open('{0}/data/{1}/test.txt'.format(self.root, '%02d' % item)) 46 | while 1: 47 | item_count += 1 48 | input_line = input_file.readline() 49 | if self.mode == 'test' and item_count % 10 != 0: 50 | continue 51 | if not input_line: 52 | break 53 | if input_line[-1:] == '\n': 54 | input_line = input_line[:-1] 55 | self.list_rgb.append('{0}/data/{1}/rgb/{2}.png'.format(self.root, '%02d' % item, input_line)) 56 | self.list_depth.append('{0}/data/{1}/depth/{2}.png'.format(self.root, '%02d' % item, input_line)) 57 | if self.mode == 'eval': 58 | self.list_label.append('{0}/segnet_results/{1}_label/{2}_label.png'.format(self.root, '%02d' % item, input_line)) 59 | else: 60 | self.list_label.append('{0}/data/{1}/mask/{2}.png'.format(self.root, '%02d' % item, input_line)) 61 | 62 | self.list_obj.append(item) 63 | self.list_rank.append(int(input_line)) 64 | 65 | meta_file = open('{0}/data/{1}/gt.yml'.format(self.root, '%02d' % item), 'r') 66 | self.meta[item] = yaml.load(meta_file) 67 | self.pt[item] = ply_vtx('{0}/models/obj_{1}.ply'.format(self.root, '%02d' % item)) 68 | 69 | print("Object {0} buffer loaded".format(item)) 70 | 71 | self.length = len(self.list_rgb) 72 | 73 | self.cam_cx = 325.26110 74 | self.cam_cy = 242.04899 75 | self.cam_fx = 572.41140 76 | self.cam_fy = 573.57043 77 | 78 | self.xmap = np.array([[j for i in range(640)] for j in range(480)]) 79 | self.ymap = np.array([[i for i in range(640)] for j in range(480)]) 80 | 81 | self.num = num 82 | self.add_noise = add_noise 83 | self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05) 84 | self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 85 | self.border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 86 | self.num_pt_mesh_large = 500 87 | self.num_pt_mesh_small = 500 88 | self.symmetry_obj_idx = [7, 8] 89 | 90 | def __getitem__(self, index): 91 | img = Image.open(self.list_rgb[index]) 92 | ori_img = np.array(img) 93 | depth = np.array(Image.open(self.list_depth[index])) 94 | label = np.array(Image.open(self.list_label[index])) 95 | obj = self.list_obj[index] 96 | rank = self.list_rank[index] 97 | 98 | if obj == 2: 99 | for i in range(0, len(self.meta[obj][rank])): 100 | if self.meta[obj][rank][i]['obj_id'] == 2: 101 | meta = self.meta[obj][rank][i] 102 | break 103 | else: 104 | meta = self.meta[obj][rank][0] 105 | 106 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 107 | if self.mode == 'eval': 108 | mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255))) 109 | else: 110 | mask_label = ma.getmaskarray(ma.masked_equal(label, np.array([255, 255, 255])))[:, :, 0] 111 | 112 | mask = mask_label * mask_depth 113 | 114 | if self.add_noise: 115 | img = self.trancolor(img) 116 | 117 | img = np.array(img)[:, :, :3] 118 | img = np.transpose(img, (2, 0, 1)) 119 | img_masked = img 120 | 121 | if self.mode == 'eval': 122 | rmin, rmax, cmin, cmax = get_bbox(mask_to_bbox(mask_label)) 123 | else: 124 | rmin, rmax, cmin, cmax = get_bbox(meta['obj_bb']) 125 | 126 | img_masked = img_masked[:, rmin:rmax, cmin:cmax] 127 | #p_img = np.transpose(img_masked, (1, 2, 0)) 128 | #scipy.misc.imsave('evaluation_result/{0}_input.png'.format(index), p_img) 129 | 130 | target_r = np.resize(np.array(meta['cam_R_m2c']), (3, 3)) 131 | target_t = np.array(meta['cam_t_m2c']) 132 | add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)]) 133 | 134 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 135 | if len(choose) == 0: 136 | cc = torch.LongTensor([0]) 137 | return(cc, cc, cc, cc, cc, cc) 138 | 139 | if len(choose) > self.num: 140 | c_mask = np.zeros(len(choose), dtype=int) 141 | c_mask[:self.num] = 1 142 | np.random.shuffle(c_mask) 143 | choose = choose[c_mask.nonzero()] 144 | else: 145 | choose = np.pad(choose, (0, self.num - len(choose)), 'wrap') 146 | 147 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 148 | xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 149 | ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 150 | choose = np.array([choose]) 151 | 152 | cam_scale = 1.0 153 | pt2 = depth_masked / cam_scale 154 | pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx 155 | pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy 156 | cloud = np.concatenate((pt0, pt1, pt2), axis=1) 157 | cloud = cloud / 1000.0 158 | 159 | if self.add_noise: 160 | cloud = np.add(cloud, add_t) 161 | 162 | #fw = open('evaluation_result/{0}_cld.xyz'.format(index), 'w') 163 | #for it in cloud: 164 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) 165 | #fw.close() 166 | 167 | model_points = self.pt[obj] / 1000.0 168 | dellist = [j for j in range(0, len(model_points))] 169 | dellist = random.sample(dellist, len(model_points) - self.num_pt_mesh_small) 170 | model_points = np.delete(model_points, dellist, axis=0) 171 | 172 | #fw = open('evaluation_result/{0}_model_points.xyz'.format(index), 'w') 173 | #for it in model_points: 174 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) 175 | #fw.close() 176 | 177 | target = np.dot(model_points, target_r.T) 178 | if self.add_noise: 179 | target = np.add(target, target_t / 1000.0 + add_t) 180 | out_t = target_t / 1000.0 + add_t 181 | else: 182 | target = np.add(target, target_t / 1000.0) 183 | out_t = target_t / 1000.0 184 | 185 | #fw = open('evaluation_result/{0}_tar.xyz'.format(index), 'w') 186 | #for it in target: 187 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) 188 | #fw.close() 189 | 190 | return torch.from_numpy(cloud.astype(np.float32)), \ 191 | torch.LongTensor(choose.astype(np.int32)), \ 192 | self.norm(torch.from_numpy(img_masked.astype(np.float32))), \ 193 | torch.from_numpy(target.astype(np.float32)), \ 194 | torch.from_numpy(model_points.astype(np.float32)), \ 195 | torch.LongTensor([self.objlist.index(obj)]) 196 | 197 | def __len__(self): 198 | return self.length 199 | 200 | def get_sym_list(self): 201 | return self.symmetry_obj_idx 202 | 203 | def get_num_points_mesh(self): 204 | if self.refine: 205 | return self.num_pt_mesh_large 206 | else: 207 | return self.num_pt_mesh_small 208 | 209 | 210 | 211 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 212 | img_width = 480 213 | img_length = 640 214 | 215 | 216 | def mask_to_bbox(mask): 217 | mask = mask.astype(np.uint8) 218 | contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 219 | 220 | 221 | x = 0 222 | y = 0 223 | w = 0 224 | h = 0 225 | for contour in contours: 226 | tmp_x, tmp_y, tmp_w, tmp_h = cv2.boundingRect(contour) 227 | if tmp_w * tmp_h > w * h: 228 | x = tmp_x 229 | y = tmp_y 230 | w = tmp_w 231 | h = tmp_h 232 | return [x, y, w, h] 233 | 234 | 235 | def get_bbox(bbox): 236 | bbx = [bbox[1], bbox[1] + bbox[3], bbox[0], bbox[0] + bbox[2]] 237 | if bbx[0] < 0: 238 | bbx[0] = 0 239 | if bbx[1] >= 480: 240 | bbx[1] = 479 241 | if bbx[2] < 0: 242 | bbx[2] = 0 243 | if bbx[3] >= 640: 244 | bbx[3] = 639 245 | rmin, rmax, cmin, cmax = bbx[0], bbx[1], bbx[2], bbx[3] 246 | r_b = rmax - rmin 247 | for tt in range(len(border_list)): 248 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 249 | r_b = border_list[tt + 1] 250 | break 251 | c_b = cmax - cmin 252 | for tt in range(len(border_list)): 253 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 254 | c_b = border_list[tt + 1] 255 | break 256 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 257 | rmin = center[0] - int(r_b / 2) 258 | rmax = center[0] + int(r_b / 2) 259 | cmin = center[1] - int(c_b / 2) 260 | cmax = center[1] + int(c_b / 2) 261 | if rmin < 0: 262 | delt = -rmin 263 | rmin = 0 264 | rmax += delt 265 | if cmin < 0: 266 | delt = -cmin 267 | cmin = 0 268 | cmax += delt 269 | if rmax > 480: 270 | delt = rmax - 480 271 | rmax = 480 272 | rmin -= delt 273 | if cmax > 640: 274 | delt = cmax - 640 275 | cmax = 640 276 | cmin -= delt 277 | return rmin, rmax, cmin, cmax 278 | 279 | 280 | def ply_vtx(path): 281 | f = open(path) 282 | assert f.readline().strip() == "ply" 283 | f.readline() 284 | f.readline() 285 | N = int(f.readline().split()[-1]) 286 | while f.readline().strip() != "end_header": 287 | continue 288 | pts = [] 289 | for _ in range(N): 290 | pts.append(np.float32(f.readline().split()[:3])) 291 | return np.array(pts) 292 | -------------------------------------------------------------------------------- /scripts/datasets/linemod/dataset_config/models_info.yml: -------------------------------------------------------------------------------- 1 | 1: {diameter: 102.09865663, min_x: -37.93430000, min_y: -38.79960000, min_z: -45.88450000, size_x: 75.86860000, size_y: 77.59920000, size_z: 91.76900000} 2 | 2: {diameter: 247.50624233, min_x: -107.83500000, min_y: -60.92790000, min_z: -109.70500000, size_x: 215.67000000, size_y: 121.85570000, size_z: 219.41000000} 3 | 3: {diameter: 167.35486092, min_x: -83.21620000, min_y: -82.65910000, min_z: -37.23640000, size_x: 166.43240000, size_y: 165.31820000, size_z: 74.47280000} 4 | 4: {diameter: 172.49224865, min_x: -68.32970000, min_y: -71.51510000, min_z: -50.24850000, size_x: 136.65940000, size_y: 143.03020000, size_z: 100.49700000} 5 | 5: {diameter: 201.40358597, min_x: -50.39580000, min_y: -90.89790000, min_z: -96.86700000, size_x: 100.79160000, size_y: 181.79580000, size_z: 193.73400000} 6 | 6: {diameter: 154.54551808, min_x: -33.50540000, min_y: -63.81650000, min_z: -58.72830000, size_x: 67.01070000, size_y: 127.63300000, size_z: 117.45660000} 7 | 7: {diameter: 124.26430816, min_x: -58.78990000, min_y: -45.75560000, min_z: -47.31120000, size_x: 117.57980000, size_y: 91.51120000, size_z: 94.62240000} 8 | 8: {diameter: 261.47178102, min_x: -114.73800000, min_y: -37.73570000, min_z: -104.00100000, size_x: 229.47600000, size_y: 75.47140000, size_z: 208.00200000} 9 | 9: {diameter: 108.99920102, min_x: -52.21460000, min_y: -38.70380000, min_z: -42.84850000, size_x: 104.42920000, size_y: 77.40760000, size_z: 85.69700000} 10 | 10: {diameter: 164.62758848, min_x: -75.09230000, min_y: -53.53750000, min_z: -34.62070000, size_x: 150.18460000, size_y: 107.07500000, size_z: 69.24140000} 11 | 11: {diameter: 175.88933422, min_x: -18.36050000, min_y: -38.93300000, min_z: -86.40790000, size_x: 36.72110000, size_y: 77.86600000, size_z: 172.81580000} 12 | 12: {diameter: 145.54287471, min_x: -50.44390000, min_y: -54.24850000, min_z: -45.40000000, size_x: 100.88780000, size_y: 108.49700000, size_z: 90.80000000} 13 | 13: {diameter: 278.07811733, min_x: -129.11300000, min_y: -59.24100000, min_z: -70.56620000, size_x: 258.22600000, size_y: 118.48210000, size_z: 141.13240000} 14 | 14: {diameter: 282.60129399, min_x: -101.57300000, min_y: -58.87630000, min_z: -106.55800000, size_x: 203.14600000, size_y: 117.75250000, size_z: 213.11600000} 15 | 15: {diameter: 212.35825148, min_x: -46.95910000, min_y: -73.71670000, min_z: -92.37370000, size_x: 93.91810000, size_y: 147.43340000, size_z: 184.74740000} -------------------------------------------------------------------------------- /scripts/datasets/ycb/dataset.py: -------------------------------------------------------------------------------- 1 | import torch.utils.data as data 2 | from PIL import Image 3 | import os 4 | import os.path 5 | import torch 6 | import numpy as np 7 | import torchvision.transforms as transforms 8 | import argparse 9 | import time 10 | import random 11 | from lib.transformations import quaternion_from_euler, euler_matrix, random_quaternion, quaternion_matrix 12 | import numpy.ma as ma 13 | import copy 14 | import scipy.misc 15 | import scipy.io as scio 16 | 17 | 18 | class PoseDataset(data.Dataset): 19 | def __init__(self, mode, num_pt, add_noise, root, noise_trans, refine): 20 | if mode == 'train': 21 | self.path = 'datasets/ycb/dataset_config/train_data_list.txt' 22 | elif mode == 'test': 23 | self.path = 'datasets/ycb/dataset_config/test_data_list.txt' 24 | self.num_pt = num_pt 25 | self.root = root 26 | self.add_noise = add_noise 27 | self.noise_trans = noise_trans 28 | 29 | self.list = [] 30 | self.real = [] 31 | self.syn = [] 32 | input_file = open(self.path) 33 | while 1: 34 | input_line = input_file.readline() 35 | if not input_line: 36 | break 37 | if input_line[-1:] == '\n': 38 | input_line = input_line[:-1] 39 | if input_line[:5] == 'data/': 40 | self.real.append(input_line) 41 | else: 42 | self.syn.append(input_line) 43 | self.list.append(input_line) 44 | input_file.close() 45 | 46 | self.length = len(self.list) 47 | self.len_real = len(self.real) 48 | self.len_syn = len(self.syn) 49 | 50 | class_file = open('datasets/ycb/dataset_config/classes.txt') 51 | class_id = 1 52 | self.cld = {} 53 | while 1: 54 | class_input = class_file.readline() 55 | if not class_input: 56 | break 57 | 58 | input_file = open('{0}/models/{1}/points.xyz'.format(self.root, class_input[:-1])) 59 | self.cld[class_id] = [] 60 | while 1: 61 | input_line = input_file.readline() 62 | if not input_line: 63 | break 64 | input_line = input_line[:-1].split(' ') 65 | self.cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])]) 66 | self.cld[class_id] = np.array(self.cld[class_id]) 67 | input_file.close() 68 | 69 | class_id += 1 70 | 71 | self.cam_cx_1 = 312.9869 72 | self.cam_cy_1 = 241.3109 73 | self.cam_fx_1 = 1066.778 74 | self.cam_fy_1 = 1067.487 75 | 76 | self.cam_cx_2 = 323.7872 77 | self.cam_cy_2 = 279.6921 78 | self.cam_fx_2 = 1077.836 79 | self.cam_fy_2 = 1078.189 80 | 81 | self.xmap = np.array([[j for i in range(640)] for j in range(480)]) 82 | self.ymap = np.array([[i for i in range(640)] for j in range(480)]) 83 | 84 | self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05) 85 | self.noise_img_loc = 0.0 86 | self.noise_img_scale = 7.0 87 | self.minimum_num_pt = 50 88 | self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 89 | self.symmetry_obj_idx = [12, 15, 18, 19, 20] 90 | self.num_pt_mesh_small = 500 91 | self.num_pt_mesh_large = 2600 92 | self.refine = refine 93 | self.front_num = 2 94 | 95 | print(len(self.list)) 96 | 97 | def __getitem__(self, index): 98 | img = Image.open('{0}/{1}-color.png'.format(self.root, self.list[index])) 99 | depth = np.array(Image.open('{0}/{1}-depth.png'.format(self.root, self.list[index]))) 100 | label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, self.list[index]))) 101 | meta = scio.loadmat('{0}/{1}-meta.mat'.format(self.root, self.list[index])) 102 | 103 | if self.list[index][:8] != 'data_syn' and int(self.list[index][5:9]) >= 60: 104 | cam_cx = self.cam_cx_2 105 | cam_cy = self.cam_cy_2 106 | cam_fx = self.cam_fx_2 107 | cam_fy = self.cam_fy_2 108 | else: 109 | cam_cx = self.cam_cx_1 110 | cam_cy = self.cam_cy_1 111 | cam_fx = self.cam_fx_1 112 | cam_fy = self.cam_fy_1 113 | 114 | mask_back = ma.getmaskarray(ma.masked_equal(label, 0)) 115 | 116 | add_front = False 117 | if self.add_noise: 118 | for k in range(5): 119 | seed = random.choice(self.syn) 120 | front = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB"))) 121 | front = np.transpose(front, (2, 0, 1)) 122 | f_label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, seed))) 123 | front_label = np.unique(f_label).tolist()[1:] 124 | if len(front_label) < self.front_num: 125 | continue 126 | front_label = random.sample(front_label, self.front_num) 127 | for f_i in front_label: 128 | mk = ma.getmaskarray(ma.masked_not_equal(f_label, f_i)) 129 | if f_i == front_label[0]: 130 | mask_front = mk 131 | else: 132 | mask_front = mask_front * mk 133 | t_label = label * mask_front 134 | if len(t_label.nonzero()[0]) > 1000: 135 | label = t_label 136 | add_front = True 137 | break 138 | 139 | obj = meta['cls_indexes'].flatten().astype(np.int32) 140 | 141 | while 1: 142 | idx = np.random.randint(0, len(obj)) 143 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 144 | mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx])) 145 | mask = mask_label * mask_depth 146 | if len(mask.nonzero()[0]) > self.minimum_num_pt: 147 | break 148 | 149 | if self.add_noise: 150 | img = self.trancolor(img) 151 | 152 | rmin, rmax, cmin, cmax = get_bbox(mask_label) 153 | img = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax] 154 | 155 | if self.list[index][:8] == 'data_syn': 156 | seed = random.choice(self.real) 157 | back = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB"))) 158 | back = np.transpose(back, (2, 0, 1))[:, rmin:rmax, cmin:cmax] 159 | img_masked = back * mask_back[rmin:rmax, cmin:cmax] + img 160 | else: 161 | img_masked = img 162 | 163 | if self.add_noise and add_front: 164 | img_masked = img_masked * mask_front[rmin:rmax, cmin:cmax] + front[:, rmin:rmax, cmin:cmax] * ~(mask_front[rmin:rmax, cmin:cmax]) 165 | 166 | if self.list[index][:8] == 'data_syn': 167 | img_masked = img_masked + np.random.normal(loc=0.0, scale=7.0, size=img_masked.shape) 168 | 169 | # p_img = np.transpose(img_masked, (1, 2, 0)) 170 | # scipy.misc.imsave('temp/{0}_input.png'.format(index), p_img) 171 | # scipy.misc.imsave('temp/{0}_label.png'.format(index), mask[rmin:rmax, cmin:cmax].astype(np.int32)) 172 | 173 | target_r = meta['poses'][:, :, idx][:, 0:3] 174 | target_t = np.array([meta['poses'][:, :, idx][:, 3:4].flatten()]) 175 | add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)]) 176 | 177 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 178 | if len(choose) > self.num_pt: 179 | c_mask = np.zeros(len(choose), dtype=int) 180 | c_mask[:self.num_pt] = 1 181 | np.random.shuffle(c_mask) 182 | choose = choose[c_mask.nonzero()] 183 | else: 184 | choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap') 185 | 186 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 187 | xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 188 | ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 189 | choose = np.array([choose]) 190 | 191 | cam_scale = meta['factor_depth'][0][0] 192 | pt2 = depth_masked / cam_scale 193 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 194 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 195 | cloud = np.concatenate((pt0, pt1, pt2), axis=1) 196 | if self.add_noise: 197 | cloud = np.add(cloud, add_t) 198 | 199 | # fw = open('temp/{0}_cld.xyz'.format(index), 'w') 200 | # for it in cloud: 201 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) 202 | # fw.close() 203 | 204 | dellist = [j for j in range(0, len(self.cld[obj[idx]]))] 205 | if self.refine: 206 | dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_large) 207 | else: 208 | dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_small) 209 | model_points = np.delete(self.cld[obj[idx]], dellist, axis=0) 210 | 211 | # fw = open('temp/{0}_model_points.xyz'.format(index), 'w') 212 | # for it in model_points: 213 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) 214 | # fw.close() 215 | 216 | target = np.dot(model_points, target_r.T) 217 | if self.add_noise: 218 | target = np.add(target, target_t + add_t) 219 | else: 220 | target = np.add(target, target_t) 221 | 222 | # fw = open('temp/{0}_tar.xyz'.format(index), 'w') 223 | # for it in target: 224 | # fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2])) 225 | # fw.close() 226 | 227 | return torch.from_numpy(cloud.astype(np.float32)), \ 228 | torch.LongTensor(choose.astype(np.int32)), \ 229 | self.norm(torch.from_numpy(img_masked.astype(np.float32))), \ 230 | torch.from_numpy(target.astype(np.float32)), \ 231 | torch.from_numpy(model_points.astype(np.float32)), \ 232 | torch.LongTensor([int(obj[idx]) - 1]) 233 | 234 | def __len__(self): 235 | return self.length 236 | 237 | def get_sym_list(self): 238 | return self.symmetry_obj_idx 239 | 240 | def get_num_points_mesh(self): 241 | if self.refine: 242 | return self.num_pt_mesh_large 243 | else: 244 | return self.num_pt_mesh_small 245 | 246 | 247 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 248 | img_width = 480 249 | img_length = 640 250 | 251 | def get_bbox(label): 252 | rows = np.any(label, axis=1) 253 | cols = np.any(label, axis=0) 254 | rmin, rmax = np.where(rows)[0][[0, -1]] 255 | cmin, cmax = np.where(cols)[0][[0, -1]] 256 | rmax += 1 257 | cmax += 1 258 | r_b = rmax - rmin 259 | for tt in range(len(border_list)): 260 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 261 | r_b = border_list[tt + 1] 262 | break 263 | c_b = cmax - cmin 264 | for tt in range(len(border_list)): 265 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 266 | c_b = border_list[tt + 1] 267 | break 268 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 269 | rmin = center[0] - int(r_b / 2) 270 | rmax = center[0] + int(r_b / 2) 271 | cmin = center[1] - int(c_b / 2) 272 | cmax = center[1] + int(c_b / 2) 273 | if rmin < 0: 274 | delt = -rmin 275 | rmin = 0 276 | rmax += delt 277 | if cmin < 0: 278 | delt = -cmin 279 | cmin = 0 280 | cmax += delt 281 | if rmax > img_width: 282 | delt = rmax - img_width 283 | rmax = img_width 284 | rmin -= delt 285 | if cmax > img_length: 286 | delt = cmax - img_length 287 | cmax = img_length 288 | cmin -= delt 289 | return rmin, rmax, cmin, cmax 290 | -------------------------------------------------------------------------------- /scripts/datasets/ycb/dataset_config/classes.txt: -------------------------------------------------------------------------------- 1 | 002_master_chef_can 2 | 003_cracker_box 3 | 004_sugar_box 4 | 005_tomato_soup_can 5 | 006_mustard_bottle 6 | 007_tuna_fish_can 7 | 008_pudding_box 8 | 009_gelatin_box 9 | 010_potted_meat_can 10 | 011_banana 11 | 019_pitcher_base 12 | 021_bleach_cleanser 13 | 024_bowl 14 | 025_mug 15 | 035_power_drill 16 | 036_wood_block 17 | 037_scissors 18 | 040_large_marker 19 | 051_large_clamp 20 | 052_extra_large_clamp 21 | 061_foam_brick 22 | -------------------------------------------------------------------------------- /scripts/distortion.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/distortion.npy -------------------------------------------------------------------------------- /scripts/eval.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | ########################################## ros packages ############################################## 4 | import rospy 5 | from sensor_msgs.msg import Image, CameraInfo 6 | from cv_bridge import CvBridge, CvBridgeError 7 | 8 | ######################################################################################################## 9 | 10 | import cv2 11 | import torch 12 | import argparse 13 | import os 14 | from torch.utils.data import DataLoader 15 | from model.build_BiSeNet import BiSeNet 16 | import numpy as np 17 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou 18 | from datasets.dataset import ycb_Dataset 19 | from matplotlib import pyplot as plt 20 | from torchvision import transforms 21 | 22 | 23 | def eval(model,dataloader, args ): 24 | print('start test!') 25 | with torch.no_grad(): 26 | model.eval() 27 | precision_record = [] 28 | tq = tqdm.tqdm(total=len(dataloader) * args.batch_size) 29 | tq.set_description('test') 30 | hist = np.zeros((args.num_classes, args.num_classes)) 31 | for i, (data, label) in enumerate(dataloader): 32 | tq.update(args.batch_size) 33 | if torch.cuda.is_available() and args.use_gpu: 34 | data = data.cuda() 35 | label = label.cuda() 36 | predict = model(data).squeeze() 37 | predict = reverse_one_hot(predict) 38 | predict = np.array(predict) 39 | # predict = colour_code_segmentation(np.array(predict), label_info) 40 | 41 | label = label.squeeze() 42 | if args.loss == 'dice': 43 | label = reverse_one_hot(label) 44 | label = np.array(label) 45 | # label = colour_code_segmentation(np.array(label), label_info) 46 | 47 | precision = compute_global_accuracy(predict, label) 48 | hist += fast_hist(label.flatten(), predict.flatten(), args.num_classes) 49 | precision_record.append(precision) 50 | save_img(i,data,predict) 51 | precision = np.mean(precision_record) 52 | miou_list = per_class_iu(hist)[:-1] 53 | miou = np.mean(miou_list) 54 | print('IoU for each class:') 55 | tq.close() 56 | print('precision for test: %.3f' % precision) 57 | print('mIoU for validation: %.3f' % miou) 58 | return precision 59 | def save_img(iteration,img,label): 60 | img = img.cpu() 61 | img = img.numpy() 62 | img = np.transpose(img, [0,2,3,1]) 63 | _,h,w,c = img.shape 64 | img = img.reshape([h,w,c]) 65 | fig, axes = plt.subplots(1,2,figsize = (8,4)) 66 | ax = axes.ravel() 67 | ax[0].imshow(img) 68 | ax[1].imshow(label) 69 | plt.show() 70 | plt.savefig('./ycb/segmentation_result/{}.png'.format(iteration)) 71 | plt.close() 72 | 73 | ###################################################################################################### 74 | ############################################## test ################################################## 75 | ##################################################################################################### 76 | class object_segmentation: 77 | def __init__(self,model): 78 | self.model = model 79 | self.bridge = CvBridge() 80 | self.label_pub = rospy.Publisher('label',Image,queue_size = 10) 81 | self.rgb_sub = rospy.Subscriber('rgb_image',Image, self.seg_callback) 82 | def seg_callback(self, rgb): 83 | try: 84 | with torch.no_grad(): 85 | self.model.eval() 86 | rgb = self.bridge.imgmsg_to_cv2(rgb,'bgr8') 87 | self.to_tensor = transforms.Compose([ 88 | transforms.ToTensor(), 89 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 90 | ]) 91 | #rgb = np.transpose(rgb, (2,0,1)) 92 | #rgb = np.expand_dims(rgb, axis = 0) 93 | #print(type(rgb)) 94 | #rgb = torch.from_numpy(rgb) 95 | rgb = self.to_tensor(rgb) 96 | rgb = rgb.unsqueeze_(0) 97 | rgb = rgb.cuda() 98 | predict = self.model(rgb).squeeze() 99 | predict = reverse_one_hot(predict) 100 | predict = np.array(predict) 101 | np.save('./predict',predict) 102 | self.label_pub.publish(self.bridge.cv2_to_imgmsg(predict,'32SC1')) 103 | print('ss') 104 | except CvBridgeError as e: 105 | print(e) 106 | 107 | 108 | 109 | 110 | 111 | def main(params): 112 | # basic parameters 113 | parser = argparse.ArgumentParser() 114 | parser.add_argument('--checkpoint_path', type=str, default=None, required=True, help='The path to the pretrained weights of model') 115 | parser.add_argument('--crop_height', type=int, default=720, help='Height of cropped/resized input image to network') 116 | parser.add_argument('--crop_width', type=int, default=960, help='Width of cropped/resized input image to network') 117 | parser.add_argument('--data', type=str, default='/path/to/data', help='Path of training data') 118 | parser.add_argument('--batch_size', type=int, default=1, help='Number of images in each batch') 119 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.') 120 | parser.add_argument('--cuda', type=str, default='0', help='GPU ids used for training') 121 | parser.add_argument('--use_gpu', type=bool, default=True, help='Whether to user gpu for training') 122 | parser.add_argument('--num_classes', type=int, default=32, help='num of object classes (with void)') 123 | parser.add_argument('--loss', type=str, default='dice', help='loss function, dice or crossentropy') 124 | args = parser.parse_args(params) 125 | 126 | # build model 127 | os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda 128 | model = BiSeNet(args.num_classes, args.context_path) 129 | if torch.cuda.is_available() and args.use_gpu: 130 | model = torch.nn.DataParallel(model).cuda() 131 | 132 | # load pretrained model if exists 133 | print('load model from %s ...' % args.checkpoint_path) 134 | model.module.load_state_dict(torch.load(args.checkpoint_path)) 135 | print('Done!') 136 | 137 | rospy.init_node('obj_seg',anonymous=True) 138 | Seg = object_segmentation(model) 139 | rospy.spin() 140 | 141 | 142 | 143 | if __name__ == '__main__': 144 | params = [ 145 | '--checkpoint_path', './checkpoints_18_sgd/best_dice_loss.pth', 146 | '--data', './CamVid/', 147 | '--cuda', '1', 148 | '--context_path', 'resnet101', 149 | '--num_classes', '21' 150 | ] 151 | main(params) 152 | -------------------------------------------------------------------------------- /scripts/experiments/scripts/eval_linemod.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | python3 ./tools/eval_linemod.py --dataset_root ./datasets/linemod/Linemod_preprocessed\ 10 | --model trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth\ 11 | --refine_model trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth -------------------------------------------------------------------------------- /scripts/experiments/scripts/eval_ycb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | if [ ! -d YCB_Video_toolbox ];then 10 | echo 'Downloading the YCB_Video_toolbox...' 11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git 12 | cd YCB_Video_toolbox 13 | unzip results_PoseCNN_RSS2018.zip 14 | cd .. 15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/ 16 | fi 17 | 18 | python ./tools/eval_ycb.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\ 19 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\ 20 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth 21 | -------------------------------------------------------------------------------- /scripts/experiments/scripts/ros_eval_msg.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | if [ ! -d YCB_Video_toolbox ];then 10 | echo 'Downloading the YCB_Video_toolbox...' 11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git 12 | cd YCB_Video_toolbox 13 | unzip results_PoseCNN_RSS2018.zip 14 | cd .. 15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/ 16 | fi 17 | 18 | python ./tools/ros_eval_ycb_message.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\ 19 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\ 20 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \ 21 | --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \ 22 | --num_classes 22 \ 23 | --context_path resnet18 24 | 25 | -------------------------------------------------------------------------------- /scripts/experiments/scripts/ros_eval_ycb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | if [ ! -d YCB_Video_toolbox ];then 10 | echo 'Downloading the YCB_Video_toolbox...' 11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git 12 | cd YCB_Video_toolbox 13 | unzip results_PoseCNN_RSS2018.zip 14 | cd .. 15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/ 16 | fi 17 | # --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\ 18 | #--model trained_checkpoints/ycb/pose_model_13_0.01985655868300905.pth \ 19 | python ./tools/ros_eval_ycb.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\ 20 | --model trained_checkpoints/ycb/pose_model_13_0.01985655868300905.pth \ 21 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\ 22 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \ 23 | --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \ 24 | --num_classes 21 \ 25 | --context_path resnet101 26 | -------------------------------------------------------------------------------- /scripts/experiments/scripts/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | if [ ! -d YCB_Video_toolbox ];then 10 | echo 'Downloading the YCB_Video_toolbox...' 11 | git clone https://github.com/yuxng/YCB_Video_toolbox.git 12 | cd YCB_Video_toolbox 13 | unzip results_PoseCNN_RSS2018.zip 14 | cd .. 15 | cp replace_ycb_toolbox/*.m YCB_Video_toolbox/ 16 | fi 17 | 18 | python ./tools/ros_eval_ycb2.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\ 19 | --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\ 20 | --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \ 21 | --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \ 22 | --num_classes 21 \ 23 | --context_path resnet18 24 | -------------------------------------------------------------------------------- /scripts/experiments/scripts/train_linemod.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | python3 ./tools/train.py --dataset linemod\ 10 | --dataset_root ./datasets/linemod/Linemod_preprocessed -------------------------------------------------------------------------------- /scripts/experiments/scripts/train_ycb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -x 4 | set -e 5 | 6 | export PYTHONUNBUFFERED="True" 7 | export CUDA_VISIBLE_DEVICES=0 8 | 9 | python2 ./tools/train.py --dataset ycb\ 10 | --dataset_root ./datasets/ycb/YCB_Video_Dataset 11 | -------------------------------------------------------------------------------- /scripts/lib/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/__init__.pyc -------------------------------------------------------------------------------- /scripts/lib/extractors.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/extractors.pyc -------------------------------------------------------------------------------- /scripts/lib/knn/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/__init__.pyc -------------------------------------------------------------------------------- /scripts/lib/knn/build/knn_cuda_kernel.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/build/knn_cuda_kernel.so -------------------------------------------------------------------------------- /scripts/lib/knn/build_ffi.py: -------------------------------------------------------------------------------- 1 | # https://gist.github.com/tonyseek/7821993 2 | import glob 3 | import torch 4 | from os import path as osp 5 | from torch.utils.ffi import create_extension 6 | 7 | abs_path = osp.dirname(osp.realpath(__file__)) 8 | extra_objects = [osp.join(abs_path, 'build/knn_cuda_kernel.so')] 9 | extra_objects += glob.glob('/usr/local/cuda/lib64/*.a') 10 | 11 | ffi = create_extension( 12 | 'knn_pytorch', 13 | headers=['src/knn_pytorch.h'], 14 | sources=['src/knn_pytorch.c'], 15 | define_macros=[('WITH_CUDA', None)], 16 | relative_to=__file__, 17 | with_cuda=True, 18 | extra_objects=extra_objects, 19 | include_dirs=[osp.join(abs_path, 'include')] 20 | ) 21 | 22 | 23 | if __name__ == '__main__': 24 | assert torch.cuda.is_available(), 'Please install CUDA for GPU support.' 25 | ffi.build() 26 | -------------------------------------------------------------------------------- /scripts/lib/knn/knn_pytorch/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | from torch.utils.ffi import _wrap_function 3 | from ._knn_pytorch import lib as _lib, ffi as _ffi 4 | 5 | __all__ = [] 6 | def _import_symbols(locals): 7 | for symbol in dir(_lib): 8 | fn = getattr(_lib, symbol) 9 | if callable(fn): 10 | locals[symbol] = _wrap_function(fn, _ffi) 11 | else: 12 | locals[symbol] = fn 13 | __all__.append(symbol) 14 | 15 | _import_symbols(locals()) 16 | -------------------------------------------------------------------------------- /scripts/lib/knn/knn_pytorch/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__init__.pyc -------------------------------------------------------------------------------- /scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-35.pyc -------------------------------------------------------------------------------- /scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-36.pyc -------------------------------------------------------------------------------- /scripts/lib/knn/knn_pytorch/_knn_pytorch.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/_knn_pytorch.so -------------------------------------------------------------------------------- /scripts/lib/knn/src/knn_cuda_kernel.cu: -------------------------------------------------------------------------------- 1 | /** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA 2 | * The modifications are 3 | * removed texture memory usage 4 | * removed split query KNN computation 5 | * added feature extraction with bilinear interpolation 6 | * 7 | * Last modified by Christopher B. Choy 12/23/2016 8 | */ 9 | 10 | // Includes 11 | #include 12 | #include "cuda.h" 13 | 14 | #include "knn_cuda_kernel.h" 15 | 16 | // Constants used by the program 17 | #define BLOCK_DIM 16 18 | #define DEBUG 0 19 | 20 | /** 21 | * Computes the distance between two matrix A (reference points) and 22 | * B (query points) containing respectively wA and wB points. 23 | * 24 | * @param A pointer on the matrix A 25 | * @param wA width of the matrix A = number of points in A 26 | * @param B pointer on the matrix B 27 | * @param wB width of the matrix B = number of points in B 28 | * @param dim dimension of points = height of matrices A and B 29 | * @param AB pointer on the matrix containing the wA*wB distances computed 30 | */ 31 | __global__ void cuComputeDistanceGlobal( float* A, int wA, 32 | float* B, int wB, int dim, float* AB){ 33 | 34 | // Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B 35 | __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM]; 36 | __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM]; 37 | 38 | // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step) 39 | __shared__ int begin_A; 40 | __shared__ int begin_B; 41 | __shared__ int step_A; 42 | __shared__ int step_B; 43 | __shared__ int end_A; 44 | 45 | // Thread index 46 | int tx = threadIdx.x; 47 | int ty = threadIdx.y; 48 | 49 | // Other variables 50 | float tmp; 51 | float ssd = 0; 52 | 53 | // Loop parameters 54 | begin_A = BLOCK_DIM * blockIdx.y; 55 | begin_B = BLOCK_DIM * blockIdx.x; 56 | step_A = BLOCK_DIM * wA; 57 | step_B = BLOCK_DIM * wB; 58 | end_A = begin_A + (dim-1) * wA; 59 | 60 | // Conditions 61 | int cond0 = (begin_A + tx < wA); // used to write in shared memory 62 | int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix 63 | int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix 64 | 65 | // Loop over all the sub-matrices of A and B required to compute the block sub-matrix 66 | for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) { 67 | // Load the matrices from device memory to shared memory; each thread loads one element of each matrix 68 | if (a/wA + ty < dim){ 69 | shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0; 70 | shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0; 71 | } 72 | else{ 73 | shared_A[ty][tx] = 0; 74 | shared_B[ty][tx] = 0; 75 | } 76 | 77 | // Synchronize to make sure the matrices are loaded 78 | __syncthreads(); 79 | 80 | // Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix 81 | if (cond2 && cond1){ 82 | for (int k = 0; k < BLOCK_DIM; ++k){ 83 | tmp = shared_A[k][ty] - shared_B[k][tx]; 84 | ssd += tmp*tmp; 85 | } 86 | } 87 | 88 | // Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration 89 | __syncthreads(); 90 | } 91 | 92 | // Write the block sub-matrix to device memory; each thread writes one element 93 | if (cond2 && cond1) 94 | AB[(begin_A + ty) * wB + begin_B + tx] = ssd; 95 | } 96 | 97 | 98 | /** 99 | * Gathers k-th smallest distances for each column of the distance matrix in the top. 100 | * 101 | * @param dist distance matrix 102 | * @param ind index matrix 103 | * @param width width of the distance matrix and of the index matrix 104 | * @param height height of the distance matrix and of the index matrix 105 | * @param k number of neighbors to consider 106 | */ 107 | __global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){ 108 | 109 | // Variables 110 | int l, i, j; 111 | float *p_dist; 112 | long *p_ind; 113 | float curr_dist, max_dist; 114 | long curr_row, max_row; 115 | unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; 116 | 117 | if (xIndexcurr_dist){ 132 | i=a; 133 | break; 134 | } 135 | } 136 | for (j=l; j>i; j--){ 137 | p_dist[j*width] = p_dist[(j-1)*width]; 138 | p_ind[j*width] = p_ind[(j-1)*width]; 139 | } 140 | p_dist[i*width] = curr_dist; 141 | p_ind[i*width] = l+1; 142 | } else { 143 | p_ind[l*width] = l+1; 144 | } 145 | max_dist = p_dist[curr_row]; 146 | } 147 | 148 | // Part 2 : insert element in the k-th first lines 149 | max_row = (k-1)*width; 150 | for (l=k; lcurr_dist){ 156 | i=a; 157 | break; 158 | } 159 | } 160 | for (j=k-1; j>i; j--){ 161 | p_dist[j*width] = p_dist[(j-1)*width]; 162 | p_ind[j*width] = p_ind[(j-1)*width]; 163 | } 164 | p_dist[i*width] = curr_dist; 165 | p_ind[i*width] = l+1; 166 | max_dist = p_dist[max_row]; 167 | } 168 | } 169 | } 170 | } 171 | 172 | 173 | /** 174 | * Computes the square root of the first line (width-th first element) 175 | * of the distance matrix. 176 | * 177 | * @param dist distance matrix 178 | * @param width width of the distance matrix 179 | * @param k number of neighbors to consider 180 | */ 181 | __global__ void cuParallelSqrt(float *dist, int width, int k){ 182 | unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x; 183 | unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y; 184 | if (xIndex>>(ref_dev, ref_nb, 231 | query_dev, query_nb, dim, dist_dev); 232 | 233 | // Kernel 2: Sort each column 234 | cuInsertionSort<<>>(dist_dev, ind_dev, 235 | query_nb, ref_nb, k); 236 | 237 | // Kernel 3: Compute square root of k first elements 238 | // cuParallelSqrt<<>>(dist_dev, query_nb, k); 239 | 240 | #if DEBUG 241 | unsigned int size_of_float = sizeof(float); 242 | unsigned long size_of_long = sizeof(long); 243 | 244 | float* dist_host = new float[query_nb * k]; 245 | long* idx_host = new long[query_nb * k]; 246 | 247 | // Memory copy of output from device to host 248 | cudaMemcpy(&dist_host[0], dist_dev, 249 | query_nb * k *size_of_float, cudaMemcpyDeviceToHost); 250 | 251 | cudaMemcpy(&idx_host[0], ind_dev, 252 | query_nb * k * size_of_long, cudaMemcpyDeviceToHost); 253 | 254 | int i = 0; 255 | for(i = 0; i < 100; i++){ 256 | printf("IDX[%d]: %d\n", i, (int)idx_host[i]); 257 | } 258 | #endif 259 | } 260 | -------------------------------------------------------------------------------- /scripts/lib/knn/src/knn_cuda_kernel.h: -------------------------------------------------------------------------------- 1 | #ifndef _MATHUTIL_CUDA_KERNEL 2 | #define _MATHUTIL_CUDA_KERNEL 3 | 4 | #define IDX2D(i, j, dj) (dj * i + j) 5 | #define IDX3D(i, j, k, dj, dk) (IDX2D(IDX2D(i, j, dj), k, dk)) 6 | 7 | #define BLOCK 512 8 | #define MAX_STREAMS 512 9 | 10 | #ifdef __cplusplus 11 | extern "C" { 12 | #endif 13 | 14 | void knn_device(float* ref_dev, int ref_width, 15 | float* query_dev, int query_width, 16 | int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream); 17 | 18 | #ifdef __cplusplus 19 | } 20 | #endif 21 | 22 | #endif 23 | -------------------------------------------------------------------------------- /scripts/lib/knn/src/knn_pytorch.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "knn_cuda_kernel.h" 3 | 4 | extern THCState *state; 5 | 6 | int knn(THCudaTensor *ref_tensor, THCudaTensor *query_tensor, 7 | THCudaLongTensor *idx_tensor) { 8 | 9 | THCAssertSameGPU(THCudaTensor_checkGPU(state, 3, idx_tensor, ref_tensor, query_tensor)); 10 | long batch, ref_nb, query_nb, dim, k; 11 | THArgCheck(THCudaTensor_nDimension(state, ref_tensor) == 3 , 0, "ref_tensor: 3D Tensor expected"); 12 | THArgCheck(THCudaTensor_nDimension(state, query_tensor) == 3 , 1, "query_tensor: 3D Tensor expected"); 13 | THArgCheck(THCudaLongTensor_nDimension(state, idx_tensor) == 3 , 3, "idx_tensor: 3D Tensor expected"); 14 | THArgCheck(THCudaTensor_size(state, ref_tensor, 0) == THCudaTensor_size(state, query_tensor,0), 0, "input sizes must match"); 15 | THArgCheck(THCudaTensor_size(state, ref_tensor, 1) == THCudaTensor_size(state, query_tensor,1), 0, "input sizes must match"); 16 | THArgCheck(THCudaTensor_size(state, idx_tensor, 2) == THCudaTensor_size(state, query_tensor,2), 0, "input sizes must match"); 17 | 18 | //ref_tensor = THCudaTensor_newContiguous(state, ref_tensor); 19 | //query_tensor = THCudaTensor_newContiguous(state, query_tensor); 20 | 21 | batch = THCudaLongTensor_size(state, ref_tensor, 0); 22 | dim = THCudaTensor_size(state, ref_tensor, 1); 23 | k = THCudaLongTensor_size(state, idx_tensor, 1); 24 | ref_nb = THCudaTensor_size(state, ref_tensor, 2); 25 | query_nb = THCudaTensor_size(state, query_tensor, 2); 26 | 27 | float *ref_dev = THCudaTensor_data(state, ref_tensor); 28 | float *query_dev = THCudaTensor_data(state, query_tensor); 29 | long *idx_dev = THCudaLongTensor_data(state, idx_tensor); 30 | // scratch buffer for distances 31 | float *dist_dev = (float*)THCudaMalloc(state, ref_nb * query_nb * sizeof(float)); 32 | 33 | for (int b = 0; b < batch; b++) { 34 | knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k, 35 | dist_dev, idx_dev + b * k * query_nb, THCState_getCurrentStream(state)); 36 | } 37 | // free buffer 38 | THCudaFree(state, dist_dev); 39 | //printf("aaaaa\n"); 40 | // check for errors 41 | cudaError_t err = cudaGetLastError(); 42 | if (err != cudaSuccess) { 43 | printf("error in knn: %s\n", cudaGetErrorString(err)); 44 | THError("aborting"); 45 | } 46 | 47 | return 1; 48 | } 49 | -------------------------------------------------------------------------------- /scripts/lib/knn/src/knn_pytorch.h: -------------------------------------------------------------------------------- 1 | int knn(THCudaTensor *ref_tensor, THCudaTensor *query_tensor, 2 | THCudaLongTensor *idx_tensor); 3 | -------------------------------------------------------------------------------- /scripts/lib/loss.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.loss import _Loss 2 | from torch.autograd import Variable 3 | import torch 4 | import time 5 | import numpy as np 6 | import torch.nn as nn 7 | import random 8 | import torch.backends.cudnn as cudnn 9 | from lib.knn.__init__ import KNearestNeighbor 10 | 11 | 12 | def loss_calculation(pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine, num_point_mesh, sym_list): 13 | knn = KNearestNeighbor(1) 14 | bs, num_p, _ = pred_c.size() 15 | 16 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(bs, num_p, 1)) 17 | 18 | base = torch.cat(((1.0 - 2.0*(pred_r[:, :, 2]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1),\ 19 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] - 2.0*pred_r[:, :, 0]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 20 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 21 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 3]*pred_r[:, :, 0]).view(bs, num_p, 1), \ 22 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1), \ 23 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 24 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 25 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 26 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 2]**2)).view(bs, num_p, 1)), dim=2).contiguous().view(bs * num_p, 3, 3) 27 | 28 | ori_base = base 29 | base = base.contiguous().transpose(2, 1).contiguous() 30 | model_points = model_points.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3) 31 | target = target.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3) 32 | ori_target = target 33 | pred_t = pred_t.contiguous().view(bs * num_p, 1, 3) 34 | ori_t = pred_t 35 | points = points.contiguous().view(bs * num_p, 1, 3) 36 | pred_c = pred_c.contiguous().view(bs * num_p) 37 | 38 | pred = torch.add(torch.bmm(model_points, base), points + pred_t) 39 | 40 | if not refine: 41 | if idx[0].item() in sym_list: 42 | target = target[0].transpose(1, 0).contiguous().view(3, -1) 43 | pred = pred.permute(2, 0, 1).contiguous().view(3, -1) 44 | inds = knn(target.unsqueeze(0), pred.unsqueeze(0)) 45 | target = torch.index_select(target, 1, inds.view(-1) - 1) 46 | target = target.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous() 47 | pred = pred.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous() 48 | 49 | dis = torch.mean(torch.norm((pred - target), dim=2), dim=1) 50 | loss = torch.mean((dis * pred_c - w * torch.log(pred_c)), dim=0) 51 | 52 | 53 | pred_c = pred_c.view(bs, num_p) 54 | how_max, which_max = torch.max(pred_c, 1) 55 | dis = dis.view(bs, num_p) 56 | 57 | 58 | t = ori_t[which_max[0]] + points[which_max[0]] 59 | points = points.view(1, bs * num_p, 3) 60 | 61 | ori_base = ori_base[which_max[0]].view(1, 3, 3).contiguous() 62 | ori_t = t.repeat(bs * num_p, 1).contiguous().view(1, bs * num_p, 3) 63 | new_points = torch.bmm((points - ori_t), ori_base).contiguous() 64 | 65 | new_target = ori_target[0].view(1, num_point_mesh, 3).contiguous() 66 | ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3) 67 | new_target = torch.bmm((new_target - ori_t), ori_base).contiguous() 68 | 69 | # print('------------> ', dis[0][which_max[0]].item(), pred_c[0][which_max[0]].item(), idx[0].item()) 70 | del knn 71 | return loss, dis[0][which_max[0]], new_points.detach(), new_target.detach() 72 | 73 | 74 | class Loss(_Loss): 75 | 76 | def __init__(self, num_points_mesh, sym_list): 77 | super(Loss, self).__init__(True) 78 | self.num_pt_mesh = num_points_mesh 79 | self.sym_list = sym_list 80 | 81 | def forward(self, pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine): 82 | 83 | return loss_calculation(pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine, self.num_pt_mesh, self.sym_list) 84 | -------------------------------------------------------------------------------- /scripts/lib/loss.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/loss.pyc -------------------------------------------------------------------------------- /scripts/lib/loss_refiner.py: -------------------------------------------------------------------------------- 1 | from torch.nn.modules.loss import _Loss 2 | from torch.autograd import Variable 3 | import torch 4 | import time 5 | import numpy as np 6 | import torch.nn as nn 7 | import random 8 | import torch.backends.cudnn as cudnn 9 | from lib.knn.__init__ import KNearestNeighbor 10 | 11 | 12 | def loss_calculation(pred_r, pred_t, target, model_points, idx, points, num_point_mesh, sym_list): 13 | knn = KNearestNeighbor(1) 14 | pred_r = pred_r.view(1, 1, -1) 15 | pred_t = pred_t.view(1, 1, -1) 16 | bs, num_p, _ = pred_r.size() 17 | num_input_points = len(points[0]) 18 | 19 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(bs, num_p, 1)) 20 | 21 | base = torch.cat(((1.0 - 2.0*(pred_r[:, :, 2]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1),\ 22 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] - 2.0*pred_r[:, :, 0]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 23 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 24 | (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 3]*pred_r[:, :, 0]).view(bs, num_p, 1), \ 25 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1), \ 26 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 27 | (-2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 28 | (2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \ 29 | (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 2]**2)).view(bs, num_p, 1)), dim=2).contiguous().view(bs * num_p, 3, 3) 30 | 31 | ori_base = base 32 | base = base.contiguous().transpose(2, 1).contiguous() 33 | model_points = model_points.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3) 34 | target = target.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3) 35 | ori_target = target 36 | pred_t = pred_t.contiguous().view(bs * num_p, 1, 3) 37 | ori_t = pred_t 38 | 39 | pred = torch.add(torch.bmm(model_points, base), pred_t) 40 | 41 | if idx[0].item() in sym_list: 42 | target = target[0].transpose(1, 0).contiguous().view(3, -1) 43 | pred = pred.permute(2, 0, 1).contiguous().view(3, -1) 44 | inds = knn(target.unsqueeze(0), pred.unsqueeze(0)) 45 | target = torch.index_select(target, 1, inds.view(-1) - 1) 46 | target = target.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous() 47 | pred = pred.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous() 48 | 49 | dis = torch.mean(torch.norm((pred - target), dim=2), dim=1) 50 | 51 | t = ori_t[0] 52 | points = points.view(1, num_input_points, 3) 53 | 54 | ori_base = ori_base[0].view(1, 3, 3).contiguous() 55 | ori_t = t.repeat(bs * num_input_points, 1).contiguous().view(1, bs * num_input_points, 3) 56 | new_points = torch.bmm((points - ori_t), ori_base).contiguous() 57 | 58 | new_target = ori_target[0].view(1, num_point_mesh, 3).contiguous() 59 | ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3) 60 | new_target = torch.bmm((new_target - ori_t), ori_base).contiguous() 61 | 62 | # print('------------> ', dis.item(), idx[0].item()) 63 | del knn 64 | return dis, new_points.detach(), new_target.detach() 65 | 66 | 67 | class Loss_refine(_Loss): 68 | 69 | def __init__(self, num_points_mesh, sym_list): 70 | super(Loss_refine, self).__init__(True) 71 | self.num_pt_mesh = num_points_mesh 72 | self.sym_list = sym_list 73 | 74 | 75 | def forward(self, pred_r, pred_t, target, model_points, idx, points): 76 | return loss_calculation(pred_r, pred_t, target, model_points, idx, points, self.num_pt_mesh, self.sym_list) 77 | -------------------------------------------------------------------------------- /scripts/lib/network.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import os 3 | import random 4 | import torch 5 | import torch.nn as nn 6 | import torch.nn.parallel 7 | import torch.backends.cudnn as cudnn 8 | import torch.optim as optim 9 | import torch.utils.data 10 | import torchvision.transforms as transforms 11 | import torchvision.utils as vutils 12 | from torch.autograd import Variable 13 | from PIL import Image 14 | import numpy as np 15 | import pdb 16 | import torch.nn.functional as F 17 | from lib.pspnet import PSPNet 18 | 19 | psp_models = { 20 | 'resnet18': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet18'), 21 | 'resnet34': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34'), 22 | 'resnet50': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet50'), 23 | 'resnet101': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet101'), 24 | 'resnet152': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet152') 25 | } 26 | 27 | class ModifiedResnet(nn.Module): 28 | 29 | def __init__(self, usegpu=True): 30 | super(ModifiedResnet, self).__init__() 31 | 32 | self.model = psp_models['resnet18'.lower()]() 33 | self.model = nn.DataParallel(self.model) 34 | 35 | def forward(self, x): 36 | x = self.model(x) 37 | return x 38 | 39 | class PoseNetFeat(nn.Module): 40 | def __init__(self, num_points): 41 | super(PoseNetFeat, self).__init__() 42 | self.conv1 = torch.nn.Conv1d(3, 64, 1) 43 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 44 | 45 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1) 46 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1) 47 | 48 | self.conv5 = torch.nn.Conv1d(256, 512, 1) 49 | self.conv6 = torch.nn.Conv1d(512, 1024, 1) 50 | 51 | self.ap1 = torch.nn.AvgPool1d(num_points) 52 | self.num_points = num_points 53 | def forward(self, x, emb): 54 | x = F.relu(self.conv1(x)) 55 | emb = F.relu(self.e_conv1(emb)) 56 | pointfeat_1 = torch.cat((x, emb), dim=1) 57 | 58 | x = F.relu(self.conv2(x)) 59 | emb = F.relu(self.e_conv2(emb)) 60 | pointfeat_2 = torch.cat((x, emb), dim=1) 61 | 62 | x = F.relu(self.conv5(pointfeat_2)) 63 | x = F.relu(self.conv6(x)) 64 | 65 | ap_x = self.ap1(x) 66 | 67 | ap_x = ap_x.view(-1, 1024, 1).repeat(1, 1, self.num_points) 68 | return torch.cat([pointfeat_1, pointfeat_2, ap_x], 1) #128 + 256 + 1024 69 | 70 | class PoseNet(nn.Module): 71 | def __init__(self, num_points, num_obj): 72 | super(PoseNet, self).__init__() 73 | self.num_points = num_points 74 | self.cnn = ModifiedResnet() 75 | self.feat = PoseNetFeat(num_points) 76 | 77 | self.conv1_r = torch.nn.Conv1d(1408, 640, 1) 78 | self.conv1_t = torch.nn.Conv1d(1408, 640, 1) 79 | self.conv1_c = torch.nn.Conv1d(1408, 640, 1) 80 | 81 | self.conv2_r = torch.nn.Conv1d(640, 256, 1) 82 | self.conv2_t = torch.nn.Conv1d(640, 256, 1) 83 | self.conv2_c = torch.nn.Conv1d(640, 256, 1) 84 | 85 | self.conv3_r = torch.nn.Conv1d(256, 128, 1) 86 | self.conv3_t = torch.nn.Conv1d(256, 128, 1) 87 | self.conv3_c = torch.nn.Conv1d(256, 128, 1) 88 | 89 | self.conv4_r = torch.nn.Conv1d(128, num_obj*4, 1) #quaternion 90 | self.conv4_t = torch.nn.Conv1d(128, num_obj*3, 1) #translation 91 | self.conv4_c = torch.nn.Conv1d(128, num_obj*1, 1) #confidence 92 | 93 | self.num_obj = num_obj 94 | 95 | def forward(self, img, x, choose, obj): 96 | out_img = self.cnn(img) 97 | 98 | bs, di, _, _ = out_img.size() 99 | 100 | emb = out_img.view(bs, di, -1) 101 | choose = choose.repeat(1, di, 1) 102 | emb = torch.gather(emb, 2, choose).contiguous() 103 | 104 | x = x.transpose(2, 1).contiguous() 105 | ap_x = self.feat(x, emb) 106 | 107 | rx = F.relu(self.conv1_r(ap_x)) 108 | tx = F.relu(self.conv1_t(ap_x)) 109 | cx = F.relu(self.conv1_c(ap_x)) 110 | 111 | rx = F.relu(self.conv2_r(rx)) 112 | tx = F.relu(self.conv2_t(tx)) 113 | cx = F.relu(self.conv2_c(cx)) 114 | 115 | rx = F.relu(self.conv3_r(rx)) 116 | tx = F.relu(self.conv3_t(tx)) 117 | cx = F.relu(self.conv3_c(cx)) 118 | 119 | rx = self.conv4_r(rx).view(bs, self.num_obj, 4, self.num_points) 120 | tx = self.conv4_t(tx).view(bs, self.num_obj, 3, self.num_points) 121 | cx = torch.sigmoid(self.conv4_c(cx)).view(bs, self.num_obj, 1, self.num_points) 122 | 123 | b = 0 124 | out_rx = torch.index_select(rx[b], 0, obj[b]) 125 | out_tx = torch.index_select(tx[b], 0, obj[b]) 126 | out_cx = torch.index_select(cx[b], 0, obj[b]) 127 | 128 | out_rx = out_rx.contiguous().transpose(2, 1).contiguous() 129 | out_cx = out_cx.contiguous().transpose(2, 1).contiguous() 130 | out_tx = out_tx.contiguous().transpose(2, 1).contiguous() 131 | 132 | return out_rx, out_tx, out_cx, emb.detach() 133 | 134 | 135 | 136 | class PoseRefineNetFeat(nn.Module): 137 | def __init__(self, num_points): 138 | super(PoseRefineNetFeat, self).__init__() 139 | self.conv1 = torch.nn.Conv1d(3, 64, 1) 140 | self.conv2 = torch.nn.Conv1d(64, 128, 1) 141 | 142 | self.e_conv1 = torch.nn.Conv1d(32, 64, 1) 143 | self.e_conv2 = torch.nn.Conv1d(64, 128, 1) 144 | 145 | self.conv5 = torch.nn.Conv1d(384, 512, 1) 146 | self.conv6 = torch.nn.Conv1d(512, 1024, 1) 147 | 148 | self.ap1 = torch.nn.AvgPool1d(num_points) 149 | self.num_points = num_points 150 | 151 | def forward(self, x, emb): 152 | x = F.relu(self.conv1(x)) 153 | emb = F.relu(self.e_conv1(emb)) 154 | pointfeat_1 = torch.cat([x, emb], dim=1) 155 | 156 | x = F.relu(self.conv2(x)) 157 | emb = F.relu(self.e_conv2(emb)) 158 | pointfeat_2 = torch.cat([x, emb], dim=1) 159 | 160 | pointfeat_3 = torch.cat([pointfeat_1, pointfeat_2], dim=1) 161 | 162 | x = F.relu(self.conv5(pointfeat_3)) 163 | x = F.relu(self.conv6(x)) 164 | 165 | ap_x = self.ap1(x) 166 | 167 | ap_x = ap_x.view(-1, 1024) 168 | return ap_x 169 | 170 | class PoseRefineNet(nn.Module): 171 | def __init__(self, num_points, num_obj): 172 | super(PoseRefineNet, self).__init__() 173 | self.num_points = num_points 174 | self.feat = PoseRefineNetFeat(num_points) 175 | 176 | self.conv1_r = torch.nn.Linear(1024, 512) 177 | self.conv1_t = torch.nn.Linear(1024, 512) 178 | 179 | self.conv2_r = torch.nn.Linear(512, 128) 180 | self.conv2_t = torch.nn.Linear(512, 128) 181 | 182 | self.conv3_r = torch.nn.Linear(128, num_obj*4) #quaternion 183 | self.conv3_t = torch.nn.Linear(128, num_obj*3) #translation 184 | 185 | self.num_obj = num_obj 186 | 187 | def forward(self, x, emb, obj): 188 | bs = x.size()[0] 189 | 190 | x = x.transpose(2, 1).contiguous() 191 | ap_x = self.feat(x, emb) 192 | 193 | rx = F.relu(self.conv1_r(ap_x)) 194 | tx = F.relu(self.conv1_t(ap_x)) 195 | 196 | rx = F.relu(self.conv2_r(rx)) 197 | tx = F.relu(self.conv2_t(tx)) 198 | 199 | rx = self.conv3_r(rx).view(bs, self.num_obj, 4) 200 | tx = self.conv3_t(tx).view(bs, self.num_obj, 3) 201 | 202 | b = 0 203 | out_rx = torch.index_select(rx[b], 0, obj[b]) 204 | out_tx = torch.index_select(tx[b], 0, obj[b]) 205 | 206 | return out_rx, out_tx 207 | -------------------------------------------------------------------------------- /scripts/lib/pspnet.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/pspnet.pyc -------------------------------------------------------------------------------- /scripts/lib/transformations.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/transformations.pyc -------------------------------------------------------------------------------- /scripts/loss.py: -------------------------------------------------------------------------------- 1 | import torch.nn as nn 2 | import torch 3 | import torch.nn.functional as F 4 | 5 | def flatten(tensor): 6 | """Flattens a given tensor such that the channel axis is first. 7 | The shapes are transformed as follows: 8 | (N, C, D, H, W) -> (C, N * D * H * W) 9 | """ 10 | C = tensor.size(1) 11 | # new axis order 12 | axis_order = (1, 0) + tuple(range(2, tensor.dim())) 13 | # Transpose: (N, C, D, H, W) -> (C, N, D, H, W) 14 | transposed = tensor.permute(axis_order) 15 | # Flatten: (C, N, D, H, W) -> (C, N * D * H * W) 16 | return transposed.contiguous().view(C, -1) 17 | 18 | 19 | class DiceLoss(nn.Module): 20 | def __init__(self): 21 | super().__init__() 22 | self.epsilon = 1e-5 23 | 24 | def forward(self, output, target): 25 | assert output.size() == target.size(), "'input' and 'target' must have the same shape" 26 | output = F.softmax(output, dim=1) 27 | output = flatten(output) 28 | target = flatten(target) 29 | # intersect = (output * target).sum(-1).sum() + self.epsilon 30 | # denominator = ((output + target).sum(-1)).sum() + self.epsilon 31 | 32 | intersect = (output * target).sum(-1) 33 | denominator = (output + target).sum(-1) 34 | dice = intersect / denominator 35 | dice = torch.mean(dice) 36 | return 1 - dice 37 | # return 1 - 2. * intersect / denominator 38 | -------------------------------------------------------------------------------- /scripts/matrix.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/matrix.npy -------------------------------------------------------------------------------- /scripts/model/build_BiSeNet.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from model.build_contextpath import build_contextpath 4 | import warnings 5 | warnings.filterwarnings(action='ignore') 6 | 7 | class ConvBlock(torch.nn.Module): 8 | def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1): 9 | super(ConvBlock,self).__init__() 10 | self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False) 11 | self.bn = nn.BatchNorm2d(out_channels) 12 | self.relu = nn.ReLU() 13 | 14 | def forward(self, input): 15 | x = self.conv1(input) 16 | return self.relu(self.bn(x)) 17 | 18 | class Spatial_path(torch.nn.Module): 19 | def __init__(self): 20 | super(Spatial_path,self).__init__() 21 | self.convblock1 = ConvBlock(in_channels=3, out_channels=64) 22 | self.convblock2 = ConvBlock(in_channels=64, out_channels=128) 23 | self.convblock3 = ConvBlock(in_channels=128, out_channels=256) 24 | 25 | def forward(self, input): 26 | x = self.convblock1(input) 27 | x = self.convblock2(x) 28 | x = self.convblock3(x) 29 | return x 30 | 31 | class AttentionRefinementModule(torch.nn.Module): 32 | def __init__(self, in_channels, out_channels): 33 | super(AttentionRefinementModule,self).__init__() 34 | self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1) 35 | self.bn = nn.BatchNorm2d(out_channels) 36 | self.sigmoid = nn.Sigmoid() 37 | self.in_channels = in_channels 38 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) 39 | 40 | def forward(self, input): 41 | # global average pooling 42 | x = self.avgpool(input) 43 | assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1)) 44 | x = self.conv(x) 45 | # x = self.sigmoid(self.bn(x)) 46 | x = self.sigmoid(x) 47 | # channels of input and x should be same 48 | x = torch.mul(input, x) 49 | return x 50 | 51 | 52 | class FeatureFusionModule(torch.nn.Module): 53 | def __init__(self, num_classes, in_channels): 54 | super(FeatureFusionModule,self).__init__() 55 | # self.in_channels = input_1.channels + input_2.channels 56 | # resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path) 57 | # resnet18 1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path) 58 | self.in_channels = in_channels 59 | 60 | self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1) 61 | self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1) 62 | self.relu = nn.ReLU() 63 | self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1) 64 | self.sigmoid = nn.Sigmoid() 65 | self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1)) 66 | 67 | 68 | def forward(self, input_1, input_2): 69 | x = torch.cat((input_1, input_2), dim=1) 70 | assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1)) 71 | feature = self.convblock(x) 72 | x = self.avgpool(feature) 73 | 74 | x = self.relu(self.conv1(x)) 75 | x = self.sigmoid(self.conv2(x)) 76 | x = torch.mul(feature, x) 77 | x = torch.add(x, feature) 78 | return x 79 | 80 | class BiSeNet(torch.nn.Module): 81 | def __init__(self, num_classes, context_path): 82 | super(BiSeNet,self).__init__() 83 | # build spatial path 84 | self.saptial_path = Spatial_path() 85 | 86 | # build context path 87 | self.context_path = build_contextpath(name=context_path) 88 | 89 | # build attention refinement module for resnet 101 90 | if context_path == 'resnet101': 91 | self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024) 92 | self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048) 93 | # supervision block 94 | self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1) 95 | self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1) 96 | # build feature fusion module 97 | self.feature_fusion_module = FeatureFusionModule(num_classes, 3328) 98 | 99 | elif context_path == 'resnet18': 100 | # build attention refinement module for resnet 18 101 | self.attention_refinement_module1 = AttentionRefinementModule(256, 256) 102 | self.attention_refinement_module2 = AttentionRefinementModule(512, 512) 103 | # supervision block 104 | self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1) 105 | self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1) 106 | # build feature fusion module 107 | self.feature_fusion_module = FeatureFusionModule(num_classes, 1024) 108 | else: 109 | print('Error: unspport context_path network \n') 110 | 111 | # build final convolution 112 | self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1) 113 | 114 | self.init_weight() 115 | 116 | self.mul_lr = [] 117 | self.mul_lr.append(self.saptial_path) 118 | self.mul_lr.append(self.attention_refinement_module1) 119 | self.mul_lr.append(self.attention_refinement_module2) 120 | self.mul_lr.append(self.supervision1) 121 | self.mul_lr.append(self.supervision2) 122 | self.mul_lr.append(self.feature_fusion_module) 123 | self.mul_lr.append(self.conv) 124 | 125 | def init_weight(self): 126 | for name, m in self.named_modules(): 127 | if 'context_path' not in name: 128 | if isinstance(m, nn.Conv2d): 129 | nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu') 130 | elif isinstance(m, nn.BatchNorm2d): 131 | m.eps = 1e-5 132 | m.momentum = 0.1 133 | nn.init.constant_(m.weight, 1) 134 | nn.init.constant_(m.bias, 0) 135 | 136 | def forward(self, input): 137 | # output of spatial path 138 | sx = self.saptial_path(input) 139 | 140 | # output of context path 141 | cx1, cx2, tail = self.context_path(input) 142 | cx1 = self.attention_refinement_module1(cx1) 143 | cx2 = self.attention_refinement_module2(cx2) 144 | cx2 = torch.mul(cx2, tail) 145 | # upsampling 146 | cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear') 147 | cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear') 148 | cx = torch.cat((cx1, cx2), dim=1) 149 | 150 | if self.training == True: 151 | cx1_sup = self.supervision1(cx1) 152 | cx2_sup = self.supervision2(cx2) 153 | cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear') 154 | cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear') 155 | 156 | # output of feature fusion module 157 | result = self.feature_fusion_module(sx, cx) 158 | 159 | # upsampling 160 | result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear') 161 | result = self.conv(result) 162 | 163 | if self.training == True: 164 | return result, cx1_sup, cx2_sup 165 | 166 | return result 167 | 168 | 169 | if __name__ == '__main__': 170 | import os 171 | os.environ['CUDA_VISIBLE_DEVICES'] = '0' 172 | model = BiSeNet(21, 'resnet18') 173 | # model = nn.DataParallel(model) 174 | 175 | model = model.cuda() 176 | x = torch.rand(2, 3, 256, 256) 177 | record = model.parameters() 178 | # for key, params in model.named_parameters(): 179 | # if 'bn' in key: 180 | # params.requires_grad = False 181 | from utils import group_weight 182 | # params_list = [] 183 | # for module in model.mul_lr: 184 | # params_list = group_weight(params_list, module, nn.BatchNorm2d, 10) 185 | # params_list = group_weight(params_list, model.context_path, torch.nn.BatchNorm2d, 1) 186 | 187 | print(model.parameters()) 188 | -------------------------------------------------------------------------------- /scripts/model/build_contextpath.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torchvision import models 3 | 4 | 5 | class resnet18(torch.nn.Module): 6 | def __init__(self, pretrained=True): 7 | super(resnet18,self).__init__() 8 | self.features = models.resnet18(pretrained=True) 9 | self.conv1 = self.features.conv1 10 | self.bn1 = self.features.bn1 11 | self.relu = self.features.relu 12 | self.maxpool1 = self.features.maxpool 13 | self.layer1 = self.features.layer1 14 | self.layer2 = self.features.layer2 15 | self.layer3 = self.features.layer3 16 | self.layer4 = self.features.layer4 17 | 18 | def forward(self, input): 19 | x = self.conv1(input) 20 | x = self.relu(self.bn1(x)) 21 | x = self.maxpool1(x) 22 | feature1 = self.layer1(x) # 1 / 4 23 | feature2 = self.layer2(feature1) # 1 / 8 24 | feature3 = self.layer3(feature2) # 1 / 16 25 | feature4 = self.layer4(feature3) # 1 / 32 26 | # global average pooling to build tail 27 | tail = torch.mean(feature4, 3, keepdim=True) 28 | tail = torch.mean(tail, 2, keepdim=True) 29 | return feature3, feature4, tail 30 | 31 | 32 | class resnet101(torch.nn.Module): 33 | def __init__(self, pretrained=True): 34 | super(resnet101,self).__init__() 35 | self.features = models.resnet101(pretrained=True) 36 | self.conv1 = self.features.conv1 37 | self.bn1 = self.features.bn1 38 | self.relu = self.features.relu 39 | self.maxpool1 = self.features.maxpool 40 | self.layer1 = self.features.layer1 41 | self.layer2 = self.features.layer2 42 | self.layer3 = self.features.layer3 43 | self.layer4 = self.features.layer4 44 | 45 | def forward(self, input): 46 | x = self.conv1(input) 47 | x = self.relu(self.bn1(x)) 48 | x = self.maxpool1(x) 49 | feature1 = self.layer1(x) # 1 / 4 50 | feature2 = self.layer2(feature1) # 1 / 8 51 | feature3 = self.layer3(feature2) # 1 / 16 52 | feature4 = self.layer4(feature3) # 1 / 32 53 | # global average pooling to build tail 54 | tail = torch.mean(feature4, 3, keepdim=True) 55 | tail = torch.mean(tail, 2, keepdim=True) 56 | return feature3, feature4, tail 57 | 58 | 59 | def build_contextpath(name): 60 | model = { 61 | 'resnet18': resnet18(pretrained=True), 62 | 'resnet101': resnet101(pretrained=True) 63 | } 64 | return model[name] 65 | 66 | 67 | if __name__ == '__main__': 68 | # 69 | model_18 = build_contextpath('resnet18') 70 | model_101 = build_contextpath('resnet101') 71 | x = torch.rand(1, 3, 256, 256) 72 | 73 | y_18 = model_18(x) 74 | y_101 = model_101(x) 75 | -------------------------------------------------------------------------------- /scripts/predict.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/predict.npy -------------------------------------------------------------------------------- /scripts/tools/__pycache__/_init_paths.cpython-35.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/tools/__pycache__/_init_paths.cpython-35.pyc -------------------------------------------------------------------------------- /scripts/tools/_init_paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | sys.path.insert(0, os.getcwd()) -------------------------------------------------------------------------------- /scripts/tools/_init_paths.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/tools/_init_paths.pyc -------------------------------------------------------------------------------- /scripts/tools/eval_linemod.py: -------------------------------------------------------------------------------- 1 | import _init_paths 2 | import argparse 3 | import os 4 | import random 5 | import numpy as np 6 | import yaml 7 | import copy 8 | import torch 9 | import torch.nn as nn 10 | import torch.nn.parallel 11 | import torch.backends.cudnn as cudnn 12 | import torch.optim as optim 13 | import torch.utils.data 14 | import torchvision.datasets as dset 15 | import torchvision.transforms as transforms 16 | import torchvision.utils as vutils 17 | from torch.autograd import Variable 18 | from datasets.linemod.dataset import PoseDataset as PoseDataset_linemod 19 | from lib.network import PoseNet, PoseRefineNet 20 | from lib.loss import Loss 21 | from lib.loss_refiner import Loss_refine 22 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix 23 | from lib.knn.__init__ import KNearestNeighbor 24 | 25 | parser = argparse.ArgumentParser() 26 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir') 27 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model') 28 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model') 29 | opt = parser.parse_args() 30 | 31 | num_objects = 13 32 | objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15] 33 | num_points = 500 34 | iteration = 2 35 | bs = 1 36 | dataset_config_dir = 'datasets/linemod/dataset_config' 37 | output_result_dir = 'experiments/eval_result/linemod' 38 | knn = KNearestNeighbor(1) 39 | 40 | estimator = PoseNet(num_points = num_points, num_obj = num_objects) 41 | estimator.cuda() 42 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_objects) 43 | refiner.cuda() 44 | estimator.load_state_dict(torch.load(opt.model)) 45 | refiner.load_state_dict(torch.load(opt.refine_model)) 46 | estimator.eval() 47 | refiner.eval() 48 | 49 | testdataset = PoseDataset_linemod('eval', num_points, False, opt.dataset_root, 0.0, True) 50 | testdataloader = torch.utils.data.DataLoader(testdataset, batch_size=1, shuffle=False, num_workers=10) 51 | 52 | sym_list = testdataset.get_sym_list() 53 | num_points_mesh = testdataset.get_num_points_mesh() 54 | criterion = Loss(num_points_mesh, sym_list) 55 | criterion_refine = Loss_refine(num_points_mesh, sym_list) 56 | 57 | diameter = [] 58 | meta_file = open('{0}/models_info.yml'.format(dataset_config_dir), 'r') 59 | meta = yaml.load(meta_file) 60 | for obj in objlist: 61 | diameter.append(meta[obj]['diameter'] / 1000.0 * 0.1) 62 | print(diameter) 63 | 64 | success_count = [0 for i in range(num_objects)] 65 | num_count = [0 for i in range(num_objects)] 66 | fw = open('{0}/eval_result_logs.txt'.format(output_result_dir), 'w') 67 | 68 | for i, data in enumerate(testdataloader, 0): 69 | points, choose, img, target, model_points, idx = data 70 | if len(points.size()) == 2: 71 | print('No.{0} NOT Pass! Lost detection!'.format(i)) 72 | fw.write('No.{0} NOT Pass! Lost detection!\n'.format(i)) 73 | continue 74 | points, choose, img, target, model_points, idx = Variable(points).cuda(), \ 75 | Variable(choose).cuda(), \ 76 | Variable(img).cuda(), \ 77 | Variable(target).cuda(), \ 78 | Variable(model_points).cuda(), \ 79 | Variable(idx).cuda() 80 | 81 | pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) 82 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) 83 | pred_c = pred_c.view(bs, num_points) 84 | how_max, which_max = torch.max(pred_c, 1) 85 | pred_t = pred_t.view(bs * num_points, 1, 3) 86 | 87 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() 88 | my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy() 89 | my_pred = np.append(my_r, my_t) 90 | 91 | for ite in range(0, iteration): 92 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) 93 | my_mat = quaternion_matrix(my_r) 94 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) 95 | my_mat[0:3, 3] = my_t 96 | 97 | new_points = torch.bmm((points - T), R).contiguous() 98 | pred_r, pred_t = refiner(new_points, emb, idx) 99 | pred_r = pred_r.view(1, 1, -1) 100 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) 101 | my_r_2 = pred_r.view(-1).cpu().data.numpy() 102 | my_t_2 = pred_t.view(-1).cpu().data.numpy() 103 | my_mat_2 = quaternion_matrix(my_r_2) 104 | my_mat_2[0:3, 3] = my_t_2 105 | 106 | my_mat_final = np.dot(my_mat, my_mat_2) 107 | my_r_final = copy.deepcopy(my_mat_final) 108 | my_r_final[0:3, 3] = 0 109 | my_r_final = quaternion_from_matrix(my_r_final, True) 110 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) 111 | 112 | my_pred = np.append(my_r_final, my_t_final) 113 | my_r = my_r_final 114 | my_t = my_t_final 115 | 116 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) 117 | 118 | model_points = model_points[0].cpu().detach().numpy() 119 | my_r = quaternion_matrix(my_r)[:3, :3] 120 | pred = np.dot(model_points, my_r.T) + my_t 121 | target = target[0].cpu().detach().numpy() 122 | 123 | if idx[0].item() in sym_list: 124 | pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(1, 0).contiguous() 125 | target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(1, 0).contiguous() 126 | inds = knn(target.unsqueeze(0), pred.unsqueeze(0)) 127 | target = torch.index_select(target, 1, inds.view(-1) - 1) 128 | dis = torch.mean(torch.norm((pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item() 129 | else: 130 | dis = np.mean(np.linalg.norm(pred - target, axis=1)) 131 | 132 | if dis < diameter[idx[0].item()]: 133 | success_count[idx[0].item()] += 1 134 | print('No.{0} Pass! Distance: {1}'.format(i, dis)) 135 | fw.write('No.{0} Pass! Distance: {1}\n'.format(i, dis)) 136 | else: 137 | print('No.{0} NOT Pass! Distance: {1}'.format(i, dis)) 138 | fw.write('No.{0} NOT Pass! Distance: {1}\n'.format(i, dis)) 139 | num_count[idx[0].item()] += 1 140 | 141 | for i in range(num_objects): 142 | print('Object {0} success rate: {1}'.format(objlist[i], float(success_count[i]) / num_count[i])) 143 | fw.write('Object {0} success rate: {1}\n'.format(objlist[i], float(success_count[i]) / num_count[i])) 144 | print('ALL success rate: {0}'.format(float(sum(success_count)) / sum(num_count))) 145 | fw.write('ALL success rate: {0}\n'.format(float(sum(success_count)) / sum(num_count))) 146 | fw.close() 147 | -------------------------------------------------------------------------------- /scripts/tools/eval_ycb.py: -------------------------------------------------------------------------------- 1 | import _init_paths 2 | import argparse 3 | import os 4 | import copy 5 | import random 6 | import numpy as np 7 | from PIL import Image 8 | import scipy.io as scio 9 | import scipy.misc 10 | import numpy.ma as ma 11 | import math 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.parallel 15 | import torch.backends.cudnn as cudnn 16 | import torch.optim as optim 17 | import torch.utils.data 18 | import torchvision.datasets as dset 19 | import torchvision.transforms as transforms 20 | import torchvision.utils as vutils 21 | import torch.nn.functional as F 22 | from torch.autograd import Variable 23 | from datasets.ycb.dataset import PoseDataset 24 | from lib.network import PoseNet, PoseRefineNet 25 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix 26 | 27 | parser = argparse.ArgumentParser() 28 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir') 29 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model') 30 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model') 31 | opt = parser.parse_args() 32 | 33 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 34 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 35 | xmap = np.array([[j for i in range(640)] for j in range(480)]) 36 | ymap = np.array([[i for i in range(640)] for j in range(480)]) 37 | cam_cx = 312.9869 38 | cam_cy = 241.3109 39 | cam_fx = 1066.778 40 | cam_fy = 1067.487 41 | cam_scale = 10000.0 42 | num_obj = 21 43 | img_width = 480 44 | img_length = 640 45 | num_points = 1000 46 | num_points_mesh = 500 47 | iteration = 2 48 | bs = 1 49 | dataset_config_dir = 'datasets/ycb/dataset_config' 50 | ycb_toolbox_dir = 'YCB_Video_toolbox' 51 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result' 52 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result' 53 | 54 | def get_bbox(posecnn_rois): 55 | rmin = int(posecnn_rois[idx][3]) + 1 56 | rmax = int(posecnn_rois[idx][5]) - 1 57 | cmin = int(posecnn_rois[idx][2]) + 1 58 | cmax = int(posecnn_rois[idx][4]) - 1 59 | r_b = rmax - rmin 60 | for tt in range(len(border_list)): 61 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 62 | r_b = border_list[tt + 1] 63 | break 64 | c_b = cmax - cmin 65 | for tt in range(len(border_list)): 66 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 67 | c_b = border_list[tt + 1] 68 | break 69 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 70 | rmin = center[0] - int(r_b / 2) 71 | rmax = center[0] + int(r_b / 2) 72 | cmin = center[1] - int(c_b / 2) 73 | cmax = center[1] + int(c_b / 2) 74 | if rmin < 0: 75 | delt = -rmin 76 | rmin = 0 77 | rmax += delt 78 | if cmin < 0: 79 | delt = -cmin 80 | cmin = 0 81 | cmax += delt 82 | if rmax > img_width: 83 | delt = rmax - img_width 84 | rmax = img_width 85 | rmin -= delt 86 | if cmax > img_length: 87 | delt = cmax - img_length 88 | cmax = img_length 89 | cmin -= delt 90 | return rmin, rmax, cmin, cmax 91 | 92 | estimator = PoseNet(num_points = num_points, num_obj = num_obj) 93 | estimator.cuda() 94 | estimator.load_state_dict(torch.load(opt.model)) 95 | estimator.eval() 96 | 97 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj) 98 | refiner.cuda() 99 | refiner.load_state_dict(torch.load(opt.refine_model)) 100 | refiner.eval() 101 | 102 | testlist = [] 103 | input_file = open('{0}/test_data_list.txt'.format(dataset_config_dir)) 104 | while 1: 105 | input_line = input_file.readline() 106 | if not input_line: 107 | break 108 | if input_line[-1:] == '\n': 109 | input_line = input_line[:-1] 110 | testlist.append(input_line) 111 | input_file.close() 112 | print(len(testlist)) 113 | 114 | class_file = open('{0}/classes.txt'.format(dataset_config_dir)) 115 | class_id = 1 116 | cld = {} 117 | while 1: 118 | class_input = class_file.readline() 119 | if not class_input: 120 | break 121 | class_input = class_input[:-1] 122 | 123 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input)) 124 | cld[class_id] = [] 125 | while 1: 126 | input_line = input_file.readline() 127 | if not input_line: 128 | break 129 | input_line = input_line[:-1] 130 | input_line = input_line.split(' ') 131 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])]) 132 | input_file.close() 133 | cld[class_id] = np.array(cld[class_id]) 134 | class_id += 1 135 | 136 | for now in range(0, 2949): 137 | img = Image.open('{0}/{1}-color.png'.format(opt.dataset_root, testlist[now])) 138 | depth = np.array(Image.open('{0}/{1}-depth.png'.format(opt.dataset_root, testlist[now]))) 139 | posecnn_meta = scio.loadmat('{0}/results_PoseCNN_RSS2018/{1}.mat'.format(ycb_toolbox_dir, '%06d' % now)) 140 | label = np.array(posecnn_meta['labels']) 141 | posecnn_rois = np.array(posecnn_meta['rois']) 142 | 143 | lst = posecnn_rois[:, 1:2].flatten() 144 | my_result_wo_refine = [] 145 | my_result = [] 146 | 147 | for idx in range(len(lst)): 148 | itemid = lst[idx] 149 | try: 150 | rmin, rmax, cmin, cmax = get_bbox(posecnn_rois) 151 | 152 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 153 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 154 | mask = mask_label * mask_depth 155 | 156 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 157 | if len(choose) > num_points: 158 | c_mask = np.zeros(len(choose), dtype=int) 159 | c_mask[:num_points] = 1 160 | np.random.shuffle(c_mask) 161 | choose = choose[c_mask.nonzero()] 162 | else: 163 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') 164 | 165 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 166 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 167 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 168 | choose = np.array([choose]) 169 | 170 | pt2 = depth_masked / cam_scale 171 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 172 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 173 | cloud = np.concatenate((pt0, pt1, pt2), axis=1) 174 | 175 | img_masked = np.array(img)[:, :, :3] 176 | img_masked = np.transpose(img_masked, (2, 0, 1)) 177 | img_masked = img_masked[:, rmin:rmax, cmin:cmax] 178 | 179 | cloud = torch.from_numpy(cloud.astype(np.float32)) 180 | choose = torch.LongTensor(choose.astype(np.int32)) 181 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32))) 182 | index = torch.LongTensor([itemid - 1]) 183 | 184 | cloud = Variable(cloud).cuda() 185 | choose = Variable(choose).cuda() 186 | img_masked = Variable(img_masked).cuda() 187 | index = Variable(index).cuda() 188 | 189 | cloud = cloud.view(1, num_points, 3) 190 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) 191 | 192 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index) 193 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) 194 | 195 | pred_c = pred_c.view(bs, num_points) 196 | how_max, which_max = torch.max(pred_c, 1) 197 | pred_t = pred_t.view(bs * num_points, 1, 3) 198 | points = cloud.view(bs * num_points, 1, 3) 199 | 200 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() 201 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() 202 | my_pred = np.append(my_r, my_t) 203 | my_result_wo_refine.append(my_pred.tolist()) 204 | 205 | for ite in range(0, iteration): 206 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) 207 | my_mat = quaternion_matrix(my_r) 208 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) 209 | my_mat[0:3, 3] = my_t 210 | 211 | new_cloud = torch.bmm((cloud - T), R).contiguous() 212 | pred_r, pred_t = refiner(new_cloud, emb, index) 213 | pred_r = pred_r.view(1, 1, -1) 214 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) 215 | my_r_2 = pred_r.view(-1).cpu().data.numpy() 216 | my_t_2 = pred_t.view(-1).cpu().data.numpy() 217 | my_mat_2 = quaternion_matrix(my_r_2) 218 | 219 | my_mat_2[0:3, 3] = my_t_2 220 | 221 | my_mat_final = np.dot(my_mat, my_mat_2) 222 | my_r_final = copy.deepcopy(my_mat_final) 223 | my_r_final[0:3, 3] = 0 224 | my_r_final = quaternion_from_matrix(my_r_final, True) 225 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) 226 | 227 | my_pred = np.append(my_r_final, my_t_final) 228 | my_r = my_r_final 229 | my_t = my_t_final 230 | 231 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) 232 | 233 | my_result.append(my_pred.tolist()) 234 | except ZeroDivisionError: 235 | print("PoseCNN Detector Lost {0} at No.{1} keyframe".format(itemid, now)) 236 | my_result_wo_refine.append([0.0 for i in range(7)]) 237 | my_result.append([0.0 for i in range(7)]) 238 | 239 | scio.savemat('{0}/{1}.mat'.format(result_wo_refine_dir, '%04d' % now), {'poses':my_result_wo_refine}) 240 | scio.savemat('{0}/{1}.mat'.format(result_refine_dir, '%04d' % now), {'poses':my_result}) 241 | print("Finish No.{0} keyframe".format(now)) 242 | -------------------------------------------------------------------------------- /scripts/tools/ros_eval_ycb.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | ############# ros packages ##################### 4 | import cv2 5 | import rospy 6 | from sensor_msgs.msg import Image, CameraInfo 7 | from cv_bridge import CvBridge, CvBridgeError 8 | from be.srv import AddTwoInts, AddTwoIntsResponse 9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox 10 | from geometry_msgs.msg import Pose, PoseArray 11 | import tf 12 | import message_filters 13 | 14 | ############ python pakcages ################### 15 | import _init_paths 16 | import argparse 17 | import os 18 | import copy 19 | import random 20 | import numpy as np 21 | import scipy.io as scio 22 | import scipy.misc 23 | import numpy.ma as ma 24 | import math 25 | import torch 26 | import torch.nn as nn 27 | import torch.nn.parallel 28 | import torch.backends.cudnn as cudnn 29 | import torch.optim as optim 30 | import torch.utils.data 31 | import torchvision.datasets as dset 32 | import torchvision.transforms as transforms 33 | import torchvision.utils as vutils 34 | import torch.nn.functional as F 35 | from torch.autograd import Variable 36 | from datasets.ycb.dataset import PoseDataset 37 | from lib.network import PoseNet, PoseRefineNet 38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix 39 | from model.build_BiSeNet import BiSeNet 40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou 41 | from matplotlib import pyplot as plt 42 | import time 43 | 44 | 45 | 46 | ########################################################################################## 47 | 48 | parser = argparse.ArgumentParser() 49 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir') 50 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model') 51 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model') 52 | parser.add_argument('--checkpoint_path', type=str, default='', required=True, help='The path to the pretrained weights of model') 53 | parser.add_argument('--num_classes', type=int, default=22, help='num of object classes (with void)') 54 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.') 55 | 56 | 57 | opt = parser.parse_args() 58 | import numpy.ma as ma 59 | 60 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 61 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 62 | xmap = np.array([[j for i in range(640)] for j in range(480)]) 63 | ymap = np.array([[i for i in range(640)] for j in range(480)]) 64 | cam_cx = 312.9869 65 | cam_cy = 241.3109 66 | cam_fx = 1066.778 67 | cam_fy = 1067.487 68 | #cam_scale = 10000.0 69 | cam_scale = 1000.0 70 | num_obj = 21 71 | img_width = 480 72 | img_length = 640 73 | num_points = 1000 74 | num_points_mesh = 500 75 | iteration = 2 76 | bs = 1 77 | dataset_config_dir = 'datasets/ycb/dataset_config' 78 | ycb_toolbox_dir = 'YCB_Video_toolbox' 79 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result' 80 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result' 81 | cam_mat = np.load('matrix.npy') 82 | dist = np.load('distortion.npy') 83 | #dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0]) 84 | 85 | ######################################################################################### 86 | 87 | def isRotationMatrix(R) : 88 | Rt = np.transpose(R) 89 | shouldBeIdentity = np.dot(Rt, R) 90 | I = np.identity(3, dtype = R.dtype) 91 | n = np.linalg.norm(I - shouldBeIdentity) 92 | return n < 1e-6 93 | 94 | 95 | def rotationMatrixToEulerAngles(R) : 96 | 97 | assert(isRotationMatrix(R)) 98 | 99 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) 100 | 101 | singular = sy < 1e-6 102 | 103 | if not singular : 104 | x = math.atan2(R[2,1] , R[2,2]) 105 | y = math.atan2(-R[2,0], sy) 106 | z = math.atan2(R[1,0], R[0,0]) 107 | else : 108 | x = math.atan2(-R[1,2], R[1,1]) 109 | y = math.atan2(-R[2,0], sy) 110 | z = 0 111 | 112 | return np.array([x, y, z]) 113 | 114 | ################################################################################################ 115 | 116 | # get bbox coordinate 117 | def get_bbox(label): 118 | rows = np.any(label, axis=1) 119 | cols = np.any(label, axis=0) 120 | rmin, rmax = np.where(rows)[0][[0, -1]] 121 | cmin, cmax = np.where(cols)[0][[0, -1]] 122 | rmax += 1 123 | cmax += 1 124 | r_b = rmax - rmin 125 | for tt in range(len(border_list)): 126 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 127 | r_b = border_list[tt + 1] 128 | break 129 | c_b = cmax - cmin 130 | for tt in range(len(border_list)): 131 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 132 | c_b = border_list[tt + 1] 133 | break 134 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 135 | rmin = center[0] - int(r_b / 2) 136 | rmax = center[0] + int(r_b / 2) 137 | cmin = center[1] - int(c_b / 2) 138 | cmax = center[1] + int(c_b / 2) 139 | if rmin < 0: 140 | delt = -rmin 141 | rmin = 0 142 | rmax += delt 143 | if cmin < 0: 144 | delt = -cmin 145 | cmin = 0 146 | cmax += delt 147 | if rmax > img_width: 148 | delt = rmax - img_width 149 | rmax = img_width 150 | rmin -= delt 151 | if cmax > img_length: 152 | delt = cmax - img_length 153 | cmax = img_length 154 | cmin -= delt 155 | return rmin, rmax, cmin, cmax 156 | 157 | ############################ with detection algorithm ############################# 158 | # def get_bbox(rois,idx): 159 | # # rmin = int(posecnn_rois[idx][2]) + 1 160 | # # rmax = int(posecnn_rois[idx][4]) - 1 161 | # # cmin = int(posecnn_rois[idx][1]) + 1 162 | # # cmax = int(posecnn_rois[idx][3]) - 1 163 | # rmin = int(rois[idx].xmin) + 1 164 | # rmax = int(rois[idx].xmax) - 1 165 | # cmin = int(rois[idx].ymin) + 1 166 | # cmax = int(rois[idx].ymax) - 1 167 | # r_b = rmax - rmin 168 | # for tt in range(len(border_list)): 169 | # if r_b > border_list[tt] and r_b < border_list[tt + 1]: 170 | # r_b = border_list[tt + 1] 171 | # break 172 | # c_b = cmax - cmin 173 | # for tt in range(len(border_list)): 174 | # if c_b > border_list[tt] and c_b < border_list[tt + 1]: 175 | # c_b = border_list[tt + 1] 176 | # break 177 | # center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 178 | # rmin = center[0] - int(r_b / 2) 179 | # rmax = center[0] + int(r_b / 2) 180 | # cmin = center[1] - int(c_b / 2) 181 | # cmax = center[1] + int(c_b / 2) 182 | # if rmin < 0: 183 | # delt = -rmin 184 | # rmin = 0 185 | # rmax += delt 186 | # if cmin < 0: 187 | # delt = -cmin 188 | # cmin = 0 189 | # cmax += delt 190 | # if rmax > img_width: 191 | # delt = rmax - img_width 192 | # rmax = img_width 193 | # rmin -= delt 194 | # if cmax > img_length: 195 | # delt = cmax - img_length 196 | # cmax = img_length 197 | # cmin -= delt 198 | # return rmin, rmax, cmin, cmax 199 | 200 | 201 | #################################################################################################### 202 | ################################### load BiSeNet parameters ######################################## 203 | #################################################################################################### 204 | print('load BiseNet') 205 | start_time = time.time() 206 | bise_model = BiSeNet(opt.num_classes, opt.context_path) 207 | bise_model = bise_model.cuda() 208 | bise_model.load_state_dict(torch.load(opt.checkpoint_path)) 209 | global bise_model 210 | print('Done!') 211 | print("Load time : {}".format(time.time() - start_time)) 212 | 213 | ##################################################################################################### 214 | ######################## load Densefusion Netwopy4thork, 3d model ############################# 215 | ##################################################################################################### 216 | print('load densefusion network') 217 | start_time = time.time() 218 | estimator = PoseNet(num_points = num_points, num_obj = num_obj) 219 | estimator.cuda() 220 | estimator.load_state_dict(torch.load(opt.model)) 221 | estimator.eval() 222 | ############################################################################ 223 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj) 224 | refiner.cuda() 225 | refiner.load_state_dict(torch.load(opt.refine_model)) 226 | refiner.eval() 227 | print('Done') 228 | print("Load time : {}".format(time.time() - start_time)) 229 | ##################################################################################################### 230 | # class list upload 231 | class_file = open('{0}/classes.txt'.format(dataset_config_dir)) 232 | class_id = 1 233 | cld = {} 234 | while 1: 235 | class_input = class_file.readline() 236 | if not class_input: 237 | break 238 | class_input = class_input[:-1] 239 | 240 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input)) 241 | cld[class_id] = [] 242 | while 1: 243 | input_line = input_file.readline() 244 | if not input_line: 245 | break 246 | input_line = input_line[:-1] 247 | input_line = input_line.split(' ') 248 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])]) 249 | input_file.close() 250 | cld[class_id] = np.array(cld[class_id]) 251 | class_id += 1 252 | ######################################################################################################## 253 | def seg_predict(image): 254 | global bise_model 255 | try: 256 | with torch.no_grad(): 257 | bise_model.eval() 258 | h,w,_ = image.shape 259 | to_tensor = transforms.Compose([ 260 | transforms.ToTensor(), 261 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 262 | ]) 263 | 264 | image = to_tensor(image) 265 | image = image.unsqueeze_(0) 266 | image = image.cuda() 267 | predict = bise_model(image).squeeze() 268 | predict = reverse_one_hot(predict) 269 | predict = np.array(predict) 270 | print(np.unique(predict)) 271 | predict = np.resize(predict,[h,w]) 272 | pub_label = np.uint8(predict) 273 | cv2.imwrite('./segmentation_image.png', pub_label) 274 | 275 | return predict, pub_label 276 | except CvBridgeError as e: 277 | print(e) 278 | 279 | 280 | 281 | 282 | 283 | def pose_predict(img, depth,rois): 284 | label_pub = rospy.Publisher('/label',Image, queue_size = 10) 285 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 286 | class_list = ['002_master_chef_can', 287 | '003_cracker_box', 288 | '004_sugar_box', 289 | '005_tomato_soup_can', 290 | '006_mustard_bottle', 291 | '007_tuna_fish_can', 292 | '008_pudding_box', 293 | '009_gelatin_box', 294 | '010_potted_meat_can', 295 | '011_banana', 296 | '019_pitcher_base', 297 | '025_mug', 298 | '021_bleach_cleanser', 299 | '024_bowl', 300 | '035_power_drill', 301 | '036_wood_block', 302 | '037_scissors', 303 | '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick'] 304 | try: 305 | object_number = len(rois) 306 | 307 | #lst = posecnn_rois[:,0:1].flatten() 308 | #lst = np.unique(label) 309 | my_result_wo_refine = [] 310 | my_result = [] 311 | for idx in range(object_number): 312 | #itemid = lst[idx] 313 | itemid = class_list.index(rois[idx].Class) +1 314 | #itemid = class_list.index(rois[idx].Class) +3 315 | print(object_number,itemid, rois[idx]) 316 | 317 | try: 318 | label, pub_label = seg_predict(img) 319 | pub_label =pub_label * 50 320 | label_pub.publish(bridge.cv2_to_imgmsg(pub_label,'8UC1')) 321 | ####################### with Detection algorithm ################################# 322 | # rmin, rmax, cmin,cmax = get_bbox(rois,idx) 323 | ##################################################################################### 324 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 325 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 326 | mask = mask_label * mask_depth 327 | rmin, rmax, cmin, cmax = get_bbox(mask_label) 328 | 329 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 330 | if len(choose) > num_points: 331 | c_mask = np.zeros(len(choose), dtype=int) 332 | c_mask[:num_points] = 1 333 | np.random.shuffle(c_mask) 334 | choose = choose[c_mask.nonzero()] 335 | else: 336 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') 337 | 338 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 339 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 340 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 341 | choose = np.array([choose]) 342 | 343 | pt2 = depth_masked / cam_scale 344 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 345 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 346 | cloud = np.concatenate((pt0, pt1, pt2), axis=1) 347 | 348 | img_masked = np.array(img)[:, :, :3] 349 | img_masked = np.transpose(img_masked, (2, 0, 1)) 350 | img_masked = img_masked[:, rmin:rmax, cmin:cmax] 351 | 352 | cloud = torch.from_numpy(cloud.astype(np.float32)) 353 | choose = torch.LongTensor(choose.astype(np.int32)) 354 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32))) 355 | index = torch.LongTensor([itemid - 1]) 356 | 357 | cloud = Variable(cloud).cuda() 358 | choose = Variable(choose).cuda() 359 | img_masked = Variable(img_masked).cuda() 360 | index = Variable(index).cuda() 361 | cloud = cloud.view(1, num_points, 3) 362 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) 363 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index) 364 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) 365 | pred_c = pred_c.view(bs, num_points) 366 | how_max, which_max = torch.max(pred_c, 1) 367 | pred_t = pred_t.view(bs * num_points, 1, 3) 368 | points = cloud.view(bs * num_points, 1, 3) 369 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() 370 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() 371 | my_pred = np.append(my_r, my_t) 372 | # making pose matrix 373 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3]) 374 | rot_to_angle = rot_to_angle.reshape(1,3) 375 | my_t = my_t.reshape(1,3) 376 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0) 377 | 378 | # cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], 379 | # [0, 0, 1]])) 380 | #tl = np.array([100,100,100]) 381 | #cam_mat = cv2.UMat(np.matrix([[960.14238289, 0, 252.43270692], [0, 960.14238289, 317.39366696], 382 | # [0, 0, 1]])) 383 | 384 | 385 | 386 | for ite in range(0, iteration): 387 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) 388 | my_mat = quaternion_matrix(my_r) 389 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) 390 | my_mat[0:3, 3] = my_t 391 | 392 | new_cloud = torch.bmm((cloud - T), R).contiguous() 393 | pred_r, pred_t = refiner(new_cloud, emb, index) 394 | pred_r = pred_r.view(1, 1, -1) 395 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) 396 | my_r_2 = pred_r.view(-1).cpu().data.numpy() 397 | my_t_2 = pred_t.view(-1).cpu().data.numpy() 398 | my_mat_2 = quaternion_matrix(my_r_2) 399 | 400 | 401 | my_mat_2[0:3, 3] = my_t_2 402 | my_mat_final = np.dot(my_mat, my_mat_2) 403 | my_r_final = copy.deepcopy(my_mat_final) 404 | my_r_final[0:3, 3] = 0 405 | my_r_final = quaternion_from_matrix(my_r_final, True) 406 | 407 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) 408 | 409 | my_pred = np.append(my_r_final, my_t_final) 410 | my_r = my_r_final 411 | my_t = my_t_final 412 | open_cv_image = img.copy() 413 | open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) 414 | dof = quaternion_matrix(my_r) 415 | dof[0:3,3] = my_t 416 | 417 | 418 | object_poses = { 419 | 'tx':my_t[0][0], 420 | 'ty':my_t[0][1], 421 | 'tz':my_t[0][2], 422 | 'qx':my_r[0], 423 | 'qy':my_r[1], 424 | 'qz':my_r[2], 425 | 'qw':my_r[3]} 426 | my_result.append(object_poses) 427 | open_cv_image = img.copy() 428 | open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR) 429 | imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 13 = mug 430 | open_cv_image = draw(open_cv_image,imgpts, itemid) 431 | 432 | 433 | except ZeroDivisionError: 434 | open_cv_image = None 435 | print('Fail') 436 | except CvBridgeError as e: 437 | print(e) 438 | return my_result, open_cv_image 439 | 440 | def draw(img, imgpts, label): 441 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]] 442 | for point in imgpts: 443 | 444 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1) 445 | return img 446 | 447 | 448 | 449 | def image_callback(rgb): 450 | bridge = CvBridge() 451 | cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8') 452 | 453 | global cv_image 454 | global bridge 455 | 456 | 457 | def depth_callback(depth): 458 | cv_depth = bridge.imgmsg_to_cv2(depth,'16UC1') 459 | cv2.imwrite('./depth.png', cv_depth) 460 | 461 | 462 | global cv_depth 463 | 464 | def rois_callback(rois): 465 | detect_res = rois.bounding_boxes 466 | global detect_res 467 | 468 | def pose_server(): 469 | rospy.init_node('pose_estimation_server') 470 | s = rospy.Service('/cvipl/pose_server', AddTwoInts, implimentation_seg) 471 | # only Segmentation 472 | 473 | # only Pose Estimation 474 | rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, image_callback) 475 | depth_sub = rospy.Subscriber('/camera/aligned_depth_to_color/image_raw',Image, depth_callback) 476 | rois_sub = rospy.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes, rois_callback) 477 | 478 | 479 | def implimentation_seg(req): 480 | global cv_image 481 | global cv_depth 482 | global detect_res 483 | 484 | pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10) 485 | pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10) 486 | if req.a == 2 : 487 | print(cv_image.shape,cv_depth.shape,detect_res) 488 | seg_result = seg_predict(cv_image) 489 | 490 | elif req.a == 3 : 491 | print(req.a) 492 | pose_estimation,fit_image = pose_predict(cv_image, cv_depth, detect_res) 493 | pose_array = PoseArray() 494 | pose_msg = Pose() 495 | print(pose_estimation) 496 | 497 | for i in range(len(pose_estimation)): 498 | pose_msg.position.x = pose_estimation[i]['tx'] 499 | pose_msg.position.y = pose_estimation[i]['ty'] 500 | pose_msg.position.z = pose_estimation[i]['tz'] 501 | 502 | pose_msg.orientation.x = pose_estimation[i]['qx'] 503 | pose_msg.orientation.y = pose_estimation[i]['qy'] 504 | pose_msg.orientation.z = pose_estimation[i]['qz'] 505 | pose_msg.orientation.w = pose_estimation[i]['qw'] 506 | 507 | pose_array.poses.append(pose_msg) 508 | pose_pub.publish(pose_array) 509 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8')) 510 | 511 | 512 | 513 | def main(): 514 | pose_server() 515 | rospy.spin() 516 | 517 | if __name__ == '__main__': 518 | main() 519 | -------------------------------------------------------------------------------- /scripts/tools/ros_eval_ycb_message.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | ############# ros packages ##################### 4 | import cv2 5 | import rospy 6 | from sensor_msgs.msg import Image, CameraInfo 7 | from cv_bridge import CvBridge, CvBridgeError 8 | from be.srv import AddTwoInts, AddTwoIntsResponse 9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox 10 | from geometry_msgs.msg import Pose, PoseArray 11 | import tf 12 | import message_filters 13 | 14 | ############ python pakcages ################### 15 | import _init_paths 16 | import argparse 17 | import os 18 | import copy 19 | import random 20 | import numpy as np 21 | import scipy.io as scio 22 | import scipy.misc 23 | import numpy.ma as ma 24 | import math 25 | import torch 26 | import torch.nn as nn 27 | import torch.nn.parallel 28 | import torch.backends.cudnn as cudnn 29 | import torch.optim as optim 30 | import torch.utils.data 31 | import torchvision.datasets as dset 32 | import torchvision.transforms as transforms 33 | import torchvision.utils as vutils 34 | import torch.nn.functional as F 35 | from torch.autograd import Variable 36 | from datasets.ycb.dataset import PoseDataset 37 | from lib.network import PoseNet, PoseRefineNet 38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix 39 | from model.build_BiSeNet import BiSeNet 40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou 41 | from matplotlib import pyplot as plt 42 | import time 43 | 44 | 45 | 46 | ########################################################################################## 47 | 48 | parser = argparse.ArgumentParser() 49 | parser.add_argument('--dataset_root', type=str, default = 'datasets/ycb/YCB_Video_Dataset/', help='dataset root dir') 50 | parser.add_argument('--model', type=str, default = 'trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth', help='resume PoseNet model') 51 | parser.add_argument('--refine_model', type=str, default = 'trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth', help='resume PoseRefineNet model') 52 | parser.add_argument('--checkpoint_path', type=str, default='trained_checkpoints/ycb/best_dice_loss.pth', help='The path to the pretrained weights of model') 53 | parser.add_argument('--num_classes', type=int, default=21, help='num of object classes (with void)') 54 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.') 55 | parser.add_argument('--image_subscriber', type=str,defualt='/camera/color/image_raw') 56 | parser.add_argument('--depth_subscriber', type=str,defualt='/camera/depth/image_rect_raw') 57 | 58 | 59 | 60 | opt = parser.parse_args() 61 | 62 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 63 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 64 | xmap = np.array([[j for i in range(640)] for j in range(480)]) 65 | ymap = np.array([[i for i in range(640)] for j in range(480)]) 66 | cam_cx = 312.9869 67 | cam_cy = 241.3109 68 | cam_fx = 1066.778 69 | cam_fy = 1067.487 70 | cam_scale = 10000.0 71 | num_obj = 21 72 | img_width = 480 73 | img_length = 640 74 | num_points = 1000 75 | num_points_mesh = 500 76 | iteration = 2 77 | bs = 1 78 | dataset_config_dir = 'datasets/ycb/dataset_config' 79 | ycb_toolbox_dir = 'YCB_Video_toolbox' 80 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result' 81 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result' 82 | dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0]) 83 | 84 | 85 | def image_callback(rgb): 86 | bridge = CvBridge() 87 | cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8') 88 | global cv_image 89 | 90 | 91 | def depth_callback(depth): 92 | bridge = CvBridge() 93 | cv_depth = bridge.imgmsg_to_cv2(depth,'32SC1') 94 | global cv_depth 95 | 96 | def rois_callback(rois): 97 | 98 | detect_res = rois.bounding_boxes 99 | global detect_res 100 | implimentation_seg() 101 | 102 | 103 | rgb_sub = rospy.Subscriber(args.image_subsriber,Image, image_callback) 104 | depth_sub = rospy.Subscriber(args.depth_subscriber,Image, depth_callback) 105 | rois_sub = rospy.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes, rois_callback) 106 | ######################################################################################### 107 | 108 | def isRotationMatrix(R) : 109 | Rt = np.transpose(R) 110 | shouldBeIdentity = np.dot(Rt, R) 111 | I = np.identity(3, dtype = R.dtype) 112 | n = np.linalg.norm(I - shouldBeIdentity) 113 | return n < 1e-6 114 | 115 | 116 | def rotationMatrixToEulerAngles(R) : 117 | 118 | assert(isRotationMatrix(R)) 119 | 120 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) 121 | 122 | singular = sy < 1e-6 123 | 124 | if not singular : 125 | x = math.atan2(R[2,1] , R[2,2]) 126 | y = math.atan2(-R[2,0], sy) 127 | z = math.atan2(R[1,0], R[0,0]) 128 | else : 129 | x = math.atan2(-R[1,2], R[1,1]) 130 | y = math.atan2(-R[2,0], sy) 131 | z = 0 132 | 133 | return np.array([x, y, z]) 134 | 135 | ################################################################################################ 136 | """ 137 | ################################################################################################## 138 | # get bbox coordinate 139 | def get_bbox(label): 140 | rows = np.any(label, axis=1) 141 | cols = np.any(label, axis=0) 142 | rmin, rmax = 143 | np.where(rows)[0][[0, -1]] 144 | cmin, cmax = np.where(cols)[0][[0, -1]] 145 | rmax += 1 146 | cmax += 1 147 | r_b = rmax - rmin 148 | for tt in range(len(border_list)): 149 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 150 | r_b = border_list[tt + 1] 151 | break 152 | c_b = cmax - cmin 153 | for tt in range(len(border_list)): 154 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 155 | c_b = border_list[tt + 1] 156 | break 157 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 158 | rmin = center[0] - int(r_b / 2) 159 | rmax = center[0] + int(r_b / 2) 160 | cmin = center[1] - int(c_b / 2) 161 | cmax = center[1] + int(c_b / 2) 162 | if rmin < 0: 163 | delt = -rmin 164 | rmin = 0 165 | rmax += delt 166 | if cmin < 0: 167 | delt = -cmin 168 | cmin = 0 169 | cmax += delt 170 | if rmax > img_width: 171 | delt = rmax - img_width 172 | rmax = img_width 173 | rmin -= delt 174 | if cmax > img_length: 175 | delt = cmax - img_length 176 | cmax = img_length 177 | cmin -= delt 178 | return rmin, rmax, cmin, cmax 179 | """ 180 | def get_bbox(rois,idx): 181 | # rmin = int(posecnn_rois[idx][2]) + 1 182 | # rmax = int(posecnn_rois[idx][4]) - 1 183 | # cmin = int(posecnn_rois[idx][1]) + 1 184 | # cmax = int(posecnn_rois[idx][3]) - 1 185 | rmin = int(rois[idx].xmin) + 1 186 | rmax = int(rois[idx].xmax) - 1 187 | cmin = int(rois[idx].ymin) + 1 188 | cmax = int(rois[idx].ymax) - 1 189 | r_b = rmax - rmin 190 | for tt in range(len(border_list)): 191 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 192 | r_b = border_list[tt + 1] 193 | break 194 | c_b = cmax - cmin 195 | for tt in range(len(border_list)): 196 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 197 | c_b = border_list[tt + 1] 198 | break 199 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 200 | rmin = center[0] - int(r_b / 2) 201 | rmax = center[0] + int(r_b / 2) 202 | cmin = center[1] - int(c_b / 2) 203 | cmax = center[1] + int(c_b / 2) 204 | if rmin < 0: 205 | delt = -rmin 206 | rmin = 0 207 | rmax += delt 208 | if cmin < 0: 209 | delt = -cmin 210 | cmin = 0 211 | cmax += delt 212 | if rmax > img_width: 213 | delt = rmax - img_width 214 | rmax = img_width 215 | rmin -= delt 216 | if cmax > img_length: 217 | delt = cmax - img_length 218 | cmax = img_length 219 | cmin -= delt 220 | return rmin, rmax, cmin, cmax 221 | #################################################################################################### 222 | ################################### load BiSeNet parameters ######################################## 223 | #################################################################################################### 224 | print('load BiseNet') 225 | start_time = time.time() 226 | bise_model = BiSeNet(opt.num_classes, opt.context_path) 227 | bise_model = bise_model.cuda() 228 | bise_model.load_state_dict(torch.load(opt.checkpoint_path)) 229 | global bise_model 230 | print('Done!') 231 | print("Load time : {}".format(time.time() - start_time)) 232 | 233 | ##################################################################################################### 234 | ######################## load Densefusion Netwopy4thork, 3d model ############################# 235 | ##################################################################################################### 236 | print('load densefusion network') 237 | start_time = time.time() 238 | estimator = PoseNet(num_points = num_points, num_obj = num_obj) 239 | estimator.cuda() 240 | estimator.load_state_dict(torch.load(opt.model)) 241 | estimator.eval() 242 | ############################################################################ 243 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj) 244 | refiner.cuda() 245 | refiner.load_state_dict(torch.load(opt.refine_model)) 246 | refiner.eval() 247 | print('Done') 248 | print("Load time : {}".format(time.time() - start_time)) 249 | ##################################################################################################### 250 | # class list upload 251 | class_file = open('{0}/classes.txt'.format(dataset_config_dir)) 252 | class_id = 1 253 | cld = {} 254 | while 1: 255 | class_input = class_file.readline() 256 | if not class_input: 257 | break 258 | class_input = class_input[:-1] 259 | 260 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input)) 261 | cld[class_id] = [] 262 | while 1: 263 | input_line = input_file.readline() 264 | if not input_line: 265 | break 266 | input_line = input_line[:-1] 267 | input_line = input_line.split(' ') 268 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])]) 269 | input_file.close() 270 | cld[class_id] = np.array(cld[class_id]) 271 | class_id += 1 272 | ######################################################################################################## 273 | def seg_predict(image): 274 | global bise_model 275 | try: 276 | with torch.no_grad(): 277 | bise_model.eval() 278 | h,w,_ = image.shape 279 | to_tensor = transforms.Compose([ 280 | transforms.ToTensor(), 281 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 282 | ]) 283 | 284 | image = to_tensor(image) 285 | image = image.unsqueeze_(0) 286 | image = image.cuda() 287 | predict = bise_model(image).squeeze() 288 | predict = reverse_one_hot(predict) 289 | predict = np.array(predict) 290 | predict = np.resize(predict,[h,w]) 291 | print(np.unique(predict)) 292 | zzzz = cv2.cvtColor(np.uint8(predict), cv2.COLOR_GRAY2BGR) 293 | cv2.imwrite('./segmentation_image.png', zzzz) 294 | 295 | return predict 296 | except CvBridgeError as e: 297 | print(e) 298 | 299 | 300 | 301 | 302 | 303 | def pose_predict(img, depth,rois): 304 | class_list = ['002_master_chef_can', 305 | '003_cracker_box', 306 | '004_sugar_box', 307 | '005_tomato_soup_can', 308 | '006_mustard_bottle', 309 | '007_tuna_fish_can', 310 | '008_pudding_box', 311 | '009_gelatin_box', 312 | '010_potted_meat_can', 313 | '011_banana', 314 | '019_pitcher_base', 315 | '025_mug', 316 | '021_bleach_cleanser', 317 | '024_bowl', 318 | '035_power_drill', 319 | '036_wood_block', 320 | '037_scissors', 321 | '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick'] 322 | try: 323 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 324 | object_number = len(rois) 325 | bridge = CvBridge() 326 | 327 | #lst = posecnn_rois[:,0:1].flatten() 328 | #lst = np.unique(label) 329 | my_result_wo_refine = [] 330 | my_result = [] 331 | for idx in range(object_number): 332 | #itemid = lst[idx] 333 | #itemid = class_list.index(rois[idx].Class) +1 334 | itemid = class_list.index(rois[idx].Class) +3 335 | 336 | try: 337 | label = seg_predict(img) 338 | cv2.imwrite('/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png', label) 339 | rmin, rmax, cmin,cmax = get_bbox(rois,idx) 340 | # bounding box cutting 341 | #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) 342 | #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0)) 343 | #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 344 | #mask = mask_label * mask_depth 345 | # only image 346 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 347 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 348 | mask = mask_label * mask_depth 349 | #rmin, rmax, cmin, cmax = get_bbox(mask_label) 350 | 351 | 352 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 353 | print(choose) 354 | if len(choose) > num_points: 355 | c_mask = np.zeros(len(choose), dtype=int) 356 | c_mask[:num_points] = 1 357 | np.random.shuffle(c_mask) 358 | choose = choose[c_mask.nonzero()] 359 | else: 360 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') 361 | 362 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 363 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 364 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 365 | choose = np.array([choose]) 366 | 367 | pt2 = depth_masked / cam_scale 368 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 369 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 370 | cloud = np.concatenate((pt0, pt1, pt2), axis=1) 371 | img_masked = np.array(img)[:, :, :3] 372 | img_masked = np.transpose(img_masked, (2, 0, 1)) 373 | img_masked = img_masked[:, rmin:rmax, cmin:cmax] 374 | 375 | cloud = torch.from_numpy(cloud.astype(np.float32)) 376 | choose = torch.LongTensor(choose.astype(np.int32)) 377 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32))) 378 | index = torch.LongTensor([itemid - 1]) 379 | 380 | cloud = Variable(cloud).cuda() 381 | choose = Variable(choose).cuda() 382 | img_masked = Variable(img_masked).cuda() 383 | index = Variable(index).cuda() 384 | cloud = cloud.view(1, num_points, 3) 385 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) 386 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index) 387 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) 388 | pred_c = pred_c.view(bs, num_points) 389 | how_max, which_max = torch.max(pred_c, 1) 390 | pred_t = pred_t.view(bs * num_points, 1, 3) 391 | points = cloud.view(bs * num_points, 1, 3) 392 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() 393 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() 394 | my_pred = np.append(my_r, my_t) 395 | # making pose matrix 396 | dof = quaternion_matrix(my_r) 397 | dof[0:3,3] = my_t 398 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3]) 399 | rot_to_angle = rot_to_angle.reshape(1,3) 400 | my_t = my_t.reshape(1,3) 401 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0) 402 | object_poses = { 403 | 'tx':my_t[0][0], 404 | 'ty':my_t[0][1], 405 | 'tz':my_t[0][2], 406 | 'qx':my_r[0], 407 | 'qy':my_r[1], 408 | 'qz':my_r[2], 409 | 'qw':my_r[3]} 410 | my_result.append(object_poses) 411 | open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) 412 | cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], 413 | [0, 0, 1]])) 414 | imgpts, jac = cv2.projectPoints(cld[14], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 14 mugcup 415 | open_cv_image = draw(open_cv_image,imgpts.get(), itemid) 416 | my_result_wo_refine.append(my_pred.tolist()) 417 | except ZeroDivisionError: 418 | # my_result_wo_refine.append([0.0 for i in range(7)]) 419 | # my_result.append([0.0 for i in range(7)]) 420 | open_cv_image = None 421 | print('Fail') 422 | except CvBridgeError as e: 423 | print(e) 424 | 425 | return my_result, open_cv_image 426 | 427 | def draw(img, imgpts, label): 428 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]] 429 | for point in imgpts: 430 | 431 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1) 432 | return img 433 | 434 | 435 | 436 | 437 | 438 | 439 | def implimentation_seg(): 440 | global cv_image 441 | global cv_depth 442 | global detect_res 443 | label_pub = rospy.Publisher('/label',Image, queue_size = 10) 444 | pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10) 445 | bridge = CvBridge() 446 | pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10) 447 | pose_estimation,fit_image = pose_predict(cv_image, cv_depth, detect_res) 448 | pose_array = PoseArray() 449 | pose_msg = Pose() 450 | print(pose_estimation) 451 | 452 | for i in range(len(pose_estimation)): 453 | pose_msg.position.x = pose_estimation[i]['tx'] 454 | pose_msg.position.y = pose_estimation[i]['ty'] 455 | pose_msg.position.z = pose_estimation[i]['tz'] 456 | pose_msg.orientation.x = pose_estimation[i]['qx'] 457 | pose_msg.orientation.y = pose_estimation[i]['qy'] 458 | pose_msg.orientation.z = pose_estimation[i]['qz'] 459 | pose_msg.orientation.w = pose_estimation[i]['qw'] 460 | 461 | pose_array.poses.append(pose_msg) 462 | pose_pub.publish(pose_array) 463 | if fit_image is not None: 464 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8')) 465 | 466 | 467 | 468 | def main(): 469 | 470 | rospy.init_node('pose_estimation_server') 471 | rospy.spin() 472 | 473 | if __name__ == '__main__': 474 | main() 475 | -------------------------------------------------------------------------------- /scripts/tools/ros_eval_ycb_publisher.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | 4 | 5 | ############# ros packages ##################### 6 | import rospy 7 | from sensor_msgs.msg import Image, CameraInfo 8 | from cv_bridge import CvBridge, CvBridgeError 9 | #from be.srv import AddTwoInts, AddTwoIntsResponse 10 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox 11 | from geometry_msgs.msg import Pose, PoseArray 12 | import tf 13 | import message_filters 14 | import cv2 15 | 16 | ############ python pakcages ################### 17 | import _init_paths 18 | import argparse 19 | import sys 20 | import os 21 | import os.path as osp 22 | root_dir = osp.dirname(osp.dirname(__file__)) 23 | 24 | sys.path.append(root_dir) 25 | 26 | import copy 27 | import random 28 | import numpy as np 29 | import scipy.io as scio 30 | import scipy.misc 31 | import numpy.ma as ma 32 | import math 33 | import torch 34 | import torch.nn as nn 35 | import torch.nn.parallel 36 | import torch.backends.cudnn as cudnn 37 | import torch.optim as optim 38 | import torch.utils.data 39 | import torchvision.datasets as dset 40 | import torchvision.transforms as transforms 41 | import torchvision.utils as vutils 42 | import torch.nn.functional as F 43 | from torch.autograd import Variable 44 | from datasets.ycb.dataset import PoseDataset 45 | from lib.network import PoseNet, PoseRefineNet 46 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix 47 | from model.build_BiSeNet import BiSeNet 48 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou 49 | from matplotlib import pyplot as plt 50 | import time 51 | ################################################### 52 | 53 | 54 | ########################################################################################## 55 | 56 | parser = argparse.ArgumentParser() 57 | parser.add_argument('--dataset_root', type=str, default = 'datasets/ycb/YCB_Video_Dataset/', help='dataset root dir') 58 | parser.add_argument('--model', type=str, default = 'trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth', help='resume PoseNet model') 59 | parser.add_argument('--refine_model', type=str, default = 'trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth', help='resume PoseRefineNet model') 60 | parser.add_argument('--checkpoint_path', type=str, default='trained_checkpoints/ycb/best_dice_loss.pth', help='The path to the pretrained weights of model') 61 | parser.add_argument('--num_classes', type=int, default=21, help='num of object classes (with void)') 62 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.') 63 | parser.add_argument('--image_subscriber', type=str,default='/camera/color/image_raw') 64 | parser.add_argument('--depth_subscriber', type=str,default='/camera/depth/image_rect_raw') 65 | 66 | 67 | 68 | opt = parser.parse_args() 69 | 70 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 71 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 72 | xmap = np.array([[j for i in range(640)] for j in range(480)]) 73 | ymap = np.array([[i for i in range(640)] for j in range(480)]) 74 | #cam_cx = 312.9869 75 | #cam_cy = 241.3109 76 | #cam_fx = 1066.778 77 | #cam_fy = 1067.487 78 | cam_cx = 331.52874755859375 79 | cam_cy = 249.5271453857422 80 | cam_fx = 610.6751708984375 81 | cam_fy = 610.5318603515625 82 | cam_scale = 1000.0 83 | num_obj = 21 84 | img_width = 480 85 | img_length = 640 86 | num_points = 1000 87 | num_points_mesh = 500 88 | iteration = 2 89 | bs = 1 90 | dataset_config_dir = 'datasets/ycb/dataset_config' 91 | ycb_toolbox_dir = 'YCB_Video_toolbox' 92 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result' 93 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result' 94 | dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0]) 95 | 96 | label_pub = rospy.Publisher('/label',Image, queue_size = 10) 97 | pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10) 98 | pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10) 99 | 100 | bridge = CvBridge() 101 | 102 | def image_callback(rgb): 103 | cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8') 104 | global cv_image 105 | global img_flg 106 | img_flg = True 107 | print('img_flg: ', img_flg) 108 | 109 | def depth_callback(depth): 110 | cv_depth = bridge.imgmsg_to_cv2(depth,'32SC1') 111 | global cv_depth 112 | 113 | if img_flg is True: 114 | implimentation_seg() 115 | 116 | def rois_callback(rois): 117 | detect_res = rois.bounding_boxes 118 | global detect_res 119 | print("get bbox") 120 | 121 | 122 | 123 | ######################################################################################### 124 | 125 | def isRotationMatrix(R) : 126 | Rt = np.transpose(R) 127 | shouldBeIdentity = np.dot(Rt, R) 128 | I = np.identity(3, dtype = R.dtype) 129 | n = np.linalg.norm(I - shouldBeIdentity) 130 | return n < 1e-6 131 | 132 | 133 | def rotationMatrixToEulerAngles(R) : 134 | 135 | assert(isRotationMatrix(R)) 136 | 137 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) 138 | 139 | singular = sy < 1e-6 140 | 141 | if not singular : 142 | x = math.atan2(R[2,1] , R[2,2]) 143 | y = math.atan2(-R[2,0], sy) 144 | z = math.atan2(R[1,0], R[0,0]) 145 | else : 146 | x = math.atan2(-R[1,2], R[1,1]) 147 | y = math.atan2(-R[2,0], sy) 148 | z = 0 149 | 150 | return np.array([x, y, z]) 151 | 152 | ################################################################################################ 153 | 154 | ################################################################################################## 155 | # get bbox coordinate 156 | def get_bbox(label): 157 | rows = np.any(label, axis=1) 158 | cols = np.any(label, axis=0) 159 | rmin, rmax = np.where(rows)[0][[0, -1]] 160 | cmin, cmax = np.where(cols)[0][[0, -1]] 161 | rmax += 1 162 | cmax += 1 163 | r_b = rmax - rmin 164 | for tt in range(len(border_list)): 165 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 166 | r_b = border_list[tt + 1] 167 | break 168 | c_b = cmax - cmin 169 | for tt in range(len(border_list)): 170 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 171 | c_b = border_list[tt + 1] 172 | break 173 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 174 | rmin = center[0] - int(r_b / 2) 175 | rmax = center[0] + int(r_b / 2) 176 | cmin = center[1] - int(c_b / 2) 177 | cmax = center[1] + int(c_b / 2) 178 | if rmin < 0: 179 | delt = -rmin 180 | rmin = 0 181 | rmax += delt 182 | if cmin < 0: 183 | delt = -cmin 184 | cmin = 0 185 | cmax += delt 186 | if rmax > img_width: 187 | delt = rmax - img_width 188 | rmax = img_width 189 | rmin -= delt 190 | if cmax > img_length: 191 | delt = cmax - img_length 192 | cmax = img_length 193 | cmin -= delt 194 | return rmin, rmax, cmin, cmax 195 | 196 | '''def get_bbox(rois,idx): 197 | # rmin = int(posecnn_rois[idx][2]) + 1 198 | # rmax = int(posecnn_rois[idx][4]) - 1 199 | # cmin = int(posecnn_rois[idx][1]) + 1 200 | # cmax = int(posecnn_rois[idx][3]) - 1 201 | rmin = int(rois[idx].xmin) + 1 202 | rmax = int(rois[idx].xmax) - 1 203 | cmin = int(rois[idx].ymin) + 1 204 | cmax = int(rois[idx].ymax) - 1 205 | r_b = rmax - rmin 206 | for tt in range(len(border_list)): 207 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 208 | r_b = border_list[tt + 1] 209 | break 210 | c_b = cmax - cmin 211 | for tt in range(len(border_list)): 212 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 213 | c_b = border_list[tt + 1] 214 | break 215 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 216 | rmin = center[0] - int(r_b / 2) 217 | rmax = center[0] + int(r_b / 2) 218 | cmin = center[1] - int(c_b / 2) 219 | cmax = center[1] + int(c_b / 2) 220 | if rmin < 0: 221 | delt = -rmin 222 | rmin = 0 223 | rmax += delt 224 | if cmin < 0: 225 | delt = -cmin 226 | cmin = 0 227 | cmax += delt 228 | if rmax > img_width: 229 | delt = rmax - img_width 230 | rmax = img_width 231 | rmin -= delt 232 | if cmax > img_length: 233 | delt = cmax - img_length 234 | cmax = img_length 235 | cmin -= delt 236 | return rmin, rmax, cmin, cmax''' 237 | #################################################################################################### 238 | ################################### load BiSeNet parameters ######################################## 239 | #################################################################################################### 240 | print('load BiseNet') 241 | start_time = time.time() 242 | bise_model = BiSeNet(opt.num_classes, opt.context_path) 243 | bise_model = bise_model.cuda() 244 | bise_model.load_state_dict(torch.load(opt.checkpoint_path)) 245 | global bise_model 246 | print('Done!') 247 | print("Load time : {}".format(time.time() - start_time)) 248 | 249 | ##################################################################################################### 250 | ######################## load Densefusion Netwopy4thork, 3d model ############################# 251 | ##################################################################################################### 252 | print('load densefusion network') 253 | start_time = time.time() 254 | estimator = PoseNet(num_points = num_points, num_obj = num_obj) 255 | estimator.cuda() 256 | estimator.load_state_dict(torch.load(opt.model)) 257 | estimator.eval() 258 | ############################################################################ 259 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj) 260 | refiner.cuda() 261 | refiner.load_state_dict(torch.load(opt.refine_model)) 262 | refiner.eval() 263 | print('Done!') 264 | print("Load time : {}".format(time.time() - start_time)) 265 | ##################################################################################################### 266 | # class list upload 267 | class_file = open('{0}/classes.txt'.format(dataset_config_dir)) 268 | class_id = 1 269 | cld = {} 270 | while 1: 271 | class_input = class_file.readline() 272 | if not class_input: 273 | break 274 | class_input = class_input[:-1] 275 | 276 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input)) 277 | cld[class_id] = [] 278 | while 1: 279 | input_line = input_file.readline() 280 | if not input_line: 281 | break 282 | input_line = input_line[:-1] 283 | input_line = input_line.split(' ') 284 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])]) 285 | input_file.close() 286 | cld[class_id] = np.array(cld[class_id]) 287 | class_id += 1 288 | ######################################################################################################## 289 | def seg_predict(image): 290 | global bise_model 291 | try: 292 | with torch.no_grad(): 293 | bise_model.eval() 294 | h,w,_ = image.shape 295 | to_tensor = transforms.Compose([ 296 | transforms.ToTensor(), 297 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 298 | ]) 299 | 300 | image = to_tensor(image) 301 | image = image.unsqueeze_(0) 302 | image = image.cuda() 303 | predict = bise_model(image).squeeze() 304 | predict = reverse_one_hot(predict) 305 | predict = np.array(predict) 306 | predict = np.resize(predict,[h,w]) 307 | pub_label = np.uint8(predict) 308 | #zzzz = cv2.cvtColor(np.uint8(predict), cv2.COLOR_GRAY2BGR) 309 | #cv2.imwrite('./segmentation_image.png', zzzz) 310 | 311 | return predict, pub_label 312 | except CvBridgeError as e: 313 | print(e) 314 | 315 | def pose_predict(img, depth): 316 | class_list = ['002_master_chef_can', 317 | '003_cracker_box', 318 | '004_sugar_box', 319 | '005_tomato_soup_can', 320 | '006_mustard_bottle', 321 | '007_tuna_fish_can', 322 | '008_pudding_box', 323 | '009_gelatin_box', 324 | '010_potted_meat_can', 325 | '011_banana', 326 | '019_pitcher_base', 327 | '025_mug', 328 | '021_bleach_cleanser', 329 | '024_bowl', 330 | '035_power_drill', 331 | '036_wood_block', 332 | '037_scissors', 333 | '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick'] 334 | try: 335 | bridge = CvBridge() 336 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 337 | label, pub_label = seg_predict(img) 338 | label = label-1 # to match labeling results to class list 339 | pub_label =pub_label * 50 340 | label_pub.publish(bridge.cv2_to_imgmsg(pub_label,'8UC1')) 341 | 342 | object_number = len(np.unique(label)) 343 | print('unique lable: ', np.unique(label)) 344 | 345 | my_result_wo_refine = [] 346 | my_result = [] 347 | open_cv_image = np.zeros_like(img) 348 | 349 | for idx in range(object_number): 350 | if idx == 0: continue 351 | 352 | itemid = np.unique(label)[idx] 353 | print('itemid: ', itemid) 354 | 355 | try: 356 | #cv2.imwrite('/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png', label) 357 | rmin, rmax, cmin,cmax = get_bbox(label) 358 | # bounding box cutting 359 | #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) 360 | #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0)) 361 | #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 362 | #mask = mask_label * mask_depth 363 | # only image 364 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 365 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 366 | mask = mask_label * mask_depth 367 | #rmin, rmax, cmin, cmax = get_bbox(mask_label) 368 | 369 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 370 | 371 | if len(choose) > num_points: 372 | 373 | c_mask = np.zeros(len(choose), dtype=int) 374 | c_mask[:num_points] = 1 375 | np.random.shuffle(c_mask) 376 | choose = choose[c_mask.nonzero()] 377 | else: 378 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') 379 | 380 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 381 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 382 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 383 | choose = np.array([choose]) 384 | 385 | pt2 = depth_masked / cam_scale 386 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 387 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 388 | cloud = np.concatenate((pt0, pt1, pt2), axis=1) 389 | img_masked = np.array(img)[:, :, :3] 390 | img_masked = np.transpose(img_masked, (2, 0, 1)) 391 | img_masked = img_masked[:, rmin:rmax, cmin:cmax] 392 | 393 | cloud = torch.from_numpy(cloud.astype(np.float32)) 394 | choose = torch.LongTensor(choose.astype(np.int32)) 395 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32))) 396 | index = torch.LongTensor([itemid]) 397 | 398 | cloud = Variable(cloud).cuda() 399 | choose = Variable(choose).cuda() 400 | img_masked = Variable(img_masked).cuda() 401 | index = Variable(index).cuda() 402 | cloud = cloud.view(1, num_points, 3) 403 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) 404 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index) 405 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) 406 | pred_c = pred_c.view(bs, num_points) 407 | how_max, which_max = torch.max(pred_c, 1) 408 | pred_t = pred_t.view(bs * num_points, 1, 3) 409 | points = cloud.view(bs * num_points, 1, 3) 410 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() 411 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() 412 | my_pred = np.append(my_r, my_t) 413 | # making pose matrix 414 | dof = quaternion_matrix(my_r) 415 | dof[0:3,3] = my_t 416 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3]) 417 | rot_to_angle = rot_to_angle.reshape(1,3) 418 | my_t = my_t.reshape(1,3) 419 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0) 420 | object_poses = { 421 | 'tx':my_t[0][0], 422 | 'ty':my_t[0][1], 423 | 'tz':my_t[0][2], 424 | 'qx':my_r[0], 425 | 'qy':my_r[1], 426 | 'qz':my_r[2], 427 | 'qw':my_r[3]} 428 | my_result.append(object_poses) 429 | open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) 430 | cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], 431 | [0, 0, 1]])) 432 | imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 14 mugcup 433 | open_cv_image = draw(open_cv_image,imgpts.get(), itemid) 434 | my_result_wo_refine.append(my_pred.tolist()) 435 | except ZeroDivisionError: 436 | # my_result_wo_refine.append([0.0 for i in range(7)]) 437 | # my_result.append([0.0 for i in range(7)]) 438 | open_cv_image = None 439 | print('Fail') 440 | except CvBridgeError as e: 441 | print(e) 442 | 443 | return my_result, open_cv_image 444 | 445 | def draw(img, imgpts, label): 446 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]] 447 | for point in imgpts: 448 | 449 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1) 450 | return img 451 | 452 | 453 | def implimentation_seg(): 454 | global cv_image 455 | global cv_depth 456 | 457 | bridge = CvBridge() 458 | 459 | pose_estimation,fit_image = pose_predict(cv_image, cv_depth) 460 | print('pose_estimation: ', pose_estimation) 461 | pose_array = PoseArray() 462 | pose_msg = Pose() 463 | 464 | for i in range(len(pose_estimation)): 465 | pose_msg.position.x = pose_estimation[i]['tx'] 466 | pose_msg.position.y = pose_estimation[i]['ty'] 467 | pose_msg.position.z = pose_estimation[i]['tz'] 468 | pose_msg.orientation.x = pose_estimation[i]['qx'] 469 | pose_msg.orientation.y = pose_estimation[i]['qy'] 470 | pose_msg.orientation.z = pose_estimation[i]['qz'] 471 | pose_msg.orientation.w = pose_estimation[i]['qw'] 472 | 473 | pose_array.poses.append(pose_msg) 474 | 475 | pose_pub.publish(pose_array) 476 | if fit_image is not None: 477 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8')) 478 | 479 | 480 | def main(): 481 | 482 | rospy.init_node('pose_estimation_server') 483 | rgb_sub = rospy.Subscriber(opt.image_subscriber,Image, image_callback) 484 | depth_sub = rospy.Subscriber(opt.depth_subscriber,Image, depth_callback) 485 | rois_sub = rospy.Subscriber('/bbox',BoundingBoxes, rois_callback) 486 | rospy.spin() 487 | 488 | if __name__ == '__main__': 489 | main() 490 | -------------------------------------------------------------------------------- /scripts/tools/temp.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | 3 | ############# ros packages ##################### 4 | import cv2 5 | import rospy 6 | from sensor_msgs.msg import Image, CameraInfo 7 | from cv_bridge import CvBridge, CvBridgeError 8 | from be.srv import AddTwoInts, AddTwoIntsResponse 9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox 10 | from geometry_msgs.msg import Pose, PoseArray 11 | import tf 12 | import message_filters 13 | 14 | ############ python pakcages ################### 15 | import _init_paths 16 | import argparse 17 | import os 18 | import copy 19 | import random 20 | import numpy as np 21 | import scipy.io as scio 22 | import scipy.misc 23 | import numpy.ma as ma 24 | import math 25 | import torch 26 | import torch.nn as nn 27 | import torch.nn.parallel 28 | import torch.backends.cudnn as cudnn 29 | import torch.optim as optim 30 | import torch.utils.data 31 | import torchvision.datasets as dset 32 | import torchvision.transforms as transforms 33 | import torchvision.utils as vutils 34 | import torch.nn.functional as F 35 | from torch.autograd import Variable 36 | from datasets.ycb.dataset import PoseDataset 37 | from lib.network import PoseNet, PoseRefineNet 38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix 39 | from model.build_BiSeNet import BiSeNet 40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou 41 | import time 42 | 43 | ########################################################################################## 44 | 45 | parser = argparse.ArgumentParser() 46 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir') 47 | parser.add_argument('--model', type=str, default = '', help='resume PoseNet model') 48 | parser.add_argument('--refine_model', type=str, default = '', help='resume PoseRefineNet model') 49 | parser.add_argument('--checkpoint_path', type=str, default='', required=True, help='The path to the pretrained weights of model') 50 | parser.add_argument('--num_classes', type=int, default=32, help='num of object classes (with void)') 51 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.') 52 | 53 | 54 | opt = parser.parse_args() 55 | 56 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) 57 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680] 58 | xmap = np.array([[j for i in range(640)] for j in range(480)]) 59 | ymap = np.array([[i for i in range(640)] for j in range(480)]) 60 | cam_cx = 312.9869 61 | cam_cy = 241.3109 62 | cam_fx = 1066.778 63 | cam_fy = 1067.487 64 | cam_scale = 10000.0 65 | num_obj = 21 66 | img_width = 480 67 | img_length = 640 68 | num_points = 1000 69 | num_points_mesh = 500 70 | iteration = 2 71 | bs = 1 72 | dataset_config_dir = 'datasets/ycb/dataset_config' 73 | ycb_toolbox_dir = 'YCB_Video_toolbox' 74 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result' 75 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result' 76 | 77 | ######################################################################################### 78 | 79 | def isRotationMatrix(R) : 80 | Rt = np.transpose(R) 81 | shouldBeIdentity = np.dot(Rt, R) 82 | I = np.identity(3, dtype = R.dtype) 83 | n = np.linalg.norm(I - shouldBeIdentity) 84 | return n < 1e-6 85 | 86 | 87 | def rotationMatrixToEulerAngles(R) : 88 | 89 | assert(isRotationMatrix(R)) 90 | 91 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) 92 | 93 | singular = sy < 1e-6 94 | 95 | if not singular : 96 | x = math.atan2(R[2,1] , R[2,2]) 97 | y = math.atan2(-R[2,0], sy) 98 | z = math.atan2(R[1,0], R[0,0]) 99 | else : 100 | x = math.atan2(-R[1,2], R[1,1]) 101 | y = math.atan2(-R[2,0], sy) 102 | z = 0 103 | 104 | return np.array([x, y, z]) 105 | 106 | ################################################################################################ 107 | """ 108 | ################################################################################################## 109 | # get bbox coordinate 110 | def get_bbox(label): 111 | rows = np.any(label, axis=1) 112 | cols = np.any(label, axis=0) 113 | rmin, rmax = 114 | np.where(rows)[0][[0, -1]] 115 | cmin, cmax = np.where(cols)[0][[0, -1]] 116 | rmax += 1 117 | cmax += 1 118 | r_b = rmax - rmin 119 | for tt in range(len(border_list)): 120 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 121 | r_b = border_list[tt + 1] 122 | break 123 | c_b = cmax - cmin 124 | for tt in range(len(border_list)): 125 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 126 | c_b = border_list[tt + 1] 127 | break 128 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 129 | rmin = center[0] - int(r_b / 2) 130 | rmax = center[0] + int(r_b / 2) 131 | cmin = center[1] - int(c_b / 2) 132 | cmax = center[1] + int(c_b / 2) 133 | if rmin < 0: 134 | delt = -rmin 135 | rmin = 0 136 | rmax += delt 137 | if cmin < 0: 138 | delt = -cmin 139 | cmin = 0 140 | cmax += delt 141 | if rmax > img_width: 142 | delt = rmax - img_width 143 | rmax = img_width 144 | rmin -= delt 145 | if cmax > img_length: 146 | delt = cmax - img_length 147 | cmax = img_length 148 | cmin -= delt 149 | return rmin, rmax, cmin, cmax 150 | """ 151 | def get_bbox(rois,idx): 152 | # rmin = int(posecnn_rois[idx][2]) + 1 153 | # rmax = int(posecnn_rois[idx][4]) - 1 154 | # cmin = int(posecnn_rois[idx][1]) + 1 155 | # cmax = int(posecnn_rois[idx][3]) - 1 156 | rmin = int(rois[idx].xmin) + 1 157 | rmax = int(rois[idx].xmax) - 1 158 | cmin = int(rois[idx].ymin) + 1 159 | cmax = int(rois[idx].ymax) - 1 160 | r_b = rmax - rmin 161 | for tt in range(len(border_list)): 162 | if r_b > border_list[tt] and r_b < border_list[tt + 1]: 163 | r_b = border_list[tt + 1] 164 | break 165 | c_b = cmax - cmin 166 | for tt in range(len(border_list)): 167 | if c_b > border_list[tt] and c_b < border_list[tt + 1]: 168 | c_b = border_list[tt + 1] 169 | break 170 | center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)] 171 | rmin = center[0] - int(r_b / 2) 172 | rmax = center[0] + int(r_b / 2) 173 | cmin = center[1] - int(c_b / 2) 174 | cmax = center[1] + int(c_b / 2) 175 | if rmin < 0: 176 | delt = -rmin 177 | rmin = 0 178 | rmax += delt 179 | if cmin < 0: 180 | delt = -cmin 181 | cmin = 0 182 | cmax += delt 183 | if rmax > img_width: 184 | delt = rmax - img_width 185 | rmax = img_width 186 | rmin -= delt 187 | if cmax > img_length: 188 | delt = cmax - img_length 189 | cmax = img_length 190 | cmin -= delt 191 | return rmin, rmax, cmin, cmax 192 | #################################################################################################### 193 | ################################### load BiSeNet parameters ######################################## 194 | #################################################################################################### 195 | print('load BiseNet') 196 | start_time = time.time() 197 | bise_model = BiSeNet(opt.num_classes, opt.context_path) 198 | bise_model = bise_model.cuda() 199 | bise_model.load_state_dict(torch.load(opt.checkpoint_path)) 200 | global bise_model 201 | print('Done!') 202 | print("Load time : {}".format(time.time() - start_time)) 203 | 204 | ##################################################################################################### 205 | ######################## load Densefusion Netwopy4thork, 3d model ############################# 206 | ##################################################################################################### 207 | print('load densefusion network') 208 | start_time = time.time() 209 | estimator = PoseNet(num_points = num_points, num_obj = num_obj) 210 | estimator.cuda() 211 | estimator.load_state_dict(torch.load(opt.model)) 212 | estimator.eval() 213 | ############################################################################ 214 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj) 215 | refiner.cuda() 216 | refiner.load_state_dict(torch.load(opt.refine_model)) 217 | refiner.eval() 218 | print('Done') 219 | print("Load time : {}".format(time.time() - start_time)) 220 | ##################################################################################################### 221 | # class list upload 222 | class_file = open('{0}/classes.txt'.format(dataset_config_dir)) 223 | class_id = 1 224 | cld = {} 225 | while 1: 226 | class_input = class_file.readline() 227 | if not class_input: 228 | break 229 | class_input = class_input[:-1] 230 | 231 | input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input)) 232 | cld[class_id] = [] 233 | while 1: 234 | input_line = input_file.readline() 235 | if not input_line: 236 | break 237 | input_line = input_line[:-1] 238 | input_line = input_line.split(' ') 239 | cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])]) 240 | input_file.close() 241 | cld[class_id] = np.array(cld[class_id]) 242 | class_id += 1 243 | ######################################################################################################## 244 | def seg_predict(image): 245 | global bise_model 246 | try: 247 | with torch.no_grad(): 248 | bise_model.eval() 249 | h,w,_ = image.shape 250 | to_tensor = transforms.Compose([ 251 | transforms.ToTensor(), 252 | transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)), 253 | ]) 254 | 255 | image = to_tensor(image) 256 | image = image.unsqueeze_(0) 257 | image = image.cuda() 258 | predict = bise_model(image).squeeze() 259 | predict = reverse_one_hot(predict) 260 | predict = np.array(predict) 261 | predict = np.resize(predict,[h,w]) 262 | print(np.unique(predict)) 263 | except CvBridgeError as e: 264 | print(e) 265 | 266 | class object_pose_estimation: 267 | def __init__(self): 268 | self.bridge = CvBridge() 269 | # rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, image_callback) 270 | # depth_sub = rospy.Subscriber('',Image, depth_callback) 271 | # rois_sub = rospy.Subscriber('',BoundingBoxes, rois_callback) 272 | self.rgb_sub = message_filters.Subscriber('/camera/color/image_raw',Image) 273 | self.depth_sub = message_filters.Subscriber('/camera/depth/image_rect_raw',Image) 274 | self.rois_sub = message_filters.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes) 275 | self.pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10) 276 | self.ts = message_filters.TimeSynchronizer([self.rgb_sub, self.depth_sub,self.rois_sub], queue_size = 10) 277 | self.ts.registerCallback(self.estimation_callback) 278 | 279 | 280 | def estimation_callback(self, rgb,depth,rois): 281 | try: 282 | img = self.bridge.imgmsg_to_cv2(rgb,'bgr8') 283 | depth = self.bridge.imgmsg_to_cv2(depth,'32SC1') 284 | rois = rois.bounding_boxes 285 | print(img, depth,posecnn_rois) 286 | class_list = ['002_master_chef_can', 287 | '003_cracker_box', 288 | '004_sugar_box', 289 | '005_tomato_soup_can', 290 | '006_mustard_bottle', 291 | '007_tuna_fish_can', 292 | '008_pudding_box', 293 | '009_gelatin_box', 294 | '010_potted_meat_can', 295 | '011_banana',#'019_pitcher_base', 296 | '025_mug', 297 | '021_bleach_cleanser', 298 | '024_bowl', 299 | '035_power_drill', 300 | '036_wood_block', 301 | '037_scissors', 302 | '040_large_marker', 303 | '051_large_clamp', 304 | '052_extra_large_clamp', 305 | '061_foam_brick'] 306 | object_number = len(rois) 307 | #lst = posecnn_rois[:,0:1].flatten() 308 | #lst = np.unique(label) 309 | my_result_wo_refine = [] 310 | my_result = [] 311 | for idx in range(object_number): 312 | #itemid = lst[idx] 313 | itemid = class_list.index(rois[idx].Class) +1 314 | print(itemid, rois[idx]) 315 | 316 | try: 317 | label = seg_predict(img) 318 | rmin, rmax, cmin,cmax = get_bbox(rois,idx) 319 | # bounding box cutting 320 | #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) 321 | #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0)) 322 | #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 323 | #mask = mask_label * mask_depth 324 | # only image 325 | mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0)) 326 | mask_label = ma.getmaskarray(ma.masked_equal(label, itemid)) 327 | mask = mask_label * mask_depth 328 | #rmin, rmax, cmin, cmax = get_bbox(mask_label) 329 | 330 | 331 | choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0] 332 | if len(choose) > num_points: 333 | c_mask = np.zeros(len(choose), dtype=int) 334 | c_mask[:num_points] = 1 335 | np.random.shuffle(c_mask) 336 | choose = choose[c_mask.nonzero()] 337 | else: 338 | choose = np.pad(choose, (0, num_points - len(choose)), 'wrap') 339 | 340 | depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 341 | xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 342 | ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32) 343 | choose = np.array([choose]) 344 | 345 | pt2 = depth_masked / cam_scale 346 | pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx 347 | pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy 348 | cloud = np.concatenate((pt0, pt1, pt2), axis=1) 349 | 350 | img_masked = np.array(img)[:, :, :3] 351 | img_masked = np.transpose(img_masked, (2, 0, 1)) 352 | img_masked = img_masked[:, rmin:rmax, cmin:cmax] 353 | 354 | cloud = torch.from_numpy(cloud.astype(np.float32)) 355 | choose = torch.LongTensor(choose.astype(np.int32)) 356 | img_masked = norm(torch.from_numpy(img_masked.astype(np.float32))) 357 | index = torch.LongTensor([itemid - 1]) 358 | 359 | cloud = Variable(cloud).cuda() 360 | choose = Variable(choose).cuda() 361 | img_masked = Variable(img_masked).cuda() 362 | index = Variable(index).cuda() 363 | cloud = cloud.view(1, num_points, 3) 364 | img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2]) 365 | pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index) 366 | pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1) 367 | pred_c = pred_c.view(bs, num_points) 368 | how_max, which_max = torch.max(pred_c, 1) 369 | pred_t = pred_t.view(bs * num_points, 1, 3) 370 | points = cloud.view(bs * num_points, 1, 3) 371 | my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy() 372 | my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy() 373 | my_pred = np.append(my_r, my_t) 374 | # making pose matrix 375 | dof = quaternion_matrix(my_r) 376 | dof[0:3,3] = my_t 377 | rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3]) 378 | rot_to_angle = rot_to_angle.reshape(1,3) 379 | my_t = my_t.reshape(1,3) 380 | rot_t = np.concatenate([rot_to_angle,my_t], axis= 0) 381 | object_poses = { 382 | 'tx':my_t[0][0], 383 | 'ty':my_t[0][1], 384 | 'tz':my_t[0][2], 385 | 'qx':my_r[0], 386 | 'qy':my_r[1], 387 | 'qz':my_r[2], 388 | 'qw':my_r[3]} 389 | my_result.append(object_poses) 390 | open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR) 391 | cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy], 392 | [0, 0, 1]])) 393 | imgpts, jac = cv2.projectPoints(cld[13], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) 394 | open_cv_image = draw(open_cv_image,imgpts.get(), itemid) 395 | my_result_wo_refine.append(my_pred.tolist()) 396 | pose_array = PoseArray() 397 | pose_msg = Pose() 398 | pose_pub.publish(pose_array) 399 | pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8')) 400 | 401 | """ 402 | for ite in range(0, iteration): 403 | T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3) 404 | my_mat = quaternion_matrix(my_r) 405 | R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3) 406 | my_mat[0:3, 3] = my_t 407 | 408 | new_cloud = torch.bmm((cloud - T), R).contiguous() 409 | pred_r, pred_t = refiner(new_cloud, emb, index) 410 | pred_r = pred_r.view(1, 1, -1) 411 | pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1)) 412 | my_r_2 = pred_r.view(-1).cpu().data.numpy() 413 | my_t_2 = pred_t.view(-1).cpu().data.numpy() 414 | my_mat_2 = quaternion_matrix(my_r_2) 415 | 416 | 417 | my_mat_2[0:3, 3] = my_t_2 418 | my_mat_final = np.dot(my_mat, my_mat_2) 419 | my_r_final = copy.deepcopy(my_mat_final) 420 | my_r_final[0:3, 3] = 0 421 | my_r_final = quaternion_from_matrix(my_r_final, True) 422 | 423 | my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]]) 424 | 425 | my_pred = np.append(my_r_final, my_t_final) 426 | my_r = my_r_final 427 | my_t = my_t_final 428 | """ 429 | # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation) 430 | #my_result.append(my_pred.tolist()) 431 | except ZeroDivisionError: 432 | # my_result_wo_refine.append([0.0 for i in range(7)]) 433 | # my_result.append([0.0 for i in range(7)]) 434 | print('Fail') 435 | except CvBridgeError as e: 436 | print(e) 437 | 438 | 439 | def draw(img, imgpts, label): 440 | color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]] 441 | for point in imgpts: 442 | 443 | img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1) 444 | return img 445 | 446 | def main(): 447 | rospy.init_node('pose_estimator',anonymous= True) 448 | Pose = object_pose_estimation() 449 | rospy.spin() 450 | 451 | if __name__ == '__main__': 452 | main() 453 | -------------------------------------------------------------------------------- /scripts/tools/test.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python 2 | import cv2 3 | import rospy 4 | from sensor_msgs.msg import Image, CameraInfo 5 | from cv_bridge import CvBridge, CvBridgeError 6 | 7 | class ImageIo: 8 | def __init__(self): 9 | self.rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, self.rgb_callback) 10 | def rgb_callback(self,rgb): 11 | bridge = CvBridge() 12 | label_pub = rospy.Publisher('/label',Image, queue_size = 10) 13 | img = bridge.imgmsg_to_cv2(rgb,"bgr8") 14 | img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 15 | image = cv2.cvtColor(img.copy(), cv2.COLOR_RGB2GRAY) 16 | label_pub.publish(bridge.cv2_to_imgmsg(image,encoding="8UC1")) 17 | 18 | 19 | 20 | 21 | 22 | def main(): 23 | IO = ImageIo() 24 | 25 | if __name__ == '__main__': 26 | rospy.init_node('zzz',anonymous = True) 27 | main() 28 | rospy.spin() -------------------------------------------------------------------------------- /scripts/tools/train.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # DenseFusion 6D Object Pose Estimation by Iterative Dense Fusion 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by Chen 5 | # -------------------------------------------------------- 6 | 7 | import _init_paths 8 | import argparse 9 | import os 10 | import random 11 | import time 12 | import numpy as np 13 | import torch 14 | import torch.nn as nn 15 | import torch.nn.parallel 16 | import torch.backends.cudnn as cudnn 17 | import torch.optim as optim 18 | import torch.utils.data 19 | import torchvision.datasets as dset 20 | import torchvision.transforms as transforms 21 | import torchvision.utils as vutils 22 | from torch.autograd import Variable 23 | from datasets.ycb.dataset import PoseDataset as PoseDataset_ycb 24 | from datasets.linemod.dataset import PoseDataset as PoseDataset_linemod 25 | from lib.network import PoseNet, PoseRefineNet 26 | from lib.loss import Loss 27 | from lib.loss_refiner import Loss_refine 28 | from lib.utils import setup_logger 29 | 30 | parser = argparse.ArgumentParser() 31 | parser.add_argument('--dataset', type=str, default = 'ycb', help='ycb or linemod') 32 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir (''YCB_Video_Dataset'' or ''Linemod_preprocessed'')') 33 | parser.add_argument('--batch_size', type=int, default = 8, help='batch size') 34 | parser.add_argument('--workers', type=int, default = 10, help='number of data loading workers') 35 | parser.add_argument('--lr', default=0.0001, help='learning rate') 36 | parser.add_argument('--lr_rate', default=0.3, help='learning rate decay rate') 37 | parser.add_argument('--w', default=0.015, help='learning rate') 38 | parser.add_argument('--w_rate', default=0.3, help='learning rate decay rate') 39 | parser.add_argument('--decay_margin', default=0.016, help='margin to decay lr & w') 40 | parser.add_argument('--refine_margin', default=0.013, help='margin to start the training of iterative refinement') 41 | parser.add_argument('--noise_trans', default=0.03, help='range of the random noise of translation added to the training data') 42 | parser.add_argument('--iteration', type=int, default = 2, help='number of refinement iterations') 43 | parser.add_argument('--nepoch', type=int, default=500, help='max number of epochs to train') 44 | parser.add_argument('--resume_posenet', type=str, default = '', help='resume PoseNet model') 45 | parser.add_argument('--resume_refinenet', type=str, default = '', help='resume PoseRefineNet model') 46 | parser.add_argument('--start_epoch', type=int, default = 1, help='which epoch to start') 47 | opt = parser.parse_args() 48 | 49 | 50 | def main(): 51 | opt.manualSeed = random.randint(1, 10000) 52 | random.seed(opt.manualSeed) 53 | torch.manual_seed(opt.manualSeed) 54 | 55 | if opt.dataset == 'ycb': 56 | opt.num_objects = 21 #number of object classes in the dataset 57 | opt.num_points = 1000 #number of points on the input pointcloud 58 | opt.outf = 'trained_models/ycb' #folder to save trained models 59 | opt.log_dir = 'experiments/logs/ycb' #folder to save logs 60 | opt.repeat_epoch = 1 #number of repeat times for one epoch training 61 | elif opt.dataset == 'linemod': 62 | opt.num_objects = 13 63 | opt.num_points = 500 64 | opt.outf = 'trained_models/linemod' 65 | opt.log_dir = 'experiments/logs/linemod' 66 | opt.repeat_epoch = 20 67 | else: 68 | print('Unknown dataset') 69 | return 70 | 71 | estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects) 72 | estimator.cuda() 73 | refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects) 74 | refiner.cuda() 75 | 76 | if opt.resume_posenet != '': 77 | estimator.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_posenet))) 78 | 79 | if opt.resume_refinenet != '': 80 | refiner.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_refinenet))) 81 | opt.refine_start = True 82 | opt.decay_start = True 83 | opt.lr *= opt.lr_rate 84 | opt.w *= opt.w_rate 85 | opt.batch_size = int(opt.batch_size / opt.iteration) 86 | optimizer = optim.Adam(refiner.parameters(), lr=opt.lr) 87 | else: 88 | opt.refine_start = False 89 | opt.decay_start = False 90 | optimizer = optim.Adam(estimator.parameters(), lr=opt.lr) 91 | 92 | if opt.dataset == 'ycb': 93 | dataset = PoseDataset_ycb('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start) 94 | elif opt.dataset == 'linemod': 95 | dataset = PoseDataset_linemod('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start) 96 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers) 97 | if opt.dataset == 'ycb': 98 | test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) 99 | elif opt.dataset == 'linemod': 100 | test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) 101 | testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) 102 | 103 | opt.sym_list = dataset.get_sym_list() 104 | opt.num_points_mesh = dataset.get_num_points_mesh() 105 | 106 | print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list)) 107 | 108 | criterion = Loss(opt.num_points_mesh, opt.sym_list) 109 | criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list) 110 | 111 | best_test = np.Inf 112 | 113 | if opt.start_epoch == 1: 114 | for log in os.listdir(opt.log_dir): 115 | os.remove(os.path.join(opt.log_dir, log)) 116 | st_time = time.time() 117 | 118 | for epoch in range(opt.start_epoch, opt.nepoch): 119 | logger = setup_logger('epoch%d' % epoch, os.path.join(opt.log_dir, 'epoch_%d_log.txt' % epoch)) 120 | logger.info('Train time {0}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)) + ', ' + 'Training started')) 121 | train_count = 0 122 | train_dis_avg = 0.0 123 | if opt.refine_start: 124 | estimator.eval() 125 | refiner.train() 126 | else: 127 | estimator.train() 128 | optimizer.zero_grad() 129 | 130 | for rep in range(opt.repeat_epoch): 131 | for i, data in enumerate(dataloader, 0): 132 | points, choose, img, target, model_points, idx = data 133 | points, choose, img, target, model_points, idx = Variable(points).cuda(), \ 134 | Variable(choose).cuda(), \ 135 | Variable(img).cuda(), \ 136 | Variable(target).cuda(), \ 137 | Variable(model_points).cuda(), \ 138 | Variable(idx).cuda() 139 | pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) 140 | loss, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) 141 | 142 | if opt.refine_start: 143 | for ite in range(0, opt.iteration): 144 | pred_r, pred_t = refiner(new_points, emb, idx) 145 | dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) 146 | dis.backward() 147 | else: 148 | loss.backward() 149 | 150 | train_dis_avg += dis.item() 151 | train_count += 1 152 | 153 | if train_count % opt.batch_size == 0: 154 | logger.info('Train time {0} Epoch {1} Batch {2} Frame {3} Avg_dis:{4}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), epoch, int(train_count / opt.batch_size), train_count, train_dis_avg / opt.batch_size)) 155 | optimizer.step() 156 | optimizer.zero_grad() 157 | train_dis_avg = 0 158 | 159 | if train_count != 0 and train_count % 1000 == 0: 160 | if opt.refine_start: 161 | torch.save(refiner.state_dict(), '{0}/pose_refine_model_current.pth'.format(opt.outf)) 162 | else: 163 | torch.save(estimator.state_dict(), '{0}/pose_model_current.pth'.format(opt.outf)) 164 | 165 | print('>>>>>>>>----------epoch {0} train finish---------<<<<<<<<'.format(epoch)) 166 | 167 | 168 | logger = setup_logger('epoch%d_test' % epoch, os.path.join(opt.log_dir, 'epoch_%d_test_log.txt' % epoch)) 169 | logger.info('Test time {0}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)) + ', ' + 'Testing started')) 170 | test_dis = 0.0 171 | test_count = 0 172 | estimator.eval() 173 | refiner.eval() 174 | 175 | for j, data in enumerate(testdataloader, 0): 176 | points, choose, img, target, model_points, idx = data 177 | points, choose, img, target, model_points, idx = Variable(points).cuda(), \ 178 | Variable(choose).cuda(), \ 179 | Variable(img).cuda(), \ 180 | Variable(target).cuda(), \ 181 | Variable(model_points).cuda(), \ 182 | Variable(idx).cuda() 183 | pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx) 184 | _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start) 185 | 186 | if opt.refine_start: 187 | for ite in range(0, opt.iteration): 188 | pred_r, pred_t = refiner(new_points, emb, idx) 189 | dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points) 190 | 191 | test_dis += dis.item() 192 | logger.info('Test time {0} Test Frame No.{1} dis:{2}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), test_count, dis)) 193 | 194 | test_count += 1 195 | 196 | test_dis = test_dis / test_count 197 | logger.info('Test time {0} Epoch {1} TEST FINISH Avg dis: {2}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), epoch, test_dis)) 198 | if test_dis <= best_test: 199 | best_test = test_dis 200 | if opt.refine_start: 201 | torch.save(refiner.state_dict(), '{0}/pose_refine_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis)) 202 | else: 203 | torch.save(estimator.state_dict(), '{0}/pose_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis)) 204 | print(epoch, '>>>>>>>>----------BEST TEST MODEL SAVED---------<<<<<<<<') 205 | 206 | if best_test < opt.decay_margin and not opt.decay_start: 207 | opt.decay_start = True 208 | opt.lr *= opt.lr_rate 209 | opt.w *= opt.w_rate 210 | optimizer = optim.Adam(estimator.parameters(), lr=opt.lr) 211 | 212 | if best_test < opt.refine_margin and not opt.refine_start: 213 | opt.refine_start = True 214 | opt.batch_size = int(opt.batch_size / opt.iteration) 215 | optimizer = optim.Adam(refiner.parameters(), lr=opt.lr) 216 | 217 | if opt.dataset == 'ycb': 218 | dataset = PoseDataset_ycb('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start) 219 | elif opt.dataset == 'linemod': 220 | dataset = PoseDataset_linemod('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start) 221 | dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers) 222 | if opt.dataset == 'ycb': 223 | test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) 224 | elif opt.dataset == 'linemod': 225 | test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start) 226 | testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers) 227 | 228 | opt.sym_list = dataset.get_sym_list() 229 | opt.num_points_mesh = dataset.get_num_points_mesh() 230 | 231 | print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list)) 232 | 233 | criterion = Loss(opt.num_points_mesh, opt.sym_list) 234 | criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list) 235 | 236 | if __name__ == '__main__': 237 | main() 238 | -------------------------------------------------------------------------------- /scripts/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/utils.pyc -------------------------------------------------------------------------------- /srv/CameraRequests.srv: -------------------------------------------------------------------------------- 1 | int64 a 2 | --- 3 | geometry_msgs/PoseArray pose_array 4 | 5 | --------------------------------------------------------------------------------