├── CMakeLists.txt
├── README.md
├── datasets
    └── ycb
    │   ├── __init__.pyc
    │   ├── dataset.pyc
    │   └── dataset_config
    │       ├── classes.txt
    │       ├── test_data_list.txt
    │       └── train_data_list.txt
├── package.xml
├── scripts
    ├── 1
    ├── LICENSE
    ├── README.md
    ├── assets
    │   ├── compare.png
    │   ├── pullfig.png
    │   ├── result_linemod.png
    │   └── result_ycb.png
    ├── datasets
    │   ├── linemod
    │   │   ├── dataset.py
    │   │   └── dataset_config
    │   │   │   └── models_info.yml
    │   └── ycb
    │   │   ├── dataset.py
    │   │   └── dataset_config
    │   │       ├── classes.txt
    │   │       ├── test_data_list.txt
    │   │       └── train_data_list.txt
    ├── distortion.npy
    ├── eval.py
    ├── experiments
    │   └── scripts
    │   │   ├── eval_linemod.sh
    │   │   ├── eval_ycb.sh
    │   │   ├── ros_eval_msg.sh
    │   │   ├── ros_eval_ycb.sh
    │   │   ├── test.sh
    │   │   ├── train_linemod.sh
    │   │   └── train_ycb.sh
    ├── lib
    │   ├── __init__.pyc
    │   ├── extractors.pyc
    │   ├── knn
    │   │   ├── __init__.pyc
    │   │   ├── build
    │   │   │   └── knn_cuda_kernel.so
    │   │   ├── build_ffi.py
    │   │   ├── knn_pytorch
    │   │   │   ├── __init__.py
    │   │   │   ├── __init__.pyc
    │   │   │   ├── __pycache__
    │   │   │   │   ├── __init__.cpython-35.pyc
    │   │   │   │   └── __init__.cpython-36.pyc
    │   │   │   └── _knn_pytorch.so
    │   │   └── src
    │   │   │   ├── knn_cuda_kernel.cu
    │   │   │   ├── knn_cuda_kernel.h
    │   │   │   ├── knn_pytorch.c
    │   │   │   └── knn_pytorch.h
    │   ├── loss.py
    │   ├── loss.pyc
    │   ├── loss_refiner.py
    │   ├── network.py
    │   ├── pspnet.pyc
    │   └── transformations.pyc
    ├── loss.py
    ├── matrix.npy
    ├── model
    │   ├── build_BiSeNet.py
    │   └── build_contextpath.py
    ├── predict.npy
    ├── tools
    │   ├── __pycache__
    │   │   └── _init_paths.cpython-35.pyc
    │   ├── _init_paths.py
    │   ├── _init_paths.pyc
    │   ├── eval_linemod.py
    │   ├── eval_ycb.py
    │   ├── ros_eval_ycb.py
    │   ├── ros_eval_ycb_message.py
    │   ├── ros_eval_ycb_publisher.py
    │   ├── temp.py
    │   ├── test.py
    │   └── train.py
    └── utils.pyc
└── srv
    └── CameraRequests.srv


/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 2.8.3)
  2 | project(densefusion)
  3 | 
  4 | ## Compile as C++11, supported in ROS Kinetic and newer
  5 | # add_compile_options(-std=c++11)
  6 | 
  7 | ## Find catkin macros and libraries
  8 | ## if COMPONENTS list like find_package(catkin REQUIRED COMPONENTS xyz)
  9 | ## is used, also find other catkin packages
 10 | find_package(catkin REQUIRED COMPONENTS
 11 |   rospy
 12 |   std_msgs
 13 |   message_generation
 14 | )
 15 | 
 16 | ## System dependencies are found with CMake's conventions
 17 | # find_package(Boost REQUIRED COMPONENTS system)
 18 | 
 19 | 
 20 | ## Uncomment this if the package has a setup.py. This macro ensures
 21 | ## modules and global scripts declared therein get installed
 22 | ## See http://ros.org/doc/api/catkin/html/user_guide/setup_dot_py.html
 23 | # catkin_python_setup()
 24 | 
 25 | ################################################
 26 | ## Declare ROS messages, services and actions ##
 27 | ################################################
 28 | 
 29 | ## To declare and build messages, services or actions from within this
 30 | ## package, follow these steps:
 31 | ## * Let MSG_DEP_SET be the set of packages whose message types you use in
 32 | ##   your messages/services/actions (e.g. std_msgs, actionlib_msgs, ...).
 33 | ## * In the file package.xml:
 34 | ##   * add a build_depend tag for "message_generation"
 35 | ##   * add a build_depend and a exec_depend tag for each package in MSG_DEP_SET
 36 | ##   * If MSG_DEP_SET isn't empty the following dependency has been pulled in
 37 | ##     but can be declared for certainty nonetheless:
 38 | ##     * add a exec_depend tag for "message_runtime"
 39 | ## * In this file (CMakeLists.txt):
 40 | ##   * add "message_generation" and every package in MSG_DEP_SET to
 41 | ##     find_package(catkin REQUIRED COMPONENTS ...)
 42 | ##   * add "message_runtime" and every package in MSG_DEP_SET to
 43 | ##     catkin_package(CATKIN_DEPENDS ...)
 44 | ##   * uncomment the add_*_files sections below as needed
 45 | ##     and list every .msg/.srv/.action file to be processed
 46 | ##   * uncomment the generate_messages entry below
 47 | ##   * add every package in MSG_DEP_SET to generate_messages(DEPENDENCIES ...)
 48 | 
 49 | ## Generate messages in the 'msg' folder
 50 | # add_message_files(
 51 | #   FILES
 52 | #   Message1.msg
 53 | #   Message2.msg
 54 | # )
 55 | 
 56 | ## Generate services in the 'srv' folder
 57 | add_service_files(
 58 |   FILES
 59 |   CameraRequests.srv
 60 |  )
 61 | 
 62 | ## Generate actions in the 'action' folder
 63 | # add_action_files(
 64 | #   FILES
 65 | #   Action1.action
 66 | #   Action2.action
 67 | # )
 68 | 
 69 | ## Generate added messages and services with any dependencies listed here
 70 | generate_messages(
 71 |   DEPENDENCIES
 72 |   std_msgs
 73 | )
 74 | 
 75 | ################################################
 76 | ## Declare ROS dynamic reconfigure parameters ##
 77 | ################################################
 78 | 
 79 | ## To declare and build dynamic reconfigure parameters within this
 80 | ## package, follow these steps:
 81 | ## * In the file package.xml:
 82 | ##   * add a build_depend and a exec_depend tag for "dynamic_reconfigure"
 83 | ## * In this file (CMakeLists.txt):
 84 | ##   * add "dynamic_reconfigure" to
 85 | ##     find_package(catkin REQUIRED COMPONENTS ...)
 86 | ##   * uncomment the "generate_dynamic_reconfigure_options" section below
 87 | ##     and list every .cfg file to be processed
 88 | 
 89 | ## Generate dynamic reconfigure parameters in the 'cfg' folder
 90 | # generate_dynamic_reconfigure_options(
 91 | #   cfg/DynReconf1.cfg
 92 | #   cfg/DynReconf2.cfg
 93 | # )
 94 | 
 95 | ###################################
 96 | ## catkin specific configuration ##
 97 | ###################################
 98 | ## The catkin_package macro generates cmake config files for your package
 99 | ## Declare things to be passed to dependent projects
100 | ## INCLUDE_DIRS: uncomment this if your package contains header files
101 | ## LIBRARIES: libraries you create in this project that dependent projects also need
102 | ## CATKIN_DEPENDS: catkin_packages dependent projects also need
103 | ## DEPENDS: system dependencies of this project that dependent projects also need
104 | catkin_package(
105 | #  INCLUDE_DIRS include
106 | #  LIBRARIES densefusion
107 | #  CATKIN_DEPENDS rospy std_msgs
108 | #  DEPENDS system_lib
109 | )
110 | 
111 | ###########
112 | ## Build ##
113 | ###########
114 | 
115 | ## Specify additional locations of header files
116 | ## Your package locations should be listed before other locations
117 | include_directories(
118 | # include
119 |   ${catkin_INCLUDE_DIRS}
120 | )
121 | 
122 | ## Declare a C++ library
123 | # add_library(${PROJECT_NAME}
124 | #   src/${PROJECT_NAME}/densefusion.cpp
125 | # )
126 | 
127 | ## Add cmake target dependencies of the library
128 | ## as an example, code may need to be generated before libraries
129 | ## either from message generation or dynamic reconfigure
130 | # add_dependencies(${PROJECT_NAME} ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
131 | 
132 | ## Declare a C++ executable
133 | ## With catkin_make all packages are built within a single CMake context
134 | ## The recommended prefix ensures that target names across packages don't collide
135 | # add_executable(${PROJECT_NAME}_node src/densefusion_node.cpp)
136 | 
137 | ## Rename C++ executable without prefix
138 | ## The above recommended prefix causes long target names, the following renames the
139 | ## target back to the shorter version for ease of user use
140 | ## e.g. "rosrun someones_pkg node" instead of "rosrun someones_pkg someones_pkg_node"
141 | # set_target_properties(${PROJECT_NAME}_node PROPERTIES OUTPUT_NAME node PREFIX "")
142 | 
143 | ## Add cmake target dependencies of the executable
144 | ## same as for the library above
145 | # add_dependencies(${PROJECT_NAME}_node ${${PROJECT_NAME}_EXPORTED_TARGETS} ${catkin_EXPORTED_TARGETS})
146 | 
147 | ## Specify libraries to link a library or executable target against
148 | # target_link_libraries(${PROJECT_NAME}_node
149 | #   ${catkin_LIBRARIES}
150 | # )
151 | 
152 | #############
153 | ## Install ##
154 | #############
155 | 
156 | # all install targets should use catkin DESTINATION variables
157 | # See http://ros.org/doc/api/catkin/html/adv_user_guide/variables.html
158 | 
159 | ## Mark executable scripts (Python etc.) for installation
160 | ## in contrast to setup.py, you can choose the destination
161 | # install(PROGRAMS
162 | #   scripts/my_python_script
163 | #   DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
164 | # )
165 | 
166 | ## Mark executables and/or libraries for installation
167 | # install(TARGETS ${PROJECT_NAME} ${PROJECT_NAME}_node
168 | #   ARCHIVE DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
169 | #   LIBRARY DESTINATION ${CATKIN_PACKAGE_LIB_DESTINATION}
170 | #   RUNTIME DESTINATION ${CATKIN_PACKAGE_BIN_DESTINATION}
171 | # )
172 | 
173 | ## Mark cpp header files for installation
174 | # install(DIRECTORY include/${PROJECT_NAME}/
175 | #   DESTINATION ${CATKIN_PACKAGE_INCLUDE_DESTINATION}
176 | #   FILES_MATCHING PATTERN "*.h"
177 | #   PATTERN ".svn" EXCLUDE
178 | # )
179 | 
180 | ## Mark other files for installation (e.g. launch and bag files, etc.)
181 | # install(FILES
182 | #   # myfile1
183 | #   # myfile2
184 | #   DESTINATION ${CATKIN_PACKAGE_SHARE_DESTINATION}
185 | # )
186 | 
187 | #############
188 | ## Testing ##
189 | #############
190 | 
191 | ## Add gtest based cpp test target and link libraries
192 | # catkin_add_gtest(${PROJECT_NAME}-test test/test_densefusion.cpp)
193 | # if(TARGET ${PROJECT_NAME}-test)
194 | #   target_link_libraries(${PROJECT_NAME}-test ${PROJECT_NAME})
195 | # endif()
196 | 
197 | ## Add folders to be run by python nosetests
198 | # catkin_add_nosetests(test)
199 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # DenseFusion_ROS
 2 | 
 3 | This repository is based on https://github.com/j96w/DenseFusion and https://github.com/ooooverflow/BiSeNet.
 4 | 
 5 | If you are a person using Docker, https://hub.docker.com/repository/docker/choo2969/ros-densefusion
 6 | 
 7 | segmentation weight file [link](https://drive.google.com/drive/folders/1fRie5jwj9Liuwvs64_Mru8wUCy65Os0_?usp=sharing)
 8 | densefusion weight file [link](https://github.com/j96w/DenseFusion)
 9 | 
10 | ~~~
11 | $ docker pull choo2969/ros-densefusion
12 | ~~~
13 | 
14 | 
15 | ## Requirements
16 | ---
17 | - ROS (Kinetic)
18 | - Python2.7
19 | - Pytorch 0.4.1
20 | - PIL
21 | - scipy
22 | - numpy
23 | - pyyaml
24 | - logging
25 | - matplotlib
26 | - CUDA
27 | 
28 | 
29 | 
30 | ## Start
31 | ---
32 | we have tested on Ubuntu 16.04 with ROS Kinetic and NVIDIA Titan XP and Geforce 1080 Ti 
33 | 1. Start camera node (D435)
34 | 
35 |     - Step1. Run your own camera, If your camera is not a D435 or D415, you will need to edit the RGB image and Depth Subscriber. Edit image_subscriber and depth_subscriber with your camera node
36 |     ~~~
37 |     vim path/densefusion/scripts/experiments/scripts/ros_eval_msg.sh
38 |     ~~~
39 |     
40 |     - Step2. Edit the cam_cx,cam_cy,cam_fx,cam_fy values
41 |     ~~~
42 |     vim path/densefusion/scripts/tool/ros_eval_ycb_message.py
43 |     ~~~
44 | 
45 | 2. Start 
46 |     ~~~
47 |     sh path/densefusion/scripts/experiments/scripts/ros_eval_msg.sh
48 |     ~~~
49 |     Running this whill launch the SErvice Sever rining 6D Pose Estimation
50 | 


--------------------------------------------------------------------------------
/datasets/ycb/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/datasets/ycb/__init__.pyc


--------------------------------------------------------------------------------
/datasets/ycb/dataset.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/datasets/ycb/dataset.pyc


--------------------------------------------------------------------------------
/datasets/ycb/dataset_config/classes.txt:
--------------------------------------------------------------------------------
 1 | 002_master_chef_can
 2 | 003_cracker_box
 3 | 004_sugar_box
 4 | 005_tomato_soup_can
 5 | 006_mustard_bottle
 6 | 007_tuna_fish_can
 7 | 008_pudding_box
 8 | 009_gelatin_box
 9 | 010_potted_meat_can
10 | 011_banana
11 | 019_pitcher_base
12 | 021_bleach_cleanser
13 | 024_bowl
14 | 025_mug
15 | 035_power_drill
16 | 036_wood_block
17 | 037_scissors
18 | 040_large_marker
19 | 051_large_clamp
20 | 052_extra_large_clamp
21 | 061_foam_brick
22 | 


--------------------------------------------------------------------------------
/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <package format="2">
 3 |   <name>densefusion</name>
 4 |   <version>0.0.0</version>
 5 |   <description>The densefusion package</description>
 6 | 
 7 |   <!-- One maintainer tag required, multiple allowed, one person per tag -->
 8 |   <!-- Example:  -->
 9 |   <!-- <maintainer email="jane.doe@example.com">Jane Doe</maintainer> -->
10 |   <maintainer email="root@todo.todo">root</maintainer>
11 | 
12 | 
13 |   <!-- One license tag required, multiple allowed, one license per tag -->
14 |   <!-- Commonly used license strings: -->
15 |   <!--   BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
16 |   <license>TODO</license>
17 | 
18 | 
19 |   <!-- Url tags are optional, but multiple are allowed, one per tag -->
20 |   <!-- Optional attribute type can be: website, bugtracker, or repository -->
21 |   <!-- Example: -->
22 |   <!-- <url type="website">http://wiki.ros.org/densefusion</url> -->
23 | 
24 | 
25 |   <!-- Author tags are optional, multiple are allowed, one per tag -->
26 |   <!-- Authors do not have to be maintainers, but could be -->
27 |   <!-- Example: -->
28 |   <!-- <author email="jane.doe@example.com">Jane Doe</author> -->
29 | 
30 | 
31 |   <!-- The *depend tags are used to specify dependencies -->
32 |   <!-- Dependencies can be catkin packages or system dependencies -->
33 |   <!-- Examples: -->
34 |   <!-- Use depend as a shortcut for packages that are both build and exec dependencies -->
35 |   <!--   <depend>roscpp</depend> -->
36 |   <!--   Note that this is equivalent to the following: -->
37 |   <!--   <build_depend>roscpp</build_depend> -->
38 |   <!--   <exec_depend>roscpp</exec_depend> -->
39 |   <!-- Use build_depend for packages you need at compile time: -->
40 |   <!--   <build_depend>message_generation</build_depend> -->
41 |   <!-- Use build_export_depend for packages you need in order to build against this package: -->
42 |   <!--   <build_export_depend>message_generation</build_export_depend> -->
43 |   <!-- Use buildtool_depend for build tool packages: -->
44 |   <!--   <buildtool_depend>catkin</buildtool_depend> -->
45 |   <!-- Use exec_depend for packages you need at runtime: -->
46 |   <!--   <exec_depend>message_runtime</exec_depend> -->
47 |   <!-- Use test_depend for packages you need only for testing: -->
48 |   <!--   <test_depend>gtest</test_depend> -->
49 |   <!-- Use doc_depend for packages you need only for building documentation: -->
50 |   <!--   <doc_depend>doxygen</doc_depend> -->
51 | 
52 |   <buildtool_depend>catkin</buildtool_depend>
53 |   <build_depend>rospy</build_depend>
54 |   <build_depend>std_msgs</build_depend>
55 |   <build_depend>message_generation</build_depend>
56 | 
57 |   <build_export_depend>rospy</build_export_depend>
58 |   <build_export_depend>std_msgs</build_export_depend>
59 | 
60 |   <exec_depend>rospy</exec_depend>
61 |   <exec_depend>std_msgs</exec_depend>
62 |   <exec_depend>message_runtime</exec_depend>
63 | 
64 | 
65 | 
66 | 
67 |   <!-- The export tag contains other, unspecified, tags -->
68 |   <export>
69 |     <!-- Other tools can request additional information be placed here -->
70 | 
71 |   </export>
72 | </package>
73 | 


--------------------------------------------------------------------------------
/scripts/1:
--------------------------------------------------------------------------------
  1 | # ~/.bashrc: executed by bash(1) for non-login shells.
  2 | # see /usr/share/doc/bash/examples/startup-files (in the package bash-doc)
  3 | # for examples
  4 | 
  5 | # If not running interactively, don't do anything
  6 | [ -z "$PS1" ] && return
  7 | 
  8 | # don't put duplicate lines in the history. See bash(1) for more options
  9 | # ... or force ignoredups and ignorespace
 10 | HISTCONTROL=ignoredups:ignorespace
 11 | 
 12 | # append to the history file, don't overwrite it
 13 | shopt -s histappend
 14 | 
 15 | # for setting history length see HISTSIZE and HISTFILESIZE in bash(1)
 16 | HISTSIZE=1000
 17 | HISTFILESIZE=2000
 18 | 
 19 | # check the window size after each command and, if necessary,
 20 | # update the values of LINES and COLUMNS.
 21 | shopt -s checkwinsize
 22 | 
 23 | # make less more friendly for non-text input files, see lesspipe(1)
 24 | [ -x /usr/bin/lesspipe ] && eval "$(SHELL=/bin/sh lesspipe)"
 25 | 
 26 | # set variable identifying the chroot you work in (used in the prompt below)
 27 | if [ -z "$debian_chroot" ] && [ -r /etc/debian_chroot ]; then
 28 |     debian_chroot=$(cat /etc/debian_chroot)
 29 | fi
 30 | 
 31 | # set a fancy prompt (non-color, unless we know we "want" color)
 32 | case "$TERM" in
 33 |     xterm-color) color_prompt=yes;;
 34 | esac
 35 | 
 36 | # uncomment for a colored prompt, if the terminal has the capability; turned
 37 | # off by default to not distract the user: the focus in a terminal window
 38 | # should be on the output of commands, not on the prompt
 39 | #force_color_prompt=yes
 40 | 
 41 | if [ -n "$force_color_prompt" ]; then
 42 |     if [ -x /usr/bin/tput ] && tput setaf 1 >&/dev/null; then
 43 | 	# We have color support; assume it's compliant with Ecma-48
 44 | 	# (ISO/IEC-6429). (Lack of such support is extremely rare, and such
 45 | 	# a case would tend to support setf rather than setaf.)
 46 | 	color_prompt=yes
 47 |     else
 48 | 	color_prompt=
 49 |     fi
 50 | fi
 51 | 
 52 | if [ "$color_prompt" = yes ]; then
 53 |     PS1='${debian_chroot:+($debian_chroot)}\[\033[01;32m\]\u@\h\[\033[00m\]:\[\033[01;34m\]\w\[\033[00m\]\$ '
 54 | else
 55 |     PS1='${debian_chroot:+($debian_chroot)}\u@\h:\w\$ '
 56 | fi
 57 | unset color_prompt force_color_prompt
 58 | 
 59 | # If this is an xterm set the title to user@host:dir
 60 | case "$TERM" in
 61 | xterm*|rxvt*)
 62 |     PS1="\[\e]0;${debian_chroot:+($debian_chroot)}\u@\h: \w\a\]$PS1"
 63 |     ;;
 64 | *)
 65 |     ;;
 66 | esac
 67 | 
 68 | # enable color support of ls and also add handy aliases
 69 | if [ -x /usr/bin/dircolors ]; then
 70 |     test -r ~/.dircolors && eval "$(dircolors -b ~/.dircolors)" || eval "$(dircolors -b)"
 71 |     alias ls='ls --color=auto'
 72 |     #alias dir='dir --color=auto'
 73 |     #alias vdir='vdir --color=auto'
 74 | 
 75 |     alias grep='grep --color=auto'
 76 |     alias fgrep='fgrep --color=auto'
 77 |     alias egrep='egrep --color=auto'
 78 | fi
 79 | 
 80 | # some more ls aliases
 81 | alias ll='ls -alF'
 82 | alias la='ls -A'
 83 | alias l='ls -CF'
 84 | 
 85 | # Alias definitions.
 86 | # You may want to put all your additions into a separate file like
 87 | # ~/.bash_aliases, instead of adding them here directly.
 88 | # See /usr/share/doc/bash-doc/examples in the bash-doc package.
 89 | 
 90 | if [ -f ~/.bash_aliases ]; then
 91 |     . ~/.bash_aliases
 92 | fi
 93 | 
 94 | # enable programmable completion features (you don't need to enable
 95 | # this, if it's already enabled in /etc/bash.bashrc and /etc/profile
 96 | # sources /etc/bash.bashrc).
 97 | #if [ -f /etc/bash_completion ] && ! shopt -oq posix; then
 98 | #    . /etc/bash_completion
 99 | #fi
100 | alias eb='nano ~/.bashrc'
101 | alias sb='source ~/.bashrc'
102 | alias gs='git status'
103 | alias gp='git pull'
104 | alias cw='cd ~/catkin_ws'
105 | alias cs='cd ~/catkin_ws/src'
106 | alias cm='cd ~/catkin_ws && catkin_make'
107 | source ~/catkin_ws/devel/setup.bash
108 | export ROS_MASTER_URI=http://192.168.1.15:11311
109 | export ROS_HOSTNAME=172.17.0.5
110 | 


--------------------------------------------------------------------------------
/scripts/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Jeremy Wang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/scripts/README.md:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/scripts/assets/compare.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/compare.png


--------------------------------------------------------------------------------
/scripts/assets/pullfig.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/pullfig.png


--------------------------------------------------------------------------------
/scripts/assets/result_linemod.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/result_linemod.png


--------------------------------------------------------------------------------
/scripts/assets/result_ycb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/assets/result_ycb.png


--------------------------------------------------------------------------------
/scripts/datasets/linemod/dataset.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import os
  4 | import os.path
  5 | import errno
  6 | import torch
  7 | import json
  8 | import codecs
  9 | import numpy as np
 10 | import sys
 11 | import torchvision.transforms as transforms
 12 | import argparse
 13 | import json
 14 | import time
 15 | import random
 16 | import numpy.ma as ma
 17 | import copy
 18 | import scipy.misc
 19 | import scipy.io as scio
 20 | import yaml
 21 | import cv2
 22 | 
 23 | 
 24 | class PoseDataset(data.Dataset):
 25 |     def __init__(self, mode, num, add_noise, root, noise_trans, refine):
 26 |         self.objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
 27 |         self.mode = mode
 28 | 
 29 |         self.list_rgb = []
 30 |         self.list_depth = []
 31 |         self.list_label = []
 32 |         self.list_obj = []
 33 |         self.list_rank = []
 34 |         self.meta = {}
 35 |         self.pt = {}
 36 |         self.root = root
 37 |         self.noise_trans = noise_trans
 38 |         self.refine = refine
 39 | 
 40 |         item_count = 0
 41 |         for item in self.objlist:
 42 |             if self.mode == 'train':
 43 |                 input_file = open('{0}/data/{1}/train.txt'.format(self.root, '%02d' % item))
 44 |             else:
 45 |                 input_file = open('{0}/data/{1}/test.txt'.format(self.root, '%02d' % item))
 46 |             while 1:
 47 |                 item_count += 1
 48 |                 input_line = input_file.readline()
 49 |                 if self.mode == 'test' and item_count % 10 != 0:
 50 |                     continue
 51 |                 if not input_line:
 52 |                     break
 53 |                 if input_line[-1:] == '\n':
 54 |                     input_line = input_line[:-1]
 55 |                 self.list_rgb.append('{0}/data/{1}/rgb/{2}.png'.format(self.root, '%02d' % item, input_line))
 56 |                 self.list_depth.append('{0}/data/{1}/depth/{2}.png'.format(self.root, '%02d' % item, input_line))
 57 |                 if self.mode == 'eval':
 58 |                     self.list_label.append('{0}/segnet_results/{1}_label/{2}_label.png'.format(self.root, '%02d' % item, input_line))
 59 |                 else:
 60 |                     self.list_label.append('{0}/data/{1}/mask/{2}.png'.format(self.root, '%02d' % item, input_line))
 61 |                 
 62 |                 self.list_obj.append(item)
 63 |                 self.list_rank.append(int(input_line))
 64 | 
 65 |             meta_file = open('{0}/data/{1}/gt.yml'.format(self.root, '%02d' % item), 'r')
 66 |             self.meta[item] = yaml.load(meta_file)
 67 |             self.pt[item] = ply_vtx('{0}/models/obj_{1}.ply'.format(self.root, '%02d' % item))
 68 |             
 69 |             print("Object {0} buffer loaded".format(item))
 70 | 
 71 |         self.length = len(self.list_rgb)
 72 | 
 73 |         self.cam_cx = 325.26110
 74 |         self.cam_cy = 242.04899
 75 |         self.cam_fx = 572.41140
 76 |         self.cam_fy = 573.57043
 77 | 
 78 |         self.xmap = np.array([[j for i in range(640)] for j in range(480)])
 79 |         self.ymap = np.array([[i for i in range(640)] for j in range(480)])
 80 |         
 81 |         self.num = num
 82 |         self.add_noise = add_noise
 83 |         self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
 84 |         self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 85 |         self.border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
 86 |         self.num_pt_mesh_large = 500
 87 |         self.num_pt_mesh_small = 500
 88 |         self.symmetry_obj_idx = [7, 8]
 89 | 
 90 |     def __getitem__(self, index):
 91 |         img = Image.open(self.list_rgb[index])
 92 |         ori_img = np.array(img)
 93 |         depth = np.array(Image.open(self.list_depth[index]))
 94 |         label = np.array(Image.open(self.list_label[index]))
 95 |         obj = self.list_obj[index]
 96 |         rank = self.list_rank[index]        
 97 | 
 98 |         if obj == 2:
 99 |             for i in range(0, len(self.meta[obj][rank])):
100 |                 if self.meta[obj][rank][i]['obj_id'] == 2:
101 |                     meta = self.meta[obj][rank][i]
102 |                     break
103 |         else:
104 |             meta = self.meta[obj][rank][0]
105 | 
106 |         mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
107 |         if self.mode == 'eval':
108 |             mask_label = ma.getmaskarray(ma.masked_equal(label, np.array(255)))
109 |         else:
110 |             mask_label = ma.getmaskarray(ma.masked_equal(label, np.array([255, 255, 255])))[:, :, 0]
111 |         
112 |         mask = mask_label * mask_depth
113 | 
114 |         if self.add_noise:
115 |             img = self.trancolor(img)
116 | 
117 |         img = np.array(img)[:, :, :3]
118 |         img = np.transpose(img, (2, 0, 1))
119 |         img_masked = img
120 | 
121 |         if self.mode == 'eval':
122 |             rmin, rmax, cmin, cmax = get_bbox(mask_to_bbox(mask_label))
123 |         else:
124 |             rmin, rmax, cmin, cmax = get_bbox(meta['obj_bb'])
125 | 
126 |         img_masked = img_masked[:, rmin:rmax, cmin:cmax]
127 |         #p_img = np.transpose(img_masked, (1, 2, 0))
128 |         #scipy.misc.imsave('evaluation_result/{0}_input.png'.format(index), p_img)
129 | 
130 |         target_r = np.resize(np.array(meta['cam_R_m2c']), (3, 3))
131 |         target_t = np.array(meta['cam_t_m2c'])
132 |         add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])
133 | 
134 |         choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
135 |         if len(choose) == 0:
136 |             cc = torch.LongTensor([0])
137 |             return(cc, cc, cc, cc, cc, cc)
138 | 
139 |         if len(choose) > self.num:
140 |             c_mask = np.zeros(len(choose), dtype=int)
141 |             c_mask[:self.num] = 1
142 |             np.random.shuffle(c_mask)
143 |             choose = choose[c_mask.nonzero()]
144 |         else:
145 |             choose = np.pad(choose, (0, self.num - len(choose)), 'wrap')
146 |         
147 |         depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
148 |         xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
149 |         ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
150 |         choose = np.array([choose])
151 | 
152 |         cam_scale = 1.0
153 |         pt2 = depth_masked / cam_scale
154 |         pt0 = (ymap_masked - self.cam_cx) * pt2 / self.cam_fx
155 |         pt1 = (xmap_masked - self.cam_cy) * pt2 / self.cam_fy
156 |         cloud = np.concatenate((pt0, pt1, pt2), axis=1)
157 |         cloud = cloud / 1000.0
158 | 
159 |         if self.add_noise:
160 |             cloud = np.add(cloud, add_t)
161 | 
162 |         #fw = open('evaluation_result/{0}_cld.xyz'.format(index), 'w')
163 |         #for it in cloud:
164 |         #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
165 |         #fw.close()
166 | 
167 |         model_points = self.pt[obj] / 1000.0
168 |         dellist = [j for j in range(0, len(model_points))]
169 |         dellist = random.sample(dellist, len(model_points) - self.num_pt_mesh_small)
170 |         model_points = np.delete(model_points, dellist, axis=0)
171 | 
172 |         #fw = open('evaluation_result/{0}_model_points.xyz'.format(index), 'w')
173 |         #for it in model_points:
174 |         #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
175 |         #fw.close()
176 | 
177 |         target = np.dot(model_points, target_r.T)
178 |         if self.add_noise:
179 |             target = np.add(target, target_t / 1000.0 + add_t)
180 |             out_t = target_t / 1000.0 + add_t
181 |         else:
182 |             target = np.add(target, target_t / 1000.0)
183 |             out_t = target_t / 1000.0
184 | 
185 |         #fw = open('evaluation_result/{0}_tar.xyz'.format(index), 'w')
186 |         #for it in target:
187 |         #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
188 |         #fw.close()
189 | 
190 |         return torch.from_numpy(cloud.astype(np.float32)), \
191 |                torch.LongTensor(choose.astype(np.int32)), \
192 |                self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
193 |                torch.from_numpy(target.astype(np.float32)), \
194 |                torch.from_numpy(model_points.astype(np.float32)), \
195 |                torch.LongTensor([self.objlist.index(obj)])
196 | 
197 |     def __len__(self):
198 |         return self.length
199 | 
200 |     def get_sym_list(self):
201 |         return self.symmetry_obj_idx
202 | 
203 |     def get_num_points_mesh(self):
204 |         if self.refine:
205 |             return self.num_pt_mesh_large
206 |         else:
207 |             return self.num_pt_mesh_small
208 | 
209 | 
210 | 
211 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
212 | img_width = 480
213 | img_length = 640
214 | 
215 | 
216 | def mask_to_bbox(mask):
217 |     mask = mask.astype(np.uint8)
218 |     contours, _ = cv2.findContours(mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
219 | 
220 | 
221 |     x = 0
222 |     y = 0
223 |     w = 0
224 |     h = 0
225 |     for contour in contours:
226 |         tmp_x, tmp_y, tmp_w, tmp_h = cv2.boundingRect(contour)
227 |         if tmp_w * tmp_h > w * h:
228 |             x = tmp_x
229 |             y = tmp_y
230 |             w = tmp_w
231 |             h = tmp_h
232 |     return [x, y, w, h]
233 | 
234 | 
235 | def get_bbox(bbox):
236 |     bbx = [bbox[1], bbox[1] + bbox[3], bbox[0], bbox[0] + bbox[2]]
237 |     if bbx[0] < 0:
238 |         bbx[0] = 0
239 |     if bbx[1] >= 480:
240 |         bbx[1] = 479
241 |     if bbx[2] < 0:
242 |         bbx[2] = 0
243 |     if bbx[3] >= 640:
244 |         bbx[3] = 639                
245 |     rmin, rmax, cmin, cmax = bbx[0], bbx[1], bbx[2], bbx[3]
246 |     r_b = rmax - rmin
247 |     for tt in range(len(border_list)):
248 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
249 |             r_b = border_list[tt + 1]
250 |             break
251 |     c_b = cmax - cmin
252 |     for tt in range(len(border_list)):
253 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
254 |             c_b = border_list[tt + 1]
255 |             break
256 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
257 |     rmin = center[0] - int(r_b / 2)
258 |     rmax = center[0] + int(r_b / 2)
259 |     cmin = center[1] - int(c_b / 2)
260 |     cmax = center[1] + int(c_b / 2)
261 |     if rmin < 0:
262 |         delt = -rmin
263 |         rmin = 0
264 |         rmax += delt
265 |     if cmin < 0:
266 |         delt = -cmin
267 |         cmin = 0
268 |         cmax += delt
269 |     if rmax > 480:
270 |         delt = rmax - 480
271 |         rmax = 480
272 |         rmin -= delt
273 |     if cmax > 640:
274 |         delt = cmax - 640
275 |         cmax = 640
276 |         cmin -= delt
277 |     return rmin, rmax, cmin, cmax
278 | 
279 | 
280 | def ply_vtx(path):
281 |     f = open(path)
282 |     assert f.readline().strip() == "ply"
283 |     f.readline()
284 |     f.readline()
285 |     N = int(f.readline().split()[-1])
286 |     while f.readline().strip() != "end_header":
287 |         continue
288 |     pts = []
289 |     for _ in range(N):
290 |         pts.append(np.float32(f.readline().split()[:3]))
291 |     return np.array(pts)
292 | 


--------------------------------------------------------------------------------
/scripts/datasets/linemod/dataset_config/models_info.yml:
--------------------------------------------------------------------------------
 1 | 1: {diameter: 102.09865663, min_x: -37.93430000, min_y: -38.79960000, min_z: -45.88450000, size_x: 75.86860000, size_y: 77.59920000, size_z: 91.76900000}
 2 | 2: {diameter: 247.50624233, min_x: -107.83500000, min_y: -60.92790000, min_z: -109.70500000, size_x: 215.67000000, size_y: 121.85570000, size_z: 219.41000000}
 3 | 3: {diameter: 167.35486092, min_x: -83.21620000, min_y: -82.65910000, min_z: -37.23640000, size_x: 166.43240000, size_y: 165.31820000, size_z: 74.47280000}
 4 | 4: {diameter: 172.49224865, min_x: -68.32970000, min_y: -71.51510000, min_z: -50.24850000, size_x: 136.65940000, size_y: 143.03020000, size_z: 100.49700000}
 5 | 5: {diameter: 201.40358597, min_x: -50.39580000, min_y: -90.89790000, min_z: -96.86700000, size_x: 100.79160000, size_y: 181.79580000, size_z: 193.73400000}
 6 | 6: {diameter: 154.54551808, min_x: -33.50540000, min_y: -63.81650000, min_z: -58.72830000, size_x: 67.01070000, size_y: 127.63300000, size_z: 117.45660000}
 7 | 7: {diameter: 124.26430816, min_x: -58.78990000, min_y: -45.75560000, min_z: -47.31120000, size_x: 117.57980000, size_y: 91.51120000, size_z: 94.62240000}
 8 | 8: {diameter: 261.47178102, min_x: -114.73800000, min_y: -37.73570000, min_z: -104.00100000, size_x: 229.47600000, size_y: 75.47140000, size_z: 208.00200000}
 9 | 9: {diameter: 108.99920102, min_x: -52.21460000, min_y: -38.70380000, min_z: -42.84850000, size_x: 104.42920000, size_y: 77.40760000, size_z: 85.69700000}
10 | 10: {diameter: 164.62758848, min_x: -75.09230000, min_y: -53.53750000, min_z: -34.62070000, size_x: 150.18460000, size_y: 107.07500000, size_z: 69.24140000}
11 | 11: {diameter: 175.88933422, min_x: -18.36050000, min_y: -38.93300000, min_z: -86.40790000, size_x: 36.72110000, size_y: 77.86600000, size_z: 172.81580000}
12 | 12: {diameter: 145.54287471, min_x: -50.44390000, min_y: -54.24850000, min_z: -45.40000000, size_x: 100.88780000, size_y: 108.49700000, size_z: 90.80000000}
13 | 13: {diameter: 278.07811733, min_x: -129.11300000, min_y: -59.24100000, min_z: -70.56620000, size_x: 258.22600000, size_y: 118.48210000, size_z: 141.13240000}
14 | 14: {diameter: 282.60129399, min_x: -101.57300000, min_y: -58.87630000, min_z: -106.55800000, size_x: 203.14600000, size_y: 117.75250000, size_z: 213.11600000}
15 | 15: {diameter: 212.35825148, min_x: -46.95910000, min_y: -73.71670000, min_z: -92.37370000, size_x: 93.91810000, size_y: 147.43340000, size_z: 184.74740000}


--------------------------------------------------------------------------------
/scripts/datasets/ycb/dataset.py:
--------------------------------------------------------------------------------
  1 | import torch.utils.data as data
  2 | from PIL import Image
  3 | import os
  4 | import os.path
  5 | import torch
  6 | import numpy as np
  7 | import torchvision.transforms as transforms
  8 | import argparse
  9 | import time
 10 | import random
 11 | from lib.transformations import quaternion_from_euler, euler_matrix, random_quaternion, quaternion_matrix
 12 | import numpy.ma as ma
 13 | import copy
 14 | import scipy.misc
 15 | import scipy.io as scio
 16 | 
 17 | 
 18 | class PoseDataset(data.Dataset):
 19 |     def __init__(self, mode, num_pt, add_noise, root, noise_trans, refine):
 20 |         if mode == 'train':
 21 |             self.path = 'datasets/ycb/dataset_config/train_data_list.txt'
 22 |         elif mode == 'test':
 23 |             self.path = 'datasets/ycb/dataset_config/test_data_list.txt'
 24 |         self.num_pt = num_pt
 25 |         self.root = root
 26 |         self.add_noise = add_noise
 27 |         self.noise_trans = noise_trans
 28 | 
 29 |         self.list = []
 30 |         self.real = []
 31 |         self.syn = []
 32 |         input_file = open(self.path)
 33 |         while 1:
 34 |             input_line = input_file.readline()
 35 |             if not input_line:
 36 |                 break
 37 |             if input_line[-1:] == '\n':
 38 |                 input_line = input_line[:-1]
 39 |             if input_line[:5] == 'data/':
 40 |                 self.real.append(input_line)
 41 |             else:
 42 |                 self.syn.append(input_line)
 43 |             self.list.append(input_line)
 44 |         input_file.close()
 45 | 
 46 |         self.length = len(self.list)
 47 |         self.len_real = len(self.real)
 48 |         self.len_syn = len(self.syn)
 49 | 
 50 |         class_file = open('datasets/ycb/dataset_config/classes.txt')
 51 |         class_id = 1
 52 |         self.cld = {}
 53 |         while 1:
 54 |             class_input = class_file.readline()
 55 |             if not class_input:
 56 |                 break
 57 | 
 58 |             input_file = open('{0}/models/{1}/points.xyz'.format(self.root, class_input[:-1]))
 59 |             self.cld[class_id] = []
 60 |             while 1:
 61 |                 input_line = input_file.readline()
 62 |                 if not input_line:
 63 |                     break
 64 |                 input_line = input_line[:-1].split(' ')
 65 |                 self.cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
 66 |             self.cld[class_id] = np.array(self.cld[class_id])
 67 |             input_file.close()
 68 |             
 69 |             class_id += 1
 70 | 
 71 |         self.cam_cx_1 = 312.9869
 72 |         self.cam_cy_1 = 241.3109
 73 |         self.cam_fx_1 = 1066.778
 74 |         self.cam_fy_1 = 1067.487
 75 | 
 76 |         self.cam_cx_2 = 323.7872
 77 |         self.cam_cy_2 = 279.6921
 78 |         self.cam_fx_2 = 1077.836
 79 |         self.cam_fy_2 = 1078.189
 80 | 
 81 |         self.xmap = np.array([[j for i in range(640)] for j in range(480)])
 82 |         self.ymap = np.array([[i for i in range(640)] for j in range(480)])
 83 |         
 84 |         self.trancolor = transforms.ColorJitter(0.2, 0.2, 0.2, 0.05)
 85 |         self.noise_img_loc = 0.0
 86 |         self.noise_img_scale = 7.0
 87 |         self.minimum_num_pt = 50
 88 |         self.norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 89 |         self.symmetry_obj_idx = [12, 15, 18, 19, 20]
 90 |         self.num_pt_mesh_small = 500
 91 |         self.num_pt_mesh_large = 2600
 92 |         self.refine = refine
 93 |         self.front_num = 2
 94 | 
 95 |         print(len(self.list))
 96 | 
 97 |     def __getitem__(self, index):
 98 |         img = Image.open('{0}/{1}-color.png'.format(self.root, self.list[index]))
 99 |         depth = np.array(Image.open('{0}/{1}-depth.png'.format(self.root, self.list[index])))
100 |         label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, self.list[index])))
101 |         meta = scio.loadmat('{0}/{1}-meta.mat'.format(self.root, self.list[index]))
102 | 
103 |         if self.list[index][:8] != 'data_syn' and int(self.list[index][5:9]) >= 60:
104 |             cam_cx = self.cam_cx_2
105 |             cam_cy = self.cam_cy_2
106 |             cam_fx = self.cam_fx_2
107 |             cam_fy = self.cam_fy_2
108 |         else:
109 |             cam_cx = self.cam_cx_1
110 |             cam_cy = self.cam_cy_1
111 |             cam_fx = self.cam_fx_1
112 |             cam_fy = self.cam_fy_1
113 | 
114 |         mask_back = ma.getmaskarray(ma.masked_equal(label, 0))
115 | 
116 |         add_front = False
117 |         if self.add_noise:
118 |             for k in range(5):
119 |                 seed = random.choice(self.syn)
120 |                 front = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
121 |                 front = np.transpose(front, (2, 0, 1))
122 |                 f_label = np.array(Image.open('{0}/{1}-label.png'.format(self.root, seed)))
123 |                 front_label = np.unique(f_label).tolist()[1:]
124 |                 if len(front_label) < self.front_num:
125 |                    continue
126 |                 front_label = random.sample(front_label, self.front_num)
127 |                 for f_i in front_label:
128 |                     mk = ma.getmaskarray(ma.masked_not_equal(f_label, f_i))
129 |                     if f_i == front_label[0]:
130 |                         mask_front = mk
131 |                     else:
132 |                         mask_front = mask_front * mk
133 |                 t_label = label * mask_front
134 |                 if len(t_label.nonzero()[0]) > 1000:
135 |                     label = t_label
136 |                     add_front = True
137 |                     break
138 | 
139 |         obj = meta['cls_indexes'].flatten().astype(np.int32)
140 | 
141 |         while 1:
142 |             idx = np.random.randint(0, len(obj))
143 |             mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
144 |             mask_label = ma.getmaskarray(ma.masked_equal(label, obj[idx]))
145 |             mask = mask_label * mask_depth
146 |             if len(mask.nonzero()[0]) > self.minimum_num_pt:
147 |                 break
148 | 
149 |         if self.add_noise:
150 |             img = self.trancolor(img)
151 | 
152 |         rmin, rmax, cmin, cmax = get_bbox(mask_label)
153 |         img = np.transpose(np.array(img)[:, :, :3], (2, 0, 1))[:, rmin:rmax, cmin:cmax]
154 | 
155 |         if self.list[index][:8] == 'data_syn':
156 |             seed = random.choice(self.real)
157 |             back = np.array(self.trancolor(Image.open('{0}/{1}-color.png'.format(self.root, seed)).convert("RGB")))
158 |             back = np.transpose(back, (2, 0, 1))[:, rmin:rmax, cmin:cmax]
159 |             img_masked = back * mask_back[rmin:rmax, cmin:cmax] + img
160 |         else:
161 |             img_masked = img
162 | 
163 |         if self.add_noise and add_front:
164 |             img_masked = img_masked * mask_front[rmin:rmax, cmin:cmax] + front[:, rmin:rmax, cmin:cmax] * ~(mask_front[rmin:rmax, cmin:cmax])
165 | 
166 |         if self.list[index][:8] == 'data_syn':
167 |             img_masked = img_masked + np.random.normal(loc=0.0, scale=7.0, size=img_masked.shape)
168 | 
169 |         # p_img = np.transpose(img_masked, (1, 2, 0))
170 |         # scipy.misc.imsave('temp/{0}_input.png'.format(index), p_img)
171 |         # scipy.misc.imsave('temp/{0}_label.png'.format(index), mask[rmin:rmax, cmin:cmax].astype(np.int32))
172 | 
173 |         target_r = meta['poses'][:, :, idx][:, 0:3]
174 |         target_t = np.array([meta['poses'][:, :, idx][:, 3:4].flatten()])
175 |         add_t = np.array([random.uniform(-self.noise_trans, self.noise_trans) for i in range(3)])
176 | 
177 |         choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
178 |         if len(choose) > self.num_pt:
179 |             c_mask = np.zeros(len(choose), dtype=int)
180 |             c_mask[:self.num_pt] = 1
181 |             np.random.shuffle(c_mask)
182 |             choose = choose[c_mask.nonzero()]
183 |         else:
184 |             choose = np.pad(choose, (0, self.num_pt - len(choose)), 'wrap')
185 |         
186 |         depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
187 |         xmap_masked = self.xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
188 |         ymap_masked = self.ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
189 |         choose = np.array([choose])
190 | 
191 |         cam_scale = meta['factor_depth'][0][0]
192 |         pt2 = depth_masked / cam_scale
193 |         pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
194 |         pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
195 |         cloud = np.concatenate((pt0, pt1, pt2), axis=1)
196 |         if self.add_noise:
197 |             cloud = np.add(cloud, add_t)
198 | 
199 |         # fw = open('temp/{0}_cld.xyz'.format(index), 'w')
200 |         # for it in cloud:
201 |         #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
202 |         # fw.close()
203 | 
204 |         dellist = [j for j in range(0, len(self.cld[obj[idx]]))]
205 |         if self.refine:
206 |             dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_large)
207 |         else:
208 |             dellist = random.sample(dellist, len(self.cld[obj[idx]]) - self.num_pt_mesh_small)
209 |         model_points = np.delete(self.cld[obj[idx]], dellist, axis=0)
210 | 
211 |         # fw = open('temp/{0}_model_points.xyz'.format(index), 'w')
212 |         # for it in model_points:
213 |         #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
214 |         # fw.close()
215 | 
216 |         target = np.dot(model_points, target_r.T)
217 |         if self.add_noise:
218 |             target = np.add(target, target_t + add_t)
219 |         else:
220 |             target = np.add(target, target_t)
221 |         
222 |         # fw = open('temp/{0}_tar.xyz'.format(index), 'w')
223 |         # for it in target:
224 |         #    fw.write('{0} {1} {2}\n'.format(it[0], it[1], it[2]))
225 |         # fw.close()
226 |         
227 |         return torch.from_numpy(cloud.astype(np.float32)), \
228 |                torch.LongTensor(choose.astype(np.int32)), \
229 |                self.norm(torch.from_numpy(img_masked.astype(np.float32))), \
230 |                torch.from_numpy(target.astype(np.float32)), \
231 |                torch.from_numpy(model_points.astype(np.float32)), \
232 |                torch.LongTensor([int(obj[idx]) - 1])
233 | 
234 |     def __len__(self):
235 |         return self.length
236 | 
237 |     def get_sym_list(self):
238 |         return self.symmetry_obj_idx
239 | 
240 |     def get_num_points_mesh(self):
241 |         if self.refine:
242 |             return self.num_pt_mesh_large
243 |         else:
244 |             return self.num_pt_mesh_small
245 | 
246 | 
247 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
248 | img_width = 480
249 | img_length = 640
250 | 
251 | def get_bbox(label):
252 |     rows = np.any(label, axis=1)
253 |     cols = np.any(label, axis=0)
254 |     rmin, rmax = np.where(rows)[0][[0, -1]]
255 |     cmin, cmax = np.where(cols)[0][[0, -1]]
256 |     rmax += 1
257 |     cmax += 1
258 |     r_b = rmax - rmin
259 |     for tt in range(len(border_list)):
260 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
261 |             r_b = border_list[tt + 1]
262 |             break
263 |     c_b = cmax - cmin
264 |     for tt in range(len(border_list)):
265 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
266 |             c_b = border_list[tt + 1]
267 |             break
268 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
269 |     rmin = center[0] - int(r_b / 2)
270 |     rmax = center[0] + int(r_b / 2)
271 |     cmin = center[1] - int(c_b / 2)
272 |     cmax = center[1] + int(c_b / 2)
273 |     if rmin < 0:
274 |         delt = -rmin
275 |         rmin = 0
276 |         rmax += delt
277 |     if cmin < 0:
278 |         delt = -cmin
279 |         cmin = 0
280 |         cmax += delt
281 |     if rmax > img_width:
282 |         delt = rmax - img_width
283 |         rmax = img_width
284 |         rmin -= delt
285 |     if cmax > img_length:
286 |         delt = cmax - img_length
287 |         cmax = img_length
288 |         cmin -= delt
289 |     return rmin, rmax, cmin, cmax
290 | 


--------------------------------------------------------------------------------
/scripts/datasets/ycb/dataset_config/classes.txt:
--------------------------------------------------------------------------------
 1 | 002_master_chef_can
 2 | 003_cracker_box
 3 | 004_sugar_box
 4 | 005_tomato_soup_can
 5 | 006_mustard_bottle
 6 | 007_tuna_fish_can
 7 | 008_pudding_box
 8 | 009_gelatin_box
 9 | 010_potted_meat_can
10 | 011_banana
11 | 019_pitcher_base
12 | 021_bleach_cleanser
13 | 024_bowl
14 | 025_mug
15 | 035_power_drill
16 | 036_wood_block
17 | 037_scissors
18 | 040_large_marker
19 | 051_large_clamp
20 | 052_extra_large_clamp
21 | 061_foam_brick
22 | 


--------------------------------------------------------------------------------
/scripts/distortion.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/distortion.npy


--------------------------------------------------------------------------------
/scripts/eval.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | ##########################################  ros  packages ##############################################
  4 | import rospy
  5 | from sensor_msgs.msg import Image, CameraInfo
  6 | from cv_bridge import CvBridge, CvBridgeError
  7 | 
  8 | ########################################################################################################
  9 | 
 10 | import cv2
 11 | import torch
 12 | import argparse
 13 | import os
 14 | from torch.utils.data import DataLoader
 15 | from model.build_BiSeNet import BiSeNet
 16 | import numpy as np
 17 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
 18 | from datasets.dataset import ycb_Dataset
 19 | from matplotlib import pyplot as plt
 20 | from torchvision import transforms
 21 | 
 22 | 
 23 | def eval(model,dataloader, args ):
 24 |     print('start test!')
 25 |     with torch.no_grad():
 26 |         model.eval()
 27 |         precision_record = []
 28 |         tq = tqdm.tqdm(total=len(dataloader) * args.batch_size)
 29 |         tq.set_description('test')
 30 |         hist = np.zeros((args.num_classes, args.num_classes))
 31 |         for i, (data, label) in enumerate(dataloader):
 32 |             tq.update(args.batch_size)
 33 |             if torch.cuda.is_available() and args.use_gpu:
 34 |                 data = data.cuda()
 35 |                 label = label.cuda()
 36 |             predict = model(data).squeeze()
 37 |             predict = reverse_one_hot(predict)
 38 |             predict = np.array(predict)
 39 |             # predict = colour_code_segmentation(np.array(predict), label_info)
 40 | 
 41 |             label = label.squeeze()
 42 |             if args.loss == 'dice':
 43 |                 label = reverse_one_hot(label)
 44 |             label = np.array(label)
 45 |             # label = colour_code_segmentation(np.array(label), label_info)
 46 | 
 47 |             precision = compute_global_accuracy(predict, label)
 48 |             hist += fast_hist(label.flatten(), predict.flatten(), args.num_classes)
 49 |             precision_record.append(precision)
 50 |             save_img(i,data,predict)
 51 |         precision = np.mean(precision_record)
 52 |         miou_list = per_class_iu(hist)[:-1]
 53 |         miou = np.mean(miou_list)
 54 |         print('IoU for each class:')
 55 |         tq.close()
 56 |         print('precision for test: %.3f' % precision)
 57 |         print('mIoU for validation: %.3f' % miou)
 58 |         return precision
 59 | def save_img(iteration,img,label):
 60 |     img = img.cpu()
 61 |     img = img.numpy()
 62 |     img = np.transpose(img, [0,2,3,1])
 63 |     _,h,w,c = img.shape
 64 |     img = img.reshape([h,w,c])
 65 |     fig, axes = plt.subplots(1,2,figsize = (8,4))
 66 |     ax = axes.ravel()
 67 |     ax[0].imshow(img)
 68 |     ax[1].imshow(label)
 69 |     plt.show()
 70 |     plt.savefig('./ycb/segmentation_result/{}.png'.format(iteration))
 71 |     plt.close()
 72 | 
 73 | ######################################################################################################
 74 | ############################################## test ##################################################
 75 | #####################################################################################################
 76 | class object_segmentation:
 77 |     def __init__(self,model):
 78 |         self.model = model
 79 |         self.bridge = CvBridge()
 80 |         self.label_pub = rospy.Publisher('label',Image,queue_size = 10)
 81 |         self.rgb_sub = rospy.Subscriber('rgb_image',Image, self.seg_callback)
 82 |     def seg_callback(self, rgb):
 83 |         try:
 84 |             with torch.no_grad():
 85 |                 self.model.eval()
 86 |                 rgb = self.bridge.imgmsg_to_cv2(rgb,'bgr8')
 87 |                 self.to_tensor = transforms.Compose([
 88 |                     transforms.ToTensor(),
 89 |                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
 90 |                     ])
 91 |                 #rgb = np.transpose(rgb, (2,0,1))
 92 |                 #rgb = np.expand_dims(rgb, axis = 0)
 93 |                 #print(type(rgb))
 94 |                 #rgb = torch.from_numpy(rgb)
 95 |                 rgb = self.to_tensor(rgb)
 96 |                 rgb = rgb.unsqueeze_(0)
 97 |                 rgb = rgb.cuda()
 98 |                 predict = self.model(rgb).squeeze()
 99 |                 predict = reverse_one_hot(predict)
100 |                 predict = np.array(predict)
101 |                 np.save('./predict',predict)
102 |                 self.label_pub.publish(self.bridge.cv2_to_imgmsg(predict,'32SC1'))
103 |                 print('ss')
104 |         except CvBridgeError as e:
105 |             print(e)
106 | 
107 | 
108 | 
109 | 
110 | 
111 | def main(params):
112 |     # basic parameters
113 |     parser = argparse.ArgumentParser()
114 |     parser.add_argument('--checkpoint_path', type=str, default=None, required=True, help='The path to the pretrained weights of model')
115 |     parser.add_argument('--crop_height', type=int, default=720, help='Height of cropped/resized input image to network')
116 |     parser.add_argument('--crop_width', type=int, default=960, help='Width of cropped/resized input image to network')
117 |     parser.add_argument('--data', type=str, default='/path/to/data', help='Path of training data')
118 |     parser.add_argument('--batch_size', type=int, default=1, help='Number of images in each batch')
119 |     parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
120 |     parser.add_argument('--cuda', type=str, default='0', help='GPU ids used for training')
121 |     parser.add_argument('--use_gpu', type=bool, default=True, help='Whether to user gpu for training')
122 |     parser.add_argument('--num_classes', type=int, default=32, help='num of object classes (with void)')
123 |     parser.add_argument('--loss', type=str, default='dice', help='loss function, dice or crossentropy')
124 |     args = parser.parse_args(params)
125 | 
126 |     # build model
127 |     os.environ['CUDA_VISIBLE_DEVICES'] = args.cuda
128 |     model = BiSeNet(args.num_classes, args.context_path)
129 |     if torch.cuda.is_available() and args.use_gpu:
130 |         model = torch.nn.DataParallel(model).cuda()
131 | 
132 |     # load pretrained model if exists
133 |     print('load model from %s ...' % args.checkpoint_path)
134 |     model.module.load_state_dict(torch.load(args.checkpoint_path))
135 |     print('Done!')
136 |     
137 |     rospy.init_node('obj_seg',anonymous=True)
138 |     Seg = object_segmentation(model)
139 |     rospy.spin()
140 |     
141 | 
142 | 
143 | if __name__ == '__main__':
144 |     params = [
145 |         '--checkpoint_path', './checkpoints_18_sgd/best_dice_loss.pth',
146 |         '--data', './CamVid/',
147 |         '--cuda', '1',
148 |         '--context_path', 'resnet101',
149 |         '--num_classes', '21'
150 |     ]
151 |     main(params)
152 | 


--------------------------------------------------------------------------------
/scripts/experiments/scripts/eval_linemod.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | python3 ./tools/eval_linemod.py --dataset_root ./datasets/linemod/Linemod_preprocessed\
10 |   --model trained_checkpoints/linemod/pose_model_9_0.01310166542980859.pth\
11 |   --refine_model trained_checkpoints/linemod/pose_refine_model_493_0.006761023565178073.pth


--------------------------------------------------------------------------------
/scripts/experiments/scripts/eval_ycb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | if [ ! -d YCB_Video_toolbox ];then
10 |     echo 'Downloading the YCB_Video_toolbox...'
11 |     git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 |     cd YCB_Video_toolbox
13 |     unzip results_PoseCNN_RSS2018.zip
14 |     cd ..
15 |     cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 | 
18 | python ./tools/eval_ycb.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
19 |   --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
20 |   --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth
21 | 


--------------------------------------------------------------------------------
/scripts/experiments/scripts/ros_eval_msg.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | if [ ! -d YCB_Video_toolbox ];then
10 |     echo 'Downloading the YCB_Video_toolbox...'
11 |     git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 |     cd YCB_Video_toolbox
13 |     unzip results_PoseCNN_RSS2018.zip
14 |     cd ..
15 |     cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 | 
18 | python ./tools/ros_eval_ycb_message.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
19 |   --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
20 |   --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \
21 |   --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \
22 |   --num_classes 22 \
23 |   --context_path resnet18
24 |   
25 | 


--------------------------------------------------------------------------------
/scripts/experiments/scripts/ros_eval_ycb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | if [ ! -d YCB_Video_toolbox ];then
10 |     echo 'Downloading the YCB_Video_toolbox...'
11 |     git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 |     cd YCB_Video_toolbox
13 |     unzip results_PoseCNN_RSS2018.zip
14 |     cd ..
15 |     cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 | #  --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
18 |   #--model trained_checkpoints/ycb/pose_model_13_0.01985655868300905.pth \
19 | python ./tools/ros_eval_ycb.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
20 |   --model trained_checkpoints/ycb/pose_model_13_0.01985655868300905.pth \
21 |   --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
22 |   --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \
23 |   --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \
24 |   --num_classes 21 \
25 |   --context_path resnet101
26 | 


--------------------------------------------------------------------------------
/scripts/experiments/scripts/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | if [ ! -d YCB_Video_toolbox ];then
10 |     echo 'Downloading the YCB_Video_toolbox...'
11 |     git clone https://github.com/yuxng/YCB_Video_toolbox.git
12 |     cd YCB_Video_toolbox
13 |     unzip results_PoseCNN_RSS2018.zip
14 |     cd ..
15 |     cp replace_ycb_toolbox/*.m YCB_Video_toolbox/
16 | fi
17 | 
18 | python ./tools/ros_eval_ycb2.py --dataset_root ./datasets/ycb/YCB_Video_Dataset\
19 |   --model trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth\
20 |   --refine_model trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth \
21 |   --checkpoint_path trained_checkpoints/ycb/best_dice_loss.pth \
22 |   --num_classes 21 \
23 |   --context_path resnet18
24 | 


--------------------------------------------------------------------------------
/scripts/experiments/scripts/train_linemod.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | python3 ./tools/train.py --dataset linemod\
10 |   --dataset_root ./datasets/linemod/Linemod_preprocessed


--------------------------------------------------------------------------------
/scripts/experiments/scripts/train_ycb.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -x
 4 | set -e
 5 | 
 6 | export PYTHONUNBUFFERED="True"
 7 | export CUDA_VISIBLE_DEVICES=0
 8 | 
 9 | python2 ./tools/train.py --dataset ycb\
10 |   --dataset_root ./datasets/ycb/YCB_Video_Dataset
11 | 


--------------------------------------------------------------------------------
/scripts/lib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/__init__.pyc


--------------------------------------------------------------------------------
/scripts/lib/extractors.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/extractors.pyc


--------------------------------------------------------------------------------
/scripts/lib/knn/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/__init__.pyc


--------------------------------------------------------------------------------
/scripts/lib/knn/build/knn_cuda_kernel.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/build/knn_cuda_kernel.so


--------------------------------------------------------------------------------
/scripts/lib/knn/build_ffi.py:
--------------------------------------------------------------------------------
 1 | # https://gist.github.com/tonyseek/7821993
 2 | import glob
 3 | import torch
 4 | from os import path as osp
 5 | from torch.utils.ffi import create_extension
 6 | 
 7 | abs_path = osp.dirname(osp.realpath(__file__))
 8 | extra_objects = [osp.join(abs_path, 'build/knn_cuda_kernel.so')]
 9 | extra_objects += glob.glob('/usr/local/cuda/lib64/*.a')
10 | 
11 | ffi = create_extension(
12 |     'knn_pytorch',
13 |     headers=['src/knn_pytorch.h'],
14 |     sources=['src/knn_pytorch.c'],
15 |     define_macros=[('WITH_CUDA', None)],
16 |     relative_to=__file__,
17 |     with_cuda=True,
18 |     extra_objects=extra_objects,
19 |     include_dirs=[osp.join(abs_path, 'include')]
20 | )
21 | 
22 | 
23 | if __name__ == '__main__':
24 |     assert torch.cuda.is_available(), 'Please install CUDA for GPU support.'
25 |     ffi.build()
26 | 


--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__init__.py:
--------------------------------------------------------------------------------
 1 | 
 2 | from torch.utils.ffi import _wrap_function
 3 | from ._knn_pytorch import lib as _lib, ffi as _ffi
 4 | 
 5 | __all__ = []
 6 | def _import_symbols(locals):
 7 |     for symbol in dir(_lib):
 8 |         fn = getattr(_lib, symbol)
 9 |         if callable(fn):
10 |             locals[symbol] = _wrap_function(fn, _ffi)
11 |         else:
12 |             locals[symbol] = fn
13 |         __all__.append(symbol)
14 | 
15 | _import_symbols(locals())
16 | 


--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__init__.pyc


--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-35.pyc


--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/scripts/lib/knn/knn_pytorch/_knn_pytorch.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/knn/knn_pytorch/_knn_pytorch.so


--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_cuda_kernel.cu:
--------------------------------------------------------------------------------
  1 | /** Modifed version of knn-CUDA from https://github.com/vincentfpgarcia/kNN-CUDA
  2 |  * The modifications are
  3 |  *      removed texture memory usage
  4 |  *      removed split query KNN computation
  5 |  *      added feature extraction with bilinear interpolation
  6 |  *
  7 |  * Last modified by Christopher B. Choy <chrischoy@ai.stanford.edu> 12/23/2016
  8 |  */
  9 | 
 10 | // Includes
 11 | #include <cstdio>
 12 | #include "cuda.h"
 13 | 
 14 | #include "knn_cuda_kernel.h"
 15 | 
 16 | // Constants used by the program
 17 | #define BLOCK_DIM                      16
 18 | #define DEBUG                          0
 19 | 
 20 | /**
 21 |   * Computes the distance between two matrix A (reference points) and
 22 |   * B (query points) containing respectively wA and wB points.
 23 |   *
 24 |   * @param A     pointer on the matrix A
 25 |   * @param wA    width of the matrix A = number of points in A
 26 |   * @param B     pointer on the matrix B
 27 |   * @param wB    width of the matrix B = number of points in B
 28 |   * @param dim   dimension of points = height of matrices A and B
 29 |   * @param AB    pointer on the matrix containing the wA*wB distances computed
 30 |   */
 31 | __global__ void cuComputeDistanceGlobal( float* A, int wA,
 32 |     float* B, int wB, int dim, float* AB){
 33 | 
 34 |   // Declaration of the shared memory arrays As and Bs used to store the sub-matrix of A and B
 35 |   __shared__ float shared_A[BLOCK_DIM][BLOCK_DIM];
 36 |   __shared__ float shared_B[BLOCK_DIM][BLOCK_DIM];
 37 | 
 38 |   // Sub-matrix of A (begin, step, end) and Sub-matrix of B (begin, step)
 39 |   __shared__ int begin_A;
 40 |   __shared__ int begin_B;
 41 |   __shared__ int step_A;
 42 |   __shared__ int step_B;
 43 |   __shared__ int end_A;
 44 | 
 45 |   // Thread index
 46 |   int tx = threadIdx.x;
 47 |   int ty = threadIdx.y;
 48 | 
 49 |   // Other variables
 50 |   float tmp;
 51 |   float ssd = 0;
 52 | 
 53 |   // Loop parameters
 54 |   begin_A = BLOCK_DIM * blockIdx.y;
 55 |   begin_B = BLOCK_DIM * blockIdx.x;
 56 |   step_A  = BLOCK_DIM * wA;
 57 |   step_B  = BLOCK_DIM * wB;
 58 |   end_A   = begin_A + (dim-1) * wA;
 59 | 
 60 |     // Conditions
 61 |   int cond0 = (begin_A + tx < wA); // used to write in shared memory
 62 |   int cond1 = (begin_B + tx < wB); // used to write in shared memory & to computations and to write in output matrix
 63 |   int cond2 = (begin_A + ty < wA); // used to computations and to write in output matrix
 64 | 
 65 |   // Loop over all the sub-matrices of A and B required to compute the block sub-matrix
 66 |   for (int a = begin_A, b = begin_B; a <= end_A; a += step_A, b += step_B) {
 67 |     // Load the matrices from device memory to shared memory; each thread loads one element of each matrix
 68 |     if (a/wA + ty < dim){
 69 |       shared_A[ty][tx] = (cond0)? A[a + wA * ty + tx] : 0;
 70 |       shared_B[ty][tx] = (cond1)? B[b + wB * ty + tx] : 0;
 71 |     }
 72 |     else{
 73 |       shared_A[ty][tx] = 0;
 74 |       shared_B[ty][tx] = 0;
 75 |     }
 76 | 
 77 |     // Synchronize to make sure the matrices are loaded
 78 |     __syncthreads();
 79 | 
 80 |     // Compute the difference between the two matrixes; each thread computes one element of the block sub-matrix
 81 |     if (cond2 && cond1){
 82 |       for (int k = 0; k < BLOCK_DIM; ++k){
 83 |         tmp = shared_A[k][ty] - shared_B[k][tx];
 84 |         ssd += tmp*tmp;
 85 |       }
 86 |     }
 87 | 
 88 |     // Synchronize to make sure that the preceding computation is done before loading two new sub-matrices of A and B in the next iteration
 89 |     __syncthreads();
 90 |   }
 91 | 
 92 |   // Write the block sub-matrix to device memory; each thread writes one element
 93 |   if (cond2 && cond1)
 94 |     AB[(begin_A + ty) * wB + begin_B + tx] = ssd;
 95 | }
 96 | 
 97 | 
 98 | /**
 99 |   * Gathers k-th smallest distances for each column of the distance matrix in the top.
100 |   *
101 |   * @param dist        distance matrix
102 |   * @param ind         index matrix
103 |   * @param width       width of the distance matrix and of the index matrix
104 |   * @param height      height of the distance matrix and of the index matrix
105 |   * @param k           number of neighbors to consider
106 |   */
107 | __global__ void cuInsertionSort(float *dist, long *ind, int width, int height, int k){
108 | 
109 |   // Variables
110 |   int l, i, j;
111 |   float *p_dist;
112 |   long  *p_ind;
113 |   float curr_dist, max_dist;
114 |   long  curr_row,  max_row;
115 |   unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
116 | 
117 |   if (xIndex<width){
118 |     // Pointer shift, initialization, and max value
119 |     p_dist   = dist + xIndex;
120 |     p_ind    = ind  + xIndex;
121 |     max_dist = p_dist[0];
122 |     p_ind[0] = 1;
123 | 
124 |     // Part 1 : sort kth firt elementZ
125 |     for (l=1; l<k; l++){
126 |       curr_row  = l * width;
127 |       curr_dist = p_dist[curr_row];
128 |       if (curr_dist<max_dist){
129 |         i=l-1;
130 |         for (int a=0; a<l-1; a++){
131 |           if (p_dist[a*width]>curr_dist){
132 |             i=a;
133 |             break;
134 |           }
135 |         }
136 |         for (j=l; j>i; j--){
137 |           p_dist[j*width] = p_dist[(j-1)*width];
138 |           p_ind[j*width]   = p_ind[(j-1)*width];
139 |         }
140 |         p_dist[i*width] = curr_dist;
141 |         p_ind[i*width]   = l+1;
142 |       } else {
143 |         p_ind[l*width] = l+1;
144 |       }
145 |       max_dist = p_dist[curr_row];
146 |     }
147 | 
148 |     // Part 2 : insert element in the k-th first lines
149 |     max_row = (k-1)*width;
150 |     for (l=k; l<height; l++){
151 |       curr_dist = p_dist[l*width];
152 |       if (curr_dist<max_dist){
153 |         i=k-1;
154 |         for (int a=0; a<k-1; a++){
155 |           if (p_dist[a*width]>curr_dist){
156 |             i=a;
157 |             break;
158 |           }
159 |         }
160 |         for (j=k-1; j>i; j--){
161 |           p_dist[j*width] = p_dist[(j-1)*width];
162 |           p_ind[j*width]   = p_ind[(j-1)*width];
163 |         }
164 |         p_dist[i*width] = curr_dist;
165 |         p_ind[i*width]   = l+1;
166 |         max_dist             = p_dist[max_row];
167 |       }
168 |     }
169 |   }
170 | }
171 | 
172 | 
173 | /**
174 |   * Computes the square root of the first line (width-th first element)
175 |   * of the distance matrix.
176 |   *
177 |   * @param dist    distance matrix
178 |   * @param width   width of the distance matrix
179 |   * @param k       number of neighbors to consider
180 |   */
181 | __global__ void cuParallelSqrt(float *dist, int width, int k){
182 |     unsigned int xIndex = blockIdx.x * blockDim.x + threadIdx.x;
183 |     unsigned int yIndex = blockIdx.y * blockDim.y + threadIdx.y;
184 |   if (xIndex<width && yIndex<k)
185 |     dist[yIndex*width + xIndex] = sqrt(dist[yIndex*width + xIndex]);
186 | }
187 | 
188 | 
189 | //-----------------------------------------------------------------------------------------------//
190 | //                                   K-th NEAREST NEIGHBORS                                      //
191 | //-----------------------------------------------------------------------------------------------//
192 | 
193 | /**
194 |   * K nearest neighbor algorithm
195 |   * - Initialize CUDA
196 |   * - Allocate device memory
197 |   * - Copy point sets (reference and query points) from host to device memory
198 |   * - Compute the distances + indexes to the k nearest neighbors for each query point
199 |   * - Copy distances from device to host memory
200 |   *
201 |   * @param ref_host      reference points ; pointer to linear matrix
202 |   * @param ref_nb        number of reference points ; width of the matrix
203 |   * @param query_host    query points ; pointer to linear matrix
204 |   * @param query_nb      number of query points ; width of the matrix
205 |   * @param dim           dimension of points ; height of the matrices
206 |   * @param k             number of neighbor to consider
207 |   * @param dist_host     distances to k nearest neighbors ; pointer to linear matrix
208 |   * @param dist_host     indexes of the k nearest neighbors ; pointer to linear matrix
209 |   *
210 |   */
211 | void knn_device(float* ref_dev, int ref_nb, float* query_dev, int query_nb,
212 |     int dim, int k, float* dist_dev, long* ind_dev, cudaStream_t stream){
213 | 
214 |   // Grids and threads
215 |   dim3 g_16x16(query_nb/16, ref_nb/16, 1);
216 |   dim3 t_16x16(16, 16, 1);
217 |   if (query_nb%16 != 0) g_16x16.x += 1;
218 |   if (ref_nb  %16 != 0) g_16x16.y += 1;
219 |   //
220 |   dim3 g_256x1(query_nb/256, 1, 1);
221 |   dim3 t_256x1(256, 1, 1);
222 |   if (query_nb%256 != 0) g_256x1.x += 1;
223 | 
224 |   dim3 g_k_16x16(query_nb/16, k/16, 1);
225 |   dim3 t_k_16x16(16, 16, 1);
226 |   if (query_nb%16 != 0) g_k_16x16.x += 1;
227 |   if (k  %16 != 0) g_k_16x16.y += 1;
228 | 
229 |   // Kernel 1: Compute all the distances
230 |   cuComputeDistanceGlobal<<<g_16x16, t_16x16, 0, stream>>>(ref_dev, ref_nb,
231 |       query_dev, query_nb, dim, dist_dev);
232 | 
233 |   // Kernel 2: Sort each column
234 |   cuInsertionSort<<<g_256x1, t_256x1, 0, stream>>>(dist_dev, ind_dev,
235 |       query_nb, ref_nb, k);
236 | 
237 |   // Kernel 3: Compute square root of k first elements
238 |   // cuParallelSqrt<<<g_k_16x16,t_k_16x16, 0, stream>>>(dist_dev, query_nb, k);
239 | 
240 | #if DEBUG
241 |   unsigned int  size_of_float = sizeof(float);
242 |   unsigned long size_of_long  = sizeof(long);
243 | 
244 |   float* dist_host = new float[query_nb * k];
245 |   long*  idx_host  = new long[query_nb * k];
246 | 
247 |   // Memory copy of output from device to host
248 |   cudaMemcpy(&dist_host[0], dist_dev,
249 |       query_nb * k *size_of_float, cudaMemcpyDeviceToHost);
250 | 
251 |   cudaMemcpy(&idx_host[0], ind_dev,
252 |       query_nb * k * size_of_long, cudaMemcpyDeviceToHost);
253 | 
254 |   int i = 0;
255 |   for(i = 0; i < 100; i++){
256 |     printf("IDX[%d]: %d\n", i, (int)idx_host[i]);
257 |   }
258 | #endif
259 | }
260 | 


--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_cuda_kernel.h:
--------------------------------------------------------------------------------
 1 | #ifndef _MATHUTIL_CUDA_KERNEL
 2 | #define _MATHUTIL_CUDA_KERNEL
 3 | 
 4 | #define IDX2D(i, j, dj) (dj * i + j)
 5 | #define IDX3D(i, j, k, dj, dk) (IDX2D(IDX2D(i, j, dj), k, dk))
 6 | 
 7 | #define BLOCK 512
 8 | #define MAX_STREAMS 512
 9 | 
10 | #ifdef __cplusplus
11 | extern "C" {
12 | #endif
13 | 
14 | void knn_device(float* ref_dev, int ref_width,
15 |     float* query_dev, int query_width,
16 |     int height, int k, float* dist_dev, long* ind_dev, cudaStream_t stream);
17 | 
18 | #ifdef __cplusplus
19 | }
20 | #endif
21 | 
22 | #endif
23 | 


--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_pytorch.c:
--------------------------------------------------------------------------------
 1 | #include <THC/THC.h>
 2 | #include "knn_cuda_kernel.h"
 3 | 
 4 | extern THCState *state;
 5 |  
 6 | int knn(THCudaTensor *ref_tensor, THCudaTensor *query_tensor,
 7 |     THCudaLongTensor *idx_tensor) {
 8 | 
 9 |   THCAssertSameGPU(THCudaTensor_checkGPU(state, 3, idx_tensor, ref_tensor, query_tensor));
10 |   long batch, ref_nb, query_nb, dim, k;
11 |   THArgCheck(THCudaTensor_nDimension(state, ref_tensor) == 3 , 0, "ref_tensor: 3D Tensor expected");
12 |   THArgCheck(THCudaTensor_nDimension(state, query_tensor) == 3 , 1, "query_tensor: 3D Tensor expected");
13 |   THArgCheck(THCudaLongTensor_nDimension(state, idx_tensor) == 3 , 3, "idx_tensor: 3D Tensor expected");
14 |   THArgCheck(THCudaTensor_size(state, ref_tensor, 0) == THCudaTensor_size(state, query_tensor,0), 0, "input sizes must match");
15 |   THArgCheck(THCudaTensor_size(state, ref_tensor, 1) == THCudaTensor_size(state, query_tensor,1), 0, "input sizes must match");
16 |   THArgCheck(THCudaTensor_size(state, idx_tensor, 2) == THCudaTensor_size(state, query_tensor,2), 0, "input sizes must match");
17 | 
18 |   //ref_tensor = THCudaTensor_newContiguous(state, ref_tensor);
19 |   //query_tensor = THCudaTensor_newContiguous(state, query_tensor);
20 | 
21 |   batch = THCudaLongTensor_size(state, ref_tensor, 0);
22 |   dim = THCudaTensor_size(state, ref_tensor, 1);
23 |   k = THCudaLongTensor_size(state, idx_tensor, 1);
24 |   ref_nb = THCudaTensor_size(state, ref_tensor, 2);
25 |   query_nb = THCudaTensor_size(state, query_tensor, 2);
26 | 
27 |   float *ref_dev = THCudaTensor_data(state, ref_tensor);
28 |   float *query_dev = THCudaTensor_data(state, query_tensor);
29 |   long *idx_dev = THCudaLongTensor_data(state, idx_tensor);
30 |   // scratch buffer for distances
31 |   float *dist_dev = (float*)THCudaMalloc(state, ref_nb * query_nb * sizeof(float));
32 |   
33 |   for (int b = 0; b < batch; b++) {
34 |     knn_device(ref_dev + b * dim * ref_nb, ref_nb, query_dev + b * dim * query_nb, query_nb, dim, k, 
35 |       dist_dev, idx_dev + b * k * query_nb, THCState_getCurrentStream(state));
36 |   }
37 |   // free buffer
38 |   THCudaFree(state, dist_dev);
39 |   //printf("aaaaa\n");
40 |   // check for errors
41 |   cudaError_t err = cudaGetLastError();
42 |   if (err != cudaSuccess) {
43 |     printf("error in knn: %s\n", cudaGetErrorString(err));
44 |     THError("aborting");
45 |   }
46 | 
47 |   return 1;
48 | }
49 | 


--------------------------------------------------------------------------------
/scripts/lib/knn/src/knn_pytorch.h:
--------------------------------------------------------------------------------
1 | int knn(THCudaTensor *ref_tensor, THCudaTensor *query_tensor,
2 |     THCudaLongTensor *idx_tensor);
3 | 


--------------------------------------------------------------------------------
/scripts/lib/loss.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.loss import _Loss
 2 | from torch.autograd import Variable
 3 | import torch
 4 | import time
 5 | import numpy as np
 6 | import torch.nn as nn
 7 | import random
 8 | import torch.backends.cudnn as cudnn
 9 | from lib.knn.__init__ import KNearestNeighbor
10 | 
11 | 
12 | def loss_calculation(pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine, num_point_mesh, sym_list):
13 |     knn = KNearestNeighbor(1)
14 |     bs, num_p, _ = pred_c.size()
15 | 
16 |     pred_r = pred_r / (torch.norm(pred_r, dim=2).view(bs, num_p, 1))
17 |     
18 |     base = torch.cat(((1.0 - 2.0*(pred_r[:, :, 2]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1),\
19 |                       (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] - 2.0*pred_r[:, :, 0]*pred_r[:, :, 3]).view(bs, num_p, 1), \
20 |                       (2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
21 |                       (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 3]*pred_r[:, :, 0]).view(bs, num_p, 1), \
22 |                       (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1), \
23 |                       (-2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
24 |                       (-2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
25 |                       (2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
26 |                       (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 2]**2)).view(bs, num_p, 1)), dim=2).contiguous().view(bs * num_p, 3, 3)
27 | 
28 |     ori_base = base
29 |     base = base.contiguous().transpose(2, 1).contiguous()
30 |     model_points = model_points.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
31 |     target = target.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
32 |     ori_target = target
33 |     pred_t = pred_t.contiguous().view(bs * num_p, 1, 3)
34 |     ori_t = pred_t
35 |     points = points.contiguous().view(bs * num_p, 1, 3)
36 |     pred_c = pred_c.contiguous().view(bs * num_p)
37 | 
38 |     pred = torch.add(torch.bmm(model_points, base), points + pred_t)
39 | 
40 |     if not refine:
41 |         if idx[0].item() in sym_list:
42 |             target = target[0].transpose(1, 0).contiguous().view(3, -1)
43 |             pred = pred.permute(2, 0, 1).contiguous().view(3, -1)
44 |             inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
45 |             target = torch.index_select(target, 1, inds.view(-1) - 1)
46 |             target = target.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
47 |             pred = pred.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
48 | 
49 |     dis = torch.mean(torch.norm((pred - target), dim=2), dim=1)
50 |     loss = torch.mean((dis * pred_c - w * torch.log(pred_c)), dim=0)
51 |     
52 | 
53 |     pred_c = pred_c.view(bs, num_p)
54 |     how_max, which_max = torch.max(pred_c, 1)
55 |     dis = dis.view(bs, num_p)
56 | 
57 | 
58 |     t = ori_t[which_max[0]] + points[which_max[0]]
59 |     points = points.view(1, bs * num_p, 3)
60 | 
61 |     ori_base = ori_base[which_max[0]].view(1, 3, 3).contiguous()
62 |     ori_t = t.repeat(bs * num_p, 1).contiguous().view(1, bs * num_p, 3)
63 |     new_points = torch.bmm((points - ori_t), ori_base).contiguous()
64 | 
65 |     new_target = ori_target[0].view(1, num_point_mesh, 3).contiguous()
66 |     ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3)
67 |     new_target = torch.bmm((new_target - ori_t), ori_base).contiguous()
68 | 
69 |     # print('------------> ', dis[0][which_max[0]].item(), pred_c[0][which_max[0]].item(), idx[0].item())
70 |     del knn
71 |     return loss, dis[0][which_max[0]], new_points.detach(), new_target.detach()
72 | 
73 | 
74 | class Loss(_Loss):
75 | 
76 |     def __init__(self, num_points_mesh, sym_list):
77 |         super(Loss, self).__init__(True)
78 |         self.num_pt_mesh = num_points_mesh
79 |         self.sym_list = sym_list
80 | 
81 |     def forward(self, pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine):
82 | 
83 |         return loss_calculation(pred_r, pred_t, pred_c, target, model_points, idx, points, w, refine, self.num_pt_mesh, self.sym_list)
84 | 


--------------------------------------------------------------------------------
/scripts/lib/loss.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/loss.pyc


--------------------------------------------------------------------------------
/scripts/lib/loss_refiner.py:
--------------------------------------------------------------------------------
 1 | from torch.nn.modules.loss import _Loss
 2 | from torch.autograd import Variable
 3 | import torch
 4 | import time
 5 | import numpy as np
 6 | import torch.nn as nn
 7 | import random
 8 | import torch.backends.cudnn as cudnn
 9 | from lib.knn.__init__ import KNearestNeighbor
10 | 
11 | 
12 | def loss_calculation(pred_r, pred_t, target, model_points, idx, points, num_point_mesh, sym_list):
13 |     knn = KNearestNeighbor(1)
14 |     pred_r = pred_r.view(1, 1, -1)
15 |     pred_t = pred_t.view(1, 1, -1)
16 |     bs, num_p, _ = pred_r.size()
17 |     num_input_points = len(points[0])
18 | 
19 |     pred_r = pred_r / (torch.norm(pred_r, dim=2).view(bs, num_p, 1))
20 |     
21 |     base = torch.cat(((1.0 - 2.0*(pred_r[:, :, 2]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1),\
22 |                       (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] - 2.0*pred_r[:, :, 0]*pred_r[:, :, 3]).view(bs, num_p, 1), \
23 |                       (2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
24 |                       (2.0*pred_r[:, :, 1]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 3]*pred_r[:, :, 0]).view(bs, num_p, 1), \
25 |                       (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 3]**2)).view(bs, num_p, 1), \
26 |                       (-2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
27 |                       (-2.0*pred_r[:, :, 0]*pred_r[:, :, 2] + 2.0*pred_r[:, :, 1]*pred_r[:, :, 3]).view(bs, num_p, 1), \
28 |                       (2.0*pred_r[:, :, 0]*pred_r[:, :, 1] + 2.0*pred_r[:, :, 2]*pred_r[:, :, 3]).view(bs, num_p, 1), \
29 |                       (1.0 - 2.0*(pred_r[:, :, 1]**2 + pred_r[:, :, 2]**2)).view(bs, num_p, 1)), dim=2).contiguous().view(bs * num_p, 3, 3)
30 |     
31 |     ori_base = base
32 |     base = base.contiguous().transpose(2, 1).contiguous()
33 |     model_points = model_points.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
34 |     target = target.view(bs, 1, num_point_mesh, 3).repeat(1, num_p, 1, 1).view(bs * num_p, num_point_mesh, 3)
35 |     ori_target = target
36 |     pred_t = pred_t.contiguous().view(bs * num_p, 1, 3)
37 |     ori_t = pred_t
38 | 
39 |     pred = torch.add(torch.bmm(model_points, base), pred_t)
40 | 
41 |     if idx[0].item() in sym_list:
42 |         target = target[0].transpose(1, 0).contiguous().view(3, -1)
43 |         pred = pred.permute(2, 0, 1).contiguous().view(3, -1)
44 |         inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
45 |         target = torch.index_select(target, 1, inds.view(-1) - 1)
46 |         target = target.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
47 |         pred = pred.view(3, bs * num_p, num_point_mesh).permute(1, 2, 0).contiguous()
48 | 
49 |     dis = torch.mean(torch.norm((pred - target), dim=2), dim=1)
50 | 
51 |     t = ori_t[0]
52 |     points = points.view(1, num_input_points, 3)
53 | 
54 |     ori_base = ori_base[0].view(1, 3, 3).contiguous()
55 |     ori_t = t.repeat(bs * num_input_points, 1).contiguous().view(1, bs * num_input_points, 3)
56 |     new_points = torch.bmm((points - ori_t), ori_base).contiguous()
57 | 
58 |     new_target = ori_target[0].view(1, num_point_mesh, 3).contiguous()
59 |     ori_t = t.repeat(num_point_mesh, 1).contiguous().view(1, num_point_mesh, 3)
60 |     new_target = torch.bmm((new_target - ori_t), ori_base).contiguous()
61 | 
62 |     # print('------------> ', dis.item(), idx[0].item())
63 |     del knn
64 |     return dis, new_points.detach(), new_target.detach()
65 | 
66 | 
67 | class Loss_refine(_Loss):
68 | 
69 |     def __init__(self, num_points_mesh, sym_list):
70 |         super(Loss_refine, self).__init__(True)
71 |         self.num_pt_mesh = num_points_mesh
72 |         self.sym_list = sym_list
73 | 
74 | 
75 |     def forward(self, pred_r, pred_t, target, model_points, idx, points):
76 |         return loss_calculation(pred_r, pred_t, target, model_points, idx, points, self.num_pt_mesh, self.sym_list)
77 | 


--------------------------------------------------------------------------------
/scripts/lib/network.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import os
  3 | import random
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.nn.parallel
  7 | import torch.backends.cudnn as cudnn
  8 | import torch.optim as optim
  9 | import torch.utils.data
 10 | import torchvision.transforms as transforms
 11 | import torchvision.utils as vutils
 12 | from torch.autograd import Variable
 13 | from PIL import Image
 14 | import numpy as np
 15 | import pdb
 16 | import torch.nn.functional as F
 17 | from lib.pspnet import PSPNet
 18 | 
 19 | psp_models = {
 20 |     'resnet18': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet18'),
 21 |     'resnet34': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=512, deep_features_size=256, backend='resnet34'),
 22 |     'resnet50': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet50'),
 23 |     'resnet101': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet101'),
 24 |     'resnet152': lambda: PSPNet(sizes=(1, 2, 3, 6), psp_size=2048, deep_features_size=1024, backend='resnet152')
 25 | }
 26 | 
 27 | class ModifiedResnet(nn.Module):
 28 | 
 29 |     def __init__(self, usegpu=True):
 30 |         super(ModifiedResnet, self).__init__()
 31 | 
 32 |         self.model = psp_models['resnet18'.lower()]()
 33 |         self.model = nn.DataParallel(self.model)
 34 | 
 35 |     def forward(self, x):
 36 |         x = self.model(x)
 37 |         return x
 38 | 
 39 | class PoseNetFeat(nn.Module):
 40 |     def __init__(self, num_points):
 41 |         super(PoseNetFeat, self).__init__()
 42 |         self.conv1 = torch.nn.Conv1d(3, 64, 1)
 43 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
 44 | 
 45 |         self.e_conv1 = torch.nn.Conv1d(32, 64, 1)
 46 |         self.e_conv2 = torch.nn.Conv1d(64, 128, 1)
 47 | 
 48 |         self.conv5 = torch.nn.Conv1d(256, 512, 1)
 49 |         self.conv6 = torch.nn.Conv1d(512, 1024, 1)
 50 | 
 51 |         self.ap1 = torch.nn.AvgPool1d(num_points)
 52 |         self.num_points = num_points
 53 |     def forward(self, x, emb):
 54 |         x = F.relu(self.conv1(x))
 55 |         emb = F.relu(self.e_conv1(emb))
 56 |         pointfeat_1 = torch.cat((x, emb), dim=1)
 57 | 
 58 |         x = F.relu(self.conv2(x))
 59 |         emb = F.relu(self.e_conv2(emb))
 60 |         pointfeat_2 = torch.cat((x, emb), dim=1)
 61 | 
 62 |         x = F.relu(self.conv5(pointfeat_2))
 63 |         x = F.relu(self.conv6(x))
 64 | 
 65 |         ap_x = self.ap1(x)
 66 | 
 67 |         ap_x = ap_x.view(-1, 1024, 1).repeat(1, 1, self.num_points)
 68 |         return torch.cat([pointfeat_1, pointfeat_2, ap_x], 1) #128 + 256 + 1024
 69 | 
 70 | class PoseNet(nn.Module):
 71 |     def __init__(self, num_points, num_obj):
 72 |         super(PoseNet, self).__init__()
 73 |         self.num_points = num_points
 74 |         self.cnn = ModifiedResnet()
 75 |         self.feat = PoseNetFeat(num_points)
 76 |         
 77 |         self.conv1_r = torch.nn.Conv1d(1408, 640, 1)
 78 |         self.conv1_t = torch.nn.Conv1d(1408, 640, 1)
 79 |         self.conv1_c = torch.nn.Conv1d(1408, 640, 1)
 80 | 
 81 |         self.conv2_r = torch.nn.Conv1d(640, 256, 1)
 82 |         self.conv2_t = torch.nn.Conv1d(640, 256, 1)
 83 |         self.conv2_c = torch.nn.Conv1d(640, 256, 1)
 84 | 
 85 |         self.conv3_r = torch.nn.Conv1d(256, 128, 1)
 86 |         self.conv3_t = torch.nn.Conv1d(256, 128, 1)
 87 |         self.conv3_c = torch.nn.Conv1d(256, 128, 1)
 88 | 
 89 |         self.conv4_r = torch.nn.Conv1d(128, num_obj*4, 1) #quaternion
 90 |         self.conv4_t = torch.nn.Conv1d(128, num_obj*3, 1) #translation
 91 |         self.conv4_c = torch.nn.Conv1d(128, num_obj*1, 1) #confidence
 92 | 
 93 |         self.num_obj = num_obj
 94 | 
 95 |     def forward(self, img, x, choose, obj):
 96 |         out_img = self.cnn(img)
 97 |         
 98 |         bs, di, _, _ = out_img.size()
 99 | 
100 |         emb = out_img.view(bs, di, -1)
101 |         choose = choose.repeat(1, di, 1)
102 |         emb = torch.gather(emb, 2, choose).contiguous()
103 |         
104 |         x = x.transpose(2, 1).contiguous()
105 |         ap_x = self.feat(x, emb)
106 | 
107 |         rx = F.relu(self.conv1_r(ap_x))
108 |         tx = F.relu(self.conv1_t(ap_x))
109 |         cx = F.relu(self.conv1_c(ap_x))      
110 | 
111 |         rx = F.relu(self.conv2_r(rx))
112 |         tx = F.relu(self.conv2_t(tx))
113 |         cx = F.relu(self.conv2_c(cx))
114 | 
115 |         rx = F.relu(self.conv3_r(rx))
116 |         tx = F.relu(self.conv3_t(tx))
117 |         cx = F.relu(self.conv3_c(cx))
118 | 
119 |         rx = self.conv4_r(rx).view(bs, self.num_obj, 4, self.num_points)
120 |         tx = self.conv4_t(tx).view(bs, self.num_obj, 3, self.num_points)
121 |         cx = torch.sigmoid(self.conv4_c(cx)).view(bs, self.num_obj, 1, self.num_points)
122 |         
123 |         b = 0
124 |         out_rx = torch.index_select(rx[b], 0, obj[b])
125 |         out_tx = torch.index_select(tx[b], 0, obj[b])
126 |         out_cx = torch.index_select(cx[b], 0, obj[b])
127 |         
128 |         out_rx = out_rx.contiguous().transpose(2, 1).contiguous()
129 |         out_cx = out_cx.contiguous().transpose(2, 1).contiguous()
130 |         out_tx = out_tx.contiguous().transpose(2, 1).contiguous()
131 |         
132 |         return out_rx, out_tx, out_cx, emb.detach()
133 |  
134 | 
135 | 
136 | class PoseRefineNetFeat(nn.Module):
137 |     def __init__(self, num_points):
138 |         super(PoseRefineNetFeat, self).__init__()
139 |         self.conv1 = torch.nn.Conv1d(3, 64, 1)
140 |         self.conv2 = torch.nn.Conv1d(64, 128, 1)
141 | 
142 |         self.e_conv1 = torch.nn.Conv1d(32, 64, 1)
143 |         self.e_conv2 = torch.nn.Conv1d(64, 128, 1)
144 | 
145 |         self.conv5 = torch.nn.Conv1d(384, 512, 1)
146 |         self.conv6 = torch.nn.Conv1d(512, 1024, 1)
147 | 
148 |         self.ap1 = torch.nn.AvgPool1d(num_points)
149 |         self.num_points = num_points
150 | 
151 |     def forward(self, x, emb):
152 |         x = F.relu(self.conv1(x))
153 |         emb = F.relu(self.e_conv1(emb))
154 |         pointfeat_1 = torch.cat([x, emb], dim=1)
155 | 
156 |         x = F.relu(self.conv2(x))
157 |         emb = F.relu(self.e_conv2(emb))
158 |         pointfeat_2 = torch.cat([x, emb], dim=1)
159 | 
160 |         pointfeat_3 = torch.cat([pointfeat_1, pointfeat_2], dim=1)
161 | 
162 |         x = F.relu(self.conv5(pointfeat_3))
163 |         x = F.relu(self.conv6(x))
164 | 
165 |         ap_x = self.ap1(x)
166 | 
167 |         ap_x = ap_x.view(-1, 1024)
168 |         return ap_x
169 | 
170 | class PoseRefineNet(nn.Module):
171 |     def __init__(self, num_points, num_obj):
172 |         super(PoseRefineNet, self).__init__()
173 |         self.num_points = num_points
174 |         self.feat = PoseRefineNetFeat(num_points)
175 |         
176 |         self.conv1_r = torch.nn.Linear(1024, 512)
177 |         self.conv1_t = torch.nn.Linear(1024, 512)
178 | 
179 |         self.conv2_r = torch.nn.Linear(512, 128)
180 |         self.conv2_t = torch.nn.Linear(512, 128)
181 | 
182 |         self.conv3_r = torch.nn.Linear(128, num_obj*4) #quaternion
183 |         self.conv3_t = torch.nn.Linear(128, num_obj*3) #translation
184 | 
185 |         self.num_obj = num_obj
186 | 
187 |     def forward(self, x, emb, obj):
188 |         bs = x.size()[0]
189 |         
190 |         x = x.transpose(2, 1).contiguous()
191 |         ap_x = self.feat(x, emb)
192 | 
193 |         rx = F.relu(self.conv1_r(ap_x))
194 |         tx = F.relu(self.conv1_t(ap_x))   
195 | 
196 |         rx = F.relu(self.conv2_r(rx))
197 |         tx = F.relu(self.conv2_t(tx))
198 | 
199 |         rx = self.conv3_r(rx).view(bs, self.num_obj, 4)
200 |         tx = self.conv3_t(tx).view(bs, self.num_obj, 3)
201 | 
202 |         b = 0
203 |         out_rx = torch.index_select(rx[b], 0, obj[b])
204 |         out_tx = torch.index_select(tx[b], 0, obj[b])
205 | 
206 |         return out_rx, out_tx
207 | 


--------------------------------------------------------------------------------
/scripts/lib/pspnet.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/pspnet.pyc


--------------------------------------------------------------------------------
/scripts/lib/transformations.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/lib/transformations.pyc


--------------------------------------------------------------------------------
/scripts/loss.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | import torch.nn.functional as F
 4 | 
 5 | def flatten(tensor):
 6 |     """Flattens a given tensor such that the channel axis is first.
 7 |     The shapes are transformed as follows:
 8 |        (N, C, D, H, W) -> (C, N * D * H * W)
 9 |     """
10 |     C = tensor.size(1)
11 |     # new axis order
12 |     axis_order = (1, 0) + tuple(range(2, tensor.dim()))
13 |     # Transpose: (N, C, D, H, W) -> (C, N, D, H, W)
14 |     transposed = tensor.permute(axis_order)
15 |     # Flatten: (C, N, D, H, W) -> (C, N * D * H * W)
16 |     return transposed.contiguous().view(C, -1)
17 | 
18 | 
19 | class DiceLoss(nn.Module):
20 |     def __init__(self):
21 |         super().__init__()
22 |         self.epsilon = 1e-5
23 | 
24 |     def forward(self, output, target):
25 |         assert output.size() == target.size(), "'input' and 'target' must have the same shape"
26 |         output = F.softmax(output, dim=1)
27 |         output = flatten(output)
28 |         target = flatten(target)
29 |         # intersect = (output * target).sum(-1).sum() + self.epsilon
30 |         # denominator = ((output + target).sum(-1)).sum() + self.epsilon
31 | 
32 |         intersect = (output * target).sum(-1)
33 |         denominator = (output + target).sum(-1)
34 |         dice = intersect / denominator
35 |         dice = torch.mean(dice)
36 |         return 1 - dice
37 |         # return 1 - 2. * intersect / denominator
38 | 


--------------------------------------------------------------------------------
/scripts/matrix.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/matrix.npy


--------------------------------------------------------------------------------
/scripts/model/build_BiSeNet.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from model.build_contextpath import build_contextpath
  4 | import warnings
  5 | warnings.filterwarnings(action='ignore')
  6 | 
  7 | class ConvBlock(torch.nn.Module):
  8 |     def __init__(self, in_channels, out_channels, kernel_size=3, stride=2,padding=1):
  9 |         super(ConvBlock,self).__init__()
 10 |         self.conv1 = nn.Conv2d(in_channels, out_channels, kernel_size=kernel_size, stride=stride, padding=padding, bias=False)
 11 |         self.bn = nn.BatchNorm2d(out_channels)
 12 |         self.relu = nn.ReLU()
 13 | 
 14 |     def forward(self, input):
 15 |         x = self.conv1(input)
 16 |         return self.relu(self.bn(x))
 17 | 
 18 | class Spatial_path(torch.nn.Module):
 19 |     def __init__(self):
 20 |         super(Spatial_path,self).__init__()
 21 |         self.convblock1 = ConvBlock(in_channels=3, out_channels=64)
 22 |         self.convblock2 = ConvBlock(in_channels=64, out_channels=128)
 23 |         self.convblock3 = ConvBlock(in_channels=128, out_channels=256)
 24 | 
 25 |     def forward(self, input):
 26 |         x = self.convblock1(input)
 27 |         x = self.convblock2(x)
 28 |         x = self.convblock3(x)
 29 |         return x
 30 | 
 31 | class AttentionRefinementModule(torch.nn.Module):
 32 |     def __init__(self, in_channels, out_channels):
 33 |         super(AttentionRefinementModule,self).__init__()
 34 |         self.conv = nn.Conv2d(in_channels, out_channels, kernel_size=1)
 35 |         self.bn = nn.BatchNorm2d(out_channels)
 36 |         self.sigmoid = nn.Sigmoid()
 37 |         self.in_channels = in_channels
 38 |         self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
 39 | 
 40 |     def forward(self, input):
 41 |         # global average pooling
 42 |         x = self.avgpool(input)
 43 |         assert self.in_channels == x.size(1), 'in_channels and out_channels should all be {}'.format(x.size(1))
 44 |         x = self.conv(x)
 45 |         # x = self.sigmoid(self.bn(x))
 46 |         x = self.sigmoid(x)
 47 |         # channels of input and x should be same
 48 |         x = torch.mul(input, x)
 49 |         return x
 50 | 
 51 | 
 52 | class FeatureFusionModule(torch.nn.Module):
 53 |     def __init__(self, num_classes, in_channels):
 54 |         super(FeatureFusionModule,self).__init__()
 55 |         # self.in_channels = input_1.channels + input_2.channels
 56 |         # resnet101 3328 = 256(from context path) + 1024(from spatial path) + 2048(from spatial path)
 57 |         # resnet18  1024 = 256(from context path) + 256(from spatial path) + 512(from spatial path)
 58 |         self.in_channels = in_channels
 59 | 
 60 |         self.convblock = ConvBlock(in_channels=self.in_channels, out_channels=num_classes, stride=1)
 61 |         self.conv1 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
 62 |         self.relu = nn.ReLU()
 63 |         self.conv2 = nn.Conv2d(num_classes, num_classes, kernel_size=1)
 64 |         self.sigmoid = nn.Sigmoid()
 65 |         self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
 66 | 
 67 | 
 68 |     def forward(self, input_1, input_2):
 69 |         x = torch.cat((input_1, input_2), dim=1)
 70 |         assert self.in_channels == x.size(1), 'in_channels of ConvBlock should be {}'.format(x.size(1))
 71 |         feature = self.convblock(x)
 72 |         x = self.avgpool(feature)
 73 | 
 74 |         x = self.relu(self.conv1(x))
 75 |         x = self.sigmoid(self.conv2(x))
 76 |         x = torch.mul(feature, x)
 77 |         x = torch.add(x, feature)
 78 |         return x
 79 | 
 80 | class BiSeNet(torch.nn.Module):
 81 |     def __init__(self, num_classes, context_path):
 82 |         super(BiSeNet,self).__init__()
 83 |         # build spatial path
 84 |         self.saptial_path = Spatial_path()
 85 | 
 86 |         # build context path
 87 |         self.context_path = build_contextpath(name=context_path)
 88 | 
 89 |         # build attention refinement module  for resnet 101
 90 |         if context_path == 'resnet101':
 91 |             self.attention_refinement_module1 = AttentionRefinementModule(1024, 1024)
 92 |             self.attention_refinement_module2 = AttentionRefinementModule(2048, 2048)
 93 |             # supervision block
 94 |             self.supervision1 = nn.Conv2d(in_channels=1024, out_channels=num_classes, kernel_size=1)
 95 |             self.supervision2 = nn.Conv2d(in_channels=2048, out_channels=num_classes, kernel_size=1)
 96 |             # build feature fusion module
 97 |             self.feature_fusion_module = FeatureFusionModule(num_classes, 3328)
 98 | 
 99 |         elif context_path == 'resnet18':
100 |             # build attention refinement module  for resnet 18
101 |             self.attention_refinement_module1 = AttentionRefinementModule(256, 256)
102 |             self.attention_refinement_module2 = AttentionRefinementModule(512, 512)
103 |             # supervision block
104 |             self.supervision1 = nn.Conv2d(in_channels=256, out_channels=num_classes, kernel_size=1)
105 |             self.supervision2 = nn.Conv2d(in_channels=512, out_channels=num_classes, kernel_size=1)
106 |             # build feature fusion module
107 |             self.feature_fusion_module = FeatureFusionModule(num_classes, 1024)
108 |         else:
109 |             print('Error: unspport context_path network \n')
110 | 
111 |         # build final convolution
112 |         self.conv = nn.Conv2d(in_channels=num_classes, out_channels=num_classes, kernel_size=1)
113 | 
114 |         self.init_weight()
115 | 
116 |         self.mul_lr = []
117 |         self.mul_lr.append(self.saptial_path)
118 |         self.mul_lr.append(self.attention_refinement_module1)
119 |         self.mul_lr.append(self.attention_refinement_module2)
120 |         self.mul_lr.append(self.supervision1)
121 |         self.mul_lr.append(self.supervision2)
122 |         self.mul_lr.append(self.feature_fusion_module)
123 |         self.mul_lr.append(self.conv)
124 | 
125 |     def init_weight(self):
126 |         for name, m in self.named_modules():
127 |             if 'context_path' not in name:
128 |                 if isinstance(m, nn.Conv2d):
129 |                     nn.init.kaiming_normal_(m.weight, mode='fan_in', nonlinearity='relu')
130 |                 elif isinstance(m, nn.BatchNorm2d):
131 |                     m.eps = 1e-5
132 |                     m.momentum = 0.1
133 |                     nn.init.constant_(m.weight, 1)
134 |                     nn.init.constant_(m.bias, 0)
135 | 
136 |     def forward(self, input):
137 |         # output of spatial path
138 |         sx = self.saptial_path(input)
139 | 
140 |         # output of context path
141 |         cx1, cx2, tail = self.context_path(input)
142 |         cx1 = self.attention_refinement_module1(cx1)
143 |         cx2 = self.attention_refinement_module2(cx2)
144 |         cx2 = torch.mul(cx2, tail)
145 |         # upsampling
146 |         cx1 = torch.nn.functional.interpolate(cx1, size=sx.size()[-2:], mode='bilinear')
147 |         cx2 = torch.nn.functional.interpolate(cx2, size=sx.size()[-2:], mode='bilinear')
148 |         cx = torch.cat((cx1, cx2), dim=1)
149 | 
150 |         if self.training == True:
151 |             cx1_sup = self.supervision1(cx1)
152 |             cx2_sup = self.supervision2(cx2)
153 |             cx1_sup = torch.nn.functional.interpolate(cx1_sup, size=input.size()[-2:], mode='bilinear')
154 |             cx2_sup = torch.nn.functional.interpolate(cx2_sup, size=input.size()[-2:], mode='bilinear')
155 | 
156 |         # output of feature fusion module
157 |         result = self.feature_fusion_module(sx, cx)
158 | 
159 |         # upsampling
160 |         result = torch.nn.functional.interpolate(result, scale_factor=8, mode='bilinear')
161 |         result = self.conv(result)
162 | 
163 |         if self.training == True:
164 |             return result, cx1_sup, cx2_sup
165 | 
166 |         return result
167 | 
168 | 
169 | if __name__ == '__main__':
170 |     import os
171 |     os.environ['CUDA_VISIBLE_DEVICES'] = '0'
172 |     model = BiSeNet(21, 'resnet18')
173 |     # model = nn.DataParallel(model)
174 | 
175 |     model = model.cuda()
176 |     x = torch.rand(2, 3, 256, 256)
177 |     record = model.parameters()
178 |     # for key, params in model.named_parameters():
179 |     #     if 'bn' in key:
180 |     #         params.requires_grad = False
181 |     from utils import group_weight
182 |     # params_list = []
183 |     # for module in model.mul_lr:
184 |     #     params_list = group_weight(params_list, module, nn.BatchNorm2d, 10)
185 |     # params_list = group_weight(params_list, model.context_path, torch.nn.BatchNorm2d, 1)
186 | 
187 |     print(model.parameters())
188 | 


--------------------------------------------------------------------------------
/scripts/model/build_contextpath.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from torchvision import models
 3 | 
 4 | 
 5 | class resnet18(torch.nn.Module):
 6 |     def __init__(self, pretrained=True):
 7 |         super(resnet18,self).__init__()
 8 |         self.features = models.resnet18(pretrained=True)
 9 |         self.conv1 = self.features.conv1
10 |         self.bn1 = self.features.bn1
11 |         self.relu = self.features.relu
12 |         self.maxpool1 = self.features.maxpool
13 |         self.layer1 = self.features.layer1
14 |         self.layer2 = self.features.layer2
15 |         self.layer3 = self.features.layer3
16 |         self.layer4 = self.features.layer4
17 | 
18 |     def forward(self, input):
19 |         x = self.conv1(input)
20 |         x = self.relu(self.bn1(x))
21 |         x = self.maxpool1(x)
22 |         feature1 = self.layer1(x)  # 1 / 4
23 |         feature2 = self.layer2(feature1)  # 1 / 8
24 |         feature3 = self.layer3(feature2)  # 1 / 16
25 |         feature4 = self.layer4(feature3)  # 1 / 32
26 |         # global average pooling to build tail
27 |         tail = torch.mean(feature4, 3, keepdim=True)
28 |         tail = torch.mean(tail, 2, keepdim=True)
29 |         return feature3, feature4, tail
30 | 
31 | 
32 | class resnet101(torch.nn.Module):
33 |     def __init__(self, pretrained=True):
34 |         super(resnet101,self).__init__()
35 |         self.features = models.resnet101(pretrained=True)
36 |         self.conv1 = self.features.conv1
37 |         self.bn1 = self.features.bn1
38 |         self.relu = self.features.relu
39 |         self.maxpool1 = self.features.maxpool
40 |         self.layer1 = self.features.layer1
41 |         self.layer2 = self.features.layer2
42 |         self.layer3 = self.features.layer3
43 |         self.layer4 = self.features.layer4
44 | 
45 |     def forward(self, input):
46 |         x = self.conv1(input)
47 |         x = self.relu(self.bn1(x))
48 |         x = self.maxpool1(x)
49 |         feature1 = self.layer1(x)  # 1 / 4
50 |         feature2 = self.layer2(feature1)  # 1 / 8
51 |         feature3 = self.layer3(feature2)  # 1 / 16
52 |         feature4 = self.layer4(feature3)  # 1 / 32
53 |         # global average pooling to build tail
54 |         tail = torch.mean(feature4, 3, keepdim=True)
55 |         tail = torch.mean(tail, 2, keepdim=True)
56 |         return feature3, feature4, tail
57 | 
58 | 
59 | def build_contextpath(name):
60 |     model = {
61 |         'resnet18': resnet18(pretrained=True),
62 |         'resnet101': resnet101(pretrained=True)
63 |     }
64 |     return model[name]
65 | 
66 | 
67 | if __name__ == '__main__':
68 |     #
69 |     model_18 = build_contextpath('resnet18')
70 |     model_101 = build_contextpath('resnet101')
71 |     x = torch.rand(1, 3, 256, 256)
72 | 
73 |     y_18 = model_18(x)
74 |     y_101 = model_101(x)
75 | 


--------------------------------------------------------------------------------
/scripts/predict.npy:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/predict.npy


--------------------------------------------------------------------------------
/scripts/tools/__pycache__/_init_paths.cpython-35.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/tools/__pycache__/_init_paths.cpython-35.pyc


--------------------------------------------------------------------------------
/scripts/tools/_init_paths.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 | sys.path.insert(0, os.getcwd())


--------------------------------------------------------------------------------
/scripts/tools/_init_paths.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/tools/_init_paths.pyc


--------------------------------------------------------------------------------
/scripts/tools/eval_linemod.py:
--------------------------------------------------------------------------------
  1 | import _init_paths
  2 | import argparse
  3 | import os
  4 | import random
  5 | import numpy as np
  6 | import yaml
  7 | import copy
  8 | import torch
  9 | import torch.nn as nn
 10 | import torch.nn.parallel
 11 | import torch.backends.cudnn as cudnn
 12 | import torch.optim as optim
 13 | import torch.utils.data
 14 | import torchvision.datasets as dset
 15 | import torchvision.transforms as transforms
 16 | import torchvision.utils as vutils
 17 | from torch.autograd import Variable
 18 | from datasets.linemod.dataset import PoseDataset as PoseDataset_linemod
 19 | from lib.network import PoseNet, PoseRefineNet
 20 | from lib.loss import Loss
 21 | from lib.loss_refiner import Loss_refine
 22 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
 23 | from lib.knn.__init__ import KNearestNeighbor
 24 | 
 25 | parser = argparse.ArgumentParser()
 26 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
 27 | parser.add_argument('--model', type=str, default = '',  help='resume PoseNet model')
 28 | parser.add_argument('--refine_model', type=str, default = '',  help='resume PoseRefineNet model')
 29 | opt = parser.parse_args()
 30 | 
 31 | num_objects = 13
 32 | objlist = [1, 2, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15]
 33 | num_points = 500
 34 | iteration = 2
 35 | bs = 1
 36 | dataset_config_dir = 'datasets/linemod/dataset_config'
 37 | output_result_dir = 'experiments/eval_result/linemod'
 38 | knn = KNearestNeighbor(1)
 39 | 
 40 | estimator = PoseNet(num_points = num_points, num_obj = num_objects)
 41 | estimator.cuda()
 42 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_objects)
 43 | refiner.cuda()
 44 | estimator.load_state_dict(torch.load(opt.model))
 45 | refiner.load_state_dict(torch.load(opt.refine_model))
 46 | estimator.eval()
 47 | refiner.eval()
 48 | 
 49 | testdataset = PoseDataset_linemod('eval', num_points, False, opt.dataset_root, 0.0, True)
 50 | testdataloader = torch.utils.data.DataLoader(testdataset, batch_size=1, shuffle=False, num_workers=10)
 51 | 
 52 | sym_list = testdataset.get_sym_list()
 53 | num_points_mesh = testdataset.get_num_points_mesh()
 54 | criterion = Loss(num_points_mesh, sym_list)
 55 | criterion_refine = Loss_refine(num_points_mesh, sym_list)
 56 | 
 57 | diameter = []
 58 | meta_file = open('{0}/models_info.yml'.format(dataset_config_dir), 'r')
 59 | meta = yaml.load(meta_file)
 60 | for obj in objlist:
 61 |     diameter.append(meta[obj]['diameter'] / 1000.0 * 0.1)
 62 | print(diameter)
 63 | 
 64 | success_count = [0 for i in range(num_objects)]
 65 | num_count = [0 for i in range(num_objects)]
 66 | fw = open('{0}/eval_result_logs.txt'.format(output_result_dir), 'w')
 67 | 
 68 | for i, data in enumerate(testdataloader, 0):
 69 |     points, choose, img, target, model_points, idx = data
 70 |     if len(points.size()) == 2:
 71 |         print('No.{0} NOT Pass! Lost detection!'.format(i))
 72 |         fw.write('No.{0} NOT Pass! Lost detection!\n'.format(i))
 73 |         continue
 74 |     points, choose, img, target, model_points, idx = Variable(points).cuda(), \
 75 |                                                      Variable(choose).cuda(), \
 76 |                                                      Variable(img).cuda(), \
 77 |                                                      Variable(target).cuda(), \
 78 |                                                      Variable(model_points).cuda(), \
 79 |                                                      Variable(idx).cuda()
 80 | 
 81 |     pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
 82 |     pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
 83 |     pred_c = pred_c.view(bs, num_points)
 84 |     how_max, which_max = torch.max(pred_c, 1)
 85 |     pred_t = pred_t.view(bs * num_points, 1, 3)
 86 | 
 87 |     my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
 88 |     my_t = (points.view(bs * num_points, 1, 3) + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
 89 |     my_pred = np.append(my_r, my_t)
 90 | 
 91 |     for ite in range(0, iteration):
 92 |         T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
 93 |         my_mat = quaternion_matrix(my_r)
 94 |         R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
 95 |         my_mat[0:3, 3] = my_t
 96 |         
 97 |         new_points = torch.bmm((points - T), R).contiguous()
 98 |         pred_r, pred_t = refiner(new_points, emb, idx)
 99 |         pred_r = pred_r.view(1, 1, -1)
100 |         pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
101 |         my_r_2 = pred_r.view(-1).cpu().data.numpy()
102 |         my_t_2 = pred_t.view(-1).cpu().data.numpy()
103 |         my_mat_2 = quaternion_matrix(my_r_2)
104 |         my_mat_2[0:3, 3] = my_t_2
105 | 
106 |         my_mat_final = np.dot(my_mat, my_mat_2)
107 |         my_r_final = copy.deepcopy(my_mat_final)
108 |         my_r_final[0:3, 3] = 0
109 |         my_r_final = quaternion_from_matrix(my_r_final, True)
110 |         my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
111 | 
112 |         my_pred = np.append(my_r_final, my_t_final)
113 |         my_r = my_r_final
114 |         my_t = my_t_final
115 | 
116 |     # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
117 | 
118 |     model_points = model_points[0].cpu().detach().numpy()
119 |     my_r = quaternion_matrix(my_r)[:3, :3]
120 |     pred = np.dot(model_points, my_r.T) + my_t
121 |     target = target[0].cpu().detach().numpy()
122 | 
123 |     if idx[0].item() in sym_list:
124 |         pred = torch.from_numpy(pred.astype(np.float32)).cuda().transpose(1, 0).contiguous()
125 |         target = torch.from_numpy(target.astype(np.float32)).cuda().transpose(1, 0).contiguous()
126 |         inds = knn(target.unsqueeze(0), pred.unsqueeze(0))
127 |         target = torch.index_select(target, 1, inds.view(-1) - 1)
128 |         dis = torch.mean(torch.norm((pred.transpose(1, 0) - target.transpose(1, 0)), dim=1), dim=0).item()
129 |     else:
130 |         dis = np.mean(np.linalg.norm(pred - target, axis=1))
131 | 
132 |     if dis < diameter[idx[0].item()]:
133 |         success_count[idx[0].item()] += 1
134 |         print('No.{0} Pass! Distance: {1}'.format(i, dis))
135 |         fw.write('No.{0} Pass! Distance: {1}\n'.format(i, dis))
136 |     else:
137 |         print('No.{0} NOT Pass! Distance: {1}'.format(i, dis))
138 |         fw.write('No.{0} NOT Pass! Distance: {1}\n'.format(i, dis))
139 |     num_count[idx[0].item()] += 1
140 | 
141 | for i in range(num_objects):
142 |     print('Object {0} success rate: {1}'.format(objlist[i], float(success_count[i]) / num_count[i]))
143 |     fw.write('Object {0} success rate: {1}\n'.format(objlist[i], float(success_count[i]) / num_count[i]))
144 | print('ALL success rate: {0}'.format(float(sum(success_count)) / sum(num_count)))
145 | fw.write('ALL success rate: {0}\n'.format(float(sum(success_count)) / sum(num_count)))
146 | fw.close()
147 | 


--------------------------------------------------------------------------------
/scripts/tools/eval_ycb.py:
--------------------------------------------------------------------------------
  1 | import _init_paths
  2 | import argparse
  3 | import os
  4 | import copy
  5 | import random
  6 | import numpy as np
  7 | from PIL import Image
  8 | import scipy.io as scio
  9 | import scipy.misc
 10 | import numpy.ma as ma
 11 | import math
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.nn.parallel
 15 | import torch.backends.cudnn as cudnn
 16 | import torch.optim as optim
 17 | import torch.utils.data
 18 | import torchvision.datasets as dset
 19 | import torchvision.transforms as transforms
 20 | import torchvision.utils as vutils
 21 | import torch.nn.functional as F
 22 | from torch.autograd import Variable
 23 | from datasets.ycb.dataset import PoseDataset
 24 | from lib.network import PoseNet, PoseRefineNet
 25 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
 26 | 
 27 | parser = argparse.ArgumentParser()
 28 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
 29 | parser.add_argument('--model', type=str, default = '',  help='resume PoseNet model')
 30 | parser.add_argument('--refine_model', type=str, default = '',  help='resume PoseRefineNet model')
 31 | opt = parser.parse_args()
 32 | 
 33 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 34 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
 35 | xmap = np.array([[j for i in range(640)] for j in range(480)])
 36 | ymap = np.array([[i for i in range(640)] for j in range(480)])
 37 | cam_cx = 312.9869
 38 | cam_cy = 241.3109
 39 | cam_fx = 1066.778
 40 | cam_fy = 1067.487
 41 | cam_scale = 10000.0
 42 | num_obj = 21
 43 | img_width = 480
 44 | img_length = 640
 45 | num_points = 1000
 46 | num_points_mesh = 500
 47 | iteration = 2
 48 | bs = 1
 49 | dataset_config_dir = 'datasets/ycb/dataset_config'
 50 | ycb_toolbox_dir = 'YCB_Video_toolbox'
 51 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
 52 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
 53 | 
 54 | def get_bbox(posecnn_rois):
 55 |     rmin = int(posecnn_rois[idx][3]) + 1
 56 |     rmax = int(posecnn_rois[idx][5]) - 1
 57 |     cmin = int(posecnn_rois[idx][2]) + 1
 58 |     cmax = int(posecnn_rois[idx][4]) - 1
 59 |     r_b = rmax - rmin
 60 |     for tt in range(len(border_list)):
 61 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
 62 |             r_b = border_list[tt + 1]
 63 |             break
 64 |     c_b = cmax - cmin
 65 |     for tt in range(len(border_list)):
 66 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
 67 |             c_b = border_list[tt + 1]
 68 |             break
 69 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
 70 |     rmin = center[0] - int(r_b / 2)
 71 |     rmax = center[0] + int(r_b / 2)
 72 |     cmin = center[1] - int(c_b / 2)
 73 |     cmax = center[1] + int(c_b / 2)
 74 |     if rmin < 0:
 75 |         delt = -rmin
 76 |         rmin = 0
 77 |         rmax += delt
 78 |     if cmin < 0:
 79 |         delt = -cmin
 80 |         cmin = 0
 81 |         cmax += delt
 82 |     if rmax > img_width:
 83 |         delt = rmax - img_width
 84 |         rmax = img_width
 85 |         rmin -= delt
 86 |     if cmax > img_length:
 87 |         delt = cmax - img_length
 88 |         cmax = img_length
 89 |         cmin -= delt
 90 |     return rmin, rmax, cmin, cmax
 91 | 
 92 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
 93 | estimator.cuda()
 94 | estimator.load_state_dict(torch.load(opt.model))
 95 | estimator.eval()
 96 | 
 97 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
 98 | refiner.cuda()
 99 | refiner.load_state_dict(torch.load(opt.refine_model))
100 | refiner.eval()
101 | 
102 | testlist = []
103 | input_file = open('{0}/test_data_list.txt'.format(dataset_config_dir))
104 | while 1:
105 |     input_line = input_file.readline()
106 |     if not input_line:
107 |         break
108 |     if input_line[-1:] == '\n':
109 |         input_line = input_line[:-1]
110 |     testlist.append(input_line)
111 | input_file.close()
112 | print(len(testlist))
113 | 
114 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
115 | class_id = 1
116 | cld = {}
117 | while 1:
118 |     class_input = class_file.readline()
119 |     if not class_input:
120 |         break
121 |     class_input = class_input[:-1]
122 | 
123 |     input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
124 |     cld[class_id] = []
125 |     while 1:
126 |         input_line = input_file.readline()
127 |         if not input_line:
128 |             break
129 |         input_line = input_line[:-1]
130 |         input_line = input_line.split(' ')
131 |         cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
132 |     input_file.close()
133 |     cld[class_id] = np.array(cld[class_id])
134 |     class_id += 1
135 | 
136 | for now in range(0, 2949):
137 |     img = Image.open('{0}/{1}-color.png'.format(opt.dataset_root, testlist[now]))
138 |     depth = np.array(Image.open('{0}/{1}-depth.png'.format(opt.dataset_root, testlist[now])))
139 |     posecnn_meta = scio.loadmat('{0}/results_PoseCNN_RSS2018/{1}.mat'.format(ycb_toolbox_dir, '%06d' % now))
140 |     label = np.array(posecnn_meta['labels'])
141 |     posecnn_rois = np.array(posecnn_meta['rois'])
142 | 
143 |     lst = posecnn_rois[:, 1:2].flatten()
144 |     my_result_wo_refine = []
145 |     my_result = []
146 |     
147 |     for idx in range(len(lst)):
148 |         itemid = lst[idx]
149 |         try:
150 |             rmin, rmax, cmin, cmax = get_bbox(posecnn_rois)
151 | 
152 |             mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
153 |             mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
154 |             mask = mask_label * mask_depth
155 | 
156 |             choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
157 |             if len(choose) > num_points:
158 |                 c_mask = np.zeros(len(choose), dtype=int)
159 |                 c_mask[:num_points] = 1
160 |                 np.random.shuffle(c_mask)
161 |                 choose = choose[c_mask.nonzero()]
162 |             else:
163 |                 choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
164 | 
165 |             depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
166 |             xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
167 |             ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
168 |             choose = np.array([choose])
169 | 
170 |             pt2 = depth_masked / cam_scale
171 |             pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
172 |             pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
173 |             cloud = np.concatenate((pt0, pt1, pt2), axis=1)
174 | 
175 |             img_masked = np.array(img)[:, :, :3]
176 |             img_masked = np.transpose(img_masked, (2, 0, 1))
177 |             img_masked = img_masked[:, rmin:rmax, cmin:cmax]
178 | 
179 |             cloud = torch.from_numpy(cloud.astype(np.float32))
180 |             choose = torch.LongTensor(choose.astype(np.int32))
181 |             img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
182 |             index = torch.LongTensor([itemid - 1])
183 | 
184 |             cloud = Variable(cloud).cuda()
185 |             choose = Variable(choose).cuda()
186 |             img_masked = Variable(img_masked).cuda()
187 |             index = Variable(index).cuda()
188 | 
189 |             cloud = cloud.view(1, num_points, 3)
190 |             img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
191 | 
192 |             pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
193 |             pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
194 | 
195 |             pred_c = pred_c.view(bs, num_points)
196 |             how_max, which_max = torch.max(pred_c, 1)
197 |             pred_t = pred_t.view(bs * num_points, 1, 3)
198 |             points = cloud.view(bs * num_points, 1, 3)
199 | 
200 |             my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
201 |             my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
202 |             my_pred = np.append(my_r, my_t)
203 |             my_result_wo_refine.append(my_pred.tolist())
204 | 
205 |             for ite in range(0, iteration):
206 |                 T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
207 |                 my_mat = quaternion_matrix(my_r)
208 |                 R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
209 |                 my_mat[0:3, 3] = my_t
210 |                 
211 |                 new_cloud = torch.bmm((cloud - T), R).contiguous()
212 |                 pred_r, pred_t = refiner(new_cloud, emb, index)
213 |                 pred_r = pred_r.view(1, 1, -1)
214 |                 pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
215 |                 my_r_2 = pred_r.view(-1).cpu().data.numpy()
216 |                 my_t_2 = pred_t.view(-1).cpu().data.numpy()
217 |                 my_mat_2 = quaternion_matrix(my_r_2)
218 | 
219 |                 my_mat_2[0:3, 3] = my_t_2
220 | 
221 |                 my_mat_final = np.dot(my_mat, my_mat_2)
222 |                 my_r_final = copy.deepcopy(my_mat_final)
223 |                 my_r_final[0:3, 3] = 0
224 |                 my_r_final = quaternion_from_matrix(my_r_final, True)
225 |                 my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
226 | 
227 |                 my_pred = np.append(my_r_final, my_t_final)
228 |                 my_r = my_r_final
229 |                 my_t = my_t_final
230 | 
231 |             # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
232 | 
233 |             my_result.append(my_pred.tolist())
234 |         except ZeroDivisionError:
235 |             print("PoseCNN Detector Lost {0} at No.{1} keyframe".format(itemid, now))
236 |             my_result_wo_refine.append([0.0 for i in range(7)])
237 |             my_result.append([0.0 for i in range(7)])
238 | 
239 |     scio.savemat('{0}/{1}.mat'.format(result_wo_refine_dir, '%04d' % now), {'poses':my_result_wo_refine})
240 |     scio.savemat('{0}/{1}.mat'.format(result_refine_dir, '%04d' % now), {'poses':my_result})
241 |     print("Finish No.{0} keyframe".format(now))
242 | 


--------------------------------------------------------------------------------
/scripts/tools/ros_eval_ycb.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | ############# ros packages #####################
  4 | import cv2
  5 | import rospy
  6 | from sensor_msgs.msg import Image, CameraInfo
  7 | from cv_bridge import CvBridge, CvBridgeError
  8 | from be.srv import AddTwoInts, AddTwoIntsResponse
  9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
 10 | from geometry_msgs.msg import Pose, PoseArray
 11 | import tf
 12 | import message_filters
 13 | 
 14 | ############ python pakcages ###################
 15 | import _init_paths
 16 | import argparse
 17 | import os
 18 | import copy
 19 | import random
 20 | import numpy as np
 21 | import scipy.io as scio
 22 | import scipy.misc
 23 | import numpy.ma as ma
 24 | import math
 25 | import torch
 26 | import torch.nn as nn
 27 | import torch.nn.parallel
 28 | import torch.backends.cudnn as cudnn
 29 | import torch.optim as optim
 30 | import torch.utils.data
 31 | import torchvision.datasets as dset
 32 | import torchvision.transforms as transforms
 33 | import torchvision.utils as vutils
 34 | import torch.nn.functional as F
 35 | from torch.autograd import Variable
 36 | from datasets.ycb.dataset import PoseDataset
 37 | from lib.network import PoseNet, PoseRefineNet
 38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
 39 | from model.build_BiSeNet import BiSeNet
 40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
 41 | from matplotlib import pyplot as plt
 42 | import time
 43 | 
 44 | 
 45 | 
 46 | ##########################################################################################
 47 | 
 48 | parser = argparse.ArgumentParser()
 49 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
 50 | parser.add_argument('--model', type=str, default = '',  help='resume PoseNet model')
 51 | parser.add_argument('--refine_model', type=str, default = '',  help='resume PoseRefineNet model')
 52 | parser.add_argument('--checkpoint_path', type=str, default='', required=True, help='The path to the pretrained weights of model')
 53 | parser.add_argument('--num_classes', type=int, default=22, help='num of object classes (with void)')
 54 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
 55 | 
 56 | 
 57 | opt = parser.parse_args()
 58 | import numpy.ma as ma
 59 | 
 60 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 61 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
 62 | xmap = np.array([[j for i in range(640)] for j in range(480)])
 63 | ymap = np.array([[i for i in range(640)] for j in range(480)])
 64 | cam_cx = 312.9869
 65 | cam_cy = 241.3109
 66 | cam_fx = 1066.778
 67 | cam_fy = 1067.487
 68 | #cam_scale = 10000.0
 69 | cam_scale = 1000.0
 70 | num_obj = 21
 71 | img_width = 480
 72 | img_length = 640
 73 | num_points = 1000
 74 | num_points_mesh = 500
 75 | iteration = 2
 76 | bs = 1
 77 | dataset_config_dir = 'datasets/ycb/dataset_config'
 78 | ycb_toolbox_dir = 'YCB_Video_toolbox'
 79 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
 80 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
 81 | cam_mat = np.load('matrix.npy')
 82 | dist = np.load('distortion.npy')
 83 | #dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0])
 84 | 
 85 | #########################################################################################
 86 | 
 87 | def isRotationMatrix(R) :
 88 |     Rt = np.transpose(R)
 89 |     shouldBeIdentity = np.dot(Rt, R)
 90 |     I = np.identity(3, dtype = R.dtype)
 91 |     n = np.linalg.norm(I - shouldBeIdentity)
 92 |     return n < 1e-6
 93 |  
 94 |  
 95 | def rotationMatrixToEulerAngles(R) :
 96 |  
 97 |     assert(isRotationMatrix(R))
 98 |      
 99 |     sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
100 |      
101 |     singular = sy < 1e-6
102 |  
103 |     if  not singular :
104 |         x = math.atan2(R[2,1] , R[2,2])
105 |         y = math.atan2(-R[2,0], sy)
106 |         z = math.atan2(R[1,0], R[0,0])
107 |     else :
108 |         x = math.atan2(-R[1,2], R[1,1])
109 |         y = math.atan2(-R[2,0], sy)
110 |         z = 0
111 |  
112 |     return np.array([x, y, z])
113 | 
114 | ################################################################################################
115 | 
116 | # get bbox coordinate
117 | def get_bbox(label):
118 |     rows = np.any(label, axis=1)
119 |     cols = np.any(label, axis=0)
120 |     rmin, rmax = np.where(rows)[0][[0, -1]]
121 |     cmin, cmax = np.where(cols)[0][[0, -1]]
122 |     rmax += 1
123 |     cmax += 1
124 |     r_b = rmax - rmin
125 |     for tt in range(len(border_list)):
126 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
127 |             r_b = border_list[tt + 1]
128 |             break
129 |     c_b = cmax - cmin
130 |     for tt in range(len(border_list)):
131 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
132 |             c_b = border_list[tt + 1]
133 |             break
134 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
135 |     rmin = center[0] - int(r_b / 2)
136 |     rmax = center[0] + int(r_b / 2)
137 |     cmin = center[1] - int(c_b / 2)
138 |     cmax = center[1] + int(c_b / 2)
139 |     if rmin < 0:
140 |         delt = -rmin
141 |         rmin = 0
142 |         rmax += delt
143 |     if cmin < 0:
144 |         delt = -cmin
145 |         cmin = 0
146 |         cmax += delt
147 |     if rmax > img_width:
148 |         delt = rmax - img_width
149 |         rmax = img_width
150 |         rmin -= delt
151 |     if cmax > img_length:
152 |         delt = cmax - img_length
153 |         cmax = img_length
154 |         cmin -= delt
155 |     return rmin, rmax, cmin, cmax
156 | 
157 | ############################ with detection algorithm #############################
158 | # def get_bbox(rois,idx):
159 | #     # rmin = int(posecnn_rois[idx][2]) + 1
160 | #     # rmax = int(posecnn_rois[idx][4]) - 1
161 | #     # cmin = int(posecnn_rois[idx][1]) + 1
162 | #     # cmax = int(posecnn_rois[idx][3]) - 1
163 | #     rmin = int(rois[idx].xmin) + 1
164 | #     rmax = int(rois[idx].xmax) - 1
165 | #     cmin = int(rois[idx].ymin) + 1
166 | #     cmax = int(rois[idx].ymax) - 1
167 | #     r_b = rmax - rmin
168 | #     for tt in range(len(border_list)):
169 | #         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
170 | #             r_b = border_list[tt + 1]
171 | #             break
172 | #     c_b = cmax - cmin
173 | #     for tt in range(len(border_list)):
174 | #         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
175 | #             c_b = border_list[tt + 1]
176 | #             break
177 | #     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
178 | #     rmin = center[0] - int(r_b / 2)
179 | #     rmax = center[0] + int(r_b / 2)
180 | #     cmin = center[1] - int(c_b / 2)
181 | #     cmax = center[1] + int(c_b / 2)
182 | #     if rmin < 0:
183 | #         delt = -rmin
184 | #         rmin = 0
185 | #         rmax += delt
186 | #     if cmin < 0:
187 | #         delt = -cmin
188 | #         cmin = 0
189 | #         cmax += delt
190 | #     if rmax > img_width:
191 | #         delt = rmax - img_width
192 | #         rmax = img_width
193 | #         rmin -= delt
194 | #     if cmax > img_length:
195 | #         delt = cmax - img_length
196 | #         cmax = img_length
197 | #         cmin -= delt
198 | #     return rmin, rmax, cmin, cmax
199 | 
200 | 
201 | ####################################################################################################
202 | ################################### load BiSeNet parameters ########################################
203 | ####################################################################################################
204 | print('load BiseNet')
205 | start_time = time.time()
206 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
207 | bise_model = bise_model.cuda()
208 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
209 | global bise_model
210 | print('Done!')
211 | print("Load time : {}".format(time.time() - start_time))
212 | 
213 | #####################################################################################################
214 | ######################## load Densefusion Netwopy4thork, 3d model #############################
215 | #####################################################################################################
216 | print('load densefusion network')
217 | start_time = time.time()
218 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
219 | estimator.cuda()
220 | estimator.load_state_dict(torch.load(opt.model))
221 | estimator.eval()
222 | ############################################################################
223 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
224 | refiner.cuda()
225 | refiner.load_state_dict(torch.load(opt.refine_model))
226 | refiner.eval()
227 | print('Done')
228 | print("Load time : {}".format(time.time() - start_time))
229 | #####################################################################################################
230 | # class list upload
231 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
232 | class_id = 1
233 | cld = {}
234 | while 1:
235 |     class_input = class_file.readline()
236 |     if not class_input:
237 |         break
238 |     class_input = class_input[:-1]
239 | 
240 |     input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
241 |     cld[class_id] = []
242 |     while 1:
243 |         input_line = input_file.readline()
244 |         if not input_line:
245 |             break
246 |         input_line = input_line[:-1]
247 |         input_line = input_line.split(' ')
248 |         cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
249 |     input_file.close()
250 |     cld[class_id] = np.array(cld[class_id])
251 |     class_id += 1
252 | ########################################################################################################
253 | def seg_predict(image):
254 |     global bise_model
255 |     try:
256 |         with torch.no_grad():
257 |             bise_model.eval()
258 |             h,w,_ = image.shape
259 |             to_tensor = transforms.Compose([
260 |                     transforms.ToTensor(),
261 |                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
262 |                 ])
263 | 
264 |             image = to_tensor(image)
265 |             image = image.unsqueeze_(0)
266 |             image = image.cuda()
267 |             predict = bise_model(image).squeeze()
268 |             predict = reverse_one_hot(predict)
269 |             predict = np.array(predict)
270 |             print(np.unique(predict))
271 |             predict = np.resize(predict,[h,w])
272 |             pub_label = np.uint8(predict)
273 |             cv2.imwrite('./segmentation_image.png', pub_label)
274 | 
275 |             return predict, pub_label
276 |     except CvBridgeError as e:
277 |         print(e)
278 | 
279 | 
280 | 
281 | 
282 | 
283 | def pose_predict(img, depth,rois):
284 |     label_pub = rospy.Publisher('/label',Image, queue_size = 10)
285 |     img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
286 |     class_list = ['002_master_chef_can',
287 |     '003_cracker_box',
288 |     '004_sugar_box',
289 |     '005_tomato_soup_can',
290 |     '006_mustard_bottle',
291 |     '007_tuna_fish_can',
292 |     '008_pudding_box',
293 |     '009_gelatin_box',
294 |     '010_potted_meat_can',
295 |     '011_banana',
296 |     '019_pitcher_base',
297 |     '025_mug',
298 |     '021_bleach_cleanser',
299 |     '024_bowl',
300 |     '035_power_drill',
301 |     '036_wood_block',
302 |     '037_scissors',
303 |     '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick']
304 |     try:
305 |         object_number = len(rois)
306 |         
307 |         #lst = posecnn_rois[:,0:1].flatten()
308 |         #lst = np.unique(label)
309 |         my_result_wo_refine = []
310 |         my_result = []
311 |         for idx in range(object_number):
312 |             #itemid = lst[idx]
313 |             itemid = class_list.index(rois[idx].Class) +1
314 |             #itemid = class_list.index(rois[idx].Class) +3
315 |             print(object_number,itemid, rois[idx])
316 |             
317 |             try:
318 |                 label, pub_label = seg_predict(img) 
319 |                 pub_label =pub_label * 50
320 |                 label_pub.publish(bridge.cv2_to_imgmsg(pub_label,'8UC1'))
321 | ####################### with Detection algorithm #################################
322 |                 # rmin, rmax, cmin,cmax = get_bbox(rois,idx) 
323 | #####################################################################################
324 |                 mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
325 |                 mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
326 |                 mask = mask_label * mask_depth
327 |                 rmin, rmax, cmin, cmax = get_bbox(mask_label) 
328 | 
329 |                 choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
330 |                 if len(choose) > num_points:
331 |                     c_mask = np.zeros(len(choose), dtype=int)
332 |                     c_mask[:num_points] = 1
333 |                     np.random.shuffle(c_mask)
334 |                     choose = choose[c_mask.nonzero()]
335 |                 else:
336 |                     choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
337 |                     
338 |                 depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
339 |                 xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
340 |                 ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
341 |                 choose = np.array([choose])
342 | 
343 |                 pt2 = depth_masked / cam_scale
344 |                 pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
345 |                 pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
346 |                 cloud = np.concatenate((pt0, pt1, pt2), axis=1)
347 | 
348 |                 img_masked = np.array(img)[:, :, :3]
349 |                 img_masked = np.transpose(img_masked, (2, 0, 1))
350 |                 img_masked = img_masked[:, rmin:rmax, cmin:cmax]
351 | 
352 |                 cloud = torch.from_numpy(cloud.astype(np.float32))
353 |                 choose = torch.LongTensor(choose.astype(np.int32))
354 |                 img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
355 |                 index = torch.LongTensor([itemid - 1])
356 | 
357 |                 cloud = Variable(cloud).cuda()
358 |                 choose = Variable(choose).cuda()
359 |                 img_masked = Variable(img_masked).cuda()
360 |                 index = Variable(index).cuda()
361 |                 cloud = cloud.view(1, num_points, 3)
362 |                 img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
363 |                 pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
364 |                 pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
365 |                 pred_c = pred_c.view(bs, num_points)
366 |                 how_max, which_max = torch.max(pred_c, 1)
367 |                 pred_t = pred_t.view(bs * num_points, 1, 3)
368 |                 points = cloud.view(bs * num_points, 1, 3)
369 |                 my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
370 |                 my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
371 |                 my_pred = np.append(my_r, my_t)
372 |                 # making pose matrix
373 |                 rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
374 |                 rot_to_angle = rot_to_angle.reshape(1,3)
375 |                 my_t = my_t.reshape(1,3)
376 |                 rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
377 | 
378 |                 # cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
379 |                             #   [0, 0, 1]]))
380 |                 #tl = np.array([100,100,100])
381 |                 #cam_mat = cv2.UMat(np.matrix([[960.14238289, 0, 252.43270692], [0, 960.14238289, 317.39366696],
382 |                 #             [0, 0, 1]]))
383 |                 
384 | 
385 | 
386 |                 for ite in range(0, iteration):
387 |                     T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
388 |                     my_mat = quaternion_matrix(my_r)
389 |                     R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
390 |                     my_mat[0:3, 3] = my_t
391 | 
392 |                     new_cloud = torch.bmm((cloud - T), R).contiguous()
393 |                     pred_r, pred_t = refiner(new_cloud, emb, index)
394 |                     pred_r = pred_r.view(1, 1, -1)
395 |                     pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
396 |                     my_r_2 = pred_r.view(-1).cpu().data.numpy()
397 |                     my_t_2 = pred_t.view(-1).cpu().data.numpy()
398 |                     my_mat_2 = quaternion_matrix(my_r_2)
399 | 
400 | 
401 |                     my_mat_2[0:3, 3] = my_t_2
402 |                     my_mat_final = np.dot(my_mat, my_mat_2)
403 |                     my_r_final = copy.deepcopy(my_mat_final)
404 |                     my_r_final[0:3, 3] = 0
405 |                     my_r_final = quaternion_from_matrix(my_r_final, True)
406 |  
407 |                     my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
408 | 
409 |                     my_pred = np.append(my_r_final, my_t_final)
410 |                     my_r = my_r_final
411 |                     my_t = my_t_final
412 |                 open_cv_image = img.copy()
413 |                 open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
414 |                 dof = quaternion_matrix(my_r)
415 |                 dof[0:3,3] = my_t
416 | 
417 | 
418 |                 object_poses = {
419 |                     'tx':my_t[0][0],
420 |                     'ty':my_t[0][1],
421 |                     'tz':my_t[0][2],
422 |                     'qx':my_r[0],
423 |                     'qy':my_r[1],
424 |                     'qz':my_r[2],
425 |                     'qw':my_r[3]}
426 |                 my_result.append(object_poses)
427 |                 open_cv_image = img.copy()
428 |                 open_cv_image = cv2.cvtColor(open_cv_image, cv2.COLOR_RGB2BGR)
429 |                 imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 13 = mug
430 |                 open_cv_image = draw(open_cv_image,imgpts, itemid)
431 | 
432 | 
433 |             except ZeroDivisionError:
434 |                 open_cv_image = None
435 |                 print('Fail')
436 |     except CvBridgeError as e:
437 |         print(e)
438 |     return my_result, open_cv_image
439 |     
440 | def draw(img, imgpts, label):
441 |     color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
442 |     for point in imgpts:
443 |         
444 |         img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
445 |     return img 
446 | 
447 | 
448 | 
449 | def image_callback(rgb):
450 |     bridge = CvBridge()
451 |     cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8')
452 | 
453 |     global cv_image
454 |     global bridge
455 | 
456 | 
457 | def depth_callback(depth):
458 |     cv_depth = bridge.imgmsg_to_cv2(depth,'16UC1')
459 |     cv2.imwrite('./depth.png', cv_depth)
460 |     
461 | 
462 |     global cv_depth
463 | 
464 | def rois_callback(rois):
465 |     detect_res = rois.bounding_boxes
466 |     global detect_res
467 | 
468 | def pose_server():
469 |     rospy.init_node('pose_estimation_server')
470 |     s = rospy.Service('/cvipl/pose_server', AddTwoInts, implimentation_seg)
471 |     # only Segmentation
472 |     
473 |     # only Pose Estimation
474 |     rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, image_callback)
475 |     depth_sub = rospy.Subscriber('/camera/aligned_depth_to_color/image_raw',Image, depth_callback)
476 |     rois_sub = rospy.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes, rois_callback)
477 |     
478 | 
479 | def implimentation_seg(req):
480 |     global cv_image
481 |     global cv_depth
482 |     global detect_res
483 |     
484 |     pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
485 |     pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10)
486 |     if req.a == 2 :
487 |         print(cv_image.shape,cv_depth.shape,detect_res)
488 |         seg_result = seg_predict(cv_image)
489 | 
490 |     elif req.a == 3 :
491 |         print(req.a)
492 |         pose_estimation,fit_image = pose_predict(cv_image, cv_depth, detect_res)
493 |         pose_array = PoseArray()
494 |         pose_msg = Pose()
495 |         print(pose_estimation)
496 | 
497 |         for i in range(len(pose_estimation)):
498 |             pose_msg.position.x = pose_estimation[i]['tx']
499 |             pose_msg.position.y = pose_estimation[i]['ty']
500 |             pose_msg.position.z = pose_estimation[i]['tz']
501 | 
502 |             pose_msg.orientation.x = pose_estimation[i]['qx']
503 |             pose_msg.orientation.y = pose_estimation[i]['qy']
504 |             pose_msg.orientation.z = pose_estimation[i]['qz']
505 |             pose_msg.orientation.w = pose_estimation[i]['qw']
506 | 
507 |             pose_array.poses.append(pose_msg)
508 |         pose_pub.publish(pose_array)
509 |         pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
510 | 
511 | 
512 |      
513 | def main():
514 |     pose_server()
515 |     rospy.spin()
516 | 
517 | if __name__ == '__main__':
518 |     main()
519 | 


--------------------------------------------------------------------------------
/scripts/tools/ros_eval_ycb_message.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | ############# ros packages #####################
  4 | import cv2
  5 | import rospy
  6 | from sensor_msgs.msg import Image, CameraInfo
  7 | from cv_bridge import CvBridge, CvBridgeError
  8 | from be.srv import AddTwoInts, AddTwoIntsResponse
  9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
 10 | from geometry_msgs.msg import Pose, PoseArray
 11 | import tf
 12 | import message_filters
 13 | 
 14 | ############ python pakcages ###################
 15 | import _init_paths
 16 | import argparse
 17 | import os
 18 | import copy
 19 | import random
 20 | import numpy as np
 21 | import scipy.io as scio
 22 | import scipy.misc
 23 | import numpy.ma as ma
 24 | import math
 25 | import torch
 26 | import torch.nn as nn
 27 | import torch.nn.parallel
 28 | import torch.backends.cudnn as cudnn
 29 | import torch.optim as optim
 30 | import torch.utils.data
 31 | import torchvision.datasets as dset
 32 | import torchvision.transforms as transforms
 33 | import torchvision.utils as vutils
 34 | import torch.nn.functional as F
 35 | from torch.autograd import Variable
 36 | from datasets.ycb.dataset import PoseDataset
 37 | from lib.network import PoseNet, PoseRefineNet
 38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
 39 | from model.build_BiSeNet import BiSeNet
 40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
 41 | from matplotlib import pyplot as plt
 42 | import time
 43 | 
 44 | 
 45 | 
 46 | ##########################################################################################
 47 | 
 48 | parser = argparse.ArgumentParser()
 49 | parser.add_argument('--dataset_root', type=str, default = 'datasets/ycb/YCB_Video_Dataset/', help='dataset root dir')
 50 | parser.add_argument('--model', type=str, default = 'trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth',  help='resume PoseNet model')
 51 | parser.add_argument('--refine_model', type=str, default = 'trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth',  help='resume PoseRefineNet model')
 52 | parser.add_argument('--checkpoint_path', type=str, default='trained_checkpoints/ycb/best_dice_loss.pth', help='The path to the pretrained weights of model')
 53 | parser.add_argument('--num_classes', type=int, default=21, help='num of object classes (with void)')
 54 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
 55 | parser.add_argument('--image_subscriber', type=str,defualt='/camera/color/image_raw')
 56 | parser.add_argument('--depth_subscriber', type=str,defualt='/camera/depth/image_rect_raw')
 57 | 
 58 | 
 59 | 
 60 | opt = parser.parse_args()
 61 | 
 62 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 63 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
 64 | xmap = np.array([[j for i in range(640)] for j in range(480)])
 65 | ymap = np.array([[i for i in range(640)] for j in range(480)])
 66 | cam_cx = 312.9869
 67 | cam_cy = 241.3109
 68 | cam_fx = 1066.778
 69 | cam_fy = 1067.487
 70 | cam_scale = 10000.0
 71 | num_obj = 21
 72 | img_width = 480
 73 | img_length = 640
 74 | num_points = 1000
 75 | num_points_mesh = 500
 76 | iteration = 2
 77 | bs = 1
 78 | dataset_config_dir = 'datasets/ycb/dataset_config'
 79 | ycb_toolbox_dir = 'YCB_Video_toolbox'
 80 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
 81 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
 82 | dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0])
 83 | 
 84 | 
 85 | def image_callback(rgb):
 86 |     bridge = CvBridge()
 87 |     cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8')
 88 |     global cv_image
 89 | 
 90 | 
 91 | def depth_callback(depth):
 92 |     bridge = CvBridge()
 93 |     cv_depth = bridge.imgmsg_to_cv2(depth,'32SC1')
 94 |     global cv_depth
 95 | 
 96 | def rois_callback(rois):
 97 |     
 98 |     detect_res = rois.bounding_boxes
 99 |     global detect_res
100 |     implimentation_seg()
101 |     
102 | 
103 | rgb_sub = rospy.Subscriber(args.image_subsriber,Image, image_callback)
104 | depth_sub = rospy.Subscriber(args.depth_subscriber,Image, depth_callback)
105 | rois_sub = rospy.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes, rois_callback)
106 | #########################################################################################
107 | 
108 | def isRotationMatrix(R) :
109 |     Rt = np.transpose(R)
110 |     shouldBeIdentity = np.dot(Rt, R)
111 |     I = np.identity(3, dtype = R.dtype)
112 |     n = np.linalg.norm(I - shouldBeIdentity)
113 |     return n < 1e-6
114 |  
115 |  
116 | def rotationMatrixToEulerAngles(R) :
117 |  
118 |     assert(isRotationMatrix(R))
119 |      
120 |     sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
121 |      
122 |     singular = sy < 1e-6
123 |  
124 |     if  not singular :
125 |         x = math.atan2(R[2,1] , R[2,2])
126 |         y = math.atan2(-R[2,0], sy)
127 |         z = math.atan2(R[1,0], R[0,0])
128 |     else :
129 |         x = math.atan2(-R[1,2], R[1,1])
130 |         y = math.atan2(-R[2,0], sy)
131 |         z = 0
132 |  
133 |     return np.array([x, y, z])
134 | 
135 | ################################################################################################
136 | """
137 | ##################################################################################################
138 | # get bbox coordinate
139 | def get_bbox(label):
140 |     rows = np.any(label, axis=1)
141 |     cols = np.any(label, axis=0)
142 |     rmin, rmax = 
143 |     np.where(rows)[0][[0, -1]]
144 |     cmin, cmax = np.where(cols)[0][[0, -1]]
145 |     rmax += 1
146 |     cmax += 1
147 |     r_b = rmax - rmin
148 |     for tt in range(len(border_list)):
149 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
150 |             r_b = border_list[tt + 1]
151 |             break
152 |     c_b = cmax - cmin
153 |     for tt in range(len(border_list)):
154 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
155 |             c_b = border_list[tt + 1]
156 |             break
157 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
158 |     rmin = center[0] - int(r_b / 2)
159 |     rmax = center[0] + int(r_b / 2)
160 |     cmin = center[1] - int(c_b / 2)
161 |     cmax = center[1] + int(c_b / 2)
162 |     if rmin < 0:
163 |         delt = -rmin
164 |         rmin = 0
165 |         rmax += delt
166 |     if cmin < 0:
167 |         delt = -cmin
168 |         cmin = 0
169 |         cmax += delt
170 |     if rmax > img_width:
171 |         delt = rmax - img_width
172 |         rmax = img_width
173 |         rmin -= delt
174 |     if cmax > img_length:
175 |         delt = cmax - img_length
176 |         cmax = img_length
177 |         cmin -= delt
178 |     return rmin, rmax, cmin, cmax
179 | """
180 | def get_bbox(rois,idx):
181 |     # rmin = int(posecnn_rois[idx][2]) + 1
182 |     # rmax = int(posecnn_rois[idx][4]) - 1
183 |     # cmin = int(posecnn_rois[idx][1]) + 1
184 |     # cmax = int(posecnn_rois[idx][3]) - 1
185 |     rmin = int(rois[idx].xmin) + 1
186 |     rmax = int(rois[idx].xmax) - 1
187 |     cmin = int(rois[idx].ymin) + 1
188 |     cmax = int(rois[idx].ymax) - 1
189 |     r_b = rmax - rmin
190 |     for tt in range(len(border_list)):
191 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
192 |             r_b = border_list[tt + 1]
193 |             break
194 |     c_b = cmax - cmin
195 |     for tt in range(len(border_list)):
196 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
197 |             c_b = border_list[tt + 1]
198 |             break
199 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
200 |     rmin = center[0] - int(r_b / 2)
201 |     rmax = center[0] + int(r_b / 2)
202 |     cmin = center[1] - int(c_b / 2)
203 |     cmax = center[1] + int(c_b / 2)
204 |     if rmin < 0:
205 |         delt = -rmin
206 |         rmin = 0
207 |         rmax += delt
208 |     if cmin < 0:
209 |         delt = -cmin
210 |         cmin = 0
211 |         cmax += delt
212 |     if rmax > img_width:
213 |         delt = rmax - img_width
214 |         rmax = img_width
215 |         rmin -= delt
216 |     if cmax > img_length:
217 |         delt = cmax - img_length
218 |         cmax = img_length
219 |         cmin -= delt
220 |     return rmin, rmax, cmin, cmax
221 | ####################################################################################################
222 | ################################### load BiSeNet parameters ########################################
223 | ####################################################################################################
224 | print('load BiseNet')
225 | start_time = time.time()
226 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
227 | bise_model = bise_model.cuda()
228 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
229 | global bise_model
230 | print('Done!')
231 | print("Load time : {}".format(time.time() - start_time))
232 | 
233 | #####################################################################################################
234 | ######################## load Densefusion Netwopy4thork, 3d model #############################
235 | #####################################################################################################
236 | print('load densefusion network')
237 | start_time = time.time()
238 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
239 | estimator.cuda()
240 | estimator.load_state_dict(torch.load(opt.model))
241 | estimator.eval()
242 | ############################################################################
243 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
244 | refiner.cuda()
245 | refiner.load_state_dict(torch.load(opt.refine_model))
246 | refiner.eval()
247 | print('Done')
248 | print("Load time : {}".format(time.time() - start_time))
249 | #####################################################################################################
250 | # class list upload
251 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
252 | class_id = 1
253 | cld = {}
254 | while 1:
255 |     class_input = class_file.readline()
256 |     if not class_input:
257 |         break
258 |     class_input = class_input[:-1]
259 | 
260 |     input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
261 |     cld[class_id] = []
262 |     while 1:
263 |         input_line = input_file.readline()
264 |         if not input_line:
265 |             break
266 |         input_line = input_line[:-1]
267 |         input_line = input_line.split(' ')
268 |         cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
269 |     input_file.close()
270 |     cld[class_id] = np.array(cld[class_id])
271 |     class_id += 1
272 | ########################################################################################################
273 | def seg_predict(image):
274 |     global bise_model
275 |     try:
276 |         with torch.no_grad():
277 |             bise_model.eval()
278 |             h,w,_ = image.shape
279 |             to_tensor = transforms.Compose([
280 |                     transforms.ToTensor(),
281 |                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
282 |                 ])
283 | 
284 |             image = to_tensor(image)
285 |             image = image.unsqueeze_(0)
286 |             image = image.cuda()
287 |             predict = bise_model(image).squeeze()
288 |             predict = reverse_one_hot(predict)
289 |             predict = np.array(predict)
290 |             predict = np.resize(predict,[h,w])
291 |             print(np.unique(predict))
292 |             zzzz = cv2.cvtColor(np.uint8(predict), cv2.COLOR_GRAY2BGR)
293 |             cv2.imwrite('./segmentation_image.png', zzzz)
294 | 
295 |             return predict
296 |     except CvBridgeError as e:
297 |         print(e)
298 | 
299 | 
300 | 
301 | 
302 | 
303 | def pose_predict(img, depth,rois):
304 |     class_list = ['002_master_chef_can',
305 |     '003_cracker_box',
306 |     '004_sugar_box',
307 |     '005_tomato_soup_can',
308 |     '006_mustard_bottle',
309 |     '007_tuna_fish_can',
310 |     '008_pudding_box',
311 |     '009_gelatin_box',
312 |     '010_potted_meat_can',
313 |     '011_banana',
314 |     '019_pitcher_base',
315 |     '025_mug',
316 |     '021_bleach_cleanser',
317 |     '024_bowl',
318 |     '035_power_drill',
319 |     '036_wood_block',
320 |     '037_scissors',
321 |     '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick']
322 |     try:
323 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
324 |         object_number = len(rois)
325 |         bridge = CvBridge()
326 | 
327 |         #lst = posecnn_rois[:,0:1].flatten()
328 |         #lst = np.unique(label)
329 |         my_result_wo_refine = []
330 |         my_result = []
331 |         for idx in range(object_number):
332 |             #itemid = lst[idx]
333 |             #itemid = class_list.index(rois[idx].Class) +1
334 |             itemid = class_list.index(rois[idx].Class) +3
335 |             
336 |             try:
337 |                 label = seg_predict(img) 
338 |                 cv2.imwrite('/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png', label)
339 |                 rmin, rmax, cmin,cmax = get_bbox(rois,idx)
340 |                 # bounding box cutting
341 |                 #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) 
342 |                 #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
343 |                 #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
344 |                 #mask = mask_label * mask_depth
345 |                 # only image
346 |                 mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
347 |                 mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
348 |                 mask = mask_label * mask_depth
349 |                 #rmin, rmax, cmin, cmax = get_bbox(mask_label)
350 | 
351 |    
352 |                 choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
353 |                 print(choose)
354 |                 if len(choose) > num_points:
355 |                     c_mask = np.zeros(len(choose), dtype=int)
356 |                     c_mask[:num_points] = 1
357 |                     np.random.shuffle(c_mask)
358 |                     choose = choose[c_mask.nonzero()]
359 |                 else:
360 |                     choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
361 |                     
362 |                 depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
363 |                 xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
364 |                 ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
365 |                 choose = np.array([choose])
366 | 
367 |                 pt2 = depth_masked / cam_scale
368 |                 pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
369 |                 pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
370 |                 cloud = np.concatenate((pt0, pt1, pt2), axis=1)
371 |                 img_masked = np.array(img)[:, :, :3]
372 |                 img_masked = np.transpose(img_masked, (2, 0, 1))
373 |                 img_masked = img_masked[:, rmin:rmax, cmin:cmax]
374 | 
375 |                 cloud = torch.from_numpy(cloud.astype(np.float32))
376 |                 choose = torch.LongTensor(choose.astype(np.int32))
377 |                 img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
378 |                 index = torch.LongTensor([itemid - 1])
379 | 
380 |                 cloud = Variable(cloud).cuda()
381 |                 choose = Variable(choose).cuda()
382 |                 img_masked = Variable(img_masked).cuda()
383 |                 index = Variable(index).cuda()
384 |                 cloud = cloud.view(1, num_points, 3)
385 |                 img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
386 |                 pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
387 |                 pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
388 |                 pred_c = pred_c.view(bs, num_points)
389 |                 how_max, which_max = torch.max(pred_c, 1)
390 |                 pred_t = pred_t.view(bs * num_points, 1, 3)
391 |                 points = cloud.view(bs * num_points, 1, 3)
392 |                 my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
393 |                 my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
394 |                 my_pred = np.append(my_r, my_t)
395 |                 # making pose matrix
396 |                 dof = quaternion_matrix(my_r)
397 |                 dof[0:3,3] = my_t
398 |                 rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
399 |                 rot_to_angle = rot_to_angle.reshape(1,3)
400 |                 my_t = my_t.reshape(1,3)
401 |                 rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
402 |                 object_poses = {   
403 |                     'tx':my_t[0][0],
404 |                     'ty':my_t[0][1],
405 |                     'tz':my_t[0][2],
406 |                     'qx':my_r[0],
407 |                     'qy':my_r[1],
408 |                     'qz':my_r[2],
409 |                     'qw':my_r[3]}
410 |                 my_result.append(object_poses)
411 |                 open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
412 |                 cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
413 |                              [0, 0, 1]]))
414 |                 imgpts, jac = cv2.projectPoints(cld[14], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 14 mugcup
415 |                 open_cv_image = draw(open_cv_image,imgpts.get(), itemid)
416 |                 my_result_wo_refine.append(my_pred.tolist())
417 |             except ZeroDivisionError:
418 |                 # my_result_wo_refine.append([0.0 for i in range(7)])
419 |                 # my_result.append([0.0 for i in range(7)])
420 |                 open_cv_image = None
421 |                 print('Fail')
422 |     except CvBridgeError as e:
423 |         print(e)
424 |     
425 |     return my_result, open_cv_image
426 |     
427 | def draw(img, imgpts, label):
428 |     color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
429 |     for point in imgpts:
430 |         
431 |         img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
432 |     return img 
433 | 
434 | 
435 | 
436 | 
437 |     
438 | 
439 | def implimentation_seg():
440 |     global cv_image
441 |     global cv_depth
442 |     global detect_res
443 |     label_pub = rospy.Publisher('/label',Image, queue_size = 10)
444 |     pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
445 |     bridge = CvBridge()
446 |     pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10)
447 |     pose_estimation,fit_image = pose_predict(cv_image, cv_depth, detect_res)
448 |     pose_array = PoseArray()
449 |     pose_msg = Pose()
450 |     print(pose_estimation)
451 | 
452 |     for i in range(len(pose_estimation)):
453 |         pose_msg.position.x = pose_estimation[i]['tx']
454 |         pose_msg.position.y = pose_estimation[i]['ty']
455 |         pose_msg.position.z = pose_estimation[i]['tz']
456 |         pose_msg.orientation.x = pose_estimation[i]['qx']
457 |         pose_msg.orientation.y = pose_estimation[i]['qy']
458 |         pose_msg.orientation.z = pose_estimation[i]['qz']
459 |         pose_msg.orientation.w = pose_estimation[i]['qw']
460 | 
461 |         pose_array.poses.append(pose_msg)
462 |     pose_pub.publish(pose_array)
463 |     if fit_image is not None:
464 |         pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
465 | 
466 | 
467 |      
468 | def main():
469 | 
470 |     rospy.init_node('pose_estimation_server')
471 |     rospy.spin()
472 | 
473 | if __name__ == '__main__':
474 |     main()
475 | 


--------------------------------------------------------------------------------
/scripts/tools/ros_eval_ycb_publisher.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | 
  4 | 
  5 | ############# ros packages #####################
  6 | import rospy
  7 | from sensor_msgs.msg import Image, CameraInfo
  8 | from cv_bridge import CvBridge, CvBridgeError
  9 | #from be.srv import AddTwoInts, AddTwoIntsResponse
 10 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
 11 | from geometry_msgs.msg import Pose, PoseArray
 12 | import tf
 13 | import message_filters
 14 | import cv2
 15 | 
 16 | ############ python pakcages ###################
 17 | import _init_paths
 18 | import argparse
 19 | import sys
 20 | import os
 21 | import os.path as osp
 22 | root_dir = osp.dirname(osp.dirname(__file__))
 23 | 
 24 | sys.path.append(root_dir)
 25 | 
 26 | import copy
 27 | import random
 28 | import numpy as np
 29 | import scipy.io as scio
 30 | import scipy.misc
 31 | import numpy.ma as ma
 32 | import math
 33 | import torch
 34 | import torch.nn as nn
 35 | import torch.nn.parallel
 36 | import torch.backends.cudnn as cudnn
 37 | import torch.optim as optim
 38 | import torch.utils.data
 39 | import torchvision.datasets as dset
 40 | import torchvision.transforms as transforms
 41 | import torchvision.utils as vutils
 42 | import torch.nn.functional as F
 43 | from torch.autograd import Variable
 44 | from datasets.ycb.dataset import PoseDataset
 45 | from lib.network import PoseNet, PoseRefineNet
 46 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
 47 | from model.build_BiSeNet import BiSeNet
 48 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
 49 | from matplotlib import pyplot as plt
 50 | import time
 51 | ###################################################
 52 | 
 53 | 
 54 | ##########################################################################################
 55 | 
 56 | parser = argparse.ArgumentParser()
 57 | parser.add_argument('--dataset_root', type=str, default = 'datasets/ycb/YCB_Video_Dataset/', help='dataset root dir')
 58 | parser.add_argument('--model', type=str, default = 'trained_checkpoints/ycb/pose_model_26_0.012863246640872631.pth',  help='resume PoseNet model')
 59 | parser.add_argument('--refine_model', type=str, default = 'trained_checkpoints/ycb/pose_refine_model_69_0.009449292959118935.pth',  help='resume PoseRefineNet model')
 60 | parser.add_argument('--checkpoint_path', type=str, default='trained_checkpoints/ycb/best_dice_loss.pth', help='The path to the pretrained weights of model')
 61 | parser.add_argument('--num_classes', type=int, default=21, help='num of object classes (with void)')
 62 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
 63 | parser.add_argument('--image_subscriber', type=str,default='/camera/color/image_raw')
 64 | parser.add_argument('--depth_subscriber', type=str,default='/camera/depth/image_rect_raw')
 65 | 
 66 | 
 67 | 
 68 | opt = parser.parse_args()
 69 | 
 70 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 71 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
 72 | xmap = np.array([[j for i in range(640)] for j in range(480)])
 73 | ymap = np.array([[i for i in range(640)] for j in range(480)])
 74 | #cam_cx = 312.9869
 75 | #cam_cy = 241.3109
 76 | #cam_fx = 1066.778
 77 | #cam_fy = 1067.487
 78 | cam_cx = 331.52874755859375
 79 | cam_cy = 249.5271453857422
 80 | cam_fx = 610.6751708984375
 81 | cam_fy = 610.5318603515625
 82 | cam_scale = 1000.0
 83 | num_obj = 21
 84 | img_width = 480
 85 | img_length = 640
 86 | num_points = 1000
 87 | num_points_mesh = 500
 88 | iteration = 2
 89 | bs = 1
 90 | dataset_config_dir = 'datasets/ycb/dataset_config'
 91 | ycb_toolbox_dir = 'YCB_Video_toolbox'
 92 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
 93 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
 94 | dist= np.array([0.0, 0.0, 0.0, 0.0, 0.0])
 95 | 
 96 | label_pub = rospy.Publisher('/label',Image, queue_size = 10)
 97 | pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
 98 | pose_fit_image = rospy.Publisher('/pose_fit_image_pub', Image, queue_size = 10)
 99 | 
100 | bridge = CvBridge()
101 | 
102 | def image_callback(rgb):
103 |     cv_image = bridge.imgmsg_to_cv2(rgb,'bgr8')
104 |     global cv_image
105 |     global img_flg
106 |     img_flg = True
107 |     print('img_flg: ', img_flg)
108 | 
109 | def depth_callback(depth):
110 |     cv_depth = bridge.imgmsg_to_cv2(depth,'32SC1')
111 |     global cv_depth
112 | 
113 |     if img_flg is True:
114 |         implimentation_seg()
115 | 
116 | def rois_callback(rois):
117 |     detect_res = rois.bounding_boxes
118 |     global detect_res
119 |     print("get bbox")
120 |     
121 |     
122 | 
123 | #########################################################################################
124 | 
125 | def isRotationMatrix(R) :
126 |     Rt = np.transpose(R)
127 |     shouldBeIdentity = np.dot(Rt, R)
128 |     I = np.identity(3, dtype = R.dtype)
129 |     n = np.linalg.norm(I - shouldBeIdentity)
130 |     return n < 1e-6
131 |  
132 |  
133 | def rotationMatrixToEulerAngles(R) :
134 |  
135 |     assert(isRotationMatrix(R))
136 |      
137 |     sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
138 |      
139 |     singular = sy < 1e-6
140 |  
141 |     if  not singular :
142 |         x = math.atan2(R[2,1] , R[2,2])
143 |         y = math.atan2(-R[2,0], sy)
144 |         z = math.atan2(R[1,0], R[0,0])
145 |     else :
146 |         x = math.atan2(-R[1,2], R[1,1])
147 |         y = math.atan2(-R[2,0], sy)
148 |         z = 0
149 |  
150 |     return np.array([x, y, z])
151 | 
152 | ################################################################################################
153 | 
154 | ##################################################################################################
155 | # get bbox coordinate
156 | def get_bbox(label):
157 |     rows = np.any(label, axis=1)
158 |     cols = np.any(label, axis=0)
159 |     rmin, rmax = np.where(rows)[0][[0, -1]]
160 |     cmin, cmax = np.where(cols)[0][[0, -1]]
161 |     rmax += 1
162 |     cmax += 1
163 |     r_b = rmax - rmin
164 |     for tt in range(len(border_list)):
165 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
166 |             r_b = border_list[tt + 1]
167 |             break
168 |     c_b = cmax - cmin
169 |     for tt in range(len(border_list)):
170 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
171 |             c_b = border_list[tt + 1]
172 |             break
173 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
174 |     rmin = center[0] - int(r_b / 2)
175 |     rmax = center[0] + int(r_b / 2)
176 |     cmin = center[1] - int(c_b / 2)
177 |     cmax = center[1] + int(c_b / 2)
178 |     if rmin < 0:
179 |         delt = -rmin
180 |         rmin = 0
181 |         rmax += delt
182 |     if cmin < 0:
183 |         delt = -cmin
184 |         cmin = 0
185 |         cmax += delt
186 |     if rmax > img_width:
187 |         delt = rmax - img_width
188 |         rmax = img_width
189 |         rmin -= delt
190 |     if cmax > img_length:
191 |         delt = cmax - img_length
192 |         cmax = img_length
193 |         cmin -= delt
194 |     return rmin, rmax, cmin, cmax
195 | 
196 | '''def get_bbox(rois,idx):
197 |     # rmin = int(posecnn_rois[idx][2]) + 1
198 |     # rmax = int(posecnn_rois[idx][4]) - 1
199 |     # cmin = int(posecnn_rois[idx][1]) + 1
200 |     # cmax = int(posecnn_rois[idx][3]) - 1
201 |     rmin = int(rois[idx].xmin) + 1
202 |     rmax = int(rois[idx].xmax) - 1
203 |     cmin = int(rois[idx].ymin) + 1
204 |     cmax = int(rois[idx].ymax) - 1
205 |     r_b = rmax - rmin
206 |     for tt in range(len(border_list)):
207 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
208 |             r_b = border_list[tt + 1]
209 |             break
210 |     c_b = cmax - cmin
211 |     for tt in range(len(border_list)):
212 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
213 |             c_b = border_list[tt + 1]
214 |             break
215 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
216 |     rmin = center[0] - int(r_b / 2)
217 |     rmax = center[0] + int(r_b / 2)
218 |     cmin = center[1] - int(c_b / 2)
219 |     cmax = center[1] + int(c_b / 2)
220 |     if rmin < 0:
221 |         delt = -rmin
222 |         rmin = 0
223 |         rmax += delt
224 |     if cmin < 0:
225 |         delt = -cmin
226 |         cmin = 0
227 |         cmax += delt
228 |     if rmax > img_width:
229 |         delt = rmax - img_width
230 |         rmax = img_width
231 |         rmin -= delt
232 |     if cmax > img_length:
233 |         delt = cmax - img_length
234 |         cmax = img_length
235 |         cmin -= delt
236 |     return rmin, rmax, cmin, cmax'''
237 | ####################################################################################################
238 | ################################### load BiSeNet parameters ########################################
239 | ####################################################################################################
240 | print('load BiseNet')
241 | start_time = time.time()
242 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
243 | bise_model = bise_model.cuda()
244 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
245 | global bise_model
246 | print('Done!')
247 | print("Load time : {}".format(time.time() - start_time))
248 | 
249 | #####################################################################################################
250 | ######################## load Densefusion Netwopy4thork, 3d model #############################
251 | #####################################################################################################
252 | print('load densefusion network')
253 | start_time = time.time()
254 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
255 | estimator.cuda()
256 | estimator.load_state_dict(torch.load(opt.model))
257 | estimator.eval()
258 | ############################################################################
259 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
260 | refiner.cuda()
261 | refiner.load_state_dict(torch.load(opt.refine_model))
262 | refiner.eval()
263 | print('Done!')
264 | print("Load time : {}".format(time.time() - start_time))
265 | #####################################################################################################
266 | # class list upload
267 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
268 | class_id = 1
269 | cld = {}
270 | while 1:
271 |     class_input = class_file.readline()
272 |     if not class_input:
273 |         break
274 |     class_input = class_input[:-1]
275 | 
276 |     input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
277 |     cld[class_id] = []
278 |     while 1:
279 |         input_line = input_file.readline()
280 |         if not input_line:
281 |             break
282 |         input_line = input_line[:-1]
283 |         input_line = input_line.split(' ')
284 |         cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
285 |     input_file.close()
286 |     cld[class_id] = np.array(cld[class_id])
287 |     class_id += 1
288 | ########################################################################################################
289 | def seg_predict(image):
290 |     global bise_model
291 |     try:
292 |         with torch.no_grad():
293 |             bise_model.eval()
294 |             h,w,_ = image.shape
295 |             to_tensor = transforms.Compose([
296 |                     transforms.ToTensor(),
297 |                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
298 |                 ])
299 | 
300 |             image = to_tensor(image)
301 |             image = image.unsqueeze_(0)
302 |             image = image.cuda()
303 |             predict = bise_model(image).squeeze()
304 |             predict = reverse_one_hot(predict)
305 |             predict = np.array(predict)
306 |             predict = np.resize(predict,[h,w])
307 |             pub_label = np.uint8(predict)
308 |             #zzzz = cv2.cvtColor(np.uint8(predict), cv2.COLOR_GRAY2BGR)
309 |             #cv2.imwrite('./segmentation_image.png', zzzz)
310 | 
311 |             return predict, pub_label
312 |     except CvBridgeError as e:
313 |         print(e)
314 | 
315 | def pose_predict(img, depth):
316 |     class_list = ['002_master_chef_can',
317 |     '003_cracker_box',
318 |     '004_sugar_box',
319 |     '005_tomato_soup_can',
320 |     '006_mustard_bottle',
321 |     '007_tuna_fish_can',
322 |     '008_pudding_box',
323 |     '009_gelatin_box',
324 |     '010_potted_meat_can',
325 |     '011_banana',
326 |     '019_pitcher_base',
327 |     '025_mug',
328 |     '021_bleach_cleanser',
329 |     '024_bowl',
330 |     '035_power_drill',
331 |     '036_wood_block',
332 |     '037_scissors',
333 |     '040_large_marker','051_large_clamp','052_extra_large_clamp','061_foam_brick']
334 |     try:
335 |         bridge = CvBridge()
336 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
337 |         label, pub_label = seg_predict(img) 
338 |         label = label-1 # to match labeling results to class list
339 |         pub_label =pub_label * 50
340 |         label_pub.publish(bridge.cv2_to_imgmsg(pub_label,'8UC1'))
341 | 
342 |         object_number = len(np.unique(label))
343 |         print('unique lable: ', np.unique(label))
344 | 
345 |         my_result_wo_refine = []
346 |         my_result = []
347 |         open_cv_image = np.zeros_like(img)
348 | 
349 |         for idx in range(object_number):
350 |             if idx == 0: continue
351 | 
352 |             itemid = np.unique(label)[idx]
353 |             print('itemid: ', itemid)
354 | 
355 |             try:
356 |                 #cv2.imwrite('/root/catkin_ws/src/dnsefusion/scripts/experiments/scripts/segmentation_image.png', label)
357 |                 rmin, rmax, cmin,cmax = get_bbox(label)
358 |                 # bounding box cutting
359 |                 #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) 
360 |                 #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
361 |                 #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
362 |                 #mask = mask_label * mask_depth
363 |                 # only image
364 |                 mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
365 |                 mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
366 |                 mask = mask_label * mask_depth
367 |                 #rmin, rmax, cmin, cmax = get_bbox(mask_label)
368 |    
369 |                 choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
370 | 
371 |                 if len(choose) > num_points:
372 | 
373 |                     c_mask = np.zeros(len(choose), dtype=int)
374 |                     c_mask[:num_points] = 1
375 |                     np.random.shuffle(c_mask)
376 |                     choose = choose[c_mask.nonzero()]
377 |                 else:
378 |                     choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
379 | 
380 |                 depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
381 |                 xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
382 |                 ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
383 |                 choose = np.array([choose])
384 | 
385 |                 pt2 = depth_masked / cam_scale
386 |                 pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
387 |                 pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
388 |                 cloud = np.concatenate((pt0, pt1, pt2), axis=1)
389 |                 img_masked = np.array(img)[:, :, :3]
390 |                 img_masked = np.transpose(img_masked, (2, 0, 1))
391 |                 img_masked = img_masked[:, rmin:rmax, cmin:cmax]
392 | 
393 |                 cloud = torch.from_numpy(cloud.astype(np.float32))
394 |                 choose = torch.LongTensor(choose.astype(np.int32))
395 |                 img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
396 |                 index = torch.LongTensor([itemid])
397 | 
398 |                 cloud = Variable(cloud).cuda()
399 |                 choose = Variable(choose).cuda()
400 |                 img_masked = Variable(img_masked).cuda()
401 |                 index = Variable(index).cuda()
402 |                 cloud = cloud.view(1, num_points, 3)
403 |                 img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
404 |                 pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
405 |                 pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
406 |                 pred_c = pred_c.view(bs, num_points)
407 |                 how_max, which_max = torch.max(pred_c, 1)
408 |                 pred_t = pred_t.view(bs * num_points, 1, 3)
409 |                 points = cloud.view(bs * num_points, 1, 3)
410 |                 my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
411 |                 my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
412 |                 my_pred = np.append(my_r, my_t)
413 |                 # making pose matrix
414 |                 dof = quaternion_matrix(my_r)
415 |                 dof[0:3,3] = my_t
416 |                 rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
417 |                 rot_to_angle = rot_to_angle.reshape(1,3)
418 |                 my_t = my_t.reshape(1,3)
419 |                 rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
420 |                 object_poses = {   
421 |                     'tx':my_t[0][0],
422 |                     'ty':my_t[0][1],
423 |                     'tz':my_t[0][2],
424 |                     'qx':my_r[0],
425 |                     'qy':my_r[1],
426 |                     'qz':my_r[2],
427 |                     'qw':my_r[3]}
428 |                 my_result.append(object_poses)
429 |                 open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
430 |                 cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
431 |                              [0, 0, 1]]))
432 |                 imgpts, jac = cv2.projectPoints(cld[itemid], dof[0:3,0:3],dof[0:3,3],cam_mat,dist) # 14 mugcup
433 |                 open_cv_image = draw(open_cv_image,imgpts.get(), itemid)
434 |                 my_result_wo_refine.append(my_pred.tolist())
435 |             except ZeroDivisionError:
436 |                 # my_result_wo_refine.append([0.0 for i in range(7)])
437 |                 # my_result.append([0.0 for i in range(7)])
438 |                 open_cv_image = None
439 |                 print('Fail')
440 |     except CvBridgeError as e:
441 |         print(e)
442 |     
443 |     return my_result, open_cv_image
444 |     
445 | def draw(img, imgpts, label):
446 |     color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
447 |     for point in imgpts:
448 |         
449 |         img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
450 |     return img 
451 | 
452 | 
453 | def implimentation_seg():
454 |     global cv_image
455 |     global cv_depth
456 |     
457 |     bridge = CvBridge()
458 | 
459 |     pose_estimation,fit_image = pose_predict(cv_image, cv_depth)
460 |     print('pose_estimation: ', pose_estimation)
461 |     pose_array = PoseArray()
462 |     pose_msg = Pose()
463 | 
464 |     for i in range(len(pose_estimation)):
465 |         pose_msg.position.x = pose_estimation[i]['tx']
466 |         pose_msg.position.y = pose_estimation[i]['ty']
467 |         pose_msg.position.z = pose_estimation[i]['tz']
468 |         pose_msg.orientation.x = pose_estimation[i]['qx']
469 |         pose_msg.orientation.y = pose_estimation[i]['qy']
470 |         pose_msg.orientation.z = pose_estimation[i]['qz']
471 |         pose_msg.orientation.w = pose_estimation[i]['qw']
472 | 
473 |         pose_array.poses.append(pose_msg)
474 | 
475 |     pose_pub.publish(pose_array)
476 |     if fit_image is not None:
477 |         pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
478 | 
479 |      
480 | def main():
481 | 
482 |     rospy.init_node('pose_estimation_server')
483 |     rgb_sub = rospy.Subscriber(opt.image_subscriber,Image, image_callback)
484 |     depth_sub = rospy.Subscriber(opt.depth_subscriber,Image, depth_callback)
485 |     rois_sub = rospy.Subscriber('/bbox',BoundingBoxes, rois_callback)
486 |     rospy.spin()
487 | 
488 | if __name__ == '__main__':
489 |     main()
490 | 


--------------------------------------------------------------------------------
/scripts/tools/temp.py:
--------------------------------------------------------------------------------
  1 | #! /usr/bin/env python
  2 | 
  3 | ############# ros packages #####################
  4 | import cv2
  5 | import rospy
  6 | from sensor_msgs.msg import Image, CameraInfo
  7 | from cv_bridge import CvBridge, CvBridgeError
  8 | from be.srv import AddTwoInts, AddTwoIntsResponse
  9 | from darknet_ros_msgs.msg import BoundingBoxes, BoundingBox
 10 | from geometry_msgs.msg import Pose, PoseArray
 11 | import tf
 12 | import message_filters
 13 | 
 14 | ############ python pakcages ###################
 15 | import _init_paths
 16 | import argparse
 17 | import os
 18 | import copy
 19 | import random
 20 | import numpy as np
 21 | import scipy.io as scio
 22 | import scipy.misc
 23 | import numpy.ma as ma
 24 | import math
 25 | import torch
 26 | import torch.nn as nn
 27 | import torch.nn.parallel
 28 | import torch.backends.cudnn as cudnn
 29 | import torch.optim as optim
 30 | import torch.utils.data
 31 | import torchvision.datasets as dset
 32 | import torchvision.transforms as transforms
 33 | import torchvision.utils as vutils
 34 | import torch.nn.functional as F
 35 | from torch.autograd import Variable
 36 | from datasets.ycb.dataset import PoseDataset
 37 | from lib.network import PoseNet, PoseRefineNet
 38 | from lib.transformations import euler_matrix, quaternion_matrix, quaternion_from_matrix
 39 | from model.build_BiSeNet import BiSeNet
 40 | from utils import reverse_one_hot, compute_global_accuracy, fast_hist, per_class_iu, cal_miou
 41 | import time
 42 | 
 43 | ##########################################################################################
 44 | 
 45 | parser = argparse.ArgumentParser()
 46 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir')
 47 | parser.add_argument('--model', type=str, default = '',  help='resume PoseNet model')
 48 | parser.add_argument('--refine_model', type=str, default = '',  help='resume PoseRefineNet model')
 49 | parser.add_argument('--checkpoint_path', type=str, default='', required=True, help='The path to the pretrained weights of model')
 50 | parser.add_argument('--num_classes', type=int, default=32, help='num of object classes (with void)')
 51 | parser.add_argument('--context_path', type=str, default="resnet101", help='The context path model you are using.')
 52 | 
 53 | 
 54 | opt = parser.parse_args()
 55 | 
 56 | norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
 57 | border_list = [-1, 40, 80, 120, 160, 200, 240, 280, 320, 360, 400, 440, 480, 520, 560, 600, 640, 680]
 58 | xmap = np.array([[j for i in range(640)] for j in range(480)])
 59 | ymap = np.array([[i for i in range(640)] for j in range(480)])
 60 | cam_cx = 312.9869
 61 | cam_cy = 241.3109
 62 | cam_fx = 1066.778
 63 | cam_fy = 1067.487
 64 | cam_scale = 10000.0
 65 | num_obj = 21
 66 | img_width = 480
 67 | img_length = 640
 68 | num_points = 1000
 69 | num_points_mesh = 500
 70 | iteration = 2
 71 | bs = 1
 72 | dataset_config_dir = 'datasets/ycb/dataset_config'
 73 | ycb_toolbox_dir = 'YCB_Video_toolbox'
 74 | result_wo_refine_dir = 'experiments/eval_result/ycb/Densefusion_wo_refine_result'
 75 | result_refine_dir = 'experiments/eval_result/ycb/Densefusion_iterative_result'
 76 | 
 77 | #########################################################################################
 78 | 
 79 | def isRotationMatrix(R) :
 80 |     Rt = np.transpose(R)
 81 |     shouldBeIdentity = np.dot(Rt, R)
 82 |     I = np.identity(3, dtype = R.dtype)
 83 |     n = np.linalg.norm(I - shouldBeIdentity)
 84 |     return n < 1e-6
 85 |  
 86 |  
 87 | def rotationMatrixToEulerAngles(R) :
 88 |  
 89 |     assert(isRotationMatrix(R))
 90 |      
 91 |     sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
 92 |      
 93 |     singular = sy < 1e-6
 94 |  
 95 |     if  not singular :
 96 |         x = math.atan2(R[2,1] , R[2,2])
 97 |         y = math.atan2(-R[2,0], sy)
 98 |         z = math.atan2(R[1,0], R[0,0])
 99 |     else :
100 |         x = math.atan2(-R[1,2], R[1,1])
101 |         y = math.atan2(-R[2,0], sy)
102 |         z = 0
103 |  
104 |     return np.array([x, y, z])
105 | 
106 | ################################################################################################
107 | """
108 | ##################################################################################################
109 | # get bbox coordinate
110 | def get_bbox(label):
111 |     rows = np.any(label, axis=1)
112 |     cols = np.any(label, axis=0)
113 |     rmin, rmax = 
114 |     np.where(rows)[0][[0, -1]]
115 |     cmin, cmax = np.where(cols)[0][[0, -1]]
116 |     rmax += 1
117 |     cmax += 1
118 |     r_b = rmax - rmin
119 |     for tt in range(len(border_list)):
120 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
121 |             r_b = border_list[tt + 1]
122 |             break
123 |     c_b = cmax - cmin
124 |     for tt in range(len(border_list)):
125 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
126 |             c_b = border_list[tt + 1]
127 |             break
128 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
129 |     rmin = center[0] - int(r_b / 2)
130 |     rmax = center[0] + int(r_b / 2)
131 |     cmin = center[1] - int(c_b / 2)
132 |     cmax = center[1] + int(c_b / 2)
133 |     if rmin < 0:
134 |         delt = -rmin
135 |         rmin = 0
136 |         rmax += delt
137 |     if cmin < 0:
138 |         delt = -cmin
139 |         cmin = 0
140 |         cmax += delt
141 |     if rmax > img_width:
142 |         delt = rmax - img_width
143 |         rmax = img_width
144 |         rmin -= delt
145 |     if cmax > img_length:
146 |         delt = cmax - img_length
147 |         cmax = img_length
148 |         cmin -= delt
149 |     return rmin, rmax, cmin, cmax
150 | """
151 | def get_bbox(rois,idx):
152 |     # rmin = int(posecnn_rois[idx][2]) + 1
153 |     # rmax = int(posecnn_rois[idx][4]) - 1
154 |     # cmin = int(posecnn_rois[idx][1]) + 1
155 |     # cmax = int(posecnn_rois[idx][3]) - 1
156 |     rmin = int(rois[idx].xmin) + 1
157 |     rmax = int(rois[idx].xmax) - 1
158 |     cmin = int(rois[idx].ymin) + 1
159 |     cmax = int(rois[idx].ymax) - 1
160 |     r_b = rmax - rmin
161 |     for tt in range(len(border_list)):
162 |         if r_b > border_list[tt] and r_b < border_list[tt + 1]:
163 |             r_b = border_list[tt + 1]
164 |             break
165 |     c_b = cmax - cmin
166 |     for tt in range(len(border_list)):
167 |         if c_b > border_list[tt] and c_b < border_list[tt + 1]:
168 |             c_b = border_list[tt + 1]
169 |             break
170 |     center = [int((rmin + rmax) / 2), int((cmin + cmax) / 2)]
171 |     rmin = center[0] - int(r_b / 2)
172 |     rmax = center[0] + int(r_b / 2)
173 |     cmin = center[1] - int(c_b / 2)
174 |     cmax = center[1] + int(c_b / 2)
175 |     if rmin < 0:
176 |         delt = -rmin
177 |         rmin = 0
178 |         rmax += delt
179 |     if cmin < 0:
180 |         delt = -cmin
181 |         cmin = 0
182 |         cmax += delt
183 |     if rmax > img_width:
184 |         delt = rmax - img_width
185 |         rmax = img_width
186 |         rmin -= delt
187 |     if cmax > img_length:
188 |         delt = cmax - img_length
189 |         cmax = img_length
190 |         cmin -= delt
191 |     return rmin, rmax, cmin, cmax
192 | ####################################################################################################
193 | ################################### load BiSeNet parameters ########################################
194 | ####################################################################################################
195 | print('load BiseNet')
196 | start_time = time.time()
197 | bise_model = BiSeNet(opt.num_classes, opt.context_path)
198 | bise_model = bise_model.cuda()
199 | bise_model.load_state_dict(torch.load(opt.checkpoint_path))
200 | global bise_model
201 | print('Done!')
202 | print("Load time : {}".format(time.time() - start_time))
203 | 
204 | #####################################################################################################
205 | ######################## load Densefusion Netwopy4thork, 3d model #############################
206 | #####################################################################################################
207 | print('load densefusion network')
208 | start_time = time.time()
209 | estimator = PoseNet(num_points = num_points, num_obj = num_obj)
210 | estimator.cuda()
211 | estimator.load_state_dict(torch.load(opt.model))
212 | estimator.eval()
213 | ############################################################################
214 | refiner = PoseRefineNet(num_points = num_points, num_obj = num_obj)
215 | refiner.cuda()
216 | refiner.load_state_dict(torch.load(opt.refine_model))
217 | refiner.eval()
218 | print('Done')
219 | print("Load time : {}".format(time.time() - start_time))
220 | #####################################################################################################
221 | # class list upload
222 | class_file = open('{0}/classes.txt'.format(dataset_config_dir))
223 | class_id = 1
224 | cld = {}
225 | while 1:
226 |     class_input = class_file.readline()
227 |     if not class_input:
228 |         break
229 |     class_input = class_input[:-1]
230 | 
231 |     input_file = open('{0}/models/{1}/points.xyz'.format(opt.dataset_root, class_input))
232 |     cld[class_id] = []
233 |     while 1:
234 |         input_line = input_file.readline()
235 |         if not input_line:
236 |             break
237 |         input_line = input_line[:-1]
238 |         input_line = input_line.split(' ')
239 |         cld[class_id].append([float(input_line[0]), float(input_line[1]), float(input_line[2])])
240 |     input_file.close()
241 |     cld[class_id] = np.array(cld[class_id])
242 |     class_id += 1
243 | ########################################################################################################
244 | def seg_predict(image):
245 |     global bise_model
246 |     try:
247 |         with torch.no_grad():
248 |             bise_model.eval()
249 |             h,w,_ = image.shape
250 |             to_tensor = transforms.Compose([
251 |                     transforms.ToTensor(),
252 |                     transforms.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225)),
253 |                 ])
254 | 
255 |             image = to_tensor(image)
256 |             image = image.unsqueeze_(0)
257 |             image = image.cuda()
258 |             predict = bise_model(image).squeeze()
259 |             predict = reverse_one_hot(predict)
260 |             predict = np.array(predict)
261 |             predict = np.resize(predict,[h,w])
262 |             print(np.unique(predict))
263 |     except CvBridgeError as e:
264 |         print(e)
265 | 
266 | class object_pose_estimation:
267 |     def __init__(self):
268 |         self.bridge = CvBridge()
269 |         # rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, image_callback)
270 |         # depth_sub = rospy.Subscriber('',Image, depth_callback)
271 |         # rois_sub = rospy.Subscriber('',BoundingBoxes, rois_callback)
272 |         self.rgb_sub = message_filters.Subscriber('/camera/color/image_raw',Image)
273 |         self.depth_sub = message_filters.Subscriber('/camera/depth/image_rect_raw',Image)
274 |         self.rois_sub = message_filters.Subscriber('/darknet_ros/bounding_boxes',BoundingBoxes)
275 |         self.pose_pub = rospy.Publisher('/pose_pub', PoseArray,queue_size = 10)
276 |         self.ts = message_filters.TimeSynchronizer([self.rgb_sub, self.depth_sub,self.rois_sub], queue_size = 10)
277 |         self.ts.registerCallback(self.estimation_callback)
278 | 
279 | 
280 |     def estimation_callback(self, rgb,depth,rois):
281 |         try:
282 |             img = self.bridge.imgmsg_to_cv2(rgb,'bgr8')
283 |             depth = self.bridge.imgmsg_to_cv2(depth,'32SC1')
284 |             rois = rois.bounding_boxes
285 |             print(img, depth,posecnn_rois)
286 |             class_list = ['002_master_chef_can',
287 |                 '003_cracker_box',
288 |                 '004_sugar_box',
289 |                     '005_tomato_soup_can',
290 |                     '006_mustard_bottle',
291 |                     '007_tuna_fish_can',
292 |                     '008_pudding_box',
293 |                     '009_gelatin_box',
294 |                     '010_potted_meat_can',
295 |                     '011_banana',#'019_pitcher_base',
296 |                     '025_mug',
297 |                     '021_bleach_cleanser',
298 |                     '024_bowl',
299 |                     '035_power_drill',
300 |                     '036_wood_block',
301 |                     '037_scissors',
302 |                     '040_large_marker',
303 |                     '051_large_clamp',
304 |                     '052_extra_large_clamp',
305 |                     '061_foam_brick']
306 |             object_number = len(rois)
307 |             #lst = posecnn_rois[:,0:1].flatten()
308 |             #lst = np.unique(label)
309 |             my_result_wo_refine = []
310 |             my_result = []
311 |             for idx in range(object_number):
312 |                 #itemid = lst[idx]
313 |                 itemid = class_list.index(rois[idx].Class) +1
314 |                 print(itemid, rois[idx])
315 |             
316 |                 try:
317 |                     label = seg_predict(img) 
318 |                     rmin, rmax, cmin,cmax = get_bbox(rois,idx)
319 |                     # bounding box cutting
320 |                     #label = seg_predict(img[rmin:rmax,cmin:cmax,:]) 
321 |                     #mask_depth = ma.getmaskarray(ma.masked_not_equal(depth[rmin:rmax, cmin:cmax], 0))
322 |                     #mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
323 |                     #mask = mask_label * mask_depth
324 |                     # only image
325 |                     mask_depth = ma.getmaskarray(ma.masked_not_equal(depth, 0))
326 |                     mask_label = ma.getmaskarray(ma.masked_equal(label, itemid))
327 |                     mask = mask_label * mask_depth
328 |                     #rmin, rmax, cmin, cmax = get_bbox(mask_label)
329 | 
330 | 
331 |                     choose = mask[rmin:rmax, cmin:cmax].flatten().nonzero()[0]
332 |                     if len(choose) > num_points:
333 |                         c_mask = np.zeros(len(choose), dtype=int)
334 |                         c_mask[:num_points] = 1
335 |                         np.random.shuffle(c_mask)
336 |                         choose = choose[c_mask.nonzero()]
337 |                     else:
338 |                         choose = np.pad(choose, (0, num_points - len(choose)), 'wrap')
339 | 
340 |                     depth_masked = depth[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
341 |                     xmap_masked = xmap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
342 |                     ymap_masked = ymap[rmin:rmax, cmin:cmax].flatten()[choose][:, np.newaxis].astype(np.float32)
343 |                     choose = np.array([choose])
344 | 
345 |                     pt2 = depth_masked / cam_scale
346 |                     pt0 = (ymap_masked - cam_cx) * pt2 / cam_fx
347 |                     pt1 = (xmap_masked - cam_cy) * pt2 / cam_fy
348 |                     cloud = np.concatenate((pt0, pt1, pt2), axis=1)
349 | 
350 |                     img_masked = np.array(img)[:, :, :3]
351 |                     img_masked = np.transpose(img_masked, (2, 0, 1))
352 |                     img_masked = img_masked[:, rmin:rmax, cmin:cmax]
353 |     
354 |                     cloud = torch.from_numpy(cloud.astype(np.float32))
355 |                     choose = torch.LongTensor(choose.astype(np.int32))
356 |                     img_masked = norm(torch.from_numpy(img_masked.astype(np.float32)))
357 |                     index = torch.LongTensor([itemid - 1])
358 | 
359 |                     cloud = Variable(cloud).cuda()
360 |                     choose = Variable(choose).cuda()
361 |                     img_masked = Variable(img_masked).cuda()
362 |                     index = Variable(index).cuda()
363 |                     cloud = cloud.view(1, num_points, 3)
364 |                     img_masked = img_masked.view(1, 3, img_masked.size()[1], img_masked.size()[2])
365 |                     pred_r, pred_t, pred_c, emb = estimator(img_masked, cloud, choose, index)
366 |                     pred_r = pred_r / torch.norm(pred_r, dim=2).view(1, num_points, 1)
367 |                     pred_c = pred_c.view(bs, num_points)
368 |                     how_max, which_max = torch.max(pred_c, 1)
369 |                     pred_t = pred_t.view(bs * num_points, 1, 3)
370 |                     points = cloud.view(bs * num_points, 1, 3)
371 |                     my_r = pred_r[0][which_max[0]].view(-1).cpu().data.numpy()
372 |                     my_t = (points + pred_t)[which_max[0]].view(-1).cpu().data.numpy()
373 |                     my_pred = np.append(my_r, my_t)
374 |                     # making pose matrix
375 |                     dof = quaternion_matrix(my_r)
376 |                     dof[0:3,3] = my_t
377 |                     rot_to_angle = rotationMatrixToEulerAngles(dof[:3,:3])
378 |                     rot_to_angle = rot_to_angle.reshape(1,3)
379 |                     my_t = my_t.reshape(1,3)
380 |                     rot_t = np.concatenate([rot_to_angle,my_t], axis= 0)
381 |                     object_poses = {
382 |                         'tx':my_t[0][0],
383 |                         'ty':my_t[0][1],
384 |                         'tz':my_t[0][2],
385 |                         'qx':my_r[0],
386 |                         'qy':my_r[1],
387 |                         'qz':my_r[2],
388 |                         'qw':my_r[3]}
389 |                     my_result.append(object_poses)
390 |                     open_cv_image = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2BGR)
391 |                     cam_mat = cv2.UMat(np.matrix([[cam_fx, 0, cam_cx], [0, cam_fy, cam_cy],
392 |                                 [0, 0, 1]]))
393 |                     imgpts, jac = cv2.projectPoints(cld[13], dof[0:3,0:3],dof[0:3,3],cam_mat,dist)
394 |                     open_cv_image = draw(open_cv_image,imgpts.get(), itemid)
395 |                     my_result_wo_refine.append(my_pred.tolist())
396 |                     pose_array = PoseArray()
397 |                     pose_msg = Pose()
398 |                     pose_pub.publish(pose_array)
399 |                     pose_fit_image.publish(bridge.cv2_to_imgmsg(fit_image, 'bgr8'))
400 | 
401 |                     """
402 |                     for ite in range(0, iteration):
403 |                         T = Variable(torch.from_numpy(my_t.astype(np.float32))).cuda().view(1, 3).repeat(num_points, 1).contiguous().view(1, num_points, 3)
404 |                         my_mat = quaternion_matrix(my_r)
405 |                         R = Variable(torch.from_numpy(my_mat[:3, :3].astype(np.float32))).cuda().view(1, 3, 3)
406 |                         my_mat[0:3, 3] = my_t
407 | 
408 |                         new_cloud = torch.bmm((cloud - T), R).contiguous()
409 |                         pred_r, pred_t = refiner(new_cloud, emb, index)
410 |                         pred_r = pred_r.view(1, 1, -1)
411 |                         pred_r = pred_r / (torch.norm(pred_r, dim=2).view(1, 1, 1))
412 |                         my_r_2 = pred_r.view(-1).cpu().data.numpy()
413 |                         my_t_2 = pred_t.view(-1).cpu().data.numpy()
414 |                         my_mat_2 = quaternion_matrix(my_r_2)
415 | 
416 | 
417 |                         my_mat_2[0:3, 3] = my_t_2
418 |                         my_mat_final = np.dot(my_mat, my_mat_2)
419 |                         my_r_final = copy.deepcopy(my_mat_final)
420 |                         my_r_final[0:3, 3] = 0
421 |                         my_r_final = quaternion_from_matrix(my_r_final, True)
422 |     
423 |                         my_t_final = np.array([my_mat_final[0][3], my_mat_final[1][3], my_mat_final[2][3]])
424 | 
425 |                         my_pred = np.append(my_r_final, my_t_final)
426 |                         my_r = my_r_final
427 |                         my_t = my_t_final
428 |                     """
429 |                     # Here 'my_pred' is the final pose estimation result after refinement ('my_r': quaternion, 'my_t': translation)
430 |                     #my_result.append(my_pred.tolist())
431 |                 except ZeroDivisionError:
432 |                     # my_result_wo_refine.append([0.0 for i in range(7)])
433 |                     # my_result.append([0.0 for i in range(7)])
434 |                     print('Fail')
435 |         except CvBridgeError as e:
436 |             print(e)
437 |     
438 |     
439 | def draw(img, imgpts, label):
440 |     color = [[254,0,0],[254,244,0],[171,242,0],[0,216,254],[1,0,254],[95,0,254],[254,0,221],[0,0,0],[153,56,0],[138,36,124],[107,153,0],[5,0,153],[76,76,76],[32,153,67],[41,20,240],[230,111,240],[211,222,6],[40,233,70],[130,24,70],[244,200,210],[70,80,90],[30,40,30]]
441 |     for point in imgpts:
442 |         
443 |         img=cv2.circle(img,(int(point[0][0]),int(point[0][1])), 1, color[int(label)], -1)
444 |     return img 
445 | 
446 | def main():
447 |     rospy.init_node('pose_estimator',anonymous= True)
448 |     Pose = object_pose_estimation()
449 |     rospy.spin()
450 | 
451 | if __name__ == '__main__':
452 |     main()
453 | 


--------------------------------------------------------------------------------
/scripts/tools/test.py:
--------------------------------------------------------------------------------
 1 | #! /usr/bin/env python
 2 | import cv2
 3 | import rospy
 4 | from sensor_msgs.msg import Image, CameraInfo
 5 | from cv_bridge import CvBridge, CvBridgeError
 6 | 
 7 | class ImageIo:
 8 |     def __init__(self):
 9 |         self.rgb_sub = rospy.Subscriber('/camera/color/image_raw',Image, self.rgb_callback)
10 |     def rgb_callback(self,rgb):
11 |         bridge = CvBridge()
12 |         label_pub = rospy.Publisher('/label',Image, queue_size = 10)
13 |         img = bridge.imgmsg_to_cv2(rgb,"bgr8")
14 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
15 |         image = cv2.cvtColor(img.copy(), cv2.COLOR_RGB2GRAY)
16 |         label_pub.publish(bridge.cv2_to_imgmsg(image,encoding="8UC1"))
17 | 
18 | 
19 | 
20 | 
21 | 
22 | def main():
23 |     IO = ImageIo()
24 | 
25 | if __name__ == '__main__':
26 |     rospy.init_node('zzz',anonymous = True)
27 |     main()
28 |     rospy.spin()


--------------------------------------------------------------------------------
/scripts/tools/train.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # DenseFusion 6D Object Pose Estimation by Iterative Dense Fusion
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by Chen
  5 | # --------------------------------------------------------
  6 | 
  7 | import _init_paths
  8 | import argparse
  9 | import os
 10 | import random
 11 | import time
 12 | import numpy as np
 13 | import torch
 14 | import torch.nn as nn
 15 | import torch.nn.parallel
 16 | import torch.backends.cudnn as cudnn
 17 | import torch.optim as optim
 18 | import torch.utils.data
 19 | import torchvision.datasets as dset
 20 | import torchvision.transforms as transforms
 21 | import torchvision.utils as vutils
 22 | from torch.autograd import Variable
 23 | from datasets.ycb.dataset import PoseDataset as PoseDataset_ycb
 24 | from datasets.linemod.dataset import PoseDataset as PoseDataset_linemod
 25 | from lib.network import PoseNet, PoseRefineNet
 26 | from lib.loss import Loss
 27 | from lib.loss_refiner import Loss_refine
 28 | from lib.utils import setup_logger
 29 | 
 30 | parser = argparse.ArgumentParser()
 31 | parser.add_argument('--dataset', type=str, default = 'ycb', help='ycb or linemod')
 32 | parser.add_argument('--dataset_root', type=str, default = '', help='dataset root dir (''YCB_Video_Dataset'' or ''Linemod_preprocessed'')')
 33 | parser.add_argument('--batch_size', type=int, default = 8, help='batch size')
 34 | parser.add_argument('--workers', type=int, default = 10, help='number of data loading workers')
 35 | parser.add_argument('--lr', default=0.0001, help='learning rate')
 36 | parser.add_argument('--lr_rate', default=0.3, help='learning rate decay rate')
 37 | parser.add_argument('--w', default=0.015, help='learning rate')
 38 | parser.add_argument('--w_rate', default=0.3, help='learning rate decay rate')
 39 | parser.add_argument('--decay_margin', default=0.016, help='margin to decay lr & w')
 40 | parser.add_argument('--refine_margin', default=0.013, help='margin to start the training of iterative refinement')
 41 | parser.add_argument('--noise_trans', default=0.03, help='range of the random noise of translation added to the training data')
 42 | parser.add_argument('--iteration', type=int, default = 2, help='number of refinement iterations')
 43 | parser.add_argument('--nepoch', type=int, default=500, help='max number of epochs to train')
 44 | parser.add_argument('--resume_posenet', type=str, default = '',  help='resume PoseNet model')
 45 | parser.add_argument('--resume_refinenet', type=str, default = '',  help='resume PoseRefineNet model')
 46 | parser.add_argument('--start_epoch', type=int, default = 1, help='which epoch to start')
 47 | opt = parser.parse_args()
 48 | 
 49 | 
 50 | def main():
 51 |     opt.manualSeed = random.randint(1, 10000)
 52 |     random.seed(opt.manualSeed)
 53 |     torch.manual_seed(opt.manualSeed)
 54 | 
 55 |     if opt.dataset == 'ycb':
 56 |         opt.num_objects = 21 #number of object classes in the dataset
 57 |         opt.num_points = 1000 #number of points on the input pointcloud
 58 |         opt.outf = 'trained_models/ycb' #folder to save trained models
 59 |         opt.log_dir = 'experiments/logs/ycb' #folder to save logs
 60 |         opt.repeat_epoch = 1 #number of repeat times for one epoch training
 61 |     elif opt.dataset == 'linemod':
 62 |         opt.num_objects = 13
 63 |         opt.num_points = 500
 64 |         opt.outf = 'trained_models/linemod'
 65 |         opt.log_dir = 'experiments/logs/linemod'
 66 |         opt.repeat_epoch = 20
 67 |     else:
 68 |         print('Unknown dataset')
 69 |         return
 70 | 
 71 |     estimator = PoseNet(num_points = opt.num_points, num_obj = opt.num_objects)
 72 |     estimator.cuda()
 73 |     refiner = PoseRefineNet(num_points = opt.num_points, num_obj = opt.num_objects)
 74 |     refiner.cuda()
 75 | 
 76 |     if opt.resume_posenet != '':
 77 |         estimator.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_posenet)))
 78 | 
 79 |     if opt.resume_refinenet != '':
 80 |         refiner.load_state_dict(torch.load('{0}/{1}'.format(opt.outf, opt.resume_refinenet)))
 81 |         opt.refine_start = True
 82 |         opt.decay_start = True
 83 |         opt.lr *= opt.lr_rate
 84 |         opt.w *= opt.w_rate
 85 |         opt.batch_size = int(opt.batch_size / opt.iteration)
 86 |         optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)
 87 |     else:
 88 |         opt.refine_start = False
 89 |         opt.decay_start = False
 90 |         optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)
 91 | 
 92 |     if opt.dataset == 'ycb':
 93 |         dataset = PoseDataset_ycb('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
 94 |     elif opt.dataset == 'linemod':
 95 |         dataset = PoseDataset_linemod('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
 96 |     dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers)
 97 |     if opt.dataset == 'ycb':
 98 |         test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
 99 |     elif opt.dataset == 'linemod':
100 |         test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
101 |     testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
102 |     
103 |     opt.sym_list = dataset.get_sym_list()
104 |     opt.num_points_mesh = dataset.get_num_points_mesh()
105 | 
106 |     print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list))
107 | 
108 |     criterion = Loss(opt.num_points_mesh, opt.sym_list)
109 |     criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
110 | 
111 |     best_test = np.Inf
112 | 
113 |     if opt.start_epoch == 1:
114 |         for log in os.listdir(opt.log_dir):
115 |             os.remove(os.path.join(opt.log_dir, log))
116 |     st_time = time.time()
117 | 
118 |     for epoch in range(opt.start_epoch, opt.nepoch):
119 |         logger = setup_logger('epoch%d' % epoch, os.path.join(opt.log_dir, 'epoch_%d_log.txt' % epoch))
120 |         logger.info('Train time {0}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)) + ', ' + 'Training started'))
121 |         train_count = 0
122 |         train_dis_avg = 0.0
123 |         if opt.refine_start:
124 |             estimator.eval()
125 |             refiner.train()
126 |         else:
127 |             estimator.train()
128 |         optimizer.zero_grad()
129 | 
130 |         for rep in range(opt.repeat_epoch):
131 |             for i, data in enumerate(dataloader, 0):
132 |                 points, choose, img, target, model_points, idx = data
133 |                 points, choose, img, target, model_points, idx = Variable(points).cuda(), \
134 |                                                                  Variable(choose).cuda(), \
135 |                                                                  Variable(img).cuda(), \
136 |                                                                  Variable(target).cuda(), \
137 |                                                                  Variable(model_points).cuda(), \
138 |                                                                  Variable(idx).cuda()
139 |                 pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
140 |                 loss, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)
141 |                 
142 |                 if opt.refine_start:
143 |                     for ite in range(0, opt.iteration):
144 |                         pred_r, pred_t = refiner(new_points, emb, idx)
145 |                         dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
146 |                         dis.backward()
147 |                 else:
148 |                     loss.backward()
149 | 
150 |                 train_dis_avg += dis.item()
151 |                 train_count += 1
152 | 
153 |                 if train_count % opt.batch_size == 0:
154 |                     logger.info('Train time {0} Epoch {1} Batch {2} Frame {3} Avg_dis:{4}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), epoch, int(train_count / opt.batch_size), train_count, train_dis_avg / opt.batch_size))
155 |                     optimizer.step()
156 |                     optimizer.zero_grad()
157 |                     train_dis_avg = 0
158 | 
159 |                 if train_count != 0 and train_count % 1000 == 0:
160 |                     if opt.refine_start:
161 |                         torch.save(refiner.state_dict(), '{0}/pose_refine_model_current.pth'.format(opt.outf))
162 |                     else:
163 |                         torch.save(estimator.state_dict(), '{0}/pose_model_current.pth'.format(opt.outf))
164 | 
165 |         print('>>>>>>>>----------epoch {0} train finish---------<<<<<<<<'.format(epoch))
166 | 
167 | 
168 |         logger = setup_logger('epoch%d_test' % epoch, os.path.join(opt.log_dir, 'epoch_%d_test_log.txt' % epoch))
169 |         logger.info('Test time {0}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)) + ', ' + 'Testing started'))
170 |         test_dis = 0.0
171 |         test_count = 0
172 |         estimator.eval()
173 |         refiner.eval()
174 | 
175 |         for j, data in enumerate(testdataloader, 0):
176 |             points, choose, img, target, model_points, idx = data
177 |             points, choose, img, target, model_points, idx = Variable(points).cuda(), \
178 |                                                              Variable(choose).cuda(), \
179 |                                                              Variable(img).cuda(), \
180 |                                                              Variable(target).cuda(), \
181 |                                                              Variable(model_points).cuda(), \
182 |                                                              Variable(idx).cuda()
183 |             pred_r, pred_t, pred_c, emb = estimator(img, points, choose, idx)
184 |             _, dis, new_points, new_target = criterion(pred_r, pred_t, pred_c, target, model_points, idx, points, opt.w, opt.refine_start)
185 | 
186 |             if opt.refine_start:
187 |                 for ite in range(0, opt.iteration):
188 |                     pred_r, pred_t = refiner(new_points, emb, idx)
189 |                     dis, new_points, new_target = criterion_refine(pred_r, pred_t, new_target, model_points, idx, new_points)
190 | 
191 |             test_dis += dis.item()
192 |             logger.info('Test time {0} Test Frame No.{1} dis:{2}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), test_count, dis))
193 | 
194 |             test_count += 1
195 | 
196 |         test_dis = test_dis / test_count
197 |         logger.info('Test time {0} Epoch {1} TEST FINISH Avg dis: {2}'.format(time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - st_time)), epoch, test_dis))
198 |         if test_dis <= best_test:
199 |             best_test = test_dis
200 |             if opt.refine_start:
201 |                 torch.save(refiner.state_dict(), '{0}/pose_refine_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis))
202 |             else:
203 |                 torch.save(estimator.state_dict(), '{0}/pose_model_{1}_{2}.pth'.format(opt.outf, epoch, test_dis))
204 |             print(epoch, '>>>>>>>>----------BEST TEST MODEL SAVED---------<<<<<<<<')
205 | 
206 |         if best_test < opt.decay_margin and not opt.decay_start:
207 |             opt.decay_start = True
208 |             opt.lr *= opt.lr_rate
209 |             opt.w *= opt.w_rate
210 |             optimizer = optim.Adam(estimator.parameters(), lr=opt.lr)
211 | 
212 |         if best_test < opt.refine_margin and not opt.refine_start:
213 |             opt.refine_start = True
214 |             opt.batch_size = int(opt.batch_size / opt.iteration)
215 |             optimizer = optim.Adam(refiner.parameters(), lr=opt.lr)
216 | 
217 |             if opt.dataset == 'ycb':
218 |                 dataset = PoseDataset_ycb('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
219 |             elif opt.dataset == 'linemod':
220 |                 dataset = PoseDataset_linemod('train', opt.num_points, True, opt.dataset_root, opt.noise_trans, opt.refine_start)
221 |             dataloader = torch.utils.data.DataLoader(dataset, batch_size=1, shuffle=True, num_workers=opt.workers)
222 |             if opt.dataset == 'ycb':
223 |                 test_dataset = PoseDataset_ycb('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
224 |             elif opt.dataset == 'linemod':
225 |                 test_dataset = PoseDataset_linemod('test', opt.num_points, False, opt.dataset_root, 0.0, opt.refine_start)
226 |             testdataloader = torch.utils.data.DataLoader(test_dataset, batch_size=1, shuffle=False, num_workers=opt.workers)
227 |             
228 |             opt.sym_list = dataset.get_sym_list()
229 |             opt.num_points_mesh = dataset.get_num_points_mesh()
230 | 
231 |             print('>>>>>>>>----------Dataset loaded!---------<<<<<<<<\nlength of the training set: {0}\nlength of the testing set: {1}\nnumber of sample points on mesh: {2}\nsymmetry object list: {3}'.format(len(dataset), len(test_dataset), opt.num_points_mesh, opt.sym_list))
232 | 
233 |             criterion = Loss(opt.num_points_mesh, opt.sym_list)
234 |             criterion_refine = Loss_refine(opt.num_points_mesh, opt.sym_list)
235 | 
236 | if __name__ == '__main__':
237 |     main()
238 | 


--------------------------------------------------------------------------------
/scripts/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/MyoungHaSong/DenseFusion_ROS/b5f2fccb3bb6696364bab5b3ea9a55190a6af765/scripts/utils.pyc


--------------------------------------------------------------------------------
/srv/CameraRequests.srv:
--------------------------------------------------------------------------------
1 | int64 a
2 | ---
3 | geometry_msgs/PoseArray pose_array
4 | 
5 | 


--------------------------------------------------------------------------------