├── README.md ├── environment.yml ├── model_config.yaml ├── push-DQN_config.yaml ├── simulation ├── Franka-Panda.ttt ├── Task_no_1.ttt ├── Task_no_2_VPG_heap.ttt ├── Task_no_2_VPG_heap_no_colour.ttt ├── data_generation_scene.ttt ├── remoteApi.so ├── vrep.py └── vrepConst.py └── src ├── database_generator ├── database_config_inria.yaml └── database_generator_inria.py ├── mask_rg ├── coco_eval.py ├── coco_utils.py ├── dataset.py ├── engine.py ├── main.py ├── mask_rg.py ├── model.py ├── rewards.py ├── transforms.py └── utils.py └── push-DQN ├── logger.py ├── main.py ├── main_test.py ├── models.py ├── robot.py ├── trainer.py └── utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Push-to-See 2 | ## Learning Non-Prehensile Manipulation to Enhance Instance Segmentation via Deep Q-Learning 3 | This is the main repository of our Push-to-See model. For technical details, the paper is accessible via [this link.](https://ieeexplore.ieee.org/document/9811645) 4 | 5 | ### Model weights 6 | Trained model weigths can be download through the following links: 7 | - Mask-RG (trained on Inria objects for 40 epochs): [link](https://www.dropbox.com/s/mqf7iwmxeti76wx/maskrg_inria_v1_40ep.pth?dl=0) 8 | - Push-DQN (trained for 41k episodes): [link](https://www.dropbox.com/s/96qqmt809gceguj/push_dqn_41k.pth?dl=0) 9 | 10 | ### Demos 11 | 12 | A short description of the model and video demonstrations can be seen on [this video.](https://www.youtube.com/watch?v=CtMaCpACAjU) 13 | 14 | ### 15 | (Instructions for installation, training and running will be included in this readme file. For now, please contact the corresponding author if you require any help via baris.serhan@manchester.ac.uk) 16 | -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- 1 | name: pushandgrasp 2 | channels: 3 | - conda-forge 4 | - defaults 5 | dependencies: 6 | - _libgcc_mutex 7 | - attrs=19.3.0 8 | - backcall=0.2.0 9 | - bleach=3.1.5 10 | - ca-certificates 11 | - certifi=2020.6.20 12 | - decorator=4.4.2 13 | - defusedxml=0.6.0 14 | - entrypoints=0.3 15 | - importlib-metadata=1.7.0 16 | - importlib_metadata=1.7.0 17 | - ipykernel=5.3.0 18 | - ipython=7.16.1 19 | - ipython_genutils=0.2.0 20 | - jedi=0.17.1 21 | - jinja2=2.11.2 22 | - jsonschema=3.2.0 23 | - jupyter_client=6.1.3 24 | - jupyter_core=4.6.3 25 | - ld_impl_linux-64=2.33.1 26 | - libedit=3.1.20191231 27 | - libffi=3.3 28 | - libgcc-ng=9.1.0 29 | - libsodium=1.0.18 30 | - libstdcxx-ng=9.1.0 31 | - markupsafe=1.1.1 32 | - mistune=0.8.4 33 | - nbconvert=5.6.1 34 | - nbformat=5.0.7 35 | - ncurses=6.2 36 | - notebook=6.0.3 37 | - openssl=1.1.1g 38 | - packaging=20.4 39 | - pandoc=2.9.2.1 40 | - pandocfilters=1.4.2 41 | - parso=0.7.0 42 | - pexpect=4.8.0 43 | - pickleshare=0.7.5 44 | - pip=20.1.1 45 | - prometheus_client=0.8.0 46 | - prompt-toolkit=3.0.5 47 | - ptyprocess=0.6.0 48 | - pygments=2.6.1 49 | - pyparsing=2.4.7 50 | - pyrsistent=0.16.0 51 | - python=3.7 52 | - python-dateutil=2.8.1 53 | - pyzmq=19.0.1 54 | - readline=8.0 55 | - send2trash=1.5.0 56 | - setuptools=47.3.1 57 | - six=1.15.0 58 | - sqlite=3.32.3 59 | - terminado=0.8.3 60 | - testpath=0.4.4 61 | - tk=8.6.10 62 | - tornado=6.0.4 63 | - traitlets=4.3.3 64 | - wcwidth=0.2.5 65 | - webencodings=0.5.1 66 | - wheel=0.34.2 67 | - xz=5.2.5 68 | - zeromq=4.3.2 69 | - zipp=3.1.0 70 | - zlib=1.2.11 71 | - pip: 72 | - addict==2.4.0 73 | - catkin-pkg==0.4.23 74 | - cloudpickle==1.3.0 75 | - cycler==0.10.0 76 | - cython==0.29.20 77 | - distro==1.5.0 78 | - docutils==0.16 79 | - future==0.18.2 80 | - gym==0.17.2 81 | - ipywidgets==7.5.1 82 | - joblib==1.0.0 83 | - kiwisolver==1.2.0 84 | - matplotlib==3.2.2 85 | - numpy==1.19.0 86 | - open3d==0.11.2 87 | - opencv-python==4.2.0.34 88 | - pandas==1.1.5 89 | - pillow==7.2.0 90 | - plyfile==0.7.2 91 | - pybullet==3.0.7 92 | - pycocotools==2.0.1 93 | - pyglet==1.5.0 94 | - pytz==2020.4 95 | - pyyaml==5.3.1 96 | - rospkg==1.2.9 97 | - scikit-learn==0.23.2 98 | - scipy==1.5.0 99 | - sklearn==0.0 100 | - threadpoolctl==2.1.0 101 | - torch==1.2.0 102 | - torchvision==0.4.0 103 | - tqdm==4.54.1 104 | - widgetsnbextension==3.5.1 105 | 106 | 107 | -------------------------------------------------------------------------------- /model_config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | path: /home/barisserhan/Workspace/push_to_see_saved_models/mask_rg 3 | # set 'file:' to 'new' for training from scratch, 'pretrained' for transfer learning, 4 | # or the name of the save model file which is placed on the above path folder. 5 | file: maskrg_inria_v1_40ep.pth #new 6 | 7 | settings: 8 | epochs: 20 9 | learning_rate: 0.0001 10 | batch_size: 5 11 | backbone: resnet50 12 | backbone_pretrained: True 13 | cuda_available: True 14 | 15 | 16 | dataset: 17 | path: /home/barisserhan/Database # The folders and files in the subsections below should be placed in this folder. 18 | images: depth_ims/NUMPY 19 | masks: segmentation_masks/PNG 20 | train_indices: train_indices.npy 21 | test_indices: test_indices.npy 22 | is_depth: True # if this flag is set 'True', trainer will look for depth numpy arrays in 'images' folder 23 | data_augmentation: False 24 | 25 | saving: 26 | path: saved_models/inria_train_2 27 | model_name: my_model_inria_v2_0ep_ 28 | config_name: 29 | -------------------------------------------------------------------------------- /push-DQN_config.yaml: -------------------------------------------------------------------------------- 1 | model: 2 | path: /home/barisserhan/Workspace/push_to_see_saved_models/push-DQN/ 3 | file: push_dqn_41k.pth # set 'new' for not loading a snapshot of a pretrained model 4 | 5 | detection_thresholds: 6 | confidence_threshold: 0.85 # to define whether a prediction from mask_rg will be considered as a possible object/a detection 7 | mask_threshold: 0.85 # to define whether a detection will be considered correct based on its quality (i.e. how well its segmentation mask matches the ground truth) 8 | session_success_threshold: 0.90 # 9 | 10 | logging: 11 | path: Default # Set 'Default' for ./log 12 | continue_logging: False # to continue logging from previous session #TODO 13 | save_visualizations: True # to save visualisations of FCN predictions 14 | snapshot_saving_rate: 100 # to define how often the model weights will be saved 15 | 16 | environment: 17 | workspace_limits: [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]] # to define workspace limits in robot coordinates (Cols: min max, Rows: x y z) 18 | # (default val [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]]) 19 | heightmap_resolution: 0.002 # meters per pixel of heightmap (default val 0.002) 20 | 21 | min_num_objects: 26 # Minimum number of objects that will be dropped in the simulation 22 | max_num_objects: 32 # Maximum number of objects that will be dropped in the simulation (LIMIT 72!) #TODO current limit is less than 72 --> change the colour space of printed mask diff 23 | # The total number of objects per scene will be randomised between min and max! 24 | session_limit: 30 # The number of iterations in each session. If a task cannot be solved within this limit, it's considered as a fail, otherwise success 25 | 26 | setup: 27 | is_testing: True # If True --> Exploration Probability will be 0.0 !!! 28 | exploration_probability: 0.5 # Initial exploration probability 29 | epsilon_greedy_policy: 30 | exploration_rate_decay: True # to set epsilon greedy exploration (from initial probability to min_exp_rate_decay) 31 | min_exp_rate_decay: 0.1 # to define minimum value to which the exploration probability can reach 32 | is_random_model: False # If true, exploration probability is set to 1.0 with no rate decay (CAUTION! if true, overrides above values and generates a total random model!) 33 | 34 | training: 35 | future_reward_discount: 0.5 36 | 37 | -------------------------------------------------------------------------------- /simulation/Franka-Panda.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Franka-Panda.ttt -------------------------------------------------------------------------------- /simulation/Task_no_1.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Task_no_1.ttt -------------------------------------------------------------------------------- /simulation/Task_no_2_VPG_heap.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Task_no_2_VPG_heap.ttt -------------------------------------------------------------------------------- /simulation/Task_no_2_VPG_heap_no_colour.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Task_no_2_VPG_heap_no_colour.ttt -------------------------------------------------------------------------------- /simulation/data_generation_scene.ttt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/data_generation_scene.ttt -------------------------------------------------------------------------------- /simulation/remoteApi.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/remoteApi.so -------------------------------------------------------------------------------- /src/database_generator/database_config_inria.yaml: -------------------------------------------------------------------------------- 1 | database: 2 | path: ~/Database_vrep_inria/ # Change this, if you want to save your dataset into another location 3 | settings: 4 | database_size: 1250 # Number of scenes to generate 5 | max_num_obj: 32 # Maximum number of objects dropped per scene 6 | min_num_obj: 24 # Minimum number of objects dropped per scene 7 | # total_num_obj: 6 # Subset of objects that will be put into the random selection pool 8 | # (e.g., if set to 50, then first 50 object meshes in object_path will be used - from 000.urdf to 049.urdf) 9 | drop_height: 0.1 10 | remove_container: False 11 | 12 | data: 13 | save_numpy: True # Flag to save depth images and segmentation masks as numpy arrays 14 | save_png: True # Flag to save depth images and segmentation masks as png files 15 | save_color_img: True # Flag to save RGB images 16 | cam_height: 480 17 | cam_width: 640 18 | -------------------------------------------------------------------------------- /src/database_generator/database_generator_inria.py: -------------------------------------------------------------------------------- 1 | import socket 2 | import select 3 | import struct 4 | import time 5 | import os 6 | import numpy as np 7 | # import utils 8 | from simulation import vrep 9 | import matplotlib.pyplot as plt 10 | import yaml 11 | import cv2 12 | from PIL import Image 13 | 14 | class DatabaseGenerator(object): 15 | def __init__(self, config): 16 | self.config = config 17 | 18 | self.DATABASE_SIZE = config['database']['settings']['database_size'] 19 | self.NUM_OBJ_MAX = config['database']['settings']['max_num_obj'] 20 | self.NUM_OBJ_MIN = config['database']['settings']['min_num_obj'] 21 | # self.SELECTION_POOL = config['database']['settings']['total_num_obj'] 22 | 23 | self.DROP_HEIGHT = config['database']['settings']['drop_height'] 24 | self.SAVE_NUMPY = config['data']['save_numpy'] 25 | self.SAVE_PNG = config['data']['save_png'] 26 | self.SAVE_COLOR = config['data']['save_color_img'] 27 | 28 | self.drop_limits = np.asarray([[-0.6, -0.4], [-0.15, 0.15], [-0.2, -0.1]]) 29 | 30 | # Make sure to have the server side running in V-REP: 31 | # in a child script of a V-REP scene, add following command 32 | # to be executed just once, at simulation start: 33 | # 34 | # simExtRemoteApiStart(19999) 35 | # 36 | # then start simulation, and run this program. 37 | # 38 | # IMPORTANT: for each successful call to simxStart, there 39 | # should be a corresponding call to simxFinish at the end! 40 | 41 | # MODIFY remoteApiConnections.txt 42 | 43 | # Connect to simulator 44 | vrep.simxFinish(-1) # Just in case, close all opened connections 45 | self.sim_client = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5) # Connect to V-REP on port 19997 46 | if self.sim_client == -1: 47 | print('Failed to connect to simulation (V-REP remote API server). Exiting.') 48 | exit() 49 | else: 50 | print('Connected to simulation.') 51 | self.restart_sim() 52 | 53 | # Setup virtual camera in simulation 54 | # self._setup_sim_camera() 55 | 56 | # Get handle to camera 57 | # sim_ret, self.cam_handle = vrep.simxGetObjectHandle(self.sim_client, 'Vision_sensor_persp', 58 | # vrep.simx_opmode_blocking) 59 | 60 | # Get handles for masking 61 | sim_ret_cam_gt, self.cam_handle_gt = vrep.simxGetObjectHandle(self.sim_client, 'gt_sensor', 62 | vrep.simx_opmode_blocking) 63 | 64 | # Get handles for rgb 65 | sim_ret_cam_ortho, self.cam_handle_ortho = vrep.simxGetObjectHandle(self.sim_client, 'Vision_sensor_ortho', 66 | vrep.simx_opmode_blocking) 67 | 68 | 69 | # def _setup_sim_camera(self): 70 | 71 | # sim_ret_cam_mask, self.cam_handle_mask = vrep.simxGetObjectHandle(self.sim_client, 'Vision_sensor_persp0', 72 | # vrep.simx_opmode_blocking) 73 | 74 | # Get camera pose and intrinsics in simulation 75 | # sim_ret, cam_position = vrep.simxGetObjectPosition(self.sim_client, self.cam_handle, -1, 76 | # vrep.simx_opmode_blocking) 77 | # sim_ret, cam_orientation = vrep.simxGetObjectOrientation(self.sim_client, self.cam_handle, -1, 78 | # vrep.simx_opmode_blocking) 79 | # cam_trans = np.eye(4, 4) 80 | # cam_trans[0:3, 3] = np.asarray(cam_position) 81 | # cam_orientation = [-cam_orientation[0], -cam_orientation[1], -cam_orientation[2]] 82 | # cam_rotm = np.eye(4, 4) 83 | # cam_rotm[0:3, 0:3] = np.linalg.inv(utils.euler2rotm(cam_orientation)) 84 | # self.cam_pose = np.dot(cam_trans, cam_rotm) # Compute rigid transformation representating camera pose 85 | # self.cam_intrinsics = np.asarray([[618.62, 0, 320], [0, 618.62, 240], [0, 0, 1]]) 86 | # self.cam_depth_scale = 1 87 | 88 | # Get background image 89 | # self.bg_color_img, self.bg_depth_img = self.get_camera_data() 90 | # self.bg_depth_img = self.bg_depth_img * self.cam_depth_scale 91 | 92 | def generate_database(self): 93 | err_trials = [] 94 | self._folder_config() 95 | init_time = time.time() 96 | for i in range(0, self.DATABASE_SIZE): 97 | session_start = time.time() 98 | # np.random.seed() 99 | curr_num_obj = np.random.random_integers(self.NUM_OBJ_MIN, self.NUM_OBJ_MAX) 100 | print('Scene no %06d - Number of objects in the current scene --->' % i, curr_num_obj) 101 | ret = self._add_objects(curr_num_obj) 102 | 103 | if not ret[0] == -1: 104 | self.save_scene(i) 105 | else: 106 | print("ERROR: Current scene couldn't save!") 107 | err_trials.append(i) 108 | 109 | np.save(self.scene_info_dir + "scene_info_%06d.npy" % i, np.asarray(ret[1])) 110 | self.restart_sim() 111 | session_end = time.time() - session_start 112 | print('Elapsed time for this current scene --> {: .02f} seconds'.format(session_end), 113 | 'Total elapsed time by now --> {: .02f} seconds'.format(time.time() - init_time)) 114 | # TODO proper logging 115 | np.savetxt(os.path.join(self.scene_info_dir, 'error_log.txt'), err_trials, fmt='%06d', delimiter=' ') 116 | 117 | vrep.simxStopSimulation(self.sim_client, vrep.simx_opmode_blocking) 118 | time.sleep(0.25) 119 | vrep.simxFinish(self.sim_client) 120 | 121 | def save_scene(self, iteration): 122 | curr_rgb, curr_depth, seg_mask = self.get_camera_data() 123 | 124 | # save rgb and depth images 125 | color_image = cv2.cvtColor(curr_rgb, cv2.COLOR_RGB2BGR) 126 | cv2.imwrite(os.path.join(self.color_dir, 'color_image_%06d.png' % iteration), color_image) 127 | 128 | depth_image = np.round(curr_depth * 10000).astype(np.uint16) # Save depth in 1e-4 meters 129 | np.save(os.path.join(self.depth_dir_numpy, 'depth_%06d.npy' % iteration), depth_image) 130 | cv2.imwrite(os.path.join(self.depth_dir_png, 'depth_image_%06d.png' % iteration), depth_image) 131 | 132 | # save segmentation masks 133 | # np.save(os.path.join(self.segmask_dir_numpy, 'segmask_%06d.npy' % iteration), seg_mask) 134 | # plt.imsave(os.path.join(self.segmask_dir_png, 'segmask_image_%06d.png' % iteration), seg_mask) 135 | cv2.imwrite(os.path.join(self.segmask_dir_png, 'segmask_image_%06d.png' % iteration), seg_mask) 136 | 137 | 138 | def restart_sim(self): 139 | 140 | vrep.simxStopSimulation(self.sim_client, vrep.simx_opmode_blocking) 141 | time.sleep(0.25) 142 | vrep.simxStartSimulation(self.sim_client, vrep.simx_opmode_blocking) 143 | time.sleep(0.25) 144 | 145 | def _get_segmentation_mask(self, mask_cam_handle): 146 | 147 | ret, res, data = vrep.simxGetVisionSensorImage(self.sim_client, mask_cam_handle, 0, 148 | vrep.simx_opmode_oneshot_wait) 149 | 150 | seg_mask_temp = np.reshape(data, (res[1], res[0], 3)) 151 | seg_mask_temp = seg_mask_temp[:, :, :1] 152 | seg_mask_temp = np.reshape(seg_mask_temp, (res[1], res[0])) 153 | seg_mask = seg_mask_temp.copy() 154 | 155 | # set the background pixels to 0 156 | elements, counts = np.unique(seg_mask, return_counts=True) 157 | background_index = np.argmax(counts) 158 | new_mask = np.where(seg_mask == elements[background_index], 0, seg_mask) 159 | 160 | objects = np.delete(elements, background_index) 161 | objects = np.sort(objects) 162 | 163 | for i in range(0, objects.size): 164 | # if not objects[i] == 0: 165 | new_mask[np.where(seg_mask == objects[i])] = i + 1 166 | 167 | new_mask = np.flip(new_mask, axis=0) 168 | 169 | return new_mask 170 | 171 | def get_camera_data(self): 172 | 173 | print("Collecting images and ground truth masks...") 174 | # Get color image from simulation 175 | sim_ret, resolution, raw_image = vrep.simxGetVisionSensorImage(self.sim_client, self.cam_handle_ortho, 0, 176 | vrep.simx_opmode_blocking) 177 | color_img = np.asarray(raw_image) 178 | color_img.shape = (resolution[1], resolution[0], 3) 179 | color_img = color_img.astype(np.float) / 255 180 | color_img[color_img < 0] += 1 181 | color_img *= 255 182 | # color_img = np.fliplr(color_img) 183 | color_img = np.flip(color_img, axis=0) 184 | color_img = color_img.astype(np.uint8) 185 | 186 | # Get depth image from simulation 187 | sim_ret, resolution, depth_buffer = vrep.simxGetVisionSensorDepthBuffer(self.sim_client, self.cam_handle_ortho, 188 | vrep.simx_opmode_blocking) 189 | depth_img = np.asarray(depth_buffer) 190 | depth_img.shape = (resolution[1], resolution[0]) 191 | # depth_img = np.fliplr(depth_img) 192 | depth_img = np.flip(depth_img, axis=0) 193 | zNear = 0.01 #0.01 194 | zFar = 0.5 #10 195 | depth_img = depth_img * (zFar - zNear) + zNear 196 | 197 | # Get ground truth segmentation masks 198 | seg_mask = self._get_segmentation_mask(self.cam_handle_gt) 199 | 200 | return color_img, depth_img, seg_mask 201 | 202 | def _add_objects(self, num_obj_heap): 203 | 204 | # 18 INRIA objects x 4 = 72 (convex_0 to convex_71) 205 | isOscillating = 1 206 | objects = np.random.choice(range(0, 72), num_obj_heap, replace=False) 207 | shapes = [] 208 | objects_info = [] 209 | 210 | for object_idx in objects: 211 | 212 | drop_x = (self.drop_limits[0][1] - self.drop_limits[0][0]) * np.random.random_sample() + \ 213 | self.drop_limits[0][0] 214 | drop_y = (self.drop_limits[1][1] - self.drop_limits[1][0]) * np.random.random_sample() + \ 215 | self.drop_limits[1][0] 216 | 217 | object_position = [drop_x, drop_y, self.DROP_HEIGHT] 218 | object_orientation = [2 * np.pi * np.random.random_sample(), 2 * np.pi * np.random.random_sample(), 219 | 2 * np.pi * np.random.random_sample()] 220 | 221 | shape_name = 'convex_{:d}'.format(object_idx) 222 | shapes.append(shape_name) 223 | objects_info.append([shape_name, object_position, object_orientation]) 224 | 225 | ret_resp, ret_ints, ret_floats, ret_strings, ret_buffer = vrep.simxCallScriptFunction(self.sim_client, 'remoteApiCommandServer', 226 | vrep.sim_scripttype_childscript, 227 | 'moveShape', 228 | [0], 229 | object_position + object_orientation, 230 | [shape_name], 231 | bytearray(), 232 | vrep.simx_opmode_blocking) 233 | # break if error 234 | if ret_resp == 8: 235 | print('Failed to add new objects to simulation.') 236 | time.sleep(0.5) 237 | return [-1, -1] 238 | 239 | # wait a tad before dropping the next object 240 | time.sleep(0.04) 241 | 242 | # wait 100ms after all objects were released 243 | time.sleep(0.1) 244 | 245 | # wait until the oscillating objects stopped 246 | # (the remote method (i.e. checkMotion) that is attached to the vrep script (simulation_random_datacollect.ttt) 247 | # checks the angular velocities of the dropped objects) 248 | while isOscillating: 249 | ret_r, ret_i, ret_f, ret_s, ret_b = vrep.simxCallScriptFunction(self.sim_client, 'remoteApiCommandServer', 250 | vrep.sim_scripttype_childscript, 251 | 'checkMotion', 252 | [len(shapes)], 253 | [0.0], 254 | shapes, 255 | bytearray(), 256 | vrep.simx_opmode_blocking) 257 | time.sleep(0.05) 258 | isOscillating = ret_i[0] 259 | 260 | return [0, objects_info] 261 | 262 | def _folder_config(self): 263 | """Path configuration to save collected data""" 264 | 265 | # tilde expansion if necessary 266 | if self.config['database']['path'].find('~') == 0: 267 | database_dir = os.path.expanduser(self.config['database']['path']) 268 | else: 269 | database_dir = self.config['database']['path'] 270 | 271 | if not os.path.exists(database_dir): 272 | os.mkdir(database_dir) 273 | else: 274 | print("WARNING: Folder to save database is already exist, " 275 | "if it contains old scene files with same name, they will be overwritten!") 276 | 277 | self.depth_dir_numpy = os.path.join(database_dir, 'depth_ims/NUMPY/') 278 | self.depth_dir_png = os.path.join(database_dir, 'depth_ims/PNG/') 279 | self.color_dir = os.path.join(database_dir, 'color_ims/') 280 | self.segmask_dir_numpy = os.path.join(database_dir, 'segmentation_masks/NUMPY/') 281 | self.segmask_dir_png = os.path.join(database_dir, 'segmentation_masks/PNG/') 282 | self.scene_info_dir = os.path.join(database_dir, 'scene_info/') 283 | 284 | sub_folder_list = [self.depth_dir_numpy, self.depth_dir_png, self.color_dir, self.segmask_dir_numpy, 285 | self.segmask_dir_png, self.scene_info_dir] 286 | for folder in sub_folder_list: 287 | if not os.path.exists(folder): 288 | os.makedirs(folder) 289 | 290 | if __name__ == "__main__": 291 | # Read configuration file 292 | config_file = os.getcwd() + '/database_config_inria.yaml' 293 | with open(config_file) as f: 294 | configuration = yaml.load(f, Loader=yaml.FullLoader) 295 | 296 | dg = DatabaseGenerator(configuration) 297 | dg.generate_database() 298 | -------------------------------------------------------------------------------- /src/mask_rg/coco_eval.py: -------------------------------------------------------------------------------- 1 | import json 2 | import tempfile 3 | 4 | import numpy as np 5 | import copy 6 | import time 7 | import torch 8 | import torch._six 9 | 10 | from pycocotools.cocoeval import COCOeval 11 | from pycocotools.coco import COCO 12 | import pycocotools.mask as mask_util 13 | 14 | from collections import defaultdict 15 | 16 | import utils 17 | 18 | 19 | class CocoEvaluator(object): 20 | def __init__(self, coco_gt, iou_types): 21 | assert isinstance(iou_types, (list, tuple)) 22 | coco_gt = copy.deepcopy(coco_gt) 23 | self.coco_gt = coco_gt 24 | 25 | self.iou_types = iou_types 26 | self.coco_eval = {} 27 | for iou_type in iou_types: 28 | self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type) 29 | 30 | 31 | 32 | self.img_ids = [] 33 | self.eval_imgs = {k: [] for k in iou_types} 34 | 35 | def update(self, predictions): 36 | img_ids = list(np.unique(list(predictions.keys()))) 37 | self.img_ids.extend(img_ids) 38 | 39 | for iou_type in self.iou_types: 40 | results = self.prepare(predictions, iou_type) 41 | coco_dt = loadRes(self.coco_gt, results) if results else COCO() 42 | coco_eval = self.coco_eval[iou_type] 43 | 44 | coco_eval.cocoDt = coco_dt 45 | coco_eval.params.imgIds = list(img_ids) 46 | 47 | coco_eval.params.iouThrs= [0.95] 48 | 49 | img_ids, eval_imgs = evaluate(coco_eval) 50 | 51 | self.eval_imgs[iou_type].append(eval_imgs) 52 | 53 | def synchronize_between_processes(self): 54 | for iou_type in self.iou_types: 55 | self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2) 56 | create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type]) 57 | 58 | def accumulate(self): 59 | for coco_eval in self.coco_eval.values(): 60 | coco_eval.accumulate() 61 | 62 | def summarize(self): 63 | for iou_type, coco_eval in self.coco_eval.items(): 64 | print("IoU metric: {}".format(iou_type)) 65 | coco_eval.summarize() 66 | 67 | def prepare(self, predictions, iou_type): 68 | if iou_type == "bbox": 69 | return self.prepare_for_coco_detection(predictions) 70 | elif iou_type == "segm": 71 | return self.prepare_for_coco_segmentation(predictions) 72 | elif iou_type == "keypoints": 73 | return self.prepare_for_coco_keypoint(predictions) 74 | else: 75 | raise ValueError("Unknown iou type {}".format(iou_type)) 76 | 77 | def prepare_for_coco_detection(self, predictions): 78 | coco_results = [] 79 | for original_id, prediction in predictions.items(): 80 | if len(prediction) == 0: 81 | continue 82 | 83 | boxes = prediction["boxes"] 84 | boxes = convert_to_xywh(boxes).tolist() 85 | scores = prediction["scores"].tolist() 86 | labels = prediction["labels"].tolist() 87 | 88 | coco_results.extend( 89 | [ 90 | { 91 | "image_id": original_id, 92 | "category_id": labels[k], 93 | "bbox": box, 94 | "score": scores[k], 95 | } 96 | for k, box in enumerate(boxes) 97 | ] 98 | ) 99 | return coco_results 100 | 101 | def prepare_for_coco_segmentation(self, predictions): 102 | coco_results = [] 103 | for original_id, prediction in predictions.items(): 104 | if len(prediction) == 0: 105 | continue 106 | 107 | scores = prediction["scores"] 108 | labels = prediction["labels"] 109 | masks = prediction["masks"] 110 | 111 | masks = masks > 0.5 112 | 113 | scores = prediction["scores"].tolist() 114 | labels = prediction["labels"].tolist() 115 | 116 | rles = [ 117 | mask_util.encode(np.array(mask.cpu()[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0] 118 | for mask in masks 119 | ] 120 | 121 | 122 | for rle in rles: 123 | rle["counts"] = rle["counts"].decode("utf-8") 124 | 125 | coco_results.extend( 126 | [ 127 | { 128 | "image_id": original_id, 129 | "category_id": labels[k], 130 | "segmentation": rle, 131 | "score": scores[k], 132 | } 133 | for k, rle in enumerate(rles) 134 | ] 135 | ) 136 | return coco_results 137 | 138 | def prepare_for_coco_keypoint(self, predictions): 139 | coco_results = [] 140 | for original_id, prediction in predictions.items(): 141 | if len(prediction) == 0: 142 | continue 143 | 144 | boxes = prediction["boxes"] 145 | boxes = convert_to_xywh(boxes).tolist() 146 | scores = prediction["scores"].tolist() 147 | labels = prediction["labels"].tolist() 148 | keypoints = prediction["keypoints"] 149 | keypoints = keypoints.flatten(start_dim=1).tolist() 150 | 151 | coco_results.extend( 152 | [ 153 | { 154 | "image_id": original_id, 155 | "category_id": labels[k], 156 | 'keypoints': keypoint, 157 | "score": scores[k], 158 | } 159 | for k, keypoint in enumerate(keypoints) 160 | ] 161 | ) 162 | return coco_results 163 | 164 | 165 | def convert_to_xywh(boxes): 166 | xmin, ymin, xmax, ymax = boxes.unbind(1) 167 | return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1) 168 | 169 | 170 | def merge(img_ids, eval_imgs): 171 | all_img_ids = utils.all_gather(img_ids) 172 | all_eval_imgs = utils.all_gather(eval_imgs) 173 | 174 | merged_img_ids = [] 175 | for p in all_img_ids: 176 | merged_img_ids.extend(p) 177 | 178 | merged_eval_imgs = [] 179 | for p in all_eval_imgs: 180 | merged_eval_imgs.append(p) 181 | 182 | merged_img_ids = np.array(merged_img_ids) 183 | merged_eval_imgs = np.concatenate(merged_eval_imgs, 2) 184 | 185 | # keep only unique (and in sorted order) images 186 | merged_img_ids, idx = np.unique(merged_img_ids, return_index=True) 187 | merged_eval_imgs = merged_eval_imgs[..., idx] 188 | 189 | return merged_img_ids, merged_eval_imgs 190 | 191 | 192 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs): 193 | img_ids, eval_imgs = merge(img_ids, eval_imgs) 194 | img_ids = list(img_ids) 195 | eval_imgs = list(eval_imgs.flatten()) 196 | 197 | coco_eval.evalImgs = eval_imgs 198 | coco_eval.params.imgIds = img_ids 199 | coco_eval._paramsEval = copy.deepcopy(coco_eval.params) 200 | 201 | 202 | ################################################################# 203 | # From pycocotools, just removed the prints and fixed 204 | # a Python3 bug about unicode not defined 205 | ################################################################# 206 | 207 | # Ideally, pycocotools wouldn't have hard-coded prints 208 | # so that we could avoid copy-pasting those two functions 209 | 210 | def createIndex(self): 211 | # create index 212 | # print('creating index...') 213 | anns, cats, imgs = {}, {}, {} 214 | imgToAnns, catToImgs = defaultdict(list), defaultdict(list) 215 | if 'annotations' in self.dataset: 216 | for ann in self.dataset['annotations']: 217 | imgToAnns[ann['image_id']].append(ann) 218 | anns[ann['id']] = ann 219 | 220 | if 'images' in self.dataset: 221 | for img in self.dataset['images']: 222 | imgs[img['id']] = img 223 | 224 | if 'categories' in self.dataset: 225 | for cat in self.dataset['categories']: 226 | cats[cat['id']] = cat 227 | 228 | if 'annotations' in self.dataset and 'categories' in self.dataset: 229 | for ann in self.dataset['annotations']: 230 | catToImgs[ann['category_id']].append(ann['image_id']) 231 | 232 | # print('index created!') 233 | 234 | # create class members 235 | self.anns = anns 236 | self.imgToAnns = imgToAnns 237 | self.catToImgs = catToImgs 238 | self.imgs = imgs 239 | self.cats = cats 240 | 241 | 242 | maskUtils = mask_util 243 | 244 | 245 | def loadRes(self, resFile): 246 | """ 247 | Load result file and return a result api object. 248 | :param resFile (str) : file name of result file 249 | :return: res (obj) : result api object 250 | """ 251 | res = COCO() 252 | res.dataset['images'] = [img for img in self.dataset['images']] 253 | 254 | # print('Loading and preparing results...') 255 | # tic = time.time() 256 | if isinstance(resFile, torch._six.string_classes): 257 | anns = json.load(open(resFile)) 258 | elif type(resFile) == np.ndarray: 259 | anns = self.loadNumpyAnnotations(resFile) 260 | else: 261 | anns = resFile 262 | assert type(anns) == list, 'results in not an array of objects' 263 | annsImgIds = [ann['image_id'] for ann in anns] 264 | assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \ 265 | 'Results do not correspond to current coco set' 266 | if 'caption' in anns[0]: 267 | imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns]) 268 | res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds] 269 | for id, ann in enumerate(anns): 270 | ann['id'] = id + 1 271 | elif 'bbox' in anns[0] and not anns[0]['bbox'] == []: 272 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 273 | for id, ann in enumerate(anns): 274 | bb = ann['bbox'] 275 | x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]] 276 | if 'segmentation' not in ann: 277 | ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]] 278 | ann['area'] = bb[2] * bb[3] 279 | ann['id'] = id + 1 280 | ann['iscrowd'] = 0 281 | elif 'segmentation' in anns[0]: 282 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 283 | for id, ann in enumerate(anns): 284 | # now only support compressed RLE format as segmentation results 285 | ann['area'] = maskUtils.area(ann['segmentation']) 286 | if 'bbox' not in ann: 287 | ann['bbox'] = maskUtils.toBbox(ann['segmentation']) 288 | ann['id'] = id + 1 289 | ann['iscrowd'] = 0 290 | elif 'keypoints' in anns[0]: 291 | res.dataset['categories'] = copy.deepcopy(self.dataset['categories']) 292 | for id, ann in enumerate(anns): 293 | s = ann['keypoints'] 294 | x = s[0::3] 295 | y = s[1::3] 296 | x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y) 297 | ann['area'] = (x2 - x1) * (y2 - y1) 298 | ann['id'] = id + 1 299 | ann['bbox'] = [x1, y1, x2 - x1, y2 - y1] 300 | # print('DONE (t={:0.2f}s)'.format(time.time()- tic)) 301 | 302 | res.dataset['annotations'] = anns 303 | createIndex(res) 304 | return res 305 | 306 | 307 | def evaluate(self): 308 | ''' 309 | Run per image evaluation on given images and store results (a list of dict) in self.evalImgs 310 | :return: None 311 | ''' 312 | # tic = time.time() 313 | # print('Running per image evaluation...') 314 | p = self.params 315 | # add backward compatibility if useSegm is specified in params 316 | if p.useSegm is not None: 317 | p.iouType = 'segm' if p.useSegm == 1 else 'bbox' 318 | print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType)) 319 | # print('Evaluate annotation type *{}*'.format(p.iouType)) 320 | p.imgIds = list(np.unique(p.imgIds)) 321 | if p.useCats: 322 | p.catIds = list(np.unique(p.catIds)) 323 | p.maxDets = sorted(p.maxDets) 324 | self.params = p 325 | 326 | self._prepare() 327 | # loop through images, area range, max detection number 328 | catIds = p.catIds if p.useCats else [-1] 329 | 330 | if p.iouType == 'segm' or p.iouType == 'bbox': 331 | computeIoU = self.computeIoU 332 | elif p.iouType == 'keypoints': 333 | computeIoU = self.computeOks 334 | self.ious = { 335 | (imgId, catId): computeIoU(imgId, catId) 336 | for imgId in p.imgIds 337 | for catId in catIds} 338 | 339 | evaluateImg = self.evaluateImg 340 | maxDet = p.maxDets[-1] 341 | evalImgs = [ 342 | evaluateImg(imgId, catId, areaRng, maxDet) 343 | for catId in catIds 344 | for areaRng in p.areaRng 345 | for imgId in p.imgIds 346 | ] 347 | # this is NOT in the pycocotools code, but could be done outside 348 | evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds)) 349 | self._paramsEval = copy.deepcopy(self.params) 350 | # toc = time.time() 351 | # print('DONE (t={:0.2f}s).'.format(toc-tic)) 352 | return p.imgIds, evalImgs 353 | 354 | ################################################################# 355 | # end of straight copy from pycocotools, just removing the prints 356 | ################################################################# 357 | -------------------------------------------------------------------------------- /src/mask_rg/coco_utils.py: -------------------------------------------------------------------------------- 1 | import copy 2 | import os 3 | from PIL import Image 4 | 5 | import torch 6 | import torch.utils.data 7 | import torchvision 8 | 9 | from pycocotools import mask as coco_mask 10 | from pycocotools.coco import COCO 11 | 12 | import src.mask_rg.transforms as T 13 | 14 | 15 | class FilterAndRemapCocoCategories(object): 16 | def __init__(self, categories, remap=True): 17 | self.categories = categories 18 | self.remap = remap 19 | 20 | def __call__(self, image, target): 21 | anno = target["annotations"] 22 | anno = [obj for obj in anno if obj["category_id"] in self.categories] 23 | if not self.remap: 24 | target["annotations"] = anno 25 | return image, target 26 | anno = copy.deepcopy(anno) 27 | for obj in anno: 28 | obj["category_id"] = self.categories.index(obj["category_id"]) 29 | target["annotations"] = anno 30 | return image, target 31 | 32 | 33 | def convert_coco_poly_to_mask(segmentations, height, width): 34 | masks = [] 35 | for polygons in segmentations: 36 | rles = coco_mask.frPyObjects(polygons, height, width) 37 | mask = coco_mask.decode(rles) 38 | if len(mask.shape) < 3: 39 | mask = mask[..., None] 40 | mask = torch.as_tensor(mask, dtype=torch.uint8) 41 | mask = mask.any(dim=2) 42 | masks.append(mask) 43 | if masks: 44 | masks = torch.stack(masks, dim=0) 45 | else: 46 | masks = torch.zeros((0, height, width), dtype=torch.uint8) 47 | return masks 48 | 49 | 50 | class ConvertCocoPolysToMask(object): 51 | def __call__(self, image, target): 52 | w, h = image.size 53 | 54 | image_id = target["image_id"] 55 | image_id = torch.tensor([image_id]) 56 | 57 | anno = target["annotations"] 58 | 59 | anno = [obj for obj in anno if obj['iscrowd'] == 0] 60 | 61 | boxes = [obj["bbox"] for obj in anno] 62 | # guard against no boxes via resizing 63 | boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4) 64 | boxes[:, 2:] += boxes[:, :2] 65 | boxes[:, 0::2].clamp_(min=0, max=w) 66 | boxes[:, 1::2].clamp_(min=0, max=h) 67 | 68 | classes = [obj["category_id"] for obj in anno] 69 | classes = torch.tensor(classes, dtype=torch.int64) 70 | 71 | segmentations = [obj["segmentation"] for obj in anno] 72 | masks = convert_coco_poly_to_mask(segmentations, h, w) 73 | 74 | keypoints = None 75 | if anno and "keypoints" in anno[0]: 76 | keypoints = [obj["keypoints"] for obj in anno] 77 | keypoints = torch.as_tensor(keypoints, dtype=torch.float32) 78 | num_keypoints = keypoints.shape[0] 79 | if num_keypoints: 80 | keypoints = keypoints.view(num_keypoints, -1, 3) 81 | 82 | keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) 83 | boxes = boxes[keep] 84 | classes = classes[keep] 85 | masks = masks[keep] 86 | if keypoints is not None: 87 | keypoints = keypoints[keep] 88 | 89 | target = {} 90 | target["boxes"] = boxes 91 | target["labels"] = classes 92 | target["masks"] = masks 93 | target["image_id"] = image_id 94 | if keypoints is not None: 95 | target["keypoints"] = keypoints 96 | 97 | # for conversion to coco api 98 | area = torch.tensor([obj["area"] for obj in anno]) 99 | iscrowd = torch.tensor([obj["iscrowd"] for obj in anno]) 100 | target["area"] = area 101 | target["iscrowd"] = iscrowd 102 | 103 | return image, target 104 | 105 | 106 | def _coco_remove_images_without_annotations(dataset, cat_list=None): 107 | def _has_only_empty_bbox(anno): 108 | return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno) 109 | 110 | def _count_visible_keypoints(anno): 111 | return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno) 112 | 113 | min_keypoints_per_image = 10 114 | 115 | def _has_valid_annotation(anno): 116 | # if it's empty, there is no annotation 117 | if len(anno) == 0: 118 | return False 119 | # if all boxes have close to zero area, there is no annotation 120 | if _has_only_empty_bbox(anno): 121 | return False 122 | # keypoints task have a slight different critera for considering 123 | # if an annotation is valid 124 | if "keypoints" not in anno[0]: 125 | return True 126 | # for keypoint detection tasks, only consider valid images those 127 | # containing at least min_keypoints_per_image 128 | if _count_visible_keypoints(anno) >= min_keypoints_per_image: 129 | return True 130 | return False 131 | 132 | assert isinstance(dataset, torchvision.datasets.CocoDetection) 133 | ids = [] 134 | for ds_idx, img_id in enumerate(dataset.ids): 135 | ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None) 136 | anno = dataset.coco.loadAnns(ann_ids) 137 | if cat_list: 138 | anno = [obj for obj in anno if obj["category_id"] in cat_list] 139 | if _has_valid_annotation(anno): 140 | ids.append(ds_idx) 141 | 142 | dataset = torch.utils.data.Subset(dataset, ids) 143 | return dataset 144 | 145 | 146 | def convert_to_coco_api(ds): 147 | coco_ds = COCO() 148 | # annotation IDs need to start at 1, not 0, see torchvision issue #1530 149 | ann_id = 1 150 | dataset = {'images': [], 'categories': [], 'annotations': []} 151 | categories = set() 152 | for img_idx in range(len(ds)): 153 | # find better way to get target 154 | # targets = ds.get_annotations(img_idx) 155 | img, targets = ds[img_idx] 156 | image_id = targets["image_id"].item() 157 | img_dict = {} 158 | img_dict['id'] = image_id 159 | img_dict['height'] = img.shape[-2] 160 | img_dict['width'] = img.shape[-1] 161 | dataset['images'].append(img_dict) 162 | bboxes = targets["boxes"] 163 | bboxes[:, 2:] -= bboxes[:, :2] 164 | bboxes = bboxes.tolist() 165 | labels = targets['labels'].tolist() 166 | areas = targets['area'].tolist() 167 | iscrowd = targets['iscrowd'].tolist() 168 | if 'masks' in targets: 169 | masks = targets['masks'] 170 | # make masks Fortran contiguous for coco_mask 171 | masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1) 172 | if 'keypoints' in targets: 173 | keypoints = targets['keypoints'] 174 | keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist() 175 | num_objs = len(bboxes) 176 | for i in range(num_objs): 177 | ann = {} 178 | ann['image_id'] = image_id 179 | ann['bbox'] = bboxes[i] 180 | ann['category_id'] = labels[i] 181 | categories.add(labels[i]) 182 | ann['area'] = areas[i] 183 | ann['iscrowd'] = iscrowd[i] 184 | ann['id'] = ann_id 185 | if 'masks' in targets: 186 | ann["segmentation"] = coco_mask.encode(masks[i].numpy()) 187 | if 'keypoints' in targets: 188 | ann['keypoints'] = keypoints[i] 189 | ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3]) 190 | dataset['annotations'].append(ann) 191 | ann_id += 1 192 | dataset['categories'] = [{'id': i} for i in sorted(categories)] 193 | coco_ds.dataset = dataset 194 | coco_ds.createIndex() 195 | return coco_ds 196 | 197 | 198 | def get_coco_api_from_dataset(dataset): 199 | for _ in range(10): 200 | if isinstance(dataset, torchvision.datasets.CocoDetection): 201 | break 202 | if isinstance(dataset, torch.utils.data.Subset): 203 | dataset = dataset 204 | # dataset = dataset.dataset.Subset 205 | if isinstance(dataset, torchvision.datasets.CocoDetection): 206 | return dataset.coco 207 | return convert_to_coco_api(dataset) 208 | 209 | 210 | class CocoDetection(torchvision.datasets.CocoDetection): 211 | def __init__(self, img_folder, ann_file, transforms): 212 | super(CocoDetection, self).__init__(img_folder, ann_file) 213 | self._transforms = transforms 214 | 215 | def __getitem__(self, idx): 216 | img, target = super(CocoDetection, self).__getitem__(idx) 217 | image_id = self.ids[idx] 218 | target = dict(image_id=image_id, annotations=target) 219 | if self._transforms is not None: 220 | img, target = self._transforms(img, target) 221 | return img, target 222 | 223 | 224 | def get_coco(root, image_set, transforms, mode='instances'): 225 | anno_file_template = "{}_{}2017.json" 226 | PATHS = { 227 | "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))), 228 | "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))), 229 | # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))) 230 | } 231 | 232 | t = [ConvertCocoPolysToMask()] 233 | 234 | if transforms is not None: 235 | t.append(transforms) 236 | transforms = T.Compose(t) 237 | 238 | img_folder, ann_file = PATHS[image_set] 239 | img_folder = os.path.join(root, img_folder) 240 | ann_file = os.path.join(root, ann_file) 241 | 242 | dataset = CocoDetection(img_folder, ann_file, transforms=transforms) 243 | 244 | if image_set == "train": 245 | dataset = _coco_remove_images_without_annotations(dataset) 246 | 247 | # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)]) 248 | 249 | return dataset 250 | 251 | 252 | def get_coco_kp(root, image_set, transforms): 253 | return get_coco(root, image_set, transforms, mode="person_keypoints") 254 | -------------------------------------------------------------------------------- /src/mask_rg/dataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | import torch 4 | from PIL import Image 5 | import cv2 6 | import torchvision.transforms as T 7 | import matplotlib.pyplot as plt 8 | import matplotlib.patches as patches 9 | import utils 10 | 11 | 12 | class PushAndGraspDataset(object): 13 | 14 | def __init__(self, config): 15 | 16 | # read configuration 17 | self.root = config['dataset']['path'] 18 | self.imgs_path = os.path.join(config['dataset']['path'], config['dataset']['images']) 19 | self.masks_path = os.path.join(config['dataset']['path'], config['dataset']['masks']) 20 | self.transforms = config['dataset']['data_augmentation'] 21 | self.is_depth = config['dataset']['is_depth'] 22 | 23 | self.train_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['train_indices']), 24 | allow_pickle=True) 25 | self.test_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['test_indices']), 26 | allow_pickle=True) 27 | 28 | 29 | # get a list of images and GTs 30 | self.imgs = list(sorted(os.listdir(self.imgs_path))) 31 | self.masks = list(sorted(os.listdir(self.masks_path))) 32 | 33 | def __getitem__(self, idx): 34 | 35 | # set getter path 36 | img_path = os.path.join(self.imgs_path, self.imgs[idx]) 37 | mask_path = os.path.join(self.masks_path, self.masks[idx]) 38 | 39 | # read an image 40 | if self.is_depth: 41 | 42 | img = np.load(img_path).astype(dtype=np.int16) 43 | # TODO take the normalization somewhere out 44 | # depth range --> 0 to 5000 1e-4meters 45 | # max depth = 5000 --> background (0.5m) 46 | # min depth = 3459 --> the minimum depth value in the all dataset (0.npy to 012250.npy) 47 | # img = np.round((img - 2960) / 8).astype(np.uint8) 48 | 49 | # all vals are in between 0 to 250 50 | img = np.round(img/20).astype(np.uint8) 51 | img = np.repeat(img.reshape(1024, 1024, 1), 3, axis=2) 52 | else: 53 | # TODO this conversion might create problems for depth when converting back, check! 54 | img = Image.open(img_path).convert("RGB") 55 | 56 | # read a GT mask and reformat 57 | mask = cv2.imread(mask_path) 58 | mask = np.asarray(mask) 59 | mask = mask[:, :, :1] 60 | mask = np.squeeze(mask, axis=2) 61 | # plt.imshow(mask) 62 | # plt.show() 63 | 64 | # list of objects - All data had been cleaned and obj_ids are in ordered 65 | obj_ids = np.unique(mask) 66 | # Remove background (i.e. 0) 67 | obj_ids = obj_ids[1:] 68 | 69 | # combine the masks TODO read height and weight from img 70 | num_objs = len(obj_ids) 71 | masks = np.zeros((num_objs, 1024, 1024), dtype=np.uint8) 72 | for i in range(num_objs): 73 | masks[i][np.where(mask == i+1)] = 1 74 | 75 | # get bounding box coordinates for each mask 76 | boxes = [] 77 | for i in range(num_objs): 78 | # plt.imshow(masks[i]) 79 | # plt.show() 80 | pos = np.where(masks[i]) 81 | xmin = np.min(pos[1]) 82 | xmax = np.max(pos[1]) 83 | ymin = np.min(pos[0]) 84 | ymax = np.max(pos[0]) 85 | boxes.append([xmin, ymin, xmax, ymax]) 86 | 87 | # fig, ax = plt.subplots(1) 88 | # ax.imshow(img) 89 | # In the documentation it say bottom left corner, here i gave upper left corner for the correct box!! 90 | # rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, linewidth=3, edgecolor='r', facecolor='none') 91 | # ax.add_patch(rect) 92 | # plt.show() 93 | 94 | # convert everything into a torch.Tensor 95 | boxes = torch.as_tensor(boxes, dtype=torch.float32) 96 | # there is only one class 97 | labels = torch.ones((num_objs,), dtype=torch.int64) 98 | masks = torch.as_tensor(masks, dtype=torch.uint8) 99 | 100 | image_id = torch.tensor([idx]) 101 | area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0]) 102 | 103 | # If there is no object on the scene, this will be used to ignore that instance during coco eval 104 | iscrowd = torch.zeros((num_objs,), dtype=torch.int64) 105 | 106 | # this control is to solve 'loss is NaN error' during training which can be a result of an invalid box 107 | keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0]) 108 | boxes = boxes[keep] 109 | masks = masks[keep] 110 | 111 | target = {} 112 | target["boxes"] = boxes 113 | target["labels"] = labels 114 | target["masks"] = masks 115 | target["image_id"] = image_id 116 | target["area"] = area 117 | target["iscrowd"] = iscrowd 118 | 119 | # TODO data augmentation 120 | # if self.transforms is not None: 121 | # img, target = self.transforms(img, target) 122 | 123 | # convert image to torch tensor 124 | # if self.is_depth: 125 | # img = torch.from_numpy(img) 126 | # asd=23 127 | # else: 128 | tt = T.ToTensor() 129 | img = tt(img) 130 | 131 | return img, target 132 | 133 | # def augment_data(train): 134 | # transforms = [T.ToTensor()] 135 | # if train: 136 | # transforms.append(T.RandomHorizontalFlip(0.5)) 137 | # return T.Compose(transforms) 138 | 139 | def __len__(self): 140 | return len(self.imgs) -------------------------------------------------------------------------------- /src/mask_rg/engine.py: -------------------------------------------------------------------------------- 1 | import math 2 | import sys 3 | import time 4 | import torch 5 | 6 | import torchvision.models.detection.mask_rcnn 7 | 8 | from src.mask_rg.coco_utils import get_coco_api_from_dataset 9 | from src.mask_rg.coco_eval import CocoEvaluator 10 | import src.mask_rg.utils 11 | 12 | 13 | def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq): 14 | model.train() 15 | metric_logger = utils.MetricLogger(delimiter=" ") 16 | metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) 17 | header = 'Epoch: [{}]'.format(epoch) 18 | 19 | lr_scheduler = None 20 | if epoch == 0: 21 | warmup_factor = 1. / 1000 22 | warmup_iters = min(1000, len(data_loader) - 1) 23 | 24 | lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor) 25 | 26 | for images, targets in metric_logger.log_every(data_loader, print_freq, header): 27 | images = list(image.to(device) for image in images) 28 | targets = [{k: v.to(device) for k, v in t.items()} for t in targets] 29 | 30 | loss_dict = model(images, targets) 31 | 32 | losses = sum(loss for loss in loss_dict.values()) 33 | 34 | # reduce losses over all GPUs for logging purposes 35 | loss_dict_reduced = utils.reduce_dict(loss_dict) 36 | losses_reduced = sum(loss for loss in loss_dict_reduced.values()) 37 | 38 | loss_value = losses_reduced.item() 39 | 40 | if not math.isfinite(loss_value): 41 | print("Loss is {}, stopping training".format(loss_value)) 42 | print(loss_dict_reduced) 43 | sys.exit(1) 44 | 45 | optimizer.zero_grad() 46 | losses.backward() 47 | optimizer.step() 48 | 49 | if lr_scheduler is not None: 50 | lr_scheduler.step() 51 | 52 | metric_logger.update(loss=losses_reduced, **loss_dict_reduced) 53 | metric_logger.update(lr=optimizer.param_groups[0]["lr"]) 54 | 55 | return metric_logger 56 | 57 | 58 | def _get_iou_types(model): 59 | model_without_ddp = model 60 | if isinstance(model, torch.nn.parallel.DistributedDataParallel): 61 | model_without_ddp = model.module 62 | iou_types = ["bbox"] 63 | if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): 64 | iou_types.append("segm") 65 | if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): 66 | iou_types.append("keypoints") 67 | return iou_types 68 | 69 | 70 | @torch.no_grad() 71 | def evaluate(model, data_loader, device): 72 | # n_threads = torch.get_num_threads() 73 | # FIXME remove this and make paste_masks_in_image run on the GPU 74 | # torch.set_num_threads(n_threads) 75 | cpu_device = torch.device('cuda') 76 | model.eval() 77 | header = 'Test:' 78 | 79 | coco = get_coco_api_from_dataset(data_loader.dataset) 80 | metric_logger = utils.MetricLogger(delimiter=" ") 81 | iou_types = _get_iou_types(model) 82 | 83 | coco_evaluator = CocoEvaluator(coco, iou_types) 84 | 85 | for images, targets in metric_logger.log_every(data_loader, 100, header): 86 | images = list(img.to(device) for img in images) 87 | targets = [{k: v.to(device) for k, v in t.items()} for t in targets] 88 | 89 | torch.cuda.synchronize() 90 | model_time = time.time() 91 | outputs = model(images) 92 | 93 | outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] 94 | model_time = time.time() - model_time 95 | 96 | res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} 97 | evaluator_time = time.time() 98 | coco_evaluator.update(res) 99 | evaluator_time = time.time() - evaluator_time 100 | metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) 101 | 102 | # gather the stats from all processes 103 | metric_logger.synchronize_between_processes() 104 | print("Averaged stats:", metric_logger) 105 | coco_evaluator.synchronize_between_processes() 106 | 107 | # accumulate predictions from all images 108 | coco_evaluator.accumulate() 109 | coco_evaluator.summarize() 110 | # torch.set_num_threads(n_threads) 111 | return coco_evaluator 112 | -------------------------------------------------------------------------------- /src/mask_rg/main.py: -------------------------------------------------------------------------------- 1 | from model import MaskRGNetwork 2 | from dataset import PushAndGraspDataset 3 | import yaml 4 | import numpy as np 5 | import os 6 | from torch.utils import data as td 7 | 8 | 9 | def main(): 10 | 11 | with open('model_config.yaml') as f: 12 | configuration = yaml.load(f, Loader=yaml.FullLoader) 13 | 14 | # create the model 15 | model = MaskRGNetwork(configuration) 16 | 17 | # create dataset objects and set the model 18 | dataset = PushAndGraspDataset(configuration) 19 | test_indices = os.path.join(configuration['dataset']['path'], configuration['dataset']['test_indices']) 20 | test_subset = td.Subset(dataset, test_indices) 21 | # Training: 22 | # model.set_data(dataset) 23 | # model.train_model() 24 | 25 | # Evaluation: 26 | # this loads the saved weights from the file in the config file 27 | model.load_weights() 28 | # load a new dataset for the evaluation 29 | model.set_data(test_subset, is_test=True, batch_size=20) 30 | 31 | # evaluate 32 | res = model.evaluate_model() 33 | np.save('results.npy', res) 34 | with open('results.txt', 'w') as output: 35 | output.write(res) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() -------------------------------------------------------------------------------- /src/mask_rg/mask_rg.py: -------------------------------------------------------------------------------- 1 | from src.mask_rg.model import MaskRGNetwork 2 | import yaml 3 | import numpy as np 4 | import torchvision.transforms as T 5 | import torch 6 | from src.mask_rg.rewards import RewardGenerator 7 | 8 | 9 | CONFIG_PATH = './model_config.yaml' 10 | 11 | class MaskRG(object): 12 | 13 | def __init__(self, confidence_threshold=0.75, mask_threshold=0.75): 14 | # Make sure this configuration file is in your execution path 15 | with open(CONFIG_PATH) as f: 16 | configuration = yaml.load(f, Loader=yaml.FullLoader) 17 | with torch.no_grad(): 18 | self.model = MaskRGNetwork(configuration) 19 | self.model.load_weights() 20 | self.rg = RewardGenerator(confidence_threshold, mask_threshold) 21 | self.prediction = None 22 | self.gt = None 23 | 24 | def set_reward_generator(self, depth_image, gt_segmentation): 25 | 26 | depth_image = np.round(depth_image / 20).astype(np.uint8) 27 | depth_image = np.repeat(depth_image.reshape(1024, 1024, 1), 3, axis=2) 28 | with torch.no_grad(): 29 | tt = T.ToTensor() 30 | depth_tensor = tt(depth_image) 31 | depth_tensor = depth_tensor.cuda() 32 | # print(depth_tensor.requires_grad) 33 | self.prediction = self.model.eval_single_img([depth_tensor]) 34 | 35 | self.gt = gt_segmentation 36 | self.rg.set_element(self.prediction, self.gt) 37 | 38 | def get_current_rewards(self): 39 | return self.rg.get_reward() 40 | 41 | def print_segmentation(self, pred_ids): 42 | return self.rg.print_seg_diff(pred_ids) 43 | 44 | def print_masks(self): 45 | return self.rg.print_masks() 46 | -------------------------------------------------------------------------------- /src/mask_rg/model.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import os 3 | import torch 4 | import torch.nn as nn 5 | from torch.utils import data as td 6 | import torchvision 7 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor 8 | from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor 9 | from torchvision.models.detection import MaskRCNN 10 | from torchvision.models.detection.rpn import AnchorGenerator 11 | from torchvision.transforms import ToTensor 12 | from src.mask_rg.engine import train_one_epoch, evaluate 13 | import src.mask_rg.utils 14 | import time 15 | import datetime 16 | import yaml 17 | from collections import OrderedDict 18 | import cv2 19 | from PIL import Image 20 | import matplotlib.pyplot as plt 21 | import matplotlib.patches as patches 22 | 23 | class MaskRGNetwork(nn.Module): 24 | def __init__(self, config): 25 | super(MaskRGNetwork, self).__init__() 26 | 27 | self.training = False # check if this necessary 28 | 29 | # read configuration 30 | if config['model']['file'] == 'new': 31 | use_old_weights = False 32 | self.m_pretrained = False 33 | elif config['model']['file'] == 'pretrained': 34 | use_old_weights = False 35 | self.m_pretrained = True 36 | else: 37 | use_old_weights = True 38 | self.weigths_path = os.path.join(config['model']['path'], config['model']['file']) 39 | 40 | self.num_epochs = config['model']['settings']['epochs'] 41 | self.lr = config['model']['settings']['learning_rate'] 42 | self.batch_size = config['model']['settings']['batch_size'] 43 | self.backbone = config['model']['settings']['backbone'] 44 | self.bb_pretrained = config['model']['settings']['backbone_pretrained'] 45 | is_cuda = config['model']['settings']['cuda_available'] 46 | 47 | # uncomment for training!!! 48 | # self.train_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['train_indices'])) 49 | # self.test_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['test_indices'])) 50 | 51 | self.saving_path = config['saving']['path'] 52 | self.saving_prefix = config['saving']['model_name'] 53 | 54 | if is_cuda: 55 | if torch.cuda.is_available(): 56 | self.device = 'cuda' 57 | print("CUDA device found!") 58 | else: 59 | print("WARNING: CUDA is not available!!! CPU will be used instead!") 60 | self.device = 'cpu' 61 | else: 62 | print("CPU will be used to train/evaluate the network!") 63 | self.device = 'cpu' 64 | 65 | # self.backbone = torchvision.models.resnet34(pretrained=True, progress=True) 66 | # self.anchors = AnchorGenerator(sizes=ANCHOR_SIZES) 67 | # self.backbone.out_channels = [512, 512, 512] 68 | 69 | if self.backbone == 'resnet50': 70 | if not use_old_weights: 71 | self.mask_r_cnn = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=self.m_pretrained, progress=True, 72 | pretrained_backbone=self.bb_pretrained) 73 | else: 74 | self.mask_r_cnn = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False, 75 | progress=True, 76 | pretrained_backbone=self.bb_pretrained) 77 | 78 | # get number of input features for the classifier 79 | self.input_features = self.mask_r_cnn.roi_heads.box_predictor.cls_score.in_features 80 | 81 | # replace the pre-trained head with a new one --> category-agnostic so number of classes=2 82 | self.mask_r_cnn.roi_heads.box_predictor = FastRCNNPredictor(self.input_features, 2) 83 | 84 | # now get the number of input features for the mask classifier 85 | self.input_features_mask = self.mask_r_cnn.roi_heads.mask_predictor.conv5_mask.in_channels 86 | 87 | hidden_layer = 256 88 | # and replace the mask predictor with a new one 89 | self.mask_r_cnn.roi_heads.mask_predictor = MaskRCNNPredictor(self.input_features_mask, hidden_layer, 2) 90 | 91 | self.mask_r_cnn.to(self.device) 92 | # self.mask_r_cnn = MaskRCNN(backbone=self.backbone, num_classes=2, rpn_anchor_generator=self.anchors) 93 | 94 | # construct an optimizer 95 | self.params = [p for p in self.mask_r_cnn.parameters() if p.requires_grad] 96 | self.optimizer = torch.optim.SGD(self.params, lr=self.lr, momentum=0.9, weight_decay=0.00005) 97 | 98 | if use_old_weights: 99 | self.load_weights() 100 | 101 | 102 | def train_model(self): 103 | 104 | for epoch in range(self.num_epochs): 105 | # train for one epoch, printing every 10 iterations 106 | train_one_epoch(self.mask_r_cnn, self.optimizer, self.data_loader, self.device, epoch, print_freq=5) 107 | # update the learning rate 108 | # self.lr_scheduler.step() 109 | self.optimizer.step() 110 | self._save_model('_epoch_no_' + str(epoch) + '_') 111 | 112 | 113 | def evaluate_model(self): 114 | # evaluate on the test dataset 115 | res = evaluate(self.mask_r_cnn, self.data_loader, device=self.device) 116 | return res 117 | 118 | def eval_single_img(self, img): 119 | # self.device = torch.device("cpu") 120 | 121 | self.mask_r_cnn.eval() 122 | with torch.no_grad(): 123 | preds = self.mask_r_cnn(img) 124 | 125 | return preds 126 | 127 | def load_weights(self): 128 | # this should be called after the initialisation of the model 129 | self.mask_r_cnn.load_state_dict(torch.load(self.weigths_path)) 130 | 131 | def set_data(self, data, is_test=False): 132 | 133 | data_subset = td.Subset(data, data.train_indices) if not is_test else td.Subset(data, data.test_indices) 134 | 135 | # init a data loader either for training or testing 136 | # multiprocessing for data loading (num_workers) is not recommended for CUDA, so automatic memory pining 137 | # (pin_memory=True) is used instead. 138 | self.data_loader = td.DataLoader(data_subset, batch_size=self.batch_size, shuffle=False, 139 | num_workers=0, pin_memory=True, 140 | collate_fn=utils.collate_fn) 141 | 142 | def _save_model(self, string=None): 143 | t = time.time() 144 | timestamp = datetime.datetime.fromtimestamp(t) 145 | file_name = self.saving_prefix + string + timestamp.strftime('%Y-%m-%d.%H:%M:%S') + '.pth' 146 | torch.save(self.mask_r_cnn.state_dict(), os.path.join(self.saving_path, file_name)) 147 | 148 | @staticmethod 149 | def print_boxes(image, input_tensor, score_threshold=0.75): 150 | boxes = input_tensor[0]['boxes'] 151 | scores = input_tensor[0]['scores'] 152 | 153 | fig, ax = plt.subplots(1) 154 | ax.imshow(image) 155 | num_box=0 156 | for box in boxes: 157 | box_index = np.where(boxes == box) 158 | box_index = int(box_index[0][0]) 159 | if scores[box_index] > score_threshold: 160 | # In the documentation it say bottom left corner, here i gave upper left corner for the correct box!! 161 | rect = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], linewidth=3, edgecolor='r', 162 | facecolor='none') 163 | ax.add_patch(rect) 164 | num_box += 1 165 | print('num boxes that have a score higher than .75 --> ', num_box) 166 | plt.show() 167 | 168 | @staticmethod 169 | def print_masks(image, input_tensor, score_threshold=0.75): 170 | masks = input_tensor[0]['masks'] 171 | scores = input_tensor[0]['scores'] 172 | num_pred = masks.shape[0] 173 | num_masks = 0 174 | all = np.zeros((1024, 1024), dtype=np.uint8) 175 | for mask in range(0, num_pred): 176 | if scores[mask] > score_threshold: 177 | #TODO if cuda, add a control here 178 | 179 | mask_arr = np.asarray(masks[mask].cpu().detach()).reshape((1024, 1024)) 180 | mask_arr = np.where(mask_arr > score_threshold, 1, 0) 181 | all[np.where(mask_arr > 0)] = num_masks 182 | 183 | num_masks += 1 184 | plt.imshow(all) 185 | plt.show() 186 | print('num masks that have a score higher than .75 --> ', num_masks) 187 | 188 | 189 | def forward(self, input_img=None): 190 | pass 191 | # print("evaluation") 192 | # tt = T.ToTensor 193 | # input_tensor = tt(input_img) 194 | # tensor_list = [input_tensor] 195 | # self.mask_r_cnn.eval() 196 | # predictions = self.mask_r_cnn(tensor_list) 197 | # return predictions 198 | 199 | -------------------------------------------------------------------------------- /src/mask_rg/rewards.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import matplotlib.pyplot as plt 3 | 4 | NON_DETECTION_PUNISHMENT = -0.02 5 | # EXTRA_DETECTION_PUNISHMENT = 0.0 #-0.01 6 | 7 | color_space = np.asarray([#[78, 121, 167], # blue 8 | #[89, 161, 79], # green 9 | # [156, 117, 95], # brown 10 | # [242, 142, 43], # orange 11 | #[237, 201, 72], # yellow 12 | #[186, 176, 172], # gray 13 | # [255, 87, 89], # red 14 | #[176, 122, 161], # purple 15 | #[118, 183, 178], # cyan 16 | #[255, 157, 167], # pink 17 | [154, 205, 50], # yellow green 18 | [85, 107, 47], # dark olive green 19 | [107, 142, 35], # olive drab 20 | [124, 252, 0], # lawn green 21 | [127, 255, 0], # chart reuse 22 | [173, 255, 47], # green yellow 23 | [0, 100, 0], # dark green 24 | [0, 128, 0], # green 25 | [34, 139, 34], # forest green 26 | [0, 255, 0], # lime 27 | [50, 205, 50], # lime green 28 | [144, 238, 144], # light green 29 | [152, 251, 152], # pale green 30 | [143, 188, 143], # dark sea green 31 | [0, 250, 154], # medium spring green 32 | [0, 255, 127], # spring green 33 | [46, 139, 87], # sea green 34 | [0,255,255], # cyan 35 | [224,255,255], # light cyan 36 | [0,206,209], # dark turquoise 37 | [64,224,208], # turquoise 38 | [72,209,204], # medium turquoise 39 | [175,238,238], # pale turquoise 40 | [127,255,212], # aqua marine 41 | [176,224,230], # powder blue 42 | [95,158,160], # cadet blue 43 | [70,130,180], # steel blue 44 | [100,149,237], # corn flower blue 45 | [0,191,255], # deep sky blue 46 | [30,144,255], # dodger blue 47 | [173,216,230], # light blue 48 | [135,206,235], # sky blue 49 | [135,206,250], # light sky blue 50 | [25,25,112], # midnight blue 51 | [0,0,128], # navy 52 | [0,0,139], # dark blue 53 | [0,0,205], # medium blue 54 | [0,0,255], # blue 55 | [65,105,225], # royal blue 56 | [138,43,226], # blue violet 57 | [75,0,130], # indigo 58 | [72,61,139], # dark slate blue 59 | ], dtype=np.uint8) 60 | 61 | color_space_red = np.asarray([[128, 0, 0], # maroon 62 | [139, 0, 0], # dark red 63 | [165, 42, 42], # brown 64 | [178, 34, 34], # firebrick 65 | [220, 20, 60], # crimson 66 | [255, 0, 0], # red 67 | [255, 99, 71], # tomato 68 | [255, 127, 80], # coral 69 | [205, 92, 92], # indian red 70 | [240, 128, 128], # light coral 71 | [233, 150, 122], # dark salmon 72 | [250, 128, 114], # salmon 73 | [255, 160, 122], # light salmon 74 | [255, 69, 0], # orange red 75 | [255, 140, 0], # dark orange 76 | [255, 165, 0], # orange 77 | [160, 82, 45], # sienna 78 | [219, 112, 147] # pale viaolet red 79 | ], dtype=np.uint8) 80 | 81 | 82 | class BColors: 83 | HEADER = '\033[95m' 84 | OKBLUE = '\033[94m' 85 | OKGREEN = '\033[92m' 86 | WARNING = '\033[93m' 87 | FAIL = '\033[91m' 88 | ENDC = '\033[0m' 89 | BOLD = '\033[1m' 90 | UNDERLINE = '\033[4m' 91 | 92 | 93 | class RewardGenerator(object): 94 | 95 | def __init__(self, confidence_threshold=0.75, mask_threshold=0.75, device='cuda'): 96 | 97 | self.mask_threshold = mask_threshold 98 | self.confidence_threshold = confidence_threshold 99 | 100 | # self.set_element(pred_tensor, gt, confidence_threshold, mask_threshold, device) 101 | self.obj_ids = None 102 | self.num_objs = None 103 | self.height = 1024 104 | self.width = 1024 105 | self.gts = None 106 | self.scores = None 107 | self.masks = None 108 | self.boxes = None 109 | self.num_pred = None 110 | self.valid_masks = [] 111 | self.valid_boxes = [] 112 | self.num_valid_pred = 0 113 | 114 | def set_element(self, pred_tensor, gt): 115 | # list of objects - All data had been cleaned and obj_ids are in ordered 116 | obj_ids = np.unique(gt) 117 | # Remove background (i.e. 0) 118 | self.obj_ids = obj_ids[1:] 119 | 120 | # combine the masks 121 | self.num_objs = len(self.obj_ids) 122 | self.height = gt.shape[0] 123 | self.width = gt.shape[1] 124 | self.gts = np.zeros((self.num_objs, self.height, self.width), dtype=np.uint8) 125 | for i in range(self.num_objs): 126 | self.gts[i][np.where(gt == i + 1)] = 1 127 | 128 | self.scores = pred_tensor[0]['scores'] 129 | self.masks = pred_tensor[0]['masks'] 130 | self.boxes = pred_tensor[0]['boxes'] 131 | self.num_pred = self.masks.shape[0] 132 | self.valid_masks = [] 133 | self.valid_boxes = [] 134 | self.num_valid_pred = 0 135 | 136 | for pred_no in range(0, self.num_pred): 137 | if self.scores[pred_no] > self.confidence_threshold: 138 | self.valid_masks.append(self.masks[pred_no]) 139 | self.valid_boxes.append(self.boxes[pred_no]) 140 | self.num_valid_pred += 1 141 | 142 | def f1_reward(self): 143 | pass 144 | 145 | def object_wise(self): 146 | compare = 0 147 | return compare 148 | 149 | @staticmethod 150 | def _jaccard(gt, pred): 151 | intersection = gt * pred 152 | union = (gt + pred) / 2 153 | area_i = np.count_nonzero(intersection) 154 | area_u = np.count_nonzero(union) 155 | if area_u > 0: 156 | iou = area_i / area_u 157 | else: 158 | return [0, 0, 0] 159 | return [iou, area_i, area_u] 160 | 161 | def get_reward(self): 162 | res = self._detect_mask_id() 163 | # self.print_masks() 164 | # if self.num_valid_pred - self.num_objs > 0: 165 | # extra_pun = (self.num_valid_pred - self.num_objs) * EXTRA_DETECTION_PUNISHMENT 166 | # else: 167 | # extra_pun = 0 168 | if self.num_objs > 0: # and len(res) <= self.num_objs: 169 | sum_iou = np.sum(res, axis=0) 170 | sum_iou = sum_iou[1] 171 | reward_iou = sum_iou / self.num_objs 172 | 173 | true_detections = np.count_nonzero(res, axis=0) 174 | true_detections = true_detections[1] 175 | num_non_detected = self.num_objs - true_detections 176 | reward = reward_iou + num_non_detected * NON_DETECTION_PUNISHMENT #+ extra_pun 177 | 178 | print(BColors.HEADER + '--------------------------- MASK-RG RETURNS ----------------------------' + BColors.ENDC) 179 | print('Number of objects in GT --> ', self.num_objs) 180 | print('Number of possible objects (confidence threshold {:.2f}) --> {:d}'.format(self.confidence_threshold, 181 | self.num_valid_pred)) 182 | print('Number of correct detections (masking threshold {:.2f}) --> {:d}'.format(self.mask_threshold, 183 | true_detections)) 184 | print('NEGATIVE SCORE: --> {:.2f} (cnst {:.2f} x non-detected '.format(num_non_detected * NON_DETECTION_PUNISHMENT, 185 | NON_DETECTION_PUNISHMENT) + BColors.FAIL + 186 | ' {:d}'.format(num_non_detected) + BColors.ENDC + ')') 187 | 188 | print('POSITIVE SCORE: --> {:.6f} (Matching IoU)'.format(reward_iou)) 189 | # print('Non detection ({:.3f}) + extra pun ({:.3f}) = ' 190 | # '{:.3f}'.format(num_non_detected * NON_DETECTION_PUNISHMENT, 191 | # extra_pun, num_non_detected * NON_DETECTION_PUNISHMENT + extra_pun)) 192 | print(BColors.HEADER + 'TOTAL RETURN --> {:.6f}'.format(reward)) 193 | print('------------------------------------------------------------------------' + BColors.ENDC) 194 | # count 255-254 and diff 195 | else: 196 | # predictions are more than actual objects in the scene 197 | print("CHECK!") 198 | return -1 199 | pass 200 | # return res, reward, [self.num_objs, num_non_detected, num_non_detected / self.num_objs] 201 | return res, reward, [self.num_objs, num_non_detected, self.num_valid_pred] 202 | 203 | def _detect_mask_id(self): 204 | 205 | results = [] 206 | pred_order = np.zeros((self.num_objs, 2), dtype=np.float32) 207 | 208 | for idx_gt, gt in enumerate(self.gts): 209 | for inx_pred, mask in enumerate(self.valid_masks): 210 | pred_arr = np.asarray(mask.cpu().detach()).reshape((self.height, self.width)) 211 | pred_arr = np.where(pred_arr > self.mask_threshold, 1, 0) 212 | res = self._jaccard(gt, pred_arr) 213 | if res[0] > 0: 214 | results.append([inx_pred, res[0]]) 215 | 216 | if not results == []: 217 | res_arr = np.asarray(results) 218 | max_index = np.argmax(res_arr, axis=0) 219 | max_index = max_index[1] 220 | if res_arr[max_index][1] > self.mask_threshold: 221 | pred_order[idx_gt] = res_arr[max_index] 222 | else: 223 | pred_order[idx_gt] = [255, 0] 224 | else: 225 | pred_order[idx_gt] = [254, 0] 226 | results = [] 227 | return pred_order 228 | 229 | def print_masks(self): 230 | # masks = input_tensor[0]['masks'] 231 | # scores = input_tensor[0]['scores'] 232 | # num_pred = masks.shape[0] 233 | num_masks = 0 234 | all = np.zeros((self.height, self.width), dtype=np.uint8) 235 | for mask in range(self.num_pred): 236 | if self.scores[mask] > self.confidence_threshold: 237 | # TODO if cuda, add a control here 238 | mask_arr = np.asarray(self.masks[mask].cpu().detach()).reshape((self.height, self.width)) 239 | mask_arr = np.where(mask_arr > self.mask_threshold, 1, 0) 240 | all[np.where(mask_arr > 0)] = num_masks 241 | num_masks += 1 242 | # plt.imshow(all) 243 | # plt.show() 244 | # print('num masks that have a score higher than %.02f --> ' % self.confidence_threshold, num_masks) 245 | return all 246 | 247 | def print_seg_diff(self, order): 248 | img_err_masks = np.zeros([self.height, self.width, 3], dtype=np.uint8) 249 | for idx, curr_gt in enumerate(self.gts): 250 | if not (int(order[idx][0]) == 254 or int(order[idx][0]) == 255): 251 | curr_pred = np.asarray(self.masks[int(order[idx][0])].cpu().detach()).reshape((self.height, self.width)) 252 | curr_pred = np.where(curr_pred > self.mask_threshold, 1, 0) 253 | 254 | compare = curr_pred != curr_gt 255 | img_err_masks[compare] = color_space[np.abs(41-idx)] 256 | # plt.imshow(img_err_masks) 257 | # plt.imshow(curr_gt) 258 | # plt.imshow(curr_pred) 259 | else: 260 | img_err_masks[np.asarray(curr_gt, dtype=np.bool)] = color_space_red[np.abs(17-idx)] 261 | # plt.imshow(curr_gt) 262 | # plt.imshow(img_err_masks) 263 | return img_err_masks 264 | -------------------------------------------------------------------------------- /src/mask_rg/transforms.py: -------------------------------------------------------------------------------- 1 | import random 2 | import torch 3 | 4 | from torchvision.transforms import functional as F 5 | 6 | 7 | def _flip_coco_person_keypoints(kps, width): 8 | flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15] 9 | flipped_data = kps[:, flip_inds] 10 | flipped_data[..., 0] = width - flipped_data[..., 0] 11 | # Maintain COCO convention that if visibility == 0, then x, y = 0 12 | inds = flipped_data[..., 2] == 0 13 | flipped_data[inds] = 0 14 | return flipped_data 15 | 16 | 17 | class Compose(object): 18 | def __init__(self, transforms): 19 | self.transforms = transforms 20 | 21 | def __call__(self, image, target): 22 | for t in self.transforms: 23 | image, target = t(image, target) 24 | return image, target 25 | 26 | 27 | class RandomHorizontalFlip(object): 28 | def __init__(self, prob): 29 | self.prob = prob 30 | 31 | def __call__(self, image, target): 32 | if random.random() < self.prob: 33 | height, width = image.shape[-2:] 34 | image = image.flip(-1) 35 | bbox = target["boxes"] 36 | bbox[:, [0, 2]] = width - bbox[:, [2, 0]] 37 | target["boxes"] = bbox 38 | if "masks" in target: 39 | target["masks"] = target["masks"].flip(-1) 40 | if "keypoints" in target: 41 | keypoints = target["keypoints"] 42 | keypoints = _flip_coco_person_keypoints(keypoints, width) 43 | target["keypoints"] = keypoints 44 | return image, target 45 | 46 | 47 | class ToTensor(object): 48 | def __call__(self, image, target): 49 | image = F.to_tensor(image) 50 | return image, target 51 | -------------------------------------------------------------------------------- /src/mask_rg/utils.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict, deque 2 | import datetime 3 | import pickle 4 | import time 5 | 6 | import torch 7 | import torch.distributed as dist 8 | 9 | import errno 10 | import os 11 | 12 | 13 | class SmoothedValue(object): 14 | """Track a series of values and provide access to smoothed values over a 15 | window or the global series average. 16 | """ 17 | 18 | def __init__(self, window_size=20, fmt=None): 19 | if fmt is None: 20 | fmt = "{median:.4f} ({global_avg:.4f})" 21 | self.deque = deque(maxlen=window_size) 22 | self.total = 0.0 23 | self.count = 0 24 | self.fmt = fmt 25 | 26 | def update(self, value, n=1): 27 | self.deque.append(value) 28 | self.count += n 29 | self.total += value * n 30 | 31 | def synchronize_between_processes(self): 32 | """ 33 | Warning: does not synchronize the deque! 34 | """ 35 | if not is_dist_avail_and_initialized(): 36 | return 37 | t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda') 38 | dist.barrier() 39 | dist.all_reduce(t) 40 | t = t.tolist() 41 | self.count = int(t[0]) 42 | self.total = t[1] 43 | 44 | @property 45 | def median(self): 46 | d = torch.tensor(list(self.deque)) 47 | return d.median().item() 48 | 49 | @property 50 | def avg(self): 51 | d = torch.tensor(list(self.deque), dtype=torch.float32) 52 | return d.mean().item() 53 | 54 | @property 55 | def global_avg(self): 56 | return self.total / self.count 57 | 58 | @property 59 | def max(self): 60 | return max(self.deque) 61 | 62 | @property 63 | def value(self): 64 | return self.deque[-1] 65 | 66 | def __str__(self): 67 | return self.fmt.format( 68 | median=self.median, 69 | avg=self.avg, 70 | global_avg=self.global_avg, 71 | max=self.max, 72 | value=self.value) 73 | 74 | 75 | def all_gather(data): 76 | """ 77 | Run all_gather on arbitrary picklable data (not necessarily tensors) 78 | Args: 79 | data: any picklable object 80 | Returns: 81 | list[data]: list of data gathered from each rank 82 | """ 83 | world_size = get_world_size() 84 | if world_size == 1: 85 | return [data] 86 | 87 | # serialized to a Tensor 88 | buffer = pickle.dumps(data) 89 | storage = torch.ByteStorage.from_buffer(buffer) 90 | tensor = torch.ByteTensor(storage).to("cuda") 91 | 92 | # obtain Tensor size of each rank 93 | local_size = torch.tensor([tensor.numel()], device="cuda") 94 | size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)] 95 | dist.all_gather(size_list, local_size) 96 | size_list = [int(size.item()) for size in size_list] 97 | max_size = max(size_list) 98 | 99 | # receiving Tensor from all ranks 100 | # we pad the tensor because torch all_gather does not support 101 | # gathering tensors of different shapes 102 | tensor_list = [] 103 | for _ in size_list: 104 | tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda")) 105 | if local_size != max_size: 106 | padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda") 107 | tensor = torch.cat((tensor, padding), dim=0) 108 | dist.all_gather(tensor_list, tensor) 109 | 110 | data_list = [] 111 | for size, tensor in zip(size_list, tensor_list): 112 | buffer = tensor.cpu().numpy().tobytes()[:size] 113 | data_list.append(pickle.loads(buffer)) 114 | 115 | return data_list 116 | 117 | 118 | def reduce_dict(input_dict, average=True): 119 | """ 120 | Args: 121 | input_dict (dict): all the values will be reduced 122 | average (bool): whether to do average or sum 123 | Reduce the values in the dictionary from all processes so that all processes 124 | have the averaged results. Returns a dict with the same fields as 125 | input_dict, after reduction. 126 | """ 127 | world_size = get_world_size() 128 | if world_size < 2: 129 | return input_dict 130 | with torch.no_grad(): 131 | names = [] 132 | values = [] 133 | # sort the keys so that they are consistent across processes 134 | for k in sorted(input_dict.keys()): 135 | names.append(k) 136 | values.append(input_dict[k]) 137 | values = torch.stack(values, dim=0) 138 | dist.all_reduce(values) 139 | if average: 140 | values /= world_size 141 | reduced_dict = {k: v for k, v in zip(names, values)} 142 | return reduced_dict 143 | 144 | 145 | class MetricLogger(object): 146 | def __init__(self, delimiter="\t"): 147 | self.meters = defaultdict(SmoothedValue) 148 | self.delimiter = delimiter 149 | 150 | def update(self, **kwargs): 151 | for k, v in kwargs.items(): 152 | if isinstance(v, torch.Tensor): 153 | v = v.item() 154 | assert isinstance(v, (float, int)) 155 | self.meters[k].update(v) 156 | 157 | def __getattr__(self, attr): 158 | if attr in self.meters: 159 | return self.meters[attr] 160 | if attr in self.__dict__: 161 | return self.__dict__[attr] 162 | raise AttributeError("'{}' object has no attribute '{}'".format( 163 | type(self).__name__, attr)) 164 | 165 | def __str__(self): 166 | loss_str = [] 167 | for name, meter in self.meters.items(): 168 | loss_str.append( 169 | "{}: {}".format(name, str(meter)) 170 | ) 171 | return self.delimiter.join(loss_str) 172 | 173 | def synchronize_between_processes(self): 174 | for meter in self.meters.values(): 175 | meter.synchronize_between_processes() 176 | 177 | def add_meter(self, name, meter): 178 | self.meters[name] = meter 179 | 180 | def log_every(self, iterable, print_freq, header=None): 181 | i = 0 182 | if not header: 183 | header = '' 184 | start_time = time.time() 185 | end = time.time() 186 | iter_time = SmoothedValue(fmt='{avg:.4f}') 187 | data_time = SmoothedValue(fmt='{avg:.4f}') 188 | space_fmt = ':' + str(len(str(len(iterable)))) + 'd' 189 | if torch.cuda.is_available(): 190 | log_msg = self.delimiter.join([ 191 | header, 192 | '[{0' + space_fmt + '}/{1}]', 193 | 'eta: {eta}', 194 | '{meters}', 195 | 'time: {time}', 196 | 'data: {data}', 197 | 'max mem: {memory:.0f}' 198 | ]) 199 | else: 200 | log_msg = self.delimiter.join([ 201 | header, 202 | '[{0' + space_fmt + '}/{1}]', 203 | 'eta: {eta}', 204 | '{meters}', 205 | 'time: {time}', 206 | 'data: {data}' 207 | ]) 208 | MB = 1024.0 * 1024.0 209 | for obj in iterable: 210 | data_time.update(time.time() - end) 211 | yield obj 212 | iter_time.update(time.time() - end) 213 | if i % print_freq == 0 or i == len(iterable) - 1: 214 | eta_seconds = iter_time.global_avg * (len(iterable) - i) 215 | eta_string = str(datetime.timedelta(seconds=int(eta_seconds))) 216 | if torch.cuda.is_available(): 217 | print(log_msg.format( 218 | i, len(iterable), eta=eta_string, 219 | meters=str(self), 220 | time=str(iter_time), data=str(data_time), 221 | memory=torch.cuda.max_memory_allocated() / MB)) 222 | else: 223 | print(log_msg.format( 224 | i, len(iterable), eta=eta_string, 225 | meters=str(self), 226 | time=str(iter_time), data=str(data_time))) 227 | i += 1 228 | end = time.time() 229 | total_time = time.time() - start_time 230 | total_time_str = str(datetime.timedelta(seconds=int(total_time))) 231 | print('{} Total time: {} ({:.4f} s / it)'.format( 232 | header, total_time_str, total_time / len(iterable))) 233 | 234 | 235 | def collate_fn(batch): 236 | return tuple(zip(*batch)) 237 | 238 | 239 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor): 240 | 241 | def f(x): 242 | if x >= warmup_iters: 243 | return 1 244 | alpha = float(x) / warmup_iters 245 | return warmup_factor * (1 - alpha) + alpha 246 | 247 | return torch.optim.lr_scheduler.LambdaLR(optimizer, f) 248 | 249 | 250 | def mkdir(path): 251 | try: 252 | os.makedirs(path) 253 | except OSError as e: 254 | if e.errno != errno.EEXIST: 255 | raise 256 | 257 | 258 | def setup_for_distributed(is_master): 259 | """ 260 | This function disables printing when not in master process 261 | """ 262 | import builtins as __builtin__ 263 | builtin_print = __builtin__.print 264 | 265 | def print(*args, **kwargs): 266 | force = kwargs.pop('force', False) 267 | if is_master or force: 268 | builtin_print(*args, **kwargs) 269 | 270 | __builtin__.print = print 271 | 272 | 273 | def is_dist_avail_and_initialized(): 274 | if not dist.is_available(): 275 | return False 276 | if not dist.is_initialized(): 277 | return False 278 | return True 279 | 280 | 281 | def get_world_size(): 282 | if not is_dist_avail_and_initialized(): 283 | return 1 284 | return dist.get_world_size() 285 | 286 | 287 | def get_rank(): 288 | if not is_dist_avail_and_initialized(): 289 | return 0 290 | return dist.get_rank() 291 | 292 | 293 | def is_main_process(): 294 | return get_rank() == 0 295 | 296 | 297 | def save_on_master(*args, **kwargs): 298 | if is_main_process(): 299 | torch.save(*args, **kwargs) 300 | 301 | 302 | def init_distributed_mode(args): 303 | if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ: 304 | args.rank = int(os.environ["RANK"]) 305 | args.world_size = int(os.environ['WORLD_SIZE']) 306 | args.gpu = int(os.environ['LOCAL_RANK']) 307 | elif 'SLURM_PROCID' in os.environ: 308 | args.rank = int(os.environ['SLURM_PROCID']) 309 | args.gpu = args.rank % torch.cuda.device_count() 310 | else: 311 | print('Not using distributed mode') 312 | args.distributed = False 313 | return 314 | 315 | args.distributed = True 316 | 317 | torch.cuda.set_device(args.gpu) 318 | args.dist_backend = 'nccl' 319 | print('| distributed init (rank {}): {}'.format( 320 | args.rank, args.dist_url), flush=True) 321 | torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url, 322 | world_size=args.world_size, rank=args.rank) 323 | torch.distributed.barrier() 324 | setup_for_distributed(args.rank == 0) 325 | -------------------------------------------------------------------------------- /src/push-DQN/logger.py: -------------------------------------------------------------------------------- 1 | import time 2 | import datetime 3 | import os 4 | import numpy as np 5 | import cv2 6 | import torch 7 | # import h5py 8 | import shutil 9 | import matplotlib.pyplot as plt 10 | 11 | class Logger(): 12 | 13 | def __init__(self, continue_logging, logging_directory): 14 | 15 | # Create directory to save data 16 | timestamp = time.time() 17 | timestamp_value = datetime.datetime.fromtimestamp(timestamp) 18 | self.continue_logging = continue_logging 19 | if self.continue_logging: 20 | self.base_directory = logging_directory 21 | print('Pre-loading data logging session: %s' % (self.base_directory)) 22 | else: 23 | self.base_directory = os.path.join(logging_directory, timestamp_value.strftime('%Y-%m-%d.%H:%M:%S')) 24 | print('Creating data logging session: %s' % (self.base_directory)) 25 | self.info_directory = os.path.join(self.base_directory, 'info') 26 | self.color_images_directory = os.path.join(self.base_directory, 'data', 'color-images') 27 | self.depth_images_directory = os.path.join(self.base_directory, 'data', 'depth-images') 28 | self.color_heightmaps_directory = os.path.join(self.base_directory, 'data', 'color-heightmaps') 29 | self.depth_heightmaps_directory = os.path.join(self.base_directory, 'data', 'depth-heightmaps') 30 | self.models_directory = os.path.join(self.base_directory, 'models') 31 | self.visualizations_directory = os.path.join(self.base_directory, 'visualizations') 32 | self.recordings_directory = os.path.join(self.base_directory, 'recordings') 33 | self.transitions_directory = os.path.join(self.base_directory, 'transitions') 34 | 35 | self.mask_rg_directory = os.path.join(self.base_directory, 'mask_rg') 36 | self.mask_images_directory = os.path.join(self.base_directory, 'mask_rg', 'prediction-images') 37 | self.mask_predictions_directory = os.path.join(self.base_directory, 'mask_rg', 'predictions') 38 | 39 | if not os.path.exists(self.info_directory): 40 | os.makedirs(self.info_directory) 41 | if not os.path.exists(self.color_images_directory): 42 | os.makedirs(self.color_images_directory) 43 | if not os.path.exists(self.depth_images_directory): 44 | os.makedirs(self.depth_images_directory) 45 | if not os.path.exists(self.color_heightmaps_directory): 46 | os.makedirs(self.color_heightmaps_directory) 47 | if not os.path.exists(self.depth_heightmaps_directory): 48 | os.makedirs(self.depth_heightmaps_directory) 49 | if not os.path.exists(self.models_directory): 50 | os.makedirs(self.models_directory) 51 | if not os.path.exists(self.visualizations_directory): 52 | os.makedirs(self.visualizations_directory) 53 | if not os.path.exists(self.recordings_directory): 54 | os.makedirs(self.recordings_directory) 55 | if not os.path.exists(self.transitions_directory): 56 | os.makedirs(os.path.join(self.transitions_directory, 'data')) 57 | if not os.path.exists(self.mask_rg_directory): 58 | os.makedirs(self.mask_rg_directory) 59 | if not os.path.exists(self.mask_images_directory): 60 | os.makedirs(self.mask_images_directory) 61 | if not os.path.exists(self.mask_predictions_directory): 62 | os.makedirs(self.mask_predictions_directory) 63 | 64 | 65 | def save_camera_info(self, intrinsics, pose, depth_scale): 66 | np.savetxt(os.path.join(self.info_directory, 'camera-intrinsics.txt'), intrinsics, delimiter=' ') 67 | np.savetxt(os.path.join(self.info_directory, 'camera-pose.txt'), pose, delimiter=' ') 68 | np.savetxt(os.path.join(self.info_directory, 'camera-depth-scale.txt'), [depth_scale], delimiter=' ') 69 | 70 | def save_heightmap_info(self, boundaries, resolution): 71 | np.savetxt(os.path.join(self.info_directory, 'heightmap-boundaries.txt'), boundaries, delimiter=' ') 72 | np.savetxt(os.path.join(self.info_directory, 'heightmap-resolution.txt'), [resolution], delimiter=' ') 73 | 74 | def save_images(self, iteration, color_image, depth_image, mode): 75 | color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR) 76 | cv2.imwrite(os.path.join(self.color_images_directory, '%06d.%s.color.png' % (iteration, mode)), color_image) 77 | depth_image = np.round(depth_image * 10000).astype(np.uint16) # Save depth in 1e-4 meters 78 | cv2.imwrite(os.path.join(self.depth_images_directory, '%06d.%s.depth.png' % (iteration, mode)), depth_image) 79 | 80 | def save_heightmaps(self, iteration, color_heightmap, depth_heightmap, mode): 81 | color_heightmap = cv2.cvtColor(color_heightmap, cv2.COLOR_RGB2BGR) 82 | cv2.imwrite(os.path.join(self.color_heightmaps_directory, '%06d.%s.color.png' % (iteration, mode)), color_heightmap) 83 | depth_heightmap = np.round(depth_heightmap * 100000).astype(np.uint16) # Save depth in 1e-5 meters 84 | cv2.imwrite(os.path.join(self.depth_heightmaps_directory, '%06d.%s.depth.png' % (iteration, mode)), depth_heightmap) 85 | 86 | def write_to_log(self, log_name, log): 87 | np.savetxt(os.path.join(self.transitions_directory, '%s.log.txt' % log_name), log, delimiter=' ') 88 | 89 | def save_model(self, iteration, model, name): 90 | torch.save(model.cpu().state_dict(), os.path.join(self.models_directory, 'snapshot-%06d.%s.pth' % (iteration, name))) 91 | 92 | def save_backup_model(self, model, name): 93 | torch.save(model.state_dict(), os.path.join(self.models_directory, 'snapshot-backup.%s.pth' % (name))) 94 | 95 | def save_visualizations(self, iteration, affordance_vis, name): 96 | cv2.imwrite(os.path.join(self.visualizations_directory, '%06d.%s.png' % (iteration,name)), affordance_vis) 97 | 98 | # def save_state_features(self, iteration, state_feat): 99 | # h5f = h5py.File(os.path.join(self.visualizations_directory, '%06d.state.h5' % (iteration)), 'w') 100 | # h5f.create_dataset('state', data=state_feat.cpu().data.numpy()) 101 | # h5f.close() 102 | 103 | # Record RGB-D video while executing primitive 104 | # recording_directory = logger.make_new_recording_directory(iteration) 105 | # camera.start_recording(recording_directory) 106 | # camera.stop_recording() 107 | def make_new_recording_directory(self, iteration): 108 | recording_directory = os.path.join(self.recordings_directory, '%06d' % (iteration)) 109 | if not os.path.exists(recording_directory): 110 | os.makedirs(recording_directory) 111 | return recording_directory 112 | 113 | def save_transition(self, iteration, transition): 114 | depth_heightmap = np.round(transition.state * 100000).astype(np.uint16) # Save depth in 1e-5 meters 115 | cv2.imwrite(os.path.join(self.transitions_directory, 'data', '%06d.0.depth.png' % (iteration)), depth_heightmap) 116 | next_depth_heightmap = np.round(transition.next_state * 100000).astype(np.uint16) # Save depth in 1e-5 meters 117 | cv2.imwrite(os.path.join(self.transitions_directory, 'data', '%06d.1.depth.png' % (iteration)), next_depth_heightmap) 118 | # np.savetxt(os.path.join(self.transitions_directory, '%06d.action.txt' % (iteration)), [1 if (transition.action == 'grasp') else 0], delimiter=' ') 119 | # np.savetxt(os.path.join(self.transitions_directory, '%06d.reward.txt' % (iteration)), [reward_value], delimiter=' ') 120 | 121 | def save_session_success_fail(self, succ_fail): 122 | # TODO check if file exists, if not generate 123 | with open(os.path.join(self.base_directory, 'session_success_fail_list.txt'), 'a') as file: 124 | file.write(succ_fail + ' \n') 125 | 126 | def save_config_file(self, config_path): 127 | shutil.copy(config_path, self.base_directory) 128 | 129 | def save_mask_rg_returns(self, seg_vals): 130 | # np.savetxt(os.path.join(self.mask_rg_directory, '%s.log.txt' % 'segmentation'), seg_vals, delimiter=' ') 131 | with open(os.path.join(self.mask_rg_directory, 'segmentation.txt'), 'a') as file: 132 | file.write(str(seg_vals) + ' \n') 133 | 134 | def save_mask_diff_image(self, maskdiff, seg, rgb, depth, iteration, session_iteration, err_rate): 135 | plt.imsave(os.path.join(self.mask_images_directory, "{:06d}.{:02d}.mask_pred_diff.png".format(iteration,session_iteration)), maskdiff) 136 | plt.imsave(os.path.join(self.mask_images_directory, 137 | "{:06d}.{:02d}.ground_truth.png".format(iteration, session_iteration)), seg) 138 | plt.imsave(os.path.join(self.mask_images_directory, 139 | "{:06d}.{:02d}.color.png".format(iteration, session_iteration)), rgb) 140 | plt.imsave(os.path.join(self.mask_images_directory, 141 | "{:06d}.{:02d}.depth.png".format(iteration, session_iteration)), depth) 142 | 143 | err_rate.append([iteration, session_iteration]) 144 | with open(os.path.join(self.mask_rg_directory, 'err_rate_per_iteration.txt'), 'a') as file: 145 | file.write(str(err_rate) + ' \n') 146 | # np.save(os.path.join(self.mask_predictions_directory, '{:06d}.{:02d}.mask_predictions.npy'.format(iteration,session_iteration)), predictions) 147 | 148 | 149 | def save_pseudo_random_notice(self): 150 | with open(os.path.join(self.base_directory, 'PSEUDO_RANDOM_MODEL.txt'), 'a') as file: 151 | file.write('This log generated by a psuedo random model! \n (Parameters in the setup section in push-DQN_config.yaml is not used for this log and should be ignored) \n') -------------------------------------------------------------------------------- /src/push-DQN/main.py: -------------------------------------------------------------------------------- 1 | from src.mask_rg.mask_rg import MaskRG 2 | import time 3 | import os 4 | import random 5 | import threading 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import scipy as sc 9 | import cv2 10 | from collections import namedtuple 11 | import torch 12 | from torch.autograd import Variable 13 | from robot import Robot 14 | from trainer import Trainer 15 | from logger import Logger 16 | import utils 17 | import scipy.misc 18 | import yaml 19 | 20 | CONF_PATH = './push-DQN_config.yaml' 21 | 22 | class BColors: 23 | HEADER = '\033[95m' 24 | OKBLUE = '\033[94m' 25 | OKGREEN = '\033[92m' 26 | WARNING = '\033[93m' 27 | FAIL = '\033[91m' 28 | ENDC = '\033[0m' 29 | BOLD = '\033[1m' 30 | UNDERLINE = '\033[4m' 31 | 32 | def main(): 33 | 34 | with open(CONF_PATH) as f: 35 | config = yaml.load(f, Loader=yaml.FullLoader) 36 | 37 | # plt.ion() 38 | mask_rg = MaskRG(config['detection_thresholds']['confidence_threshold'], config['detection_thresholds']['mask_threshold']) 39 | session_success_threshold = config['detection_thresholds']['session_success_threshold'] 40 | 41 | snapshot_file = os.path.join(config['model']['path'], config['model']['file']) if config['model']['file'] != 'new' else None 42 | #TODO clean this from trainer method 43 | load_snapshot = True if snapshot_file is not None else False 44 | 45 | logging_directory = config['logging']['path'] if config['logging']['path'] != 'Default' else os.path.abspath('./logs') 46 | continue_logging = config['logging']['continue_logging'] 47 | save_visualizations = config['logging']['save_visualizations'] 48 | 49 | workspace_limits = np.asarray(config['environment']['workspace_limits']) 50 | heightmap_resolution = float(config['environment']['heightmap_resolution']) 51 | 52 | min_num_obj = config['environment']['min_num_objects'] 53 | max_num_obj = config['environment']['max_num_objects'] 54 | session_limit = config['environment']['session_limit'] 55 | 56 | random_seed = 1234 # (to set the random seed for simulation and neural net initialization) 57 | force_cpu = False # (to force code to run in CPU mode) 58 | 59 | # TODO implementation! 60 | # experience_replay = config['setup']['experience_replay'] 61 | 62 | future_reward_discount = config['training']['future_reward_discount'] 63 | 64 | is_testing = config['setup']['is_testing'] 65 | explore_prob = config['setup']['exploration_probability'] 66 | explore_rate_decay = config['setup']['epsilon_greedy_policy']['exploration_rate_decay'] 67 | min_rate_decay = config['setup']['epsilon_greedy_policy']['min_exp_rate_decay'] 68 | 69 | is_random_model = config['setup']['is_random_model'] 70 | 71 | # Set random seed 72 | np.random.seed(random_seed) 73 | 74 | # Initialize pick-and-place system (camera and robot) 75 | robot = Robot(min_num_obj, max_num_obj, workspace_limits) 76 | 77 | # Initialize trainer 78 | trainer = Trainer(future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu) 79 | 80 | # Initialize data logger 81 | logger = Logger(continue_logging, logging_directory) 82 | logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose 83 | logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters 84 | 85 | # save the current configuration file in the current logging directory 86 | logger.save_config_file(CONF_PATH) 87 | 88 | # Find last executed iteration of pre-loaded log, and load execution info and RL variables 89 | if continue_logging: 90 | trainer.preload(logger.transitions_directory) 91 | 92 | # Initialize variables for heuristic bootstrapping and exploration probability 93 | # no_change_count = [2, 2] if not is_testing else [0, 0] 94 | 95 | explore_prob = explore_prob if not is_testing else 0.0 96 | # This overrides all above!!! 97 | if is_random_model: 98 | explore_prob = 1.0 99 | explore_rate_decay = False 100 | 101 | nonlocal_variables = {'executing_action': False, 102 | 'primitive_action': None, 103 | 'best_pix_ind': None, 104 | 'push_success': False, 105 | 'grasp_success': False, 106 | 'session_success': False, 107 | 'session_first_loop': True} 108 | session_counter = 0 109 | # session no (index)--> [number of iteration for that session, fail(0)/success(1)] 110 | success_fail_record = [] 111 | last_iter = 0 112 | # Parallel thread to process network output and execute actions 113 | # ------------------------------------------------------------- 114 | def process_actions(): 115 | while True: 116 | if nonlocal_variables['executing_action']: 117 | 118 | print('Primitive confidence scores for pushing: {:f}'.format(np.max(push_predictions))) 119 | nonlocal_variables['primitive_action'] = 'push' 120 | 121 | # Exploration vs. Exploitation - Best push vs random push / if is_testing --> explore_prob=0! 122 | explore_actions = np.random.uniform() < explore_prob 123 | if not explore_actions: 124 | print('Strategy: ' + BColors.WARNING + 'EXPLOIT' + BColors.ENDC + ' (exploration probability: ' 125 | + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC) 126 | # Exploitation: Return the index of the x, y coordinates of the best prediction in the Q-values 127 | nonlocal_variables['best_pix_ind'] = np.unravel_index(np.argmax(push_predictions), push_predictions.shape) 128 | predicted_value = np.max(push_predictions) 129 | 130 | else: 131 | print('Strategy:' + BColors.WARNING + 'EXPLORE' + BColors.ENDC + ' (exploration probability:' 132 | + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC) 133 | 134 | nonlocal_variables['best_pix_ind'] = (np.random.random_integers(0, 15), 135 | np.random.random_integers(0, 224-1), 136 | np.random.random_integers(0, 224-1)) 137 | # nonlocal_variables['best_pix_ind'] = np.random.random_integers(0, 224-1) 138 | # nonlocal_variables['best_pix_ind'] = np.random.random_integers(0, 224-1) 139 | predicted_value = push_predictions[nonlocal_variables['best_pix_ind']] 140 | 141 | trainer.is_exploit_log.append([0 if explore_actions else 1]) 142 | logger.write_to_log('is-exploit', trainer.is_exploit_log) 143 | # Save predicted confidence value 144 | trainer.predicted_value_log.append([predicted_value]) 145 | logger.write_to_log('predicted-value', trainer.predicted_value_log) 146 | 147 | # Compute 3D position of pixel 148 | print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0], 149 | nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2])) 150 | 151 | best_rotation_angle = np.deg2rad(nonlocal_variables['best_pix_ind'][0] * (360.0/trainer.model.num_rotations)) 152 | best_pix_x = nonlocal_variables['best_pix_ind'][2] 153 | best_pix_y = nonlocal_variables['best_pix_ind'][1] 154 | primitive_position = [best_pix_x * heightmap_resolution + workspace_limits[0][0], 155 | best_pix_y * heightmap_resolution + workspace_limits[1][0], 156 | valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0]] 157 | print("primitive position --> ", primitive_position) 158 | # If pushing, adjust start position, and make sure z value is safe and not too low 159 | if nonlocal_variables['primitive_action'] == 'push': 160 | finger_width = 0.02 161 | safe_kernel_width = int(np.round((finger_width/2)/heightmap_resolution)) 162 | local_region = valid_depth_heightmap[max(best_pix_y - safe_kernel_width, 0) 163 | :min(best_pix_y + safe_kernel_width + 1, valid_depth_heightmap.shape[0]), 164 | max(best_pix_x - safe_kernel_width, 0) 165 | :min(best_pix_x + safe_kernel_width + 1, valid_depth_heightmap.shape[1])] 166 | if local_region.size == 0: 167 | safe_z_position = workspace_limits[2][0] 168 | elif (np.max(local_region) - workspace_limits[2][0]) > finger_width: 169 | safe_z_position = np.max(local_region) - finger_width/4 170 | else: 171 | safe_z_position = np.max(local_region) 172 | 173 | primitive_position[2] = safe_z_position 174 | 175 | # Save executed primitive --> 0 for exploit, 1 for explore 176 | if not explore_actions: 177 | trainer.executed_action_log.append([0, nonlocal_variables['best_pix_ind'][0], 178 | nonlocal_variables['best_pix_ind'][1], 179 | nonlocal_variables['best_pix_ind'][2]]) 180 | else: 181 | trainer.executed_action_log.append([1, nonlocal_variables['best_pix_ind'][0], 182 | nonlocal_variables['best_pix_ind'][1], 183 | nonlocal_variables['best_pix_ind'][2]]) 184 | 185 | logger.write_to_log('executed-action', trainer.executed_action_log) 186 | 187 | # Visualize executed primitive, and affordances 188 | if save_visualizations: 189 | push_pred_vis = trainer.get_prediction_vis(push_predictions, color_heightmap, nonlocal_variables['best_pix_ind']) 190 | logger.save_visualizations(trainer.iteration, push_pred_vis, 'push') 191 | cv2.imwrite('visualization.push.png', push_pred_vis) 192 | 193 | # Initialize variables that influence reward 194 | nonlocal_variables['push_success'] = False 195 | 196 | # Execute primitive 197 | if nonlocal_variables['primitive_action'] == 'push': 198 | nonlocal_variables['push_success'] = robot.push(primitive_position, best_rotation_angle, workspace_limits) 199 | print('Push executed: %r' % (nonlocal_variables['push_success'])) 200 | print(BColors.OKBLUE + '-------------------------------------------------------------------------' 201 | + BColors.ENDC) 202 | nonlocal_variables['executing_action'] = False 203 | 204 | 205 | time.sleep(0.01) 206 | action_thread = threading.Thread(target=process_actions) 207 | action_thread.daemon = True 208 | action_thread.start() 209 | exit_called = False 210 | # ------------------------------------------------------------- 211 | # ------------------------------------------------------------- 212 | 213 | # Start main training/testing loop 214 | while True: 215 | print(BColors.WARNING + '\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration) + 216 | BColors.ENDC + ' (Session iteration: {:d})'.format(session_counter)) 217 | iteration_time_0 = time.time() 218 | session_counter += 1 219 | # Make sure simulation is still stable (if not, reset simulation) 220 | robot.check_sim() 221 | 222 | # Get latest RGB-D image 223 | color_img, depth_img_raw = robot.get_camera_data() 224 | # Detph scale is 1 for simulation!! 225 | depth_img = depth_img_raw * robot.cam_depth_scale # Apply depth scale from calibration 226 | 227 | # Get heightmap from RGB-D image (by re-projecting 3D point cloud) 228 | color_heightmap, depth_heightmap = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, 229 | robot.cam_pose, workspace_limits, heightmap_resolution) 230 | valid_depth_heightmap = depth_heightmap.copy() 231 | valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 232 | 233 | # CHECK IF IT IS THE FIRST LOOP 234 | if nonlocal_variables['session_first_loop']: 235 | 236 | print(BColors.WARNING + 'This is the zeroth iteration! ' 237 | 'Mask-RG Values below will be used in the first iteration!' + BColors.ENDC) 238 | # Get ground truth segmentation masks 239 | color_m_rg, depth_m_rg, [segmentation_mask, num_objects] = robot.get_data_mask_rg() 240 | # print(num_objects) 241 | plt.imsave('ground_truth.png', segmentation_mask) 242 | plt.imsave('color_img.png', color_m_rg) 243 | plt.imsave('depth_img.png', depth_m_rg) 244 | 245 | # set the mask rcnn reward generator with the current depth and gt 246 | mask_rg.set_reward_generator(depth_m_rg, segmentation_mask) 247 | 248 | # get the rewards predictions 249 | pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards() 250 | printout = mask_rg.print_segmentation(pred_ids) 251 | plt.imsave('mask_pred_diff.png', printout) 252 | 253 | # all_masks = mask_rg.print_masks() 254 | # plt.imsave('mask_pred_all.png', all_masks) 255 | 256 | prev_seg_reward = seg_reward.copy() 257 | prev_depth_for_chg_det = depth_m_rg.copy() 258 | depth_m_rg_act = depth_m_rg.copy() # for change detection patch 259 | 260 | # if there is a success without a push just by chance, flag for the termination of the session 261 | if seg_reward > session_success_threshold: 262 | success_without_push = True 263 | nonlocal_variables['session_success'] = True 264 | else: 265 | success_without_push = False 266 | 267 | # END OF FIRST LOOP --> DEACTIVATE 268 | nonlocal_variables['session_first_loop'] = False 269 | 270 | # Save RGB-D images and RGB-D heightmaps 271 | logger.save_images(trainer.iteration, color_img, depth_img, '0') 272 | logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0') 273 | 274 | # Save mask prediction difference image and error rate per iteration 275 | if 'color_m_rg_act' in locals(): 276 | logger.save_mask_diff_image(printout, segmentation_mask_action, color_m_rg_act, 277 | depth_m_rg_act, trainer.iteration, session_counter-1, err_rate) 278 | 279 | if not exit_called: 280 | # Run forward pass with network to get affordances 281 | push_predictions = trainer.forward(color_heightmap, valid_depth_heightmap, is_volatile=True) 282 | 283 | # Execute best primitive action on robot in another thread 284 | nonlocal_variables['executing_action'] = True 285 | 286 | # Run training iteration in current thread (aka training thread) 287 | # at the first loop prev_color_img will not be in locals! as well as other vars with 'prev_' 288 | if 'prev_color_img' in locals(): 289 | 290 | # The thresh hold that will terminate the current session with success, this line is after current 291 | # push action and getting the current scene reward score from mask_rg 292 | if seg_reward > session_success_threshold: 293 | # Push success!!! 294 | nonlocal_variables['session_success'] = True 295 | 296 | # Detect changes 297 | depth_diff = abs(depth_heightmap - prev_depth_heightmap) 298 | # depth_diff = abs(depth_m_rg_act - prev_depth_for_chg_det) 299 | depth_diff[np.isnan(depth_diff)] = 0 300 | depth_diff[depth_diff < 0.01] = 0 301 | 302 | depth_diff[depth_diff > 0] = 1 303 | change_threshold = 100 304 | change_value = np.sum(depth_diff) 305 | change_detected = change_value > change_threshold 306 | print('Change detected: %r (value: %d)' % (change_detected, change_value)) 307 | 308 | if not success_without_push: 309 | label_value, prev_reward_value, seg_vals = trainer.get_reward_value(color_heightmap, valid_depth_heightmap, 310 | prev_seg_reward, seg_reward, change_detected) 311 | 312 | trainer.label_value_log.append([label_value]) 313 | logger.write_to_log('label-value', trainer.label_value_log) 314 | trainer.reward_value_log.append([prev_reward_value]) 315 | logger.write_to_log('reward-value', trainer.reward_value_log) 316 | 317 | logger.save_mask_rg_returns([seg_vals, num_objects_action.size-1]) 318 | 319 | # Backpropagate 320 | # forward pass only for the best scene between 16 ('prev best pix ind') 321 | loss_val = trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_primitive_action, prev_best_pix_ind, 322 | label_value) 323 | 324 | trainer.loss_log.append([loss_val]) 325 | logger.write_to_log('loss-value', trainer.loss_log) 326 | 327 | # TODO constants!! 328 | if explore_rate_decay and not is_testing: 329 | explore_prob = max(0.5 * np.power(0.998, trainer.iteration), min_rate_decay) 330 | 331 | # Save model snapshot 332 | if not is_testing: 333 | # TODO filename string can be changed with something like datetime 334 | logger.save_backup_model(trainer.model, 'reinforcement') 335 | if trainer.iteration % config['logging']['snapshot_saving_rate'] == 0: 336 | logger.save_model(trainer.iteration, trainer.model, 'reinforcement') 337 | if trainer.use_cuda: 338 | trainer.model = trainer.model.cuda() 339 | 340 | # Sync both action thread and training thread 341 | while nonlocal_variables['executing_action']: 342 | time.sleep(0.01) 343 | 344 | # SUCCESS 345 | if nonlocal_variables['session_success']: 346 | if not success_without_push: 347 | success_fail_record.append([session_counter, 1]) 348 | logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after ' 349 | + str(session_counter - 1) + 350 | ' pushing actions --> SUCCESS! (tr_it --> {:d})'.format(trainer.iteration)) 351 | 352 | print(BColors.WARNING + 'SESSION SUCCESS!' + BColors.ENDC) 353 | else: 354 | success_fail_record.append([session_counter, 2]) 355 | logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after ' 356 | + str(session_counter - 1) + 357 | ' TRAINING IGNORED --> (tr_it --> {:d})'.format( 358 | trainer.iteration)) 359 | print(BColors.WARNING + 'SESSION CLOSED WITHOUT TRAINING AS THE INITIAL SEGMENTATION WAS GOOD ENOUGH!' 360 | + BColors.ENDC) 361 | 362 | last_iter = trainer.iteration 363 | nonlocal_variables['session_success'] = False 364 | nonlocal_variables['session_first_loop'] = True 365 | seg_reward = np.asarray(0) 366 | session_counter = 0 367 | robot.restart_sim() 368 | 369 | 370 | # FAIL 371 | # it depends how to count -> 0th iteration of a session does not do any training, just gets two GTs to be able 372 | # to calculate rewards at the 1st iteration. "session_counter > 30" means after 31 sessions including the 0th 373 | # session or 30 training sessions 374 | if session_counter > session_limit: 375 | 376 | success_fail_record.append([session_counter, 0]) 377 | logger.save_session_success_fail('Session no ' + str(len(success_fail_record)) + ' --> after ' 378 | + str(session_counter) + 379 | ' pushing actions --> FAIL! (tr_it --> {:d})'.format(trainer.iteration)) 380 | last_iter = trainer.iteration 381 | nonlocal_variables['session_first_loop'] = True 382 | robot.restart_sim() 383 | seg_reward = np.asarray(0) 384 | 385 | print(BColors.WARNING + 'SESSION TIMEOUT! (after 30 iterations without success)' + BColors.ENDC) 386 | session_counter = 0 387 | 388 | if exit_called: 389 | break 390 | 391 | # Save information for next training step 392 | prev_color_img = color_img.copy() 393 | prev_depth_img = depth_img.copy() 394 | prev_color_heightmap = color_heightmap.copy() 395 | prev_depth_heightmap = depth_heightmap.copy() 396 | prev_valid_depth_heightmap = valid_depth_heightmap.copy() 397 | prev_push_success = nonlocal_variables['push_success'] 398 | prev_grasp_success = False 399 | prev_primitive_action = 'push' 400 | prev_push_predictions = push_predictions.copy() 401 | prev_best_pix_ind = nonlocal_variables['best_pix_ind'] 402 | 403 | 404 | if not nonlocal_variables['session_first_loop']: 405 | # Action is done 406 | prev_seg_reward = seg_reward.copy() 407 | prev_depth_for_chg_det = depth_m_rg_act.copy() 408 | # print('Previous segmentation reward --> {:.5f}'.format(prev_seg_reward)) 409 | # Get RGB-D image 410 | color_img_action, depth_img_raw_action = robot.get_camera_data() 411 | # Get ground truth segmentation masks 412 | color_m_rg_act, depth_m_rg_act, [segmentation_mask_action, num_objects_action] = robot.get_data_mask_rg() 413 | plt.imsave('ground_truth.png', segmentation_mask_action) 414 | plt.imsave('color_img.png', color_m_rg_act) 415 | plt.imsave('depth_img.png', depth_m_rg_act) 416 | mask_rg.set_reward_generator(depth_m_rg_act, segmentation_mask_action) 417 | 418 | pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards() 419 | printout = mask_rg.print_segmentation(pred_ids) 420 | plt.imsave('mask_pred_diff.png', printout) 421 | print('Number of the objects left in the scene after pushing action -->', num_objects_action.size - 1) 422 | # all_masks = mask_rg.print_masks() 423 | # plt.imsave('mask_pred_all.png', all_masks) 424 | 425 | trainer.iteration += 1 426 | iteration_time_1 = time.time() 427 | print('Time elapsed: %f' % (iteration_time_1-iteration_time_0)) 428 | print('---------------------------------------------------------------') 429 | print('---------------------------------------------------------------') 430 | 431 | 432 | if __name__ == '__main__': 433 | main() 434 | -------------------------------------------------------------------------------- /src/push-DQN/main_test.py: -------------------------------------------------------------------------------- 1 | from src.mask_rg.mask_rg import MaskRG 2 | import time 3 | import os 4 | import random 5 | import threading 6 | import matplotlib.pyplot as plt 7 | import numpy as np 8 | import scipy as sc 9 | import cv2 10 | from collections import namedtuple 11 | import torch 12 | from torch.autograd import Variable 13 | from robot import Robot 14 | from trainer import Trainer 15 | from logger import Logger 16 | import utils 17 | import scipy.misc 18 | import yaml 19 | 20 | 21 | CONF_PATH = './push-DQN_config.yaml' 22 | 23 | class BColors: 24 | HEADER = '\033[95m' 25 | OKBLUE = '\033[94m' 26 | OKGREEN = '\033[92m' 27 | WARNING = '\033[93m' 28 | FAIL = '\033[91m' 29 | ENDC = '\033[0m' 30 | BOLD = '\033[1m' 31 | UNDERLINE = '\033[4m' 32 | 33 | def main(): 34 | 35 | with open(CONF_PATH) as f: 36 | config = yaml.load(f, Loader=yaml.FullLoader) 37 | 38 | # plt.ion() 39 | mask_rg = MaskRG(config['detection_thresholds']['confidence_threshold'], config['detection_thresholds']['mask_threshold']) 40 | session_success_threshold = config['detection_thresholds']['session_success_threshold'] 41 | 42 | snapshot_file = os.path.join(config['model']['path'], config['model']['file']) if config['model']['file'] != 'new' else None 43 | #TODO clean this from trainer method 44 | load_snapshot = True if snapshot_file is not None else False 45 | 46 | logging_directory = config['logging']['path'] if config['logging']['path'] != 'Default' else os.path.abspath('./logs') 47 | continue_logging = config['logging']['continue_logging'] 48 | save_visualizations = config['logging']['save_visualizations'] 49 | 50 | workspace_limits = np.asarray(config['environment']['workspace_limits']) 51 | heightmap_resolution = float(config['environment']['heightmap_resolution']) 52 | 53 | min_num_obj = config['environment']['min_num_objects'] 54 | max_num_obj = config['environment']['max_num_objects'] 55 | session_limit = config['environment']['session_limit'] 56 | 57 | random_seed = 1234 # (to set the random seed for simulation and neural net initialization) 58 | force_cpu = False # (to force code to run in CPU mode) 59 | 60 | # TODO implementation! 61 | # experience_replay = config['setup']['experience_replay'] 62 | 63 | future_reward_discount = config['training']['future_reward_discount'] 64 | 65 | is_testing = config['setup']['is_testing'] 66 | explore_prob = config['setup']['exploration_probability'] 67 | explore_rate_decay = config['setup']['epsilon_greedy_policy']['exploration_rate_decay'] 68 | min_rate_decay = config['setup']['epsilon_greedy_policy']['min_exp_rate_decay'] 69 | 70 | is_random_model = config['setup']['is_random_model'] 71 | 72 | # Set random seed 73 | np.random.seed(random_seed) 74 | 75 | # Initialize pick-and-place system (camera and robot) 76 | robot = Robot(min_num_obj, max_num_obj, workspace_limits) 77 | 78 | # Initialize trainer 79 | trainer = Trainer(future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu) 80 | 81 | # Initialize data logger 82 | logger = Logger(continue_logging, logging_directory) 83 | logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose 84 | logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters 85 | 86 | # save the current configuration file in the current logging directory 87 | logger.save_config_file(CONF_PATH) 88 | 89 | # Find last executed iteration of pre-loaded log, and load execution info and RL variables 90 | if continue_logging: 91 | trainer.preload(logger.transitions_directory) 92 | 93 | # Initialize variables for heuristic bootstrapping and exploration probability 94 | # no_change_count = [2, 2] if not is_testing else [0, 0] 95 | 96 | explore_prob = explore_prob if not is_testing else 0.0 97 | # This overrides all above!!! 98 | if is_random_model: 99 | explore_prob = 1.0 100 | explore_rate_decay = False 101 | 102 | nonlocal_variables = {'executing_action': False, 103 | 'primitive_action': None, 104 | 'best_pix_ind': None, 105 | 'push_success': False, 106 | 'grasp_success': False, 107 | 'session_success': False, 108 | 'session_first_loop': True} 109 | session_counter = 0 110 | # session no (index)--> [number of iteration for that session, fail(0)/success(1)] 111 | success_fail_record = [] 112 | last_iter = 0 113 | # plt.ion() 114 | 115 | # Parallel thread to process network output and execute actions 116 | # ------------------------------------------------------------- 117 | def process_actions(): 118 | while True: 119 | if nonlocal_variables['executing_action']: 120 | 121 | print('Primitive confidence scores for pushing: {:f}'.format(np.max(push_predictions))) 122 | nonlocal_variables['primitive_action'] = 'push' 123 | 124 | # Exploration vs. Exploitation - Best push vs random push / if is_testing --> explore_prob=0! 125 | explore_actions = np.random.uniform() < explore_prob 126 | if not explore_actions: 127 | print('Strategy: ' + BColors.WARNING + 'EXPLOIT' + BColors.ENDC + ' (exploration probability: ' 128 | + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC) 129 | # Exploitation: Return the index of the x, y coordinates of the best prediction in the Q-values 130 | nonlocal_variables['best_pix_ind'] = np.unravel_index(np.argmax(push_predictions), push_predictions.shape) 131 | predicted_value = np.max(push_predictions) 132 | 133 | else: 134 | print('Strategy:' + BColors.WARNING + 'EXPLORE' + BColors.ENDC + ' (exploration probability:' 135 | + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC) 136 | 137 | nonlocal_variables['best_pix_ind'] = (np.random.random_integers(0, 15), 138 | np.random.random_integers(0, 224-1), 139 | np.random.random_integers(0, 224-1)) 140 | # nonlocal_variables['best_pix_ind'] = np.random.random_integers(0, 224-1) 141 | # nonlocal_variables['best_pix_ind'] = np.random.random_integers(0, 224-1) 142 | predicted_value = push_predictions[nonlocal_variables['best_pix_ind']] 143 | 144 | trainer.is_exploit_log.append([0 if explore_actions else 1]) 145 | logger.write_to_log('is-exploit', trainer.is_exploit_log) 146 | # Save predicted confidence value 147 | trainer.predicted_value_log.append([predicted_value]) 148 | logger.write_to_log('predicted-value', trainer.predicted_value_log) 149 | 150 | # Compute 3D position of pixel 151 | print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0], 152 | nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2])) 153 | 154 | best_rotation_angle = np.deg2rad(nonlocal_variables['best_pix_ind'][0] * (360.0/trainer.model.num_rotations)) 155 | best_pix_x = nonlocal_variables['best_pix_ind'][2] 156 | best_pix_y = nonlocal_variables['best_pix_ind'][1] 157 | primitive_position = [best_pix_x * heightmap_resolution + workspace_limits[0][0], 158 | best_pix_y * heightmap_resolution + workspace_limits[1][0], 159 | valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0]] 160 | print("primitive position --> ", primitive_position) 161 | # If pushing, adjust start position, and make sure z value is safe and not too low 162 | if nonlocal_variables['primitive_action'] == 'push': 163 | finger_width = 0.02 164 | safe_kernel_width = int(np.round((finger_width/2)/heightmap_resolution)) 165 | local_region = valid_depth_heightmap[max(best_pix_y - safe_kernel_width, 0) 166 | :min(best_pix_y + safe_kernel_width + 1, valid_depth_heightmap.shape[0]), 167 | max(best_pix_x - safe_kernel_width, 0) 168 | :min(best_pix_x + safe_kernel_width + 1, valid_depth_heightmap.shape[1])] 169 | if local_region.size == 0: 170 | safe_z_position = workspace_limits[2][0] 171 | 172 | elif (np.max(local_region) - workspace_limits[2][0]) > finger_width: 173 | safe_z_position = np.max(local_region) - finger_width/4 174 | else: 175 | safe_z_position = np.max(local_region) 176 | 177 | primitive_position[2] = safe_z_position 178 | 179 | # Save executed primitive --> 0 for exploit, 1 for explore 180 | if not explore_actions: 181 | trainer.executed_action_log.append([0, nonlocal_variables['best_pix_ind'][0], 182 | nonlocal_variables['best_pix_ind'][1], 183 | nonlocal_variables['best_pix_ind'][2]]) 184 | else: 185 | trainer.executed_action_log.append([1, nonlocal_variables['best_pix_ind'][0], 186 | nonlocal_variables['best_pix_ind'][1], 187 | nonlocal_variables['best_pix_ind'][2]]) 188 | 189 | logger.write_to_log('executed-action', trainer.executed_action_log) 190 | 191 | # Visualize executed primitive, and affordances 192 | if save_visualizations: 193 | push_pred_vis = trainer.get_prediction_vis(push_predictions, color_heightmap, nonlocal_variables['best_pix_ind']) 194 | logger.save_visualizations(trainer.iteration, push_pred_vis, 'push') 195 | cv2.imwrite('visualization.push.png', push_pred_vis) 196 | 197 | # Initialize variables that influence reward 198 | nonlocal_variables['push_success'] = False 199 | 200 | # robot.move_to_target([-0.5, 0.0, 0.036 + safe_z_position]) 201 | 202 | # Execute primitive 203 | if nonlocal_variables['primitive_action'] == 'push': 204 | nonlocal_variables['push_success'] = robot.push(primitive_position, best_rotation_angle, workspace_limits) 205 | print('Push executed: %r' % (nonlocal_variables['push_success'])) 206 | print(BColors.OKBLUE + '-------------------------------------------------------------------------' 207 | + BColors.ENDC) 208 | nonlocal_variables['executing_action'] = False 209 | 210 | 211 | time.sleep(0.01) 212 | action_thread = threading.Thread(target=process_actions) 213 | action_thread.daemon = True 214 | action_thread.start() 215 | exit_called = False 216 | # ------------------------------------------------------------- 217 | # ------------------------------------------------------------- 218 | 219 | # Start main training/testing loop 220 | while True: 221 | print(BColors.WARNING + '\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration) + 222 | BColors.ENDC + ' (Session iteration: {:d})'.format(session_counter)) 223 | iteration_time_0 = time.time() 224 | session_counter += 1 225 | # Make sure simulation is still stable (if not, reset simulation) 226 | robot.check_sim() 227 | 228 | # Get latest RGB-D image 229 | color_img, depth_img_raw = robot.get_camera_data() 230 | # Detph scale is 1 for simulation!! 231 | depth_img = depth_img_raw * robot.cam_depth_scale # Apply depth scale from calibration 232 | 233 | # Get heightmap from RGB-D image (by re-projecting 3D point cloud) 234 | color_heightmap, depth_heightmap = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics, 235 | robot.cam_pose, workspace_limits, heightmap_resolution) 236 | valid_depth_heightmap = depth_heightmap.copy() 237 | valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0 238 | 239 | # CHECK IF IT IS THE FIRST LOOP 240 | if nonlocal_variables['session_first_loop']: 241 | 242 | print(BColors.WARNING + 'This is the zeroth iteration! ' 243 | 'Mask-RG Values below will be used in the first iteration!' + BColors.ENDC) 244 | # Get ground truth segmentation masks 245 | color_m_rg, depth_m_rg, [segmentation_mask, num_objects] = robot.get_data_mask_rg() 246 | # print(num_objects) 247 | plt.imsave('ground_truth.png', segmentation_mask) 248 | plt.imsave('color_img.png', color_m_rg) 249 | plt.imsave('depth_img.png', depth_m_rg) 250 | 251 | # set the mask rcnn reward generator with the current depth and gt 252 | mask_rg.set_reward_generator(depth_m_rg, segmentation_mask) 253 | 254 | # get the rewards predictions 255 | pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards() 256 | printout = mask_rg.print_segmentation(pred_ids) 257 | plt.imsave('mask_pred_diff.png', printout) 258 | 259 | prev_seg_reward = seg_reward.copy() 260 | prev_depth_for_chg_det = depth_m_rg.copy() 261 | depth_m_rg_act = depth_m_rg.copy() # for change detection patch 262 | 263 | # if there is a success without a push just by chance, flag for the termination of the session 264 | if seg_reward > session_success_threshold: 265 | success_without_push = True 266 | nonlocal_variables['session_success'] = True 267 | else: 268 | success_without_push = False 269 | 270 | # END OF FIRST LOOP --> DEACTIVATE 271 | nonlocal_variables['session_first_loop'] = False 272 | 273 | # Save RGB-D images and RGB-D heightmaps 274 | logger.save_images(trainer.iteration, color_img, depth_img, '0') 275 | logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0') 276 | 277 | # Save mask prediction difference image and error rate per iteration 278 | if 'color_m_rg_act' in locals(): 279 | logger.save_mask_diff_image(printout, segmentation_mask_action, color_m_rg_act, 280 | depth_m_rg_act, trainer.iteration, session_counter-1, err_rate) 281 | 282 | if not exit_called: 283 | # Run forward pass with network to get affordances 284 | push_predictions = trainer.forward(color_heightmap, valid_depth_heightmap, is_volatile=True) 285 | 286 | # Execute best primitive action on robot in another thread 287 | nonlocal_variables['executing_action'] = True 288 | 289 | # Run training iteration in current thread (aka training thread) 290 | # at the first loop prev_color_img will not be in locals! as well as other vars with 'prev_' 291 | if 'prev_color_img' in locals(): 292 | # The thresh hold that will terminate the current session with success, this line is after current 293 | # push action and getting the current scene reward score from mask_rg 294 | if seg_reward > session_success_threshold: 295 | # Push success!!! 296 | nonlocal_variables['session_success'] = True 297 | 298 | # Detect changes 299 | depth_diff = abs(depth_heightmap - prev_depth_heightmap) 300 | # depth_diff = abs(depth_m_rg_act - prev_depth_for_chg_det) 301 | depth_diff[np.isnan(depth_diff)] = 0 302 | depth_diff[depth_diff < 0.01] = 0 303 | 304 | depth_diff[depth_diff > 0] = 1 305 | change_threshold = 100 306 | change_value = np.sum(depth_diff) 307 | change_detected = change_value > change_threshold 308 | print('Change detected: %r (value: %d)' % (change_detected, change_value)) 309 | 310 | if not success_without_push: 311 | label_value, prev_reward_value, seg_vals = trainer.get_reward_value(color_heightmap, valid_depth_heightmap, 312 | prev_seg_reward, seg_reward, change_detected) 313 | 314 | trainer.label_value_log.append([label_value]) 315 | logger.write_to_log('label-value', trainer.label_value_log) 316 | trainer.reward_value_log.append([prev_reward_value]) 317 | logger.write_to_log('reward-value', trainer.reward_value_log) 318 | 319 | logger.save_mask_rg_returns([seg_vals, num_objects_action.size-1]) 320 | 321 | # Sync both action thread and training thread 322 | while nonlocal_variables['executing_action']: 323 | time.sleep(0.01) 324 | 325 | # SUCCESS 326 | if nonlocal_variables['session_success']: 327 | if not success_without_push: 328 | success_fail_record.append([session_counter, 1]) 329 | logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after ' 330 | + str(session_counter - 1) + 331 | ' pushing actions --> SUCCESS! (tr_it --> {:d})'.format(trainer.iteration)) 332 | 333 | print(BColors.WARNING + 'SESSION SUCCESS!' + BColors.ENDC) 334 | else: 335 | success_fail_record.append([session_counter, 2]) 336 | logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after ' 337 | + str(session_counter - 1) + 338 | ' TRAINING IGNORED --> (tr_it --> {:d})'.format( 339 | trainer.iteration)) 340 | print(BColors.WARNING + 'SESSION CLOSED WITHOUT TRAINING AS THE INITIAL SEGMENTATION WAS GOOD ENOUGH!' 341 | + BColors.ENDC) 342 | 343 | last_iter = trainer.iteration 344 | nonlocal_variables['session_success'] = False 345 | nonlocal_variables['session_first_loop'] = True 346 | seg_reward = np.asarray(0) 347 | session_counter = 0 348 | robot.restart_sim() 349 | 350 | # FAIL 351 | # it depends how to count --> 0th iteration of a session does not do any training, just gets two GTs to be able 352 | # to calculate rewards at the 1st iteration. "session_counter > 30" means after 31 sessions including the 0th 353 | # session or 30 training sessions 354 | if session_counter > session_limit: 355 | 356 | success_fail_record.append([session_counter, 0]) 357 | logger.save_session_success_fail('Session no ' + str(len(success_fail_record)) + ' --> after ' 358 | + str(session_counter) + 359 | ' pushing actions --> FAIL! (tr_it --> {:d})'.format(trainer.iteration)) 360 | last_iter = trainer.iteration 361 | nonlocal_variables['session_first_loop'] = True 362 | robot.restart_sim() 363 | # robot.add_objects(18) 364 | seg_reward = np.asarray(0) 365 | 366 | print(BColors.WARNING + 'SESSION TIMEOUT! (after 30 iterations without success)' + BColors.ENDC) 367 | session_counter = 0 368 | 369 | if exit_called: 370 | break 371 | 372 | # Save information for next training step 373 | prev_color_img = color_img.copy() 374 | prev_depth_img = depth_img.copy() 375 | prev_color_heightmap = color_heightmap.copy() 376 | prev_depth_heightmap = depth_heightmap.copy() 377 | prev_valid_depth_heightmap = valid_depth_heightmap.copy() 378 | prev_push_success = nonlocal_variables['push_success'] 379 | prev_grasp_success = False 380 | prev_primitive_action = 'push' 381 | prev_push_predictions = push_predictions.copy() 382 | prev_best_pix_ind = nonlocal_variables['best_pix_ind'] 383 | 384 | 385 | if not nonlocal_variables['session_first_loop']: 386 | # Action is done 387 | prev_seg_reward = seg_reward.copy() 388 | prev_depth_for_chg_det = depth_m_rg_act.copy() 389 | # print('Previous segmentation reward --> {:.5f}'.format(prev_seg_reward)) 390 | # Get RGB-D image 391 | color_img_action, depth_img_raw_action = robot.get_camera_data() 392 | # Get ground truth segmentation masks 393 | color_m_rg_act, depth_m_rg_act, [segmentation_mask_action, num_objects_action] = robot.get_data_mask_rg() 394 | plt.imsave('ground_truth.png', segmentation_mask_action) 395 | plt.imsave('color_img.png', color_m_rg_act) 396 | plt.imsave('depth_img.png', depth_m_rg_act) 397 | mask_rg.set_reward_generator(depth_m_rg_act, segmentation_mask_action) 398 | 399 | pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards() 400 | printout = mask_rg.print_segmentation(pred_ids) 401 | plt.imsave('mask_pred_diff.png', printout) 402 | print('Number of the objects left in the scene after pushing action -->', num_objects_action.size - 1) 403 | # plot_obj.set_data(printout) 404 | # plt.draw() 405 | all_masks = mask_rg.print_masks() 406 | plt.imsave('mask_pred_all.png', all_masks) 407 | 408 | trainer.iteration += 1 409 | iteration_time_1 = time.time() 410 | print('Time elapsed: %f' % (iteration_time_1-iteration_time_0)) 411 | print('---------------------------------------------------------------') 412 | print('---------------------------------------------------------------') 413 | 414 | 415 | if __name__ == '__main__': 416 | main() 417 | -------------------------------------------------------------------------------- /src/push-DQN/models.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from collections import OrderedDict 4 | import numpy as np 5 | from scipy import ndimage 6 | import torch 7 | import torch.nn as nn 8 | import torch.nn.functional as F 9 | from torch.autograd import Variable 10 | import torchvision 11 | import matplotlib.pyplot as plt 12 | import time 13 | 14 | 15 | class reactive_net(nn.Module): 16 | 17 | def __init__(self, use_cuda): # , snapshot=None 18 | super(reactive_net, self).__init__() 19 | self.use_cuda = use_cuda 20 | 21 | # Initialize network trunks with DenseNet pre-trained on ImageNet 22 | self.push_color_trunk = torchvision.models.densenet.densenet121(pretrained=True) 23 | self.push_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True) 24 | self.grasp_color_trunk = torchvision.models.densenet.densenet121(pretrained=True) 25 | self.grasp_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True) 26 | 27 | self.num_rotations = 16 28 | 29 | # Construct network branches for pushing and grasping 30 | self.pushnet = nn.Sequential(OrderedDict([ 31 | ('push-norm0', nn.BatchNorm2d(2048)), 32 | ('push-relu0', nn.ReLU(inplace=True)), 33 | ('push-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)), 34 | ('push-norm1', nn.BatchNorm2d(64)), 35 | ('push-relu1', nn.ReLU(inplace=True)), 36 | ('push-conv1', nn.Conv2d(64, 3, kernel_size=1, stride=1, bias=False)) 37 | # ('push-upsample2', nn.Upsample(scale_factor=4, mode='bilinear')) 38 | ])) 39 | self.graspnet = nn.Sequential(OrderedDict([ 40 | ('grasp-norm0', nn.BatchNorm2d(2048)), 41 | ('grasp-relu0', nn.ReLU(inplace=True)), 42 | ('grasp-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)), 43 | ('grasp-norm1', nn.BatchNorm2d(64)), 44 | ('grasp-relu1', nn.ReLU(inplace=True)), 45 | ('grasp-conv1', nn.Conv2d(64, 3, kernel_size=1, stride=1, bias=False)) 46 | # ('grasp-upsample2', nn.Upsample(scale_factor=4, mode='bilinear')) 47 | ])) 48 | 49 | # Initialize network weights 50 | for m in self.named_modules(): 51 | if 'push-' in m[0] or 'grasp-' in m[0]: 52 | if isinstance(m[1], nn.Conv2d): 53 | nn.init.kaiming_normal(m[1].weight.data) 54 | elif isinstance(m[1], nn.BatchNorm2d): 55 | m[1].weight.data.fill_(1) 56 | m[1].bias.data.zero_() 57 | 58 | # Initialize output variable (for backprop) 59 | self.interm_feat = [] 60 | self.output_prob = [] 61 | 62 | 63 | def forward(self, input_color_data, input_depth_data, is_volatile=False, specific_rotation=-1): 64 | 65 | if is_volatile: 66 | output_prob = [] 67 | interm_feat = [] 68 | 69 | # Apply rotations to images 70 | for rotate_idx in range(self.num_rotations): 71 | rotate_theta = np.radians(rotate_idx*(360/self.num_rotations)) 72 | 73 | # Compute sample grid for rotation BEFORE neural network 74 | affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]]) 75 | affine_mat_before.shape = (2,3,1) 76 | affine_mat_before = torch.from_numpy(affine_mat_before).permute(2,0,1).float() 77 | if self.use_cuda: 78 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size()) 79 | else: 80 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size()) 81 | 82 | # Rotate images clockwise 83 | if self.use_cuda: 84 | rotate_color = F.grid_sample(Variable(input_color_data, volatile=True).cuda(), flow_grid_before, mode='nearest') 85 | rotate_depth = F.grid_sample(Variable(input_depth_data, volatile=True).cuda(), flow_grid_before, mode='nearest') 86 | else: 87 | rotate_color = F.grid_sample(Variable(input_color_data, volatile=True), flow_grid_before, mode='nearest') 88 | rotate_depth = F.grid_sample(Variable(input_depth_data, volatile=True), flow_grid_before, mode='nearest') 89 | 90 | # Compute intermediate features 91 | interm_push_color_feat = self.push_color_trunk.features(rotate_color) 92 | interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth) 93 | interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1) 94 | interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color) 95 | interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth) 96 | interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1) 97 | interm_feat.append([interm_push_feat, interm_grasp_feat]) 98 | 99 | # Compute sample grid for rotation AFTER branches 100 | affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0],[-np.sin(rotate_theta), np.cos(rotate_theta), 0]]) 101 | affine_mat_after.shape = (2,3,1) 102 | affine_mat_after = torch.from_numpy(affine_mat_after).permute(2,0,1).float() 103 | if self.use_cuda: 104 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(), interm_push_feat.data.size()) 105 | else: 106 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False), interm_push_feat.data.size()) 107 | 108 | # Forward pass through branches, undo rotation on output predictions, upsample results 109 | output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')), 110 | nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))]) 111 | 112 | return output_prob, interm_feat 113 | 114 | else: 115 | self.output_prob = [] 116 | self.interm_feat = [] 117 | 118 | # Apply rotations to intermediate features 119 | # for rotate_idx in range(self.num_rotations): 120 | rotate_idx = specific_rotation 121 | rotate_theta = np.radians(rotate_idx*(360/self.num_rotations)) 122 | 123 | # Compute sample grid for rotation BEFORE branches 124 | affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]]) 125 | affine_mat_before.shape = (2,3,1) 126 | affine_mat_before = torch.from_numpy(affine_mat_before).permute(2,0,1).float() 127 | if self.use_cuda: 128 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size()) 129 | else: 130 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size()) 131 | 132 | # Rotate images clockwise 133 | if self.use_cuda: 134 | rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest') 135 | rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest') 136 | else: 137 | rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False), flow_grid_before, mode='nearest') 138 | rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False), flow_grid_before, mode='nearest') 139 | 140 | # Compute intermediate features 141 | interm_push_color_feat = self.push_color_trunk.features(rotate_color) 142 | interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth) 143 | interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1) 144 | interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color) 145 | interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth) 146 | interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1) 147 | self.interm_feat.append([interm_push_feat, interm_grasp_feat]) 148 | 149 | # Compute sample grid for rotation AFTER branches 150 | affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0],[-np.sin(rotate_theta), np.cos(rotate_theta), 0]]) 151 | affine_mat_after.shape = (2,3,1) 152 | affine_mat_after = torch.from_numpy(affine_mat_after).permute(2,0,1).float() 153 | if self.use_cuda: 154 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(), interm_push_feat.data.size()) 155 | else: 156 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False), interm_push_feat.data.size()) 157 | 158 | # Forward pass through branches, undo rotation on output predictions, upsample results 159 | self.output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')), 160 | nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))]) 161 | 162 | return self.output_prob, self.interm_feat 163 | 164 | 165 | class reinforcement_net(nn.Module): 166 | 167 | def __init__(self, use_cuda): # , snapshot=None 168 | super(reinforcement_net, self).__init__() 169 | self.use_cuda = use_cuda 170 | 171 | # Initialize network trunks with DenseNet pre-trained on ImageNet 172 | self.push_color_trunk = torchvision.models.densenet.densenet121(pretrained=True) 173 | self.push_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True) 174 | # self.grasp_color_trunk = torchvision.models.densenet.densenet121(pretrained=True) 175 | # self.grasp_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True) 176 | 177 | self.num_rotations = 16 178 | 179 | # Construct network branches for pushing and grasping 180 | self.pushnet = nn.Sequential(OrderedDict([ 181 | ('push-norm0', nn.BatchNorm2d(2048)), 182 | ('push-relu0', nn.ReLU(inplace=True)), 183 | ('push-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)), 184 | ('push-norm1', nn.BatchNorm2d(64)), 185 | ('push-relu1', nn.ReLU(inplace=True)), 186 | ('push-conv1', nn.Conv2d(64, 1, kernel_size=1, stride=1, bias=False)) 187 | # ('push-upsample2', nn.Upsample(scale_factor=4, mode='bilinear')) 188 | ])) 189 | # self.graspnet = nn.Sequential(OrderedDict([ 190 | # ('grasp-norm0', nn.BatchNorm2d(2048)), 191 | # ('grasp-relu0', nn.ReLU(inplace=True)), 192 | # ('grasp-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)), 193 | # ('grasp-norm1', nn.BatchNorm2d(64)), 194 | # ('grasp-relu1', nn.ReLU(inplace=True)), 195 | # ('grasp-conv1', nn.Conv2d(64, 1, kernel_size=1, stride=1, bias=False)) 196 | # # ('grasp-upsample2', nn.Upsample(scale_factor=4, mode='bilinear')) 197 | # ])) 198 | 199 | # Initialize network weights 200 | for m in self.named_modules(): 201 | if 'push-' in m[0]: #or 'grasp-' in m[0]: 202 | if isinstance(m[1], nn.Conv2d): 203 | nn.init.kaiming_normal(m[1].weight.data) 204 | elif isinstance(m[1], nn.BatchNorm2d): 205 | m[1].weight.data.fill_(1) 206 | m[1].bias.data.zero_() 207 | 208 | # Initialize output variable (for backprop) 209 | self.interm_feat = [] 210 | self.output_prob = [] 211 | 212 | 213 | def forward(self, input_color_data, input_depth_data, is_volatile=False, specific_rotation=-1): 214 | 215 | if is_volatile: 216 | with torch.no_grad(): 217 | output_prob = [] 218 | interm_feat = [] 219 | 220 | # Apply rotations to images 221 | for rotate_idx in range(self.num_rotations): 222 | rotate_theta = np.radians(rotate_idx*(360/self.num_rotations)) 223 | 224 | # Compute sample grid for rotation BEFORE neural network 225 | affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]]) 226 | affine_mat_before.shape = (2, 3, 1) 227 | affine_mat_before = torch.from_numpy(affine_mat_before).permute(2, 0, 1).float() 228 | if self.use_cuda: 229 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size()) 230 | else: 231 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size()) 232 | 233 | # Rotate images clockwise 234 | if self.use_cuda: 235 | with torch.no_grad(): 236 | rotate_color = F.grid_sample(Variable(input_color_data).cuda(), flow_grid_before, mode='nearest') 237 | rotate_depth = F.grid_sample(Variable(input_depth_data).cuda(), flow_grid_before, mode='nearest') 238 | else: 239 | with torch.no_grad(): 240 | rotate_color = F.grid_sample(Variable(input_color_data), flow_grid_before, mode='nearest') 241 | rotate_depth = F.grid_sample(Variable(input_depth_data), flow_grid_before, mode='nearest') 242 | 243 | # Compute intermediate features 244 | interm_push_color_feat = self.push_color_trunk.features(rotate_color) 245 | interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth) 246 | interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1) 247 | # interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color) 248 | # interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth) 249 | # interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1) 250 | # interm_feat.append([interm_push_feat, interm_grasp_feat]) 251 | 252 | # Compute sample grid for rotation AFTER branches 253 | affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0], 254 | [-np.sin(rotate_theta), np.cos(rotate_theta), 0]]) 255 | affine_mat_after.shape = (2, 3, 1) 256 | affine_mat_after = torch.from_numpy(affine_mat_after).permute(2, 0, 1).float() 257 | if self.use_cuda: 258 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(), 259 | interm_push_feat.data.size()) 260 | else: 261 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False), 262 | interm_push_feat.data.size()) 263 | 264 | # Forward pass through branches, undo rotation on output predictions, upsample results 265 | # TODO remove 0 replaces grasp net 266 | output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')), 0]) 267 | #nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))]) 268 | 269 | return output_prob #, interm_feat 270 | 271 | else: 272 | self.output_prob = [] 273 | self.interm_feat = [] 274 | 275 | # Apply rotations to intermediate features 276 | # for rotate_idx in range(self.num_rotations): 277 | rotate_idx = specific_rotation 278 | rotate_theta = np.radians(rotate_idx*(360/self.num_rotations)) 279 | 280 | # Compute sample grid for rotation BEFORE branches 281 | affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]]) 282 | affine_mat_before.shape = (2,3,1) 283 | affine_mat_before = torch.from_numpy(affine_mat_before).permute(2,0,1).float() 284 | if self.use_cuda: 285 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size()) 286 | else: 287 | flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size()) 288 | 289 | # Rotate images clockwise 290 | if self.use_cuda: 291 | rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest') 292 | rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest') 293 | else: 294 | rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False), flow_grid_before, mode='nearest') 295 | rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False), flow_grid_before, mode='nearest') 296 | 297 | # Compute intermediate features 298 | interm_push_color_feat = self.push_color_trunk.features(rotate_color) 299 | interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth) 300 | interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1) 301 | # interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color) 302 | # interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth) 303 | # interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1) 304 | # self.interm_feat.append([interm_push_feat, interm_grasp_feat]) 305 | 306 | # Compute sample grid for rotation AFTER branches 307 | affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0],[-np.sin(rotate_theta), np.cos(rotate_theta), 0]]) 308 | affine_mat_after.shape = (2, 3, 1) 309 | affine_mat_after = torch.from_numpy(affine_mat_after).permute(2,0,1).float() 310 | if self.use_cuda: 311 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(), interm_push_feat.data.size()) 312 | else: 313 | flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False), interm_push_feat.data.size()) 314 | 315 | # Forward pass through branches, undo rotation on output predictions, upsample results 316 | # TODO remove 0 replaces grasp net 317 | self.output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')),0]) 318 | #nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))]) 319 | 320 | return self.output_prob #, self.interm_feat 321 | -------------------------------------------------------------------------------- /src/push-DQN/trainer.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | import numpy as np 4 | import cv2 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | from utils import CrossEntropyLoss2d 10 | from models import reactive_net, reinforcement_net 11 | from scipy import ndimage 12 | import matplotlib.pyplot as plt 13 | 14 | class BColors: 15 | HEADER = '\033[95m' 16 | OKBLUE = '\033[94m' 17 | OKGREEN = '\033[92m' 18 | WARNING = '\033[93m' 19 | FAIL = '\033[91m' 20 | ENDC = '\033[0m' 21 | BOLD = '\033[1m' 22 | UNDERLINE = '\033[4m' 23 | 24 | class Trainer(object): 25 | def __init__(self, future_reward_discount, 26 | is_testing, load_snapshot, snapshot_file, force_cpu): 27 | 28 | self.method = 'reinforcement' 29 | 30 | # Check if CUDA can be used 31 | if torch.cuda.is_available() and not force_cpu: 32 | print("CUDA detected. Running with GPU acceleration.") 33 | self.use_cuda = True 34 | elif force_cpu: 35 | print("CUDA detected, but overriding with option '--cpu'. Running with only CPU.") 36 | self.use_cuda = False 37 | else: 38 | print("CUDA is *NOT* detected. Running with only CPU.") 39 | self.use_cuda = False 40 | 41 | # Fully convolutional classification network for supervised learning 42 | if self.method == 'reactive': 43 | self.model = reactive_net(self.use_cuda) 44 | 45 | # Initialize classification loss 46 | push_num_classes = 3 # 0 - push, 1 - no change push, 2 - no loss 47 | push_class_weights = torch.ones(push_num_classes) 48 | push_class_weights[push_num_classes - 1] = 0 49 | if self.use_cuda: 50 | self.push_criterion = CrossEntropyLoss2d(push_class_weights.cuda()).cuda() 51 | else: 52 | self.push_criterion = CrossEntropyLoss2d(push_class_weights) 53 | grasp_num_classes = 3 # 0 - grasp, 1 - failed grasp, 2 - no loss 54 | grasp_class_weights = torch.ones(grasp_num_classes) 55 | grasp_class_weights[grasp_num_classes - 1] = 0 56 | if self.use_cuda: 57 | self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights.cuda()).cuda() 58 | else: 59 | self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights) 60 | 61 | # Fully convolutional Q network for deep reinforcement learning 62 | elif self.method == 'reinforcement': 63 | self.model = reinforcement_net(self.use_cuda) 64 | self.future_reward_discount = future_reward_discount 65 | 66 | # Initialize Huber loss 67 | self.criterion = torch.nn.SmoothL1Loss(reduce=False) # Huber loss 68 | if self.use_cuda: 69 | self.criterion = self.criterion.cuda() 70 | 71 | # Load pre-trained model 72 | if load_snapshot: 73 | self.model.load_state_dict(torch.load(snapshot_file)) 74 | print('Pre-trained model snapshot loaded from: %s' % (snapshot_file)) 75 | 76 | # Convert model from CPU to GPU 77 | if self.use_cuda: 78 | self.model = self.model.cuda() 79 | 80 | # Set model to training mode 81 | self.model.train() 82 | 83 | # Initialize optimizer 84 | self.optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-4, momentum=0.9, weight_decay=2e-5) 85 | self.iteration = 0 86 | 87 | # Initialize lists to save execution info and RL variables 88 | self.executed_action_log = [] 89 | self.label_value_log = [] 90 | self.reward_value_log = [] 91 | self.predicted_value_log = [] 92 | self.use_heuristic_log = [] 93 | self.is_exploit_log = [] 94 | self.clearance_log = [] 95 | self.loss_log = [] 96 | 97 | 98 | # Pre-load execution info and RL variables 99 | def preload(self, transitions_directory): 100 | self.executed_action_log = np.loadtxt(os.path.join(transitions_directory, 'executed-action.log.txt'), delimiter=' ') 101 | self.iteration = self.executed_action_log.shape[0] - 2 102 | self.executed_action_log = self.executed_action_log[0:self.iteration,:] 103 | self.executed_action_log = self.executed_action_log.tolist() 104 | self.label_value_log = np.loadtxt(os.path.join(transitions_directory, 'label-value.log.txt'), delimiter=' ') 105 | self.label_value_log = self.label_value_log[0:self.iteration] 106 | self.label_value_log.shape = (self.iteration,1) 107 | self.label_value_log = self.label_value_log.tolist() 108 | self.predicted_value_log = np.loadtxt(os.path.join(transitions_directory, 'predicted-value.log.txt'), delimiter=' ') 109 | self.predicted_value_log = self.predicted_value_log[0:self.iteration] 110 | self.predicted_value_log.shape = (self.iteration,1) 111 | self.predicted_value_log = self.predicted_value_log.tolist() 112 | self.reward_value_log = np.loadtxt(os.path.join(transitions_directory, 'reward-value.log.txt'), delimiter=' ') 113 | self.reward_value_log = self.reward_value_log[0:self.iteration] 114 | self.reward_value_log.shape = (self.iteration,1) 115 | self.reward_value_log = self.reward_value_log.tolist() 116 | self.use_heuristic_log = np.loadtxt(os.path.join(transitions_directory, 'use-heuristic.log.txt'), delimiter=' ') 117 | self.use_heuristic_log = self.use_heuristic_log[0:self.iteration] 118 | self.use_heuristic_log.shape = (self.iteration,1) 119 | self.use_heuristic_log = self.use_heuristic_log.tolist() 120 | self.is_exploit_log = np.loadtxt(os.path.join(transitions_directory, 'is-exploit.log.txt'), delimiter=' ') 121 | self.is_exploit_log = self.is_exploit_log[0:self.iteration] 122 | self.is_exploit_log.shape = (self.iteration,1) 123 | self.is_exploit_log = self.is_exploit_log.tolist() 124 | self.clearance_log = np.loadtxt(os.path.join(transitions_directory, 'clearance.log.txt'), delimiter=' ') 125 | self.clearance_log.shape = (self.clearance_log.shape[0],1) 126 | self.clearance_log = self.clearance_log.tolist() 127 | 128 | self.loss_log = np.loadtxt(os.path.join(transitions_directory, 'loss.log.txt'), delimiter=' ') 129 | self.loss_log.shape = (self.loss_log.shape[0],1) 130 | self.loss_log = self.loss_log.tolist() 131 | 132 | 133 | 134 | # Compute forward pass through model to compute affordances/Q 135 | def forward(self, color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=-1): 136 | 137 | # Apply 2x scale to input heightmaps 138 | color_heightmap_2x = ndimage.zoom(color_heightmap, zoom=[2,2,1], order=0) 139 | depth_heightmap_2x = ndimage.zoom(depth_heightmap, zoom=[2,2], order=0) 140 | assert(color_heightmap_2x.shape[0:2] == depth_heightmap_2x.shape[0:2]) 141 | 142 | # Add extra padding (to handle rotations inside network) 143 | diag_length = float(color_heightmap_2x.shape[0]) * np.sqrt(2) 144 | diag_length = np.ceil(diag_length/32)*32 145 | padding_width = int((diag_length - color_heightmap_2x.shape[0])/2) 146 | color_heightmap_2x_r = np.pad(color_heightmap_2x[:,:,0], padding_width, 'constant', constant_values=0) 147 | color_heightmap_2x_r.shape = (color_heightmap_2x_r.shape[0], color_heightmap_2x_r.shape[1], 1) 148 | color_heightmap_2x_g = np.pad(color_heightmap_2x[:,:,1], padding_width, 'constant', constant_values=0) 149 | color_heightmap_2x_g.shape = (color_heightmap_2x_g.shape[0], color_heightmap_2x_g.shape[1], 1) 150 | color_heightmap_2x_b = np.pad(color_heightmap_2x[:,:,2], padding_width, 'constant', constant_values=0) 151 | color_heightmap_2x_b.shape = (color_heightmap_2x_b.shape[0], color_heightmap_2x_b.shape[1], 1) 152 | color_heightmap_2x = np.concatenate((color_heightmap_2x_r, color_heightmap_2x_g, color_heightmap_2x_b), axis=2) 153 | depth_heightmap_2x = np.pad(depth_heightmap_2x, padding_width, 'constant', constant_values=0) 154 | 155 | # Pre-process color image (scale and normalize) 156 | image_mean = [0.485, 0.456, 0.406] 157 | image_std = [0.229, 0.224, 0.225] 158 | input_color_image = color_heightmap_2x.astype(float)/255 159 | for c in range(3): 160 | input_color_image[:,:,c] = (input_color_image[:,:,c] - image_mean[c])/image_std[c] 161 | 162 | # Pre-process depth image (normalize) 163 | image_mean = [0.01, 0.01, 0.01] 164 | image_std = [0.03, 0.03, 0.03] 165 | depth_heightmap_2x.shape = (depth_heightmap_2x.shape[0], depth_heightmap_2x.shape[1], 1) 166 | input_depth_image = np.concatenate((depth_heightmap_2x, depth_heightmap_2x, depth_heightmap_2x), axis=2) 167 | for c in range(3): 168 | input_depth_image[:,:,c] = (input_depth_image[:,:,c] - image_mean[c])/image_std[c] 169 | 170 | # Construct minibatch of size 1 (b,c,h,w) 171 | input_color_image.shape = (input_color_image.shape[0], input_color_image.shape[1], input_color_image.shape[2], 1) 172 | input_depth_image.shape = (input_depth_image.shape[0], input_depth_image.shape[1], input_depth_image.shape[2], 1) 173 | input_color_data = torch.from_numpy(input_color_image.astype(np.float32)).permute(3,2,0,1) 174 | input_depth_data = torch.from_numpy(input_depth_image.astype(np.float32)).permute(3,2,0,1) 175 | 176 | # Pass input data through model 177 | # output_prob, state_feat 178 | output_prob = self.model.forward(input_color_data, input_depth_data, is_volatile, specific_rotation) 179 | 180 | if self.method == 'reactive': 181 | pass 182 | # # Return affordances (and remove extra padding) 183 | # for rotate_idx in range(len(output_prob)): 184 | # if rotate_idx == 0: 185 | # push_predictions = F.softmax(output_prob[rotate_idx][0], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)] 186 | # grasp_predictions = F.softmax(output_prob[rotate_idx][1], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)] 187 | # else: 188 | # push_predictions = np.concatenate((push_predictions, F.softmax(output_prob[rotate_idx][0], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)]), axis=0) 189 | # grasp_predictions = np.concatenate((grasp_predictions, F.softmax(output_prob[rotate_idx][1], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)]), axis=0) 190 | 191 | elif self.method == 'reinforcement': 192 | 193 | # Return Q values (and remove extra padding) 194 | for rotate_idx in range(len(output_prob)): 195 | # if first rotation 196 | if rotate_idx == 0: 197 | push_predictions = output_prob[rotate_idx][0].cpu().data.numpy()[:, 198 | 0, int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2), 199 | int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2)] 200 | else: 201 | push_predictions = np.concatenate((push_predictions, 202 | output_prob[rotate_idx][0].cpu().data.numpy()[:, 203 | 0, int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2), 204 | int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2)]), 205 | axis=0) 206 | 207 | return push_predictions 208 | 209 | def get_reward_value(self, next_color_heightmap, next_depth_heightmap, prev_seg_score, curr_seg_score, change_detected): 210 | 211 | if self.method == 'reactive': 212 | pass 213 | # Compute label value 214 | # label_value = 0 215 | # if primitive_action == 'push': 216 | # if not change_detected: 217 | # label_value = 1 218 | # elif primitive_action == 'grasp': 219 | # if not grasp_success: 220 | # label_value = 1 221 | # 222 | # print('Label value: %d' % (label_value)) 223 | # return label_value, label_value 224 | 225 | elif self.method == 'reinforcement': 226 | 227 | # Compute current reward 228 | # current_reward = 0 229 | # if primitive_action == 'push': 230 | # if change_detected: 231 | # evaluate rewards intrinsically with Q values with a forward pass below 232 | # if (curr_seg_score - prev_seg_score) == 0: 233 | # curr_seg_rew = curr_seg_score 234 | # else: 235 | 236 | # #this should be in the actual training 237 | # if sess_success and sess_iteration < 10: 238 | # current_reward = 10 239 | # elif sess_success: 240 | # current_reward = 5 241 | # else: 242 | # if (curr_seg_score - prev_seg_score) > 0.4: 243 | # current_reward = 1.0 244 | # elif (curr_seg_score - prev_seg_score) > 0.01: 245 | # current_reward = 0.3 246 | # elif (curr_seg_score - prev_seg_score) < -0.4: 247 | # current_reward = -0.5 248 | # elif (curr_seg_score - prev_seg_score) < -0.01: 249 | # current_reward = -0.1 250 | # else: #(-0.01 to 0.01) 251 | # current_reward = 0 252 | 253 | diff = curr_seg_score - prev_seg_score 254 | if change_detected: 255 | current_reward = (diff*10)**2 256 | # current_reward = np.abs(diff) * np.exp(np.abs(diff)) 257 | if diff < 0: 258 | current_reward *= -1 259 | else: 260 | current_reward = -0.5 261 | 262 | seg_vals = [curr_seg_score, prev_seg_score, diff, current_reward, change_detected] 263 | print('------------------------------- Push-to-See -------------------------------') 264 | print('Current segmentation score --> ', curr_seg_score, ' Previous segmentation score --> ', prev_seg_score) 265 | print('Segmentation difference after pushing --> ' + BColors.WARNING, curr_seg_score - prev_seg_score, BColors.ENDC) 266 | print('CURRENT REWARD --> ' + BColors.WARNING + '{:.5f}'.format(current_reward) + BColors.ENDC) 267 | next_push_predictions = self.forward(next_color_heightmap, next_depth_heightmap, is_volatile=True) 268 | 269 | future_reward = np.max(next_push_predictions) 270 | print('Future reward: %f' % (future_reward)) 271 | # if primitive_action == 'push' and not self.push_rewards: 272 | # # future reward discount --> 0.5 --> default val 273 | # expected_reward = self.future_reward_discount * future_reward 274 | # print('Expected reward: %f + %f x %f = %f' % (0.0, self.future_reward_discount, future_reward, expected_reward)) 275 | # else: 276 | expected_reward = current_reward + self.future_reward_discount * future_reward 277 | print('Expected reward: %f + (%f x %f) = %f' % (current_reward, self.future_reward_discount, future_reward, expected_reward)) 278 | print('---------------------------------------------------------------') 279 | return expected_reward, current_reward, seg_vals 280 | 281 | 282 | 283 | # Compute labels and backpropagate 284 | def backprop(self, color_heightmap, depth_heightmap, primitive_action, best_pix_ind, label_value): 285 | 286 | if self.method == 'reactive': 287 | 288 | # Compute fill value 289 | fill_value = 2 290 | 291 | # Compute labels 292 | label = np.zeros((1,320,320)) + fill_value 293 | action_area = np.zeros((224,224)) 294 | action_area[best_pix_ind[1]][best_pix_ind[2]] = 1 295 | # blur_kernel = np.ones((5,5),np.float32)/25 296 | # action_area = cv2.filter2D(action_area, -1, blur_kernel) 297 | tmp_label = np.zeros((224,224)) + fill_value 298 | tmp_label[action_area > 0] = label_value 299 | label[0,48:(320-48),48:(320-48)] = tmp_label 300 | 301 | # Compute loss and backward pass 302 | self.optimizer.zero_grad() 303 | loss_value = 0 304 | if primitive_action == 'push': 305 | # loss = self.push_criterion(self.model.output_prob[best_pix_ind[0]][0], Variable(torch.from_numpy(label).long().cuda())) 306 | 307 | # Do forward pass with specified rotation (to save gradients) 308 | # push_predictions, grasp_predictions, state_feat 309 | push_predictions, grasp_predictions = self.forward(color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=best_pix_ind[0]) 310 | 311 | if self.use_cuda: 312 | loss = self.push_criterion(self.model.output_prob[0][0], Variable(torch.from_numpy(label).long().cuda())) 313 | else: 314 | loss = self.push_criterion(self.model.output_prob[0][0], Variable(torch.from_numpy(label).long())) 315 | loss.backward() 316 | loss_value = loss.cpu().data.numpy() 317 | 318 | # elif primitive_action == 'grasp': 319 | # # loss = self.grasp_criterion(self.model.output_prob[best_pix_ind[0]][1], Variable(torch.from_numpy(label).long().cuda())) 320 | # # loss += self.grasp_criterion(self.model.output_prob[(best_pix_ind[0] + self.model.num_rotations/2) % self.model.num_rotations][1], Variable(torch.from_numpy(label).long().cuda())) 321 | # 322 | # # Do forward pass with specified rotation (to save gradients) 323 | # push_predictions, grasp_predictions, state_feat = self.forward(color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=best_pix_ind[0]) 324 | # 325 | # if self.use_cuda: 326 | # loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long().cuda())) 327 | # else: 328 | # loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long())) 329 | # loss.backward() 330 | # loss_value += loss.cpu().data.numpy() 331 | # 332 | # # Since grasping is symmetric, train with another forward pass of opposite rotation angle 333 | # opposite_rotate_idx = (best_pix_ind[0] + self.model.num_rotations/2) % self.model.num_rotations 334 | # 335 | # push_predictions, grasp_predictions, state_feat = self.forward(color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=opposite_rotate_idx) 336 | # 337 | # if self.use_cuda: 338 | # loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long().cuda())) 339 | # else: 340 | # loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long())) 341 | # loss.backward() 342 | # loss_value += loss.cpu().data.numpy() 343 | # 344 | # loss_value = loss_value/2 345 | 346 | print('Training loss: %f' % (loss_value)) 347 | self.optimizer.step() 348 | 349 | elif self.method == 'reinforcement': 350 | 351 | # Compute labels 352 | # 320 for rotations?yes 353 | label = np.zeros((1, 320, 320)) 354 | action_area = np.zeros((224, 224)) 355 | action_area[best_pix_ind[1]][best_pix_ind[2]] = 1 356 | # blur_kernel = np.ones((5,5),np.float32)/25 357 | # action_area = cv2.filter2D(action_area, -1, blur_kernel) 358 | tmp_label = np.zeros((224, 224)) 359 | tmp_label[action_area > 0] = label_value 360 | label[0, 48:(320-48), 48:(320-48)] = tmp_label 361 | 362 | # Compute label mask 363 | label_weights = np.zeros(label.shape) 364 | tmp_label_weights = np.zeros((224, 224)) 365 | tmp_label_weights[action_area > 0] = 1 366 | label_weights[0, 48:(320-48), 48:(320-48)] = tmp_label_weights 367 | 368 | # Compute loss and backward pass 369 | self.optimizer.zero_grad() 370 | loss_value = 0 371 | 372 | # Do forward pass with specified rotation (to save gradients) 373 | # push_predictions, grasp_predictions, state_feat 374 | push_predictions = self.forward(color_heightmap, depth_heightmap, is_volatile=False, 375 | specific_rotation=best_pix_ind[0]) 376 | 377 | if self.use_cuda: 378 | loss = self.criterion(self.model.output_prob[0][0].view(1, 320, 320), 379 | Variable(torch.from_numpy(label).float().cuda())) * Variable(torch.from_numpy(label_weights).float().cuda(), 380 | requires_grad=False) 381 | else: 382 | loss = self.criterion(self.model.output_prob[0][0].view(1, 320, 320), 383 | Variable(torch.from_numpy(label).float())) * Variable(torch.from_numpy(label_weights).float(), 384 | requires_grad=False) 385 | loss = loss.sum() 386 | loss.backward() 387 | loss_value = loss.cpu().data.numpy() 388 | l_value = loss_value 389 | print(BColors.OKBLUE + 'Training loss: %f' % (loss_value) + BColors.OKBLUE) 390 | self.optimizer.step() 391 | 392 | return l_value 393 | 394 | def get_prediction_vis(self, predictions, color_heightmap, best_pix_ind): 395 | 396 | canvas = None 397 | num_rotations = predictions.shape[0] 398 | for canvas_row in range(int(num_rotations/4)): 399 | tmp_row_canvas = None 400 | for canvas_col in range(4): 401 | rotate_idx = canvas_row*4+canvas_col 402 | prediction_vis = predictions[rotate_idx,:,:].copy() 403 | # prediction_vis[prediction_vis < 0] = 0 # assume probability 404 | # prediction_vis[prediction_vis > 1] = 1 # assume probability 405 | prediction_vis = np.clip(prediction_vis, 0, 1) 406 | prediction_vis.shape = (predictions.shape[1], predictions.shape[2]) 407 | prediction_vis = cv2.applyColorMap((prediction_vis*255).astype(np.uint8), cv2.COLORMAP_JET) 408 | if rotate_idx == best_pix_ind[0]: 409 | prediction_vis = cv2.circle(prediction_vis, (int(best_pix_ind[2]), int(best_pix_ind[1])), 7, (0,0,255), 2) 410 | prediction_vis = ndimage.rotate(prediction_vis, rotate_idx*(360.0/num_rotations), reshape=False, order=0) 411 | background_image = ndimage.rotate(color_heightmap, rotate_idx*(360.0/num_rotations), reshape=False, order=0) 412 | prediction_vis = (0.5*cv2.cvtColor(background_image, cv2.COLOR_RGB2BGR) + 0.5*prediction_vis).astype(np.uint8) 413 | if tmp_row_canvas is None: 414 | tmp_row_canvas = prediction_vis 415 | else: 416 | tmp_row_canvas = np.concatenate((tmp_row_canvas,prediction_vis), axis=1) 417 | if canvas is None: 418 | canvas = tmp_row_canvas 419 | else: 420 | canvas = np.concatenate((canvas,tmp_row_canvas), axis=0) 421 | 422 | return canvas 423 | 424 | 425 | def push_heuristic(self, depth_heightmap): 426 | 427 | num_rotations = 16 428 | 429 | for rotate_idx in range(num_rotations): 430 | rotated_heightmap = ndimage.rotate(depth_heightmap, rotate_idx*(360.0/num_rotations), reshape=False, order=0) 431 | valid_areas = np.zeros(rotated_heightmap.shape) 432 | valid_areas[ndimage.interpolation.shift(rotated_heightmap, [0, -25], order=0) - rotated_heightmap > 0.02] = 1 433 | # valid_areas = np.multiply(valid_areas, rotated_heightmap) 434 | blur_kernel = np.ones((25, 25), np.float32)/9 435 | valid_areas = cv2.filter2D(valid_areas, -1, blur_kernel) 436 | tmp_push_predictions = ndimage.rotate(valid_areas, -rotate_idx*(360.0/num_rotations), reshape=False, order=0) 437 | tmp_push_predictions.shape = (1, rotated_heightmap.shape[0], rotated_heightmap.shape[1]) 438 | 439 | if rotate_idx == 0: 440 | push_predictions = tmp_push_predictions 441 | else: 442 | push_predictions = np.concatenate((push_predictions, tmp_push_predictions), axis=0) 443 | 444 | best_pix_ind = np.unravel_index(np.argmax(push_predictions), push_predictions.shape) 445 | return best_pix_ind 446 | 447 | 448 | def grasp_heuristic(self, depth_heightmap): 449 | 450 | num_rotations = 16 451 | 452 | for rotate_idx in range(num_rotations): 453 | rotated_heightmap = ndimage.rotate(depth_heightmap, rotate_idx*(360.0/num_rotations), reshape=False, order=0) 454 | valid_areas = np.zeros(rotated_heightmap.shape) 455 | valid_areas[np.logical_and(rotated_heightmap - ndimage.interpolation.shift(rotated_heightmap, [0,-25], order=0) > 0.02, rotated_heightmap - ndimage.interpolation.shift(rotated_heightmap, [0,25], order=0) > 0.02)] = 1 456 | # valid_areas = np.multiply(valid_areas, rotated_heightmap) 457 | blur_kernel = np.ones((25,25),np.float32)/9 458 | valid_areas = cv2.filter2D(valid_areas, -1, blur_kernel) 459 | tmp_grasp_predictions = ndimage.rotate(valid_areas, -rotate_idx*(360.0/num_rotations), reshape=False, order=0) 460 | tmp_grasp_predictions.shape = (1, rotated_heightmap.shape[0], rotated_heightmap.shape[1]) 461 | 462 | if rotate_idx == 0: 463 | grasp_predictions = tmp_grasp_predictions 464 | else: 465 | grasp_predictions = np.concatenate((grasp_predictions, tmp_grasp_predictions), axis=0) 466 | 467 | best_pix_ind = np.unravel_index(np.argmax(grasp_predictions), grasp_predictions.shape) 468 | return best_pix_ind 469 | 470 | -------------------------------------------------------------------------------- /src/push-DQN/utils.py: -------------------------------------------------------------------------------- 1 | import struct 2 | import math 3 | import numpy as np 4 | import cv2 5 | import torch 6 | import torch.nn as nn 7 | import torch.nn.functional as F 8 | from torch.autograd import Variable 9 | 10 | from mpl_toolkits import mplot3d 11 | import matplotlib.pyplot as plt 12 | 13 | 14 | def get_pointcloud(color_img, depth_img, camera_intrinsics): 15 | 16 | # Get depth image size 17 | im_h = depth_img.shape[0] 18 | im_w = depth_img.shape[1] 19 | 20 | # Project depth into 3D point cloud in camera coordinates 21 | # np.mgrid would be better 22 | # pix_x, pix_y = np.mgrid[0:im_w, 0:im_h] 23 | pix_x, pix_y = np.meshgrid(np.linspace(0, im_w-1, im_w), np.linspace(0, im_h-1, im_h)) 24 | cam_pts_x = np.multiply(pix_x - camera_intrinsics[0][2], depth_img / camera_intrinsics[0][0]) 25 | cam_pts_y = np.multiply(pix_y - camera_intrinsics[1][2], depth_img / camera_intrinsics[1][1]) 26 | cam_pts_z = depth_img.copy() 27 | cam_pts_x.shape = (im_h * im_w, 1) 28 | cam_pts_y.shape = (im_h * im_w, 1) 29 | cam_pts_z.shape = (im_h * im_w, 1) 30 | 31 | # Reshape image into colors for 3D point cloud 32 | rgb_pts_r = color_img[:, :, 0] 33 | rgb_pts_g = color_img[:, :, 1] 34 | rgb_pts_b = color_img[:, :, 2] 35 | rgb_pts_r.shape = (im_h * im_w, 1) 36 | rgb_pts_g.shape = (im_h * im_w, 1) 37 | rgb_pts_b.shape = (im_h * im_w, 1) 38 | 39 | cam_pts = np.concatenate((cam_pts_x, cam_pts_y, cam_pts_z), axis=1) 40 | rgb_pts = np.concatenate((rgb_pts_r, rgb_pts_g, rgb_pts_b), axis=1) 41 | 42 | return cam_pts, rgb_pts 43 | 44 | 45 | def get_heightmap(color_img, depth_img, cam_intrinsics, cam_pose, workspace_limits, heightmap_resolution): 46 | 47 | # fig = plt.figure() 48 | # ax = plt.axes(projection="3d") 49 | 50 | # Compute heightmap size 51 | heightmap_size = np.round(((workspace_limits[1][1] - workspace_limits[1][0])/heightmap_resolution, 52 | (workspace_limits[0][1] - workspace_limits[0][0])/heightmap_resolution)).astype(int) 53 | 54 | # Get 3D point cloud from RGB-D images 55 | surface_pts, color_pts = get_pointcloud(color_img, depth_img, cam_intrinsics) 56 | 57 | # ax.scatter3D(surface_pts[:, 0], surface_pts[:, 1], surface_pts[:, 2]) 58 | # plt.show() 59 | 60 | # Transform 3D point cloud from camera coordinates to robot coordinates 61 | surface_pts = np.transpose(np.dot(cam_pose[0:3, 0:3], 62 | np.transpose(surface_pts)) + np.tile(cam_pose[0:3, 3:], (1, surface_pts.shape[0]))) 63 | 64 | # ax.scatter3D(surface_pts[:, 0], surface_pts[:, 1], surface_pts[:, 2]) 65 | # plt.show() 66 | 67 | # Sort surface points by z value 68 | sort_z_ind = np.argsort(surface_pts[:, 2]) 69 | surface_pts = surface_pts[sort_z_ind] 70 | color_pts = color_pts[sort_z_ind] 71 | 72 | # Filter out surface points outside heightmap boundaries 73 | heightmap_valid_ind = np.logical_and( 74 | np.logical_and( 75 | np.logical_and( 76 | np.logical_and(surface_pts[:, 0] >= workspace_limits[0][0], surface_pts[:, 0] < workspace_limits[0][1]), 77 | surface_pts[:, 1] >= workspace_limits[1][0]), surface_pts[:, 1] < workspace_limits[1][1]), 78 | surface_pts[:, 2] < workspace_limits[2][1]) 79 | 80 | surface_pts = surface_pts[heightmap_valid_ind] 81 | color_pts = color_pts[heightmap_valid_ind] 82 | 83 | # ax.scatter3D(surface_pts[:, 0], surface_pts[:, 1], surface_pts[:, 2], c=surface_pts[:, 2], cmap='hsv') 84 | # plt.show() 85 | 86 | # Create orthographic top-down-view RGB-D heightmaps 87 | color_heightmap_r = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8) 88 | color_heightmap_g = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8) 89 | color_heightmap_b = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8) 90 | depth_heightmap = np.zeros(heightmap_size) 91 | heightmap_pix_x = np.floor((surface_pts[:, 0] - workspace_limits[0][0])/heightmap_resolution).astype(int) 92 | heightmap_pix_y = np.floor((surface_pts[:, 1] - workspace_limits[1][0])/heightmap_resolution).astype(int) 93 | color_heightmap_r[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [0]] 94 | color_heightmap_g[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [1]] 95 | color_heightmap_b[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [2]] 96 | color_heightmap = np.concatenate((color_heightmap_r, color_heightmap_g, color_heightmap_b), axis=2) 97 | depth_heightmap[heightmap_pix_y, heightmap_pix_x] = surface_pts[:, 2] 98 | 99 | 100 | 101 | z_bottom = workspace_limits[2][0] 102 | depth_heightmap = depth_heightmap - z_bottom 103 | depth_heightmap[depth_heightmap < 0] = 0 104 | depth_heightmap[depth_heightmap == -z_bottom] = np.nan 105 | # plt.imshow(depth_heightmap) 106 | # asd=123 107 | return color_heightmap, depth_heightmap 108 | 109 | # Save a 3D point cloud to a binary .ply file 110 | def pcwrite(xyz_pts, filename, rgb_pts=None): 111 | assert xyz_pts.shape[1] == 3, 'input XYZ points should be an Nx3 matrix' 112 | if rgb_pts is None: 113 | rgb_pts = np.ones(xyz_pts.shape).astype(np.uint8)*255 114 | assert xyz_pts.shape == rgb_pts.shape, 'input RGB colors should be Nx3 matrix and same size as input XYZ points' 115 | 116 | # Write header for .ply file 117 | pc_file = open(filename, 'wb') 118 | pc_file.write(bytearray('ply\n', 'utf8')) 119 | pc_file.write(bytearray('format binary_little_endian 1.0\n', 'utf8')) 120 | pc_file.write(bytearray(('element vertex %d\n' % xyz_pts.shape[0]), 'utf8')) 121 | pc_file.write(bytearray('property float x\n', 'utf8')) 122 | pc_file.write(bytearray('property float y\n', 'utf8')) 123 | pc_file.write(bytearray('property float z\n', 'utf8')) 124 | pc_file.write(bytearray('property uchar red\n', 'utf8')) 125 | pc_file.write(bytearray('property uchar green\n', 'utf8')) 126 | pc_file.write(bytearray('property uchar blue\n', 'utf8')) 127 | pc_file.write(bytearray('end_header\n', 'utf8')) 128 | 129 | # Write 3D points to .ply file 130 | for i in range(xyz_pts.shape[0]): 131 | pc_file.write(bytearray(struct.pack("fffccc",xyz_pts[i][0],xyz_pts[i][1],xyz_pts[i][2],rgb_pts[i][0].tostring(),rgb_pts[i][1].tostring(),rgb_pts[i][2].tostring()))) 132 | pc_file.close() 133 | 134 | 135 | def get_affordance_vis(grasp_affordances, input_images, num_rotations, best_pix_ind): 136 | vis = None 137 | for vis_row in range(num_rotations/4): 138 | tmp_row_vis = None 139 | for vis_col in range(4): 140 | rotate_idx = vis_row*4+vis_col 141 | affordance_vis = grasp_affordances[rotate_idx,:,:] 142 | affordance_vis[affordance_vis < 0] = 0 # assume probability 143 | # affordance_vis = np.divide(affordance_vis, np.max(affordance_vis)) 144 | affordance_vis[affordance_vis > 1] = 1 # assume probability 145 | affordance_vis.shape = (grasp_affordances.shape[1], grasp_affordances.shape[2]) 146 | affordance_vis = cv2.applyColorMap((affordance_vis*255).astype(np.uint8), cv2.COLORMAP_JET) 147 | input_image_vis = (input_images[rotate_idx,:,:,:]*255).astype(np.uint8) 148 | input_image_vis = cv2.resize(input_image_vis, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_NEAREST) 149 | affordance_vis = (0.5*cv2.cvtColor(input_image_vis, cv2.COLOR_RGB2BGR) + 0.5*affordance_vis).astype(np.uint8) 150 | if rotate_idx == best_pix_ind[0]: 151 | affordance_vis = cv2.circle(affordance_vis, (int(best_pix_ind[2]), int(best_pix_ind[1])), 7, (0,0,255), 2) 152 | if tmp_row_vis is None: 153 | tmp_row_vis = affordance_vis 154 | else: 155 | tmp_row_vis = np.concatenate((tmp_row_vis,affordance_vis), axis=1) 156 | if vis is None: 157 | vis = tmp_row_vis 158 | else: 159 | vis = np.concatenate((vis,tmp_row_vis), axis=0) 160 | 161 | return vis 162 | 163 | 164 | def get_difference(color_heightmap, color_space, bg_color_heightmap): 165 | 166 | color_space = np.concatenate((color_space, np.asarray([[0.0, 0.0, 0.0]])), axis=0) 167 | color_space.shape = (color_space.shape[0], 1, 1, color_space.shape[1]) 168 | color_space = np.tile(color_space, (1, color_heightmap.shape[0], color_heightmap.shape[1], 1)) 169 | 170 | # Normalize color heightmaps 171 | color_heightmap = color_heightmap.astype(float)/255.0 172 | color_heightmap.shape = (1, color_heightmap.shape[0], color_heightmap.shape[1], color_heightmap.shape[2]) 173 | color_heightmap = np.tile(color_heightmap, (color_space.shape[0], 1, 1, 1)) 174 | 175 | bg_color_heightmap = bg_color_heightmap.astype(float)/255.0 176 | bg_color_heightmap.shape = (1, bg_color_heightmap.shape[0], bg_color_heightmap.shape[1], bg_color_heightmap.shape[2]) 177 | bg_color_heightmap = np.tile(bg_color_heightmap, (color_space.shape[0], 1, 1, 1)) 178 | 179 | # Compute nearest neighbor distances to key colors 180 | key_color_dist = np.sqrt(np.sum(np.power(color_heightmap - color_space,2), axis=3)) 181 | # key_color_dist_prob = F.softmax(Variable(torch.from_numpy(key_color_dist), volatile=True), dim=0).data.numpy() 182 | 183 | bg_key_color_dist = np.sqrt(np.sum(np.power(bg_color_heightmap - color_space,2), axis=3)) 184 | # bg_key_color_dist_prob = F.softmax(Variable(torch.from_numpy(bg_key_color_dist), volatile=True), dim=0).data.numpy() 185 | 186 | key_color_match = np.argmin(key_color_dist, axis=0) 187 | bg_key_color_match = np.argmin(bg_key_color_dist, axis=0) 188 | key_color_match[key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 1 189 | bg_key_color_match[bg_key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 2 190 | 191 | return np.sum(key_color_match == bg_key_color_match).astype(float)/np.sum(bg_key_color_match < color_space.shape[0]).astype(float) 192 | 193 | 194 | # Get rotation matrix from euler angles 195 | def euler2rotm(theta): 196 | R_x = np.array([[1, 0, 0 ], 197 | [0, math.cos(theta[0]), -math.sin(theta[0]) ], 198 | [0, math.sin(theta[0]), math.cos(theta[0]) ] 199 | ]) 200 | R_y = np.array([[math.cos(theta[1]), 0, math.sin(theta[1]) ], 201 | [0, 1, 0 ], 202 | [-math.sin(theta[1]), 0, math.cos(theta[1]) ] 203 | ]) 204 | R_z = np.array([[math.cos(theta[2]), -math.sin(theta[2]), 0], 205 | [math.sin(theta[2]), math.cos(theta[2]), 0], 206 | [0, 0, 1] 207 | ]) 208 | R = np.dot(R_z, np.dot( R_y, R_x )) 209 | return R 210 | 211 | 212 | # Checks if a matrix is a valid rotation matrix. 213 | def isRotm(R) : 214 | Rt = np.transpose(R) 215 | shouldBeIdentity = np.dot(Rt, R) 216 | I = np.identity(3, dtype = R.dtype) 217 | n = np.linalg.norm(I - shouldBeIdentity) 218 | return n < 1e-6 219 | 220 | 221 | # Calculates rotation matrix to euler angles 222 | def rotm2euler(R) : 223 | 224 | assert(isRotm(R)) 225 | 226 | sy = math.sqrt(R[0,0] * R[0,0] + R[1,0] * R[1,0]) 227 | singular = sy < 1e-6 228 | 229 | if not singular : 230 | x = math.atan2(R[2,1] , R[2,2]) 231 | y = math.atan2(-R[2,0], sy) 232 | z = math.atan2(R[1,0], R[0,0]) 233 | else : 234 | x = math.atan2(-R[1,2], R[1,1]) 235 | y = math.atan2(-R[2,0], sy) 236 | z = 0 237 | 238 | return np.array([x, y, z]) 239 | 240 | 241 | def angle2rotm(angle, axis, point=None): 242 | # Copyright (c) 2006-2018, Christoph Gohlke 243 | 244 | sina = math.sin(angle) 245 | cosa = math.cos(angle) 246 | axis = axis/np.linalg.norm(axis) 247 | 248 | # Rotation matrix around unit vector 249 | R = np.diag([cosa, cosa, cosa]) 250 | R += np.outer(axis, axis) * (1.0 - cosa) 251 | axis *= sina 252 | R += np.array([[ 0.0, -axis[2], axis[1]], 253 | [ axis[2], 0.0, -axis[0]], 254 | [-axis[1], axis[0], 0.0]]) 255 | M = np.identity(4) 256 | M[:3, :3] = R 257 | if point is not None: 258 | 259 | # Rotation not around origin 260 | point = np.array(point[:3], dtype=np.float64, copy=False) 261 | M[:3, 3] = point - np.dot(R, point) 262 | return M 263 | 264 | 265 | def rotm2angle(R): 266 | # From: euclideanspace.com 267 | 268 | epsilon = 0.01 # Margin to allow for rounding errors 269 | epsilon2 = 0.1 # Margin to distinguish between 0 and 180 degrees 270 | 271 | assert(isRotm(R)) 272 | 273 | if ((abs(R[0][1]-R[1][0])< epsilon) and (abs(R[0][2]-R[2][0])< epsilon) and (abs(R[1][2]-R[2][1])< epsilon)): 274 | # Singularity found 275 | # First check for identity matrix which must have +1 for all terms in leading diagonaland zero in other terms 276 | if ((abs(R[0][1]+R[1][0]) < epsilon2) and (abs(R[0][2]+R[2][0]) < epsilon2) and (abs(R[1][2]+R[2][1]) < epsilon2) and (abs(R[0][0]+R[1][1]+R[2][2]-3) < epsilon2)): 277 | # this singularity is identity matrix so angle = 0 278 | return [0,1,0,0] # zero angle, arbitrary axis 279 | 280 | # Otherwise this singularity is angle = 180 281 | angle = np.pi 282 | xx = (R[0][0]+1)/2 283 | yy = (R[1][1]+1)/2 284 | zz = (R[2][2]+1)/2 285 | xy = (R[0][1]+R[1][0])/4 286 | xz = (R[0][2]+R[2][0])/4 287 | yz = (R[1][2]+R[2][1])/4 288 | if ((xx > yy) and (xx > zz)): # R[0][0] is the largest diagonal term 289 | if (xx< epsilon): 290 | x = 0 291 | y = 0.7071 292 | z = 0.7071 293 | else: 294 | x = np.sqrt(xx) 295 | y = xy/x 296 | z = xz/x 297 | elif (yy > zz): # R[1][1] is the largest diagonal term 298 | if (yy< epsilon): 299 | x = 0.7071 300 | y = 0 301 | z = 0.7071 302 | else: 303 | y = np.sqrt(yy) 304 | x = xy/y 305 | z = yz/y 306 | else: # R[2][2] is the largest diagonal term so base result on this 307 | if (zz< epsilon): 308 | x = 0.7071 309 | y = 0.7071 310 | z = 0 311 | else: 312 | z = np.sqrt(zz) 313 | x = xz/z 314 | y = yz/z 315 | return [angle,x,y,z] # Return 180 deg rotation 316 | 317 | # As we have reached here there are no singularities so we can handle normally 318 | s = np.sqrt((R[2][1] - R[1][2])*(R[2][1] - R[1][2]) + (R[0][2] - R[2][0])*(R[0][2] - R[2][0]) + (R[1][0] - R[0][1])*(R[1][0] - R[0][1])) # used to normalise 319 | if (abs(s) < 0.001): 320 | s = 1 321 | 322 | # Prevent divide by zero, should not happen if matrix is orthogonal and should be 323 | # Caught by singularity test above, but I've left it in just in case 324 | angle = np.arccos(( R[0][0] + R[1][1] + R[2][2] - 1)/2) 325 | x = (R[2][1] - R[1][2])/s 326 | y = (R[0][2] - R[2][0])/s 327 | z = (R[1][0] - R[0][1])/s 328 | return [angle,x,y,z] 329 | 330 | 331 | # Cross entropy loss for 2D outputs 332 | class CrossEntropyLoss2d(nn.Module): 333 | 334 | def __init__(self, weight=None, size_average=True): 335 | super(CrossEntropyLoss2d, self).__init__() 336 | self.nll_loss = nn.NLLLoss2d(weight, size_average) 337 | 338 | def forward(self, inputs, targets): 339 | return self.nll_loss(F.log_softmax(inputs, dim=1), targets) 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | --------------------------------------------------------------------------------