├── README.md
├── environment.yml
├── model_config.yaml
├── push-DQN_config.yaml
├── simulation
    ├── Franka-Panda.ttt
    ├── Task_no_1.ttt
    ├── Task_no_2_VPG_heap.ttt
    ├── Task_no_2_VPG_heap_no_colour.ttt
    ├── data_generation_scene.ttt
    ├── remoteApi.so
    ├── vrep.py
    └── vrepConst.py
└── src
    ├── database_generator
        ├── database_config_inria.yaml
        └── database_generator_inria.py
    ├── mask_rg
        ├── coco_eval.py
        ├── coco_utils.py
        ├── dataset.py
        ├── engine.py
        ├── main.py
        ├── mask_rg.py
        ├── model.py
        ├── rewards.py
        ├── transforms.py
        └── utils.py
    └── push-DQN
        ├── logger.py
        ├── main.py
        ├── main_test.py
        ├── models.py
        ├── robot.py
        ├── trainer.py
        └── utils.py


/README.md:
--------------------------------------------------------------------------------
 1 | # Push-to-See 
 2 | ## Learning Non-Prehensile Manipulation to Enhance Instance Segmentation via Deep Q-Learning
 3 | This is the main repository of our Push-to-See model. For technical details, the paper is accessible via [this link.](https://ieeexplore.ieee.org/document/9811645)
 4 | 
 5 | ### Model weights
 6 | Trained model weigths can be download through the following links:
 7 | - Mask-RG (trained on Inria objects for 40 epochs): [link](https://www.dropbox.com/s/mqf7iwmxeti76wx/maskrg_inria_v1_40ep.pth?dl=0)
 8 | - Push-DQN (trained for 41k episodes): [link](https://www.dropbox.com/s/96qqmt809gceguj/push_dqn_41k.pth?dl=0)
 9 | 
10 | ### Demos
11 | 
12 | A short description of the model and video demonstrations can be seen on [this video.](https://www.youtube.com/watch?v=CtMaCpACAjU)
13 | 
14 | ###
15 | (Instructions for installation, training and running will be included in this readme file. For now, please contact the corresponding author if you require any help via baris.serhan@manchester.ac.uk)  
16 | 


--------------------------------------------------------------------------------
/environment.yml:
--------------------------------------------------------------------------------
  1 | name: pushandgrasp
  2 | channels:
  3 |   - conda-forge
  4 |   - defaults
  5 | dependencies:
  6 |   - _libgcc_mutex
  7 |   - attrs=19.3.0
  8 |   - backcall=0.2.0
  9 |   - bleach=3.1.5
 10 |   - ca-certificates
 11 |   - certifi=2020.6.20
 12 |   - decorator=4.4.2
 13 |   - defusedxml=0.6.0
 14 |   - entrypoints=0.3
 15 |   - importlib-metadata=1.7.0
 16 |   - importlib_metadata=1.7.0
 17 |   - ipykernel=5.3.0
 18 |   - ipython=7.16.1
 19 |   - ipython_genutils=0.2.0
 20 |   - jedi=0.17.1
 21 |   - jinja2=2.11.2
 22 |   - jsonschema=3.2.0
 23 |   - jupyter_client=6.1.3
 24 |   - jupyter_core=4.6.3
 25 |   - ld_impl_linux-64=2.33.1
 26 |   - libedit=3.1.20191231
 27 |   - libffi=3.3
 28 |   - libgcc-ng=9.1.0
 29 |   - libsodium=1.0.18
 30 |   - libstdcxx-ng=9.1.0
 31 |   - markupsafe=1.1.1
 32 |   - mistune=0.8.4
 33 |   - nbconvert=5.6.1
 34 |   - nbformat=5.0.7
 35 |   - ncurses=6.2
 36 |   - notebook=6.0.3
 37 |   - openssl=1.1.1g
 38 |   - packaging=20.4
 39 |   - pandoc=2.9.2.1
 40 |   - pandocfilters=1.4.2
 41 |   - parso=0.7.0
 42 |   - pexpect=4.8.0
 43 |   - pickleshare=0.7.5
 44 |   - pip=20.1.1
 45 |   - prometheus_client=0.8.0
 46 |   - prompt-toolkit=3.0.5
 47 |   - ptyprocess=0.6.0
 48 |   - pygments=2.6.1
 49 |   - pyparsing=2.4.7
 50 |   - pyrsistent=0.16.0
 51 |   - python=3.7
 52 |   - python-dateutil=2.8.1
 53 |   - pyzmq=19.0.1
 54 |   - readline=8.0
 55 |   - send2trash=1.5.0
 56 |   - setuptools=47.3.1
 57 |   - six=1.15.0
 58 |   - sqlite=3.32.3
 59 |   - terminado=0.8.3
 60 |   - testpath=0.4.4
 61 |   - tk=8.6.10
 62 |   - tornado=6.0.4
 63 |   - traitlets=4.3.3
 64 |   - wcwidth=0.2.5
 65 |   - webencodings=0.5.1
 66 |   - wheel=0.34.2
 67 |   - xz=5.2.5
 68 |   - zeromq=4.3.2
 69 |   - zipp=3.1.0
 70 |   - zlib=1.2.11
 71 |   - pip:
 72 |     - addict==2.4.0
 73 |     - catkin-pkg==0.4.23
 74 |     - cloudpickle==1.3.0
 75 |     - cycler==0.10.0
 76 |     - cython==0.29.20
 77 |     - distro==1.5.0
 78 |     - docutils==0.16
 79 |     - future==0.18.2
 80 |     - gym==0.17.2
 81 |     - ipywidgets==7.5.1
 82 |     - joblib==1.0.0
 83 |     - kiwisolver==1.2.0
 84 |     - matplotlib==3.2.2
 85 |     - numpy==1.19.0
 86 |     - open3d==0.11.2
 87 |     - opencv-python==4.2.0.34
 88 |     - pandas==1.1.5
 89 |     - pillow==7.2.0
 90 |     - plyfile==0.7.2
 91 |     - pybullet==3.0.7
 92 |     - pycocotools==2.0.1
 93 |     - pyglet==1.5.0
 94 |     - pytz==2020.4
 95 |     - pyyaml==5.3.1
 96 |     - rospkg==1.2.9
 97 |     - scikit-learn==0.23.2
 98 |     - scipy==1.5.0
 99 |     - sklearn==0.0
100 |     - threadpoolctl==2.1.0
101 |     - torch==1.2.0
102 |     - torchvision==0.4.0
103 |     - tqdm==4.54.1
104 |     - widgetsnbextension==3.5.1
105 | 
106 | 
107 | 


--------------------------------------------------------------------------------
/model_config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   path: /home/barisserhan/Workspace/push_to_see_saved_models/mask_rg
 3 |   # set  'file:' to 'new' for training from scratch, 'pretrained' for transfer learning,
 4 |   # or the name of the save model file which is placed on the above path folder.
 5 |   file: maskrg_inria_v1_40ep.pth #new
 6 | 
 7 |   settings:
 8 |     epochs: 20
 9 |     learning_rate: 0.0001
10 |     batch_size: 5
11 |     backbone: resnet50
12 |     backbone_pretrained: True
13 |     cuda_available: True
14 | 
15 | 
16 | dataset:
17 |   path: /home/barisserhan/Database # The folders and files in the subsections below should be placed in this folder.
18 |   images: depth_ims/NUMPY
19 |   masks: segmentation_masks/PNG
20 |   train_indices: train_indices.npy
21 |   test_indices: test_indices.npy
22 |   is_depth: True   # if this flag is set 'True', trainer will look for depth numpy arrays in 'images' folder
23 |   data_augmentation: False
24 | 
25 | saving:
26 |   path: saved_models/inria_train_2
27 |   model_name: my_model_inria_v2_0ep_
28 |   config_name:
29 | 


--------------------------------------------------------------------------------
/push-DQN_config.yaml:
--------------------------------------------------------------------------------
 1 | model:
 2 |   path: /home/barisserhan/Workspace/push_to_see_saved_models/push-DQN/
 3 |   file: push_dqn_41k.pth # set 'new' for not loading a snapshot of a pretrained model
 4 | 
 5 | detection_thresholds:
 6 |   confidence_threshold: 0.85 # to define whether a prediction from mask_rg will be considered as a possible object/a detection
 7 |   mask_threshold: 0.85 # to define whether a detection will be considered correct based on its quality (i.e. how well its segmentation mask matches the ground truth)
 8 |   session_success_threshold: 0.90 #
 9 | 
10 | logging:
11 |   path: Default # Set 'Default' for ./log
12 |   continue_logging: False # to continue logging from previous session #TODO
13 |   save_visualizations: True # to save visualisations of FCN predictions
14 |   snapshot_saving_rate: 100 # to define how often the model weights will be saved
15 | 
16 | environment:
17 |   workspace_limits: [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]] # to define workspace limits in robot coordinates (Cols: min max, Rows: x y z)
18 |                                                                         # (default val [[-0.724, -0.276], [-0.224, 0.224], [-0.0001, 0.4]])
19 |   heightmap_resolution: 0.002 # meters per pixel of heightmap (default val 0.002)
20 | 
21 |   min_num_objects: 26 # Minimum number of objects that will be dropped in the simulation
22 |   max_num_objects: 32 # Maximum number of objects that will be dropped in the simulation (LIMIT 72!) #TODO current limit is less than 72 --> change the colour space of printed mask diff
23 |                       # The total number of objects per scene will be randomised between min and max!
24 |   session_limit: 30 # The number of iterations in each session. If a task cannot be solved within this limit, it's considered as a fail, otherwise success
25 | 
26 | setup:
27 |   is_testing: True # If True --> Exploration Probability will be 0.0 !!!
28 |   exploration_probability: 0.5 # Initial exploration probability
29 |   epsilon_greedy_policy:
30 |     exploration_rate_decay: True # to set epsilon greedy exploration (from initial probability to min_exp_rate_decay)
31 |     min_exp_rate_decay: 0.1 # to define minimum value to which the exploration probability can reach
32 |   is_random_model: False # If true, exploration probability is set to 1.0 with no rate decay (CAUTION! if true, overrides above values and generates a total random model!)
33 | 
34 | training:
35 |   future_reward_discount: 0.5
36 | 
37 | 


--------------------------------------------------------------------------------
/simulation/Franka-Panda.ttt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Franka-Panda.ttt


--------------------------------------------------------------------------------
/simulation/Task_no_1.ttt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Task_no_1.ttt


--------------------------------------------------------------------------------
/simulation/Task_no_2_VPG_heap.ttt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Task_no_2_VPG_heap.ttt


--------------------------------------------------------------------------------
/simulation/Task_no_2_VPG_heap_no_colour.ttt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/Task_no_2_VPG_heap_no_colour.ttt


--------------------------------------------------------------------------------
/simulation/data_generation_scene.ttt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/data_generation_scene.ttt


--------------------------------------------------------------------------------
/simulation/remoteApi.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ALRhub/push-to-see/12c4e8e01c17dc3ae846e6bb5f6b11c678b6a77a/simulation/remoteApi.so


--------------------------------------------------------------------------------
/src/database_generator/database_config_inria.yaml:
--------------------------------------------------------------------------------
 1 | database:
 2 |   path: ~/Database_vrep_inria/ # Change this, if you want to save your dataset into another location
 3 |   settings:
 4 |     database_size: 1250 # Number of scenes to generate
 5 |     max_num_obj: 32 # Maximum number of objects dropped per scene
 6 |     min_num_obj: 24 # Minimum number of objects dropped per scene
 7 | #    total_num_obj: 6 # Subset of objects that will be put into the random selection pool
 8 |                       # (e.g., if set to 50, then first 50 object meshes in object_path will be used - from 000.urdf to 049.urdf)
 9 |     drop_height: 0.1
10 |     remove_container: False
11 | 
12 | data:
13 |   save_numpy: True # Flag to save depth images and segmentation masks as numpy arrays
14 |   save_png: True # Flag to save depth images and segmentation masks as png files
15 |   save_color_img: True # Flag to save RGB images
16 |   cam_height: 480
17 |   cam_width: 640
18 | 


--------------------------------------------------------------------------------
/src/database_generator/database_generator_inria.py:
--------------------------------------------------------------------------------
  1 | import socket
  2 | import select
  3 | import struct
  4 | import time
  5 | import os
  6 | import numpy as np
  7 | # import utils
  8 | from simulation import vrep
  9 | import matplotlib.pyplot as plt
 10 | import yaml
 11 | import cv2
 12 | from PIL import Image
 13 | 
 14 | class DatabaseGenerator(object):
 15 |     def __init__(self, config):
 16 |         self.config = config
 17 | 
 18 |         self.DATABASE_SIZE = config['database']['settings']['database_size']
 19 |         self.NUM_OBJ_MAX = config['database']['settings']['max_num_obj']
 20 |         self.NUM_OBJ_MIN = config['database']['settings']['min_num_obj']
 21 |         # self.SELECTION_POOL = config['database']['settings']['total_num_obj']
 22 | 
 23 |         self.DROP_HEIGHT = config['database']['settings']['drop_height']
 24 |         self.SAVE_NUMPY = config['data']['save_numpy']
 25 |         self.SAVE_PNG = config['data']['save_png']
 26 |         self.SAVE_COLOR = config['data']['save_color_img']
 27 | 
 28 |         self.drop_limits = np.asarray([[-0.6, -0.4], [-0.15, 0.15], [-0.2, -0.1]])
 29 | 
 30 |         # Make sure to have the server side running in V-REP:
 31 |         # in a child script of a V-REP scene, add following command
 32 |         # to be executed just once, at simulation start:
 33 |         #
 34 |         # simExtRemoteApiStart(19999)
 35 |         #
 36 |         # then start simulation, and run this program.
 37 |         #
 38 |         # IMPORTANT: for each successful call to simxStart, there
 39 |         # should be a corresponding call to simxFinish at the end!
 40 | 
 41 |         # MODIFY remoteApiConnections.txt
 42 | 
 43 |         # Connect to simulator
 44 |         vrep.simxFinish(-1)  # Just in case, close all opened connections
 45 |         self.sim_client = vrep.simxStart('127.0.0.1', 19997, True, True, 5000, 5)  # Connect to V-REP on port 19997
 46 |         if self.sim_client == -1:
 47 |             print('Failed to connect to simulation (V-REP remote API server). Exiting.')
 48 |             exit()
 49 |         else:
 50 |             print('Connected to simulation.')
 51 |             self.restart_sim()
 52 | 
 53 |         # Setup virtual camera in simulation
 54 |         # self._setup_sim_camera()
 55 | 
 56 |         # Get handle to camera
 57 |         # sim_ret, self.cam_handle = vrep.simxGetObjectHandle(self.sim_client, 'Vision_sensor_persp',
 58 |         #                                                     vrep.simx_opmode_blocking)
 59 | 
 60 |         # Get handles for masking
 61 |         sim_ret_cam_gt, self.cam_handle_gt = vrep.simxGetObjectHandle(self.sim_client, 'gt_sensor',
 62 |                                                                             vrep.simx_opmode_blocking)
 63 | 
 64 |         # Get handles for rgb
 65 |         sim_ret_cam_ortho, self.cam_handle_ortho = vrep.simxGetObjectHandle(self.sim_client, 'Vision_sensor_ortho',
 66 |                                                                             vrep.simx_opmode_blocking)
 67 | 
 68 | 
 69 |     # def _setup_sim_camera(self):
 70 | 
 71 |         # sim_ret_cam_mask, self.cam_handle_mask = vrep.simxGetObjectHandle(self.sim_client, 'Vision_sensor_persp0',
 72 |         #                                                                     vrep.simx_opmode_blocking)
 73 | 
 74 |         # Get camera pose and intrinsics in simulation
 75 |         # sim_ret, cam_position = vrep.simxGetObjectPosition(self.sim_client, self.cam_handle, -1,
 76 |         #                                                    vrep.simx_opmode_blocking)
 77 |         # sim_ret, cam_orientation = vrep.simxGetObjectOrientation(self.sim_client, self.cam_handle, -1,
 78 |         #                                                          vrep.simx_opmode_blocking)
 79 |         # cam_trans = np.eye(4, 4)
 80 |         # cam_trans[0:3, 3] = np.asarray(cam_position)
 81 |         # cam_orientation = [-cam_orientation[0], -cam_orientation[1], -cam_orientation[2]]
 82 |         # cam_rotm = np.eye(4, 4)
 83 |         # cam_rotm[0:3, 0:3] = np.linalg.inv(utils.euler2rotm(cam_orientation))
 84 |         # self.cam_pose = np.dot(cam_trans, cam_rotm)  # Compute rigid transformation representating camera pose
 85 |         # self.cam_intrinsics = np.asarray([[618.62, 0, 320], [0, 618.62, 240], [0, 0, 1]])
 86 |         # self.cam_depth_scale = 1
 87 | 
 88 |         # Get background image
 89 |         # self.bg_color_img, self.bg_depth_img = self.get_camera_data()
 90 |         # self.bg_depth_img = self.bg_depth_img * self.cam_depth_scale
 91 | 
 92 |     def generate_database(self):
 93 |         err_trials = []
 94 |         self._folder_config()
 95 |         init_time = time.time()
 96 |         for i in range(0, self.DATABASE_SIZE):
 97 |             session_start = time.time()
 98 |             # np.random.seed()
 99 |             curr_num_obj = np.random.random_integers(self.NUM_OBJ_MIN, self.NUM_OBJ_MAX)
100 |             print('Scene no %06d - Number of objects in the current scene --->' % i, curr_num_obj)
101 |             ret = self._add_objects(curr_num_obj)
102 | 
103 |             if not ret[0] == -1:
104 |                 self.save_scene(i)
105 |             else:
106 |                 print("ERROR: Current scene couldn't save!")
107 |                 err_trials.append(i)
108 | 
109 |             np.save(self.scene_info_dir + "scene_info_%06d.npy" % i, np.asarray(ret[1]))
110 |             self.restart_sim()
111 |             session_end = time.time() - session_start
112 |             print('Elapsed time for this current scene --> {: .02f} seconds'.format(session_end),
113 |                   'Total elapsed time by now --> {: .02f} seconds'.format(time.time() - init_time))
114 |         # TODO proper logging
115 |         np.savetxt(os.path.join(self.scene_info_dir, 'error_log.txt'), err_trials, fmt='%06d', delimiter=' ')
116 | 
117 |         vrep.simxStopSimulation(self.sim_client, vrep.simx_opmode_blocking)
118 |         time.sleep(0.25)
119 |         vrep.simxFinish(self.sim_client)
120 | 
121 |     def save_scene(self, iteration):
122 |         curr_rgb, curr_depth, seg_mask = self.get_camera_data()
123 | 
124 |         # save rgb and depth images
125 |         color_image = cv2.cvtColor(curr_rgb, cv2.COLOR_RGB2BGR)
126 |         cv2.imwrite(os.path.join(self.color_dir, 'color_image_%06d.png' % iteration), color_image)
127 | 
128 |         depth_image = np.round(curr_depth * 10000).astype(np.uint16) # Save depth in 1e-4 meters
129 |         np.save(os.path.join(self.depth_dir_numpy, 'depth_%06d.npy' % iteration), depth_image)
130 |         cv2.imwrite(os.path.join(self.depth_dir_png, 'depth_image_%06d.png' % iteration), depth_image)
131 | 
132 |         # save segmentation masks
133 |         # np.save(os.path.join(self.segmask_dir_numpy, 'segmask_%06d.npy' % iteration), seg_mask)
134 |         # plt.imsave(os.path.join(self.segmask_dir_png, 'segmask_image_%06d.png' % iteration), seg_mask)
135 |         cv2.imwrite(os.path.join(self.segmask_dir_png, 'segmask_image_%06d.png' % iteration), seg_mask)
136 | 
137 | 
138 |     def restart_sim(self):
139 | 
140 |         vrep.simxStopSimulation(self.sim_client, vrep.simx_opmode_blocking)
141 |         time.sleep(0.25)
142 |         vrep.simxStartSimulation(self.sim_client, vrep.simx_opmode_blocking)
143 |         time.sleep(0.25)
144 | 
145 |     def _get_segmentation_mask(self, mask_cam_handle):
146 | 
147 |         ret, res, data = vrep.simxGetVisionSensorImage(self.sim_client, mask_cam_handle, 0,
148 |                                                       vrep.simx_opmode_oneshot_wait)
149 | 
150 |         seg_mask_temp = np.reshape(data, (res[1], res[0], 3))
151 |         seg_mask_temp = seg_mask_temp[:, :, :1]
152 |         seg_mask_temp = np.reshape(seg_mask_temp, (res[1], res[0]))
153 |         seg_mask = seg_mask_temp.copy()
154 | 
155 |         # set the background pixels to 0
156 |         elements, counts = np.unique(seg_mask, return_counts=True)
157 |         background_index = np.argmax(counts)
158 |         new_mask = np.where(seg_mask == elements[background_index], 0, seg_mask)
159 | 
160 |         objects = np.delete(elements, background_index)
161 |         objects = np.sort(objects)
162 | 
163 |         for i in range(0, objects.size):
164 |             # if not objects[i] == 0:
165 |             new_mask[np.where(seg_mask == objects[i])] = i + 1
166 | 
167 |         new_mask = np.flip(new_mask, axis=0)
168 | 
169 |         return new_mask
170 | 
171 |     def get_camera_data(self):
172 | 
173 |         print("Collecting images and ground truth masks...")
174 |         # Get color image from simulation
175 |         sim_ret, resolution, raw_image = vrep.simxGetVisionSensorImage(self.sim_client, self.cam_handle_ortho, 0,
176 |                                                                        vrep.simx_opmode_blocking)
177 |         color_img = np.asarray(raw_image)
178 |         color_img.shape = (resolution[1], resolution[0], 3)
179 |         color_img = color_img.astype(np.float) / 255
180 |         color_img[color_img < 0] += 1
181 |         color_img *= 255
182 |         # color_img = np.fliplr(color_img)
183 |         color_img = np.flip(color_img, axis=0)
184 |         color_img = color_img.astype(np.uint8)
185 | 
186 |         # Get depth image from simulation
187 |         sim_ret, resolution, depth_buffer = vrep.simxGetVisionSensorDepthBuffer(self.sim_client, self.cam_handle_ortho,
188 |                                                                                 vrep.simx_opmode_blocking)
189 |         depth_img = np.asarray(depth_buffer)
190 |         depth_img.shape = (resolution[1], resolution[0])
191 |         # depth_img = np.fliplr(depth_img)
192 |         depth_img = np.flip(depth_img, axis=0)
193 |         zNear = 0.01 #0.01
194 |         zFar = 0.5 #10
195 |         depth_img = depth_img * (zFar - zNear) + zNear
196 | 
197 |         # Get ground truth segmentation masks
198 |         seg_mask = self._get_segmentation_mask(self.cam_handle_gt)
199 | 
200 |         return color_img, depth_img, seg_mask
201 | 
202 |     def _add_objects(self, num_obj_heap):
203 | 
204 |         # 18 INRIA objects x 4 = 72 (convex_0 to convex_71)
205 |         isOscillating = 1
206 |         objects = np.random.choice(range(0, 72), num_obj_heap, replace=False)
207 |         shapes = []
208 |         objects_info = []
209 | 
210 |         for object_idx in objects:
211 | 
212 |             drop_x = (self.drop_limits[0][1] - self.drop_limits[0][0]) * np.random.random_sample() + \
213 |                       self.drop_limits[0][0]
214 |             drop_y = (self.drop_limits[1][1] - self.drop_limits[1][0]) * np.random.random_sample() + \
215 |                      self.drop_limits[1][0]
216 | 
217 |             object_position = [drop_x, drop_y, self.DROP_HEIGHT]
218 |             object_orientation = [2 * np.pi * np.random.random_sample(), 2 * np.pi * np.random.random_sample(),
219 |                                   2 * np.pi * np.random.random_sample()]
220 | 
221 |             shape_name = 'convex_{:d}'.format(object_idx)
222 |             shapes.append(shape_name)
223 |             objects_info.append([shape_name, object_position, object_orientation])
224 | 
225 |             ret_resp, ret_ints, ret_floats, ret_strings, ret_buffer = vrep.simxCallScriptFunction(self.sim_client, 'remoteApiCommandServer',
226 |                                                                                                   vrep.sim_scripttype_childscript,
227 |                                                                                                   'moveShape',
228 |                                                                                                   [0],
229 |                                                                                                   object_position + object_orientation,
230 |                                                                                                   [shape_name],
231 |                                                                                                   bytearray(),
232 |                                                                                                   vrep.simx_opmode_blocking)
233 |             # break if error
234 |             if ret_resp == 8:
235 |                 print('Failed to add new objects to simulation.')
236 |                 time.sleep(0.5)
237 |                 return [-1, -1]
238 | 
239 |             # wait a tad before dropping the next object
240 |             time.sleep(0.04)
241 | 
242 |         # wait 100ms after all objects were released
243 |         time.sleep(0.1)
244 | 
245 |         # wait until the oscillating objects stopped
246 |         # (the remote method (i.e. checkMotion) that is attached to the vrep script (simulation_random_datacollect.ttt)
247 |         # checks the angular velocities of the dropped objects)
248 |         while isOscillating:
249 |             ret_r, ret_i, ret_f, ret_s, ret_b = vrep.simxCallScriptFunction(self.sim_client, 'remoteApiCommandServer',
250 |                                                                             vrep.sim_scripttype_childscript,
251 |                                                                             'checkMotion',
252 |                                                                             [len(shapes)],
253 |                                                                             [0.0],
254 |                                                                             shapes,
255 |                                                                             bytearray(),
256 |                                                                             vrep.simx_opmode_blocking)
257 |             time.sleep(0.05)
258 |             isOscillating = ret_i[0]
259 | 
260 |         return [0, objects_info]
261 | 
262 |     def _folder_config(self):
263 |         """Path configuration to save collected data"""
264 | 
265 |         # tilde expansion if necessary
266 |         if self.config['database']['path'].find('~') == 0:
267 |             database_dir = os.path.expanduser(self.config['database']['path'])
268 |         else:
269 |             database_dir = self.config['database']['path']
270 | 
271 |         if not os.path.exists(database_dir):
272 |             os.mkdir(database_dir)
273 |         else:
274 |             print("WARNING: Folder to save database is already exist, "
275 |                   "if it contains old scene files with same name, they will be overwritten!")
276 | 
277 |         self.depth_dir_numpy = os.path.join(database_dir, 'depth_ims/NUMPY/')
278 |         self.depth_dir_png = os.path.join(database_dir, 'depth_ims/PNG/')
279 |         self.color_dir = os.path.join(database_dir, 'color_ims/')
280 |         self.segmask_dir_numpy = os.path.join(database_dir, 'segmentation_masks/NUMPY/')
281 |         self.segmask_dir_png = os.path.join(database_dir, 'segmentation_masks/PNG/')
282 |         self.scene_info_dir = os.path.join(database_dir, 'scene_info/')
283 | 
284 |         sub_folder_list = [self.depth_dir_numpy, self.depth_dir_png, self.color_dir, self.segmask_dir_numpy,
285 |                            self.segmask_dir_png, self.scene_info_dir]
286 |         for folder in sub_folder_list:
287 |             if not os.path.exists(folder):
288 |                 os.makedirs(folder)
289 | 
290 | if __name__ == "__main__":
291 |     # Read configuration file
292 |     config_file = os.getcwd() + '/database_config_inria.yaml'
293 |     with open(config_file) as f:
294 |         configuration = yaml.load(f, Loader=yaml.FullLoader)
295 | 
296 |     dg = DatabaseGenerator(configuration)
297 |     dg.generate_database()
298 | 


--------------------------------------------------------------------------------
/src/mask_rg/coco_eval.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import tempfile
  3 | 
  4 | import numpy as np
  5 | import copy
  6 | import time
  7 | import torch
  8 | import torch._six
  9 | 
 10 | from pycocotools.cocoeval import COCOeval
 11 | from pycocotools.coco import COCO
 12 | import pycocotools.mask as mask_util
 13 | 
 14 | from collections import defaultdict
 15 | 
 16 | import utils
 17 | 
 18 | 
 19 | class CocoEvaluator(object):
 20 |     def __init__(self, coco_gt, iou_types):
 21 |         assert isinstance(iou_types, (list, tuple))
 22 |         coco_gt = copy.deepcopy(coco_gt)
 23 |         self.coco_gt = coco_gt
 24 | 
 25 |         self.iou_types = iou_types
 26 |         self.coco_eval = {}
 27 |         for iou_type in iou_types:
 28 |             self.coco_eval[iou_type] = COCOeval(coco_gt, iouType=iou_type)
 29 | 
 30 | 
 31 | 
 32 |         self.img_ids = []
 33 |         self.eval_imgs = {k: [] for k in iou_types}
 34 | 
 35 |     def update(self, predictions):
 36 |         img_ids = list(np.unique(list(predictions.keys())))
 37 |         self.img_ids.extend(img_ids)
 38 | 
 39 |         for iou_type in self.iou_types:
 40 |             results = self.prepare(predictions, iou_type)
 41 |             coco_dt = loadRes(self.coco_gt, results) if results else COCO()
 42 |             coco_eval = self.coco_eval[iou_type]
 43 | 
 44 |             coco_eval.cocoDt = coco_dt
 45 |             coco_eval.params.imgIds = list(img_ids)
 46 | 
 47 |             coco_eval.params.iouThrs= [0.95]
 48 | 
 49 |             img_ids, eval_imgs = evaluate(coco_eval)
 50 | 
 51 |             self.eval_imgs[iou_type].append(eval_imgs)
 52 | 
 53 |     def synchronize_between_processes(self):
 54 |         for iou_type in self.iou_types:
 55 |             self.eval_imgs[iou_type] = np.concatenate(self.eval_imgs[iou_type], 2)
 56 |             create_common_coco_eval(self.coco_eval[iou_type], self.img_ids, self.eval_imgs[iou_type])
 57 | 
 58 |     def accumulate(self):
 59 |         for coco_eval in self.coco_eval.values():
 60 |             coco_eval.accumulate()
 61 | 
 62 |     def summarize(self):
 63 |         for iou_type, coco_eval in self.coco_eval.items():
 64 |             print("IoU metric: {}".format(iou_type))
 65 |             coco_eval.summarize()
 66 | 
 67 |     def prepare(self, predictions, iou_type):
 68 |         if iou_type == "bbox":
 69 |             return self.prepare_for_coco_detection(predictions)
 70 |         elif iou_type == "segm":
 71 |             return self.prepare_for_coco_segmentation(predictions)
 72 |         elif iou_type == "keypoints":
 73 |             return self.prepare_for_coco_keypoint(predictions)
 74 |         else:
 75 |             raise ValueError("Unknown iou type {}".format(iou_type))
 76 | 
 77 |     def prepare_for_coco_detection(self, predictions):
 78 |         coco_results = []
 79 |         for original_id, prediction in predictions.items():
 80 |             if len(prediction) == 0:
 81 |                 continue
 82 | 
 83 |             boxes = prediction["boxes"]
 84 |             boxes = convert_to_xywh(boxes).tolist()
 85 |             scores = prediction["scores"].tolist()
 86 |             labels = prediction["labels"].tolist()
 87 | 
 88 |             coco_results.extend(
 89 |                 [
 90 |                     {
 91 |                         "image_id": original_id,
 92 |                         "category_id": labels[k],
 93 |                         "bbox": box,
 94 |                         "score": scores[k],
 95 |                     }
 96 |                     for k, box in enumerate(boxes)
 97 |                 ]
 98 |             )
 99 |         return coco_results
100 | 
101 |     def prepare_for_coco_segmentation(self, predictions):
102 |         coco_results = []
103 |         for original_id, prediction in predictions.items():
104 |             if len(prediction) == 0:
105 |                 continue
106 | 
107 |             scores = prediction["scores"]
108 |             labels = prediction["labels"]
109 |             masks = prediction["masks"]
110 | 
111 |             masks = masks > 0.5
112 | 
113 |             scores = prediction["scores"].tolist()
114 |             labels = prediction["labels"].tolist()
115 | 
116 |             rles = [
117 |                 mask_util.encode(np.array(mask.cpu()[0, :, :, np.newaxis], dtype=np.uint8, order="F"))[0]
118 |                 for mask in masks
119 |             ]
120 | 
121 | 
122 |             for rle in rles:
123 |                 rle["counts"] = rle["counts"].decode("utf-8")
124 | 
125 |             coco_results.extend(
126 |                 [
127 |                     {
128 |                         "image_id": original_id,
129 |                         "category_id": labels[k],
130 |                         "segmentation": rle,
131 |                         "score": scores[k],
132 |                     }
133 |                     for k, rle in enumerate(rles)
134 |                 ]
135 |             )
136 |         return coco_results
137 | 
138 |     def prepare_for_coco_keypoint(self, predictions):
139 |         coco_results = []
140 |         for original_id, prediction in predictions.items():
141 |             if len(prediction) == 0:
142 |                 continue
143 | 
144 |             boxes = prediction["boxes"]
145 |             boxes = convert_to_xywh(boxes).tolist()
146 |             scores = prediction["scores"].tolist()
147 |             labels = prediction["labels"].tolist()
148 |             keypoints = prediction["keypoints"]
149 |             keypoints = keypoints.flatten(start_dim=1).tolist()
150 | 
151 |             coco_results.extend(
152 |                 [
153 |                     {
154 |                         "image_id": original_id,
155 |                         "category_id": labels[k],
156 |                         'keypoints': keypoint,
157 |                         "score": scores[k],
158 |                     }
159 |                     for k, keypoint in enumerate(keypoints)
160 |                 ]
161 |             )
162 |         return coco_results
163 | 
164 | 
165 | def convert_to_xywh(boxes):
166 |     xmin, ymin, xmax, ymax = boxes.unbind(1)
167 |     return torch.stack((xmin, ymin, xmax - xmin, ymax - ymin), dim=1)
168 | 
169 | 
170 | def merge(img_ids, eval_imgs):
171 |     all_img_ids = utils.all_gather(img_ids)
172 |     all_eval_imgs = utils.all_gather(eval_imgs)
173 | 
174 |     merged_img_ids = []
175 |     for p in all_img_ids:
176 |         merged_img_ids.extend(p)
177 | 
178 |     merged_eval_imgs = []
179 |     for p in all_eval_imgs:
180 |         merged_eval_imgs.append(p)
181 | 
182 |     merged_img_ids = np.array(merged_img_ids)
183 |     merged_eval_imgs = np.concatenate(merged_eval_imgs, 2)
184 | 
185 |     # keep only unique (and in sorted order) images
186 |     merged_img_ids, idx = np.unique(merged_img_ids, return_index=True)
187 |     merged_eval_imgs = merged_eval_imgs[..., idx]
188 | 
189 |     return merged_img_ids, merged_eval_imgs
190 | 
191 | 
192 | def create_common_coco_eval(coco_eval, img_ids, eval_imgs):
193 |     img_ids, eval_imgs = merge(img_ids, eval_imgs)
194 |     img_ids = list(img_ids)
195 |     eval_imgs = list(eval_imgs.flatten())
196 | 
197 |     coco_eval.evalImgs = eval_imgs
198 |     coco_eval.params.imgIds = img_ids
199 |     coco_eval._paramsEval = copy.deepcopy(coco_eval.params)
200 | 
201 | 
202 | #################################################################
203 | # From pycocotools, just removed the prints and fixed
204 | # a Python3 bug about unicode not defined
205 | #################################################################
206 | 
207 | # Ideally, pycocotools wouldn't have hard-coded prints
208 | # so that we could avoid copy-pasting those two functions
209 | 
210 | def createIndex(self):
211 |     # create index
212 |     # print('creating index...')
213 |     anns, cats, imgs = {}, {}, {}
214 |     imgToAnns, catToImgs = defaultdict(list), defaultdict(list)
215 |     if 'annotations' in self.dataset:
216 |         for ann in self.dataset['annotations']:
217 |             imgToAnns[ann['image_id']].append(ann)
218 |             anns[ann['id']] = ann
219 | 
220 |     if 'images' in self.dataset:
221 |         for img in self.dataset['images']:
222 |             imgs[img['id']] = img
223 | 
224 |     if 'categories' in self.dataset:
225 |         for cat in self.dataset['categories']:
226 |             cats[cat['id']] = cat
227 | 
228 |     if 'annotations' in self.dataset and 'categories' in self.dataset:
229 |         for ann in self.dataset['annotations']:
230 |             catToImgs[ann['category_id']].append(ann['image_id'])
231 | 
232 |     # print('index created!')
233 | 
234 |     # create class members
235 |     self.anns = anns
236 |     self.imgToAnns = imgToAnns
237 |     self.catToImgs = catToImgs
238 |     self.imgs = imgs
239 |     self.cats = cats
240 | 
241 | 
242 | maskUtils = mask_util
243 | 
244 | 
245 | def loadRes(self, resFile):
246 |     """
247 |     Load result file and return a result api object.
248 |     :param   resFile (str)     : file name of result file
249 |     :return: res (obj)         : result api object
250 |     """
251 |     res = COCO()
252 |     res.dataset['images'] = [img for img in self.dataset['images']]
253 | 
254 |     # print('Loading and preparing results...')
255 |     # tic = time.time()
256 |     if isinstance(resFile, torch._six.string_classes):
257 |         anns = json.load(open(resFile))
258 |     elif type(resFile) == np.ndarray:
259 |         anns = self.loadNumpyAnnotations(resFile)
260 |     else:
261 |         anns = resFile
262 |     assert type(anns) == list, 'results in not an array of objects'
263 |     annsImgIds = [ann['image_id'] for ann in anns]
264 |     assert set(annsImgIds) == (set(annsImgIds) & set(self.getImgIds())), \
265 |         'Results do not correspond to current coco set'
266 |     if 'caption' in anns[0]:
267 |         imgIds = set([img['id'] for img in res.dataset['images']]) & set([ann['image_id'] for ann in anns])
268 |         res.dataset['images'] = [img for img in res.dataset['images'] if img['id'] in imgIds]
269 |         for id, ann in enumerate(anns):
270 |             ann['id'] = id + 1
271 |     elif 'bbox' in anns[0] and not anns[0]['bbox'] == []:
272 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
273 |         for id, ann in enumerate(anns):
274 |             bb = ann['bbox']
275 |             x1, x2, y1, y2 = [bb[0], bb[0] + bb[2], bb[1], bb[1] + bb[3]]
276 |             if 'segmentation' not in ann:
277 |                 ann['segmentation'] = [[x1, y1, x1, y2, x2, y2, x2, y1]]
278 |             ann['area'] = bb[2] * bb[3]
279 |             ann['id'] = id + 1
280 |             ann['iscrowd'] = 0
281 |     elif 'segmentation' in anns[0]:
282 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
283 |         for id, ann in enumerate(anns):
284 |             # now only support compressed RLE format as segmentation results
285 |             ann['area'] = maskUtils.area(ann['segmentation'])
286 |             if 'bbox' not in ann:
287 |                 ann['bbox'] = maskUtils.toBbox(ann['segmentation'])
288 |             ann['id'] = id + 1
289 |             ann['iscrowd'] = 0
290 |     elif 'keypoints' in anns[0]:
291 |         res.dataset['categories'] = copy.deepcopy(self.dataset['categories'])
292 |         for id, ann in enumerate(anns):
293 |             s = ann['keypoints']
294 |             x = s[0::3]
295 |             y = s[1::3]
296 |             x1, x2, y1, y2 = np.min(x), np.max(x), np.min(y), np.max(y)
297 |             ann['area'] = (x2 - x1) * (y2 - y1)
298 |             ann['id'] = id + 1
299 |             ann['bbox'] = [x1, y1, x2 - x1, y2 - y1]
300 |     # print('DONE (t={:0.2f}s)'.format(time.time()- tic))
301 | 
302 |     res.dataset['annotations'] = anns
303 |     createIndex(res)
304 |     return res
305 | 
306 | 
307 | def evaluate(self):
308 |     '''
309 |     Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
310 |     :return: None
311 |     '''
312 |     # tic = time.time()
313 |     # print('Running per image evaluation...')
314 |     p = self.params
315 |     # add backward compatibility if useSegm is specified in params
316 |     if p.useSegm is not None:
317 |         p.iouType = 'segm' if p.useSegm == 1 else 'bbox'
318 |         print('useSegm (deprecated) is not None. Running {} evaluation'.format(p.iouType))
319 |     # print('Evaluate annotation type *{}*'.format(p.iouType))
320 |     p.imgIds = list(np.unique(p.imgIds))
321 |     if p.useCats:
322 |         p.catIds = list(np.unique(p.catIds))
323 |     p.maxDets = sorted(p.maxDets)
324 |     self.params = p
325 | 
326 |     self._prepare()
327 |     # loop through images, area range, max detection number
328 |     catIds = p.catIds if p.useCats else [-1]
329 | 
330 |     if p.iouType == 'segm' or p.iouType == 'bbox':
331 |         computeIoU = self.computeIoU
332 |     elif p.iouType == 'keypoints':
333 |         computeIoU = self.computeOks
334 |     self.ious = {
335 |         (imgId, catId): computeIoU(imgId, catId)
336 |         for imgId in p.imgIds
337 |         for catId in catIds}
338 | 
339 |     evaluateImg = self.evaluateImg
340 |     maxDet = p.maxDets[-1]
341 |     evalImgs = [
342 |         evaluateImg(imgId, catId, areaRng, maxDet)
343 |         for catId in catIds
344 |         for areaRng in p.areaRng
345 |         for imgId in p.imgIds
346 |     ]
347 |     # this is NOT in the pycocotools code, but could be done outside
348 |     evalImgs = np.asarray(evalImgs).reshape(len(catIds), len(p.areaRng), len(p.imgIds))
349 |     self._paramsEval = copy.deepcopy(self.params)
350 |     # toc = time.time()
351 |     # print('DONE (t={:0.2f}s).'.format(toc-tic))
352 |     return p.imgIds, evalImgs
353 | 
354 | #################################################################
355 | # end of straight copy from pycocotools, just removing the prints
356 | #################################################################
357 | 


--------------------------------------------------------------------------------
/src/mask_rg/coco_utils.py:
--------------------------------------------------------------------------------
  1 | import copy
  2 | import os
  3 | from PIL import Image
  4 | 
  5 | import torch
  6 | import torch.utils.data
  7 | import torchvision
  8 | 
  9 | from pycocotools import mask as coco_mask
 10 | from pycocotools.coco import COCO
 11 | 
 12 | import src.mask_rg.transforms as T
 13 | 
 14 | 
 15 | class FilterAndRemapCocoCategories(object):
 16 |     def __init__(self, categories, remap=True):
 17 |         self.categories = categories
 18 |         self.remap = remap
 19 | 
 20 |     def __call__(self, image, target):
 21 |         anno = target["annotations"]
 22 |         anno = [obj for obj in anno if obj["category_id"] in self.categories]
 23 |         if not self.remap:
 24 |             target["annotations"] = anno
 25 |             return image, target
 26 |         anno = copy.deepcopy(anno)
 27 |         for obj in anno:
 28 |             obj["category_id"] = self.categories.index(obj["category_id"])
 29 |         target["annotations"] = anno
 30 |         return image, target
 31 | 
 32 | 
 33 | def convert_coco_poly_to_mask(segmentations, height, width):
 34 |     masks = []
 35 |     for polygons in segmentations:
 36 |         rles = coco_mask.frPyObjects(polygons, height, width)
 37 |         mask = coco_mask.decode(rles)
 38 |         if len(mask.shape) < 3:
 39 |             mask = mask[..., None]
 40 |         mask = torch.as_tensor(mask, dtype=torch.uint8)
 41 |         mask = mask.any(dim=2)
 42 |         masks.append(mask)
 43 |     if masks:
 44 |         masks = torch.stack(masks, dim=0)
 45 |     else:
 46 |         masks = torch.zeros((0, height, width), dtype=torch.uint8)
 47 |     return masks
 48 | 
 49 | 
 50 | class ConvertCocoPolysToMask(object):
 51 |     def __call__(self, image, target):
 52 |         w, h = image.size
 53 | 
 54 |         image_id = target["image_id"]
 55 |         image_id = torch.tensor([image_id])
 56 | 
 57 |         anno = target["annotations"]
 58 | 
 59 |         anno = [obj for obj in anno if obj['iscrowd'] == 0]
 60 | 
 61 |         boxes = [obj["bbox"] for obj in anno]
 62 |         # guard against no boxes via resizing
 63 |         boxes = torch.as_tensor(boxes, dtype=torch.float32).reshape(-1, 4)
 64 |         boxes[:, 2:] += boxes[:, :2]
 65 |         boxes[:, 0::2].clamp_(min=0, max=w)
 66 |         boxes[:, 1::2].clamp_(min=0, max=h)
 67 | 
 68 |         classes = [obj["category_id"] for obj in anno]
 69 |         classes = torch.tensor(classes, dtype=torch.int64)
 70 | 
 71 |         segmentations = [obj["segmentation"] for obj in anno]
 72 |         masks = convert_coco_poly_to_mask(segmentations, h, w)
 73 | 
 74 |         keypoints = None
 75 |         if anno and "keypoints" in anno[0]:
 76 |             keypoints = [obj["keypoints"] for obj in anno]
 77 |             keypoints = torch.as_tensor(keypoints, dtype=torch.float32)
 78 |             num_keypoints = keypoints.shape[0]
 79 |             if num_keypoints:
 80 |                 keypoints = keypoints.view(num_keypoints, -1, 3)
 81 | 
 82 |         keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
 83 |         boxes = boxes[keep]
 84 |         classes = classes[keep]
 85 |         masks = masks[keep]
 86 |         if keypoints is not None:
 87 |             keypoints = keypoints[keep]
 88 | 
 89 |         target = {}
 90 |         target["boxes"] = boxes
 91 |         target["labels"] = classes
 92 |         target["masks"] = masks
 93 |         target["image_id"] = image_id
 94 |         if keypoints is not None:
 95 |             target["keypoints"] = keypoints
 96 | 
 97 |         # for conversion to coco api
 98 |         area = torch.tensor([obj["area"] for obj in anno])
 99 |         iscrowd = torch.tensor([obj["iscrowd"] for obj in anno])
100 |         target["area"] = area
101 |         target["iscrowd"] = iscrowd
102 | 
103 |         return image, target
104 | 
105 | 
106 | def _coco_remove_images_without_annotations(dataset, cat_list=None):
107 |     def _has_only_empty_bbox(anno):
108 |         return all(any(o <= 1 for o in obj["bbox"][2:]) for obj in anno)
109 | 
110 |     def _count_visible_keypoints(anno):
111 |         return sum(sum(1 for v in ann["keypoints"][2::3] if v > 0) for ann in anno)
112 | 
113 |     min_keypoints_per_image = 10
114 | 
115 |     def _has_valid_annotation(anno):
116 |         # if it's empty, there is no annotation
117 |         if len(anno) == 0:
118 |             return False
119 |         # if all boxes have close to zero area, there is no annotation
120 |         if _has_only_empty_bbox(anno):
121 |             return False
122 |         # keypoints task have a slight different critera for considering
123 |         # if an annotation is valid
124 |         if "keypoints" not in anno[0]:
125 |             return True
126 |         # for keypoint detection tasks, only consider valid images those
127 |         # containing at least min_keypoints_per_image
128 |         if _count_visible_keypoints(anno) >= min_keypoints_per_image:
129 |             return True
130 |         return False
131 | 
132 |     assert isinstance(dataset, torchvision.datasets.CocoDetection)
133 |     ids = []
134 |     for ds_idx, img_id in enumerate(dataset.ids):
135 |         ann_ids = dataset.coco.getAnnIds(imgIds=img_id, iscrowd=None)
136 |         anno = dataset.coco.loadAnns(ann_ids)
137 |         if cat_list:
138 |             anno = [obj for obj in anno if obj["category_id"] in cat_list]
139 |         if _has_valid_annotation(anno):
140 |             ids.append(ds_idx)
141 | 
142 |     dataset = torch.utils.data.Subset(dataset, ids)
143 |     return dataset
144 | 
145 | 
146 | def convert_to_coco_api(ds):
147 |     coco_ds = COCO()
148 |     # annotation IDs need to start at 1, not 0, see torchvision issue #1530
149 |     ann_id = 1
150 |     dataset = {'images': [], 'categories': [], 'annotations': []}
151 |     categories = set()
152 |     for img_idx in range(len(ds)):
153 |         # find better way to get target
154 |         # targets = ds.get_annotations(img_idx)
155 |         img, targets = ds[img_idx]
156 |         image_id = targets["image_id"].item()
157 |         img_dict = {}
158 |         img_dict['id'] = image_id
159 |         img_dict['height'] = img.shape[-2]
160 |         img_dict['width'] = img.shape[-1]
161 |         dataset['images'].append(img_dict)
162 |         bboxes = targets["boxes"]
163 |         bboxes[:, 2:] -= bboxes[:, :2]
164 |         bboxes = bboxes.tolist()
165 |         labels = targets['labels'].tolist()
166 |         areas = targets['area'].tolist()
167 |         iscrowd = targets['iscrowd'].tolist()
168 |         if 'masks' in targets:
169 |             masks = targets['masks']
170 |             # make masks Fortran contiguous for coco_mask
171 |             masks = masks.permute(0, 2, 1).contiguous().permute(0, 2, 1)
172 |         if 'keypoints' in targets:
173 |             keypoints = targets['keypoints']
174 |             keypoints = keypoints.reshape(keypoints.shape[0], -1).tolist()
175 |         num_objs = len(bboxes)
176 |         for i in range(num_objs):
177 |             ann = {}
178 |             ann['image_id'] = image_id
179 |             ann['bbox'] = bboxes[i]
180 |             ann['category_id'] = labels[i]
181 |             categories.add(labels[i])
182 |             ann['area'] = areas[i]
183 |             ann['iscrowd'] = iscrowd[i]
184 |             ann['id'] = ann_id
185 |             if 'masks' in targets:
186 |                 ann["segmentation"] = coco_mask.encode(masks[i].numpy())
187 |             if 'keypoints' in targets:
188 |                 ann['keypoints'] = keypoints[i]
189 |                 ann['num_keypoints'] = sum(k != 0 for k in keypoints[i][2::3])
190 |             dataset['annotations'].append(ann)
191 |             ann_id += 1
192 |     dataset['categories'] = [{'id': i} for i in sorted(categories)]
193 |     coco_ds.dataset = dataset
194 |     coco_ds.createIndex()
195 |     return coco_ds
196 | 
197 | 
198 | def get_coco_api_from_dataset(dataset):
199 |     for _ in range(10):
200 |         if isinstance(dataset, torchvision.datasets.CocoDetection):
201 |             break
202 |         if isinstance(dataset, torch.utils.data.Subset):
203 |             dataset = dataset
204 |             # dataset = dataset.dataset.Subset
205 |     if isinstance(dataset, torchvision.datasets.CocoDetection):
206 |         return dataset.coco
207 |     return convert_to_coco_api(dataset)
208 | 
209 | 
210 | class CocoDetection(torchvision.datasets.CocoDetection):
211 |     def __init__(self, img_folder, ann_file, transforms):
212 |         super(CocoDetection, self).__init__(img_folder, ann_file)
213 |         self._transforms = transforms
214 | 
215 |     def __getitem__(self, idx):
216 |         img, target = super(CocoDetection, self).__getitem__(idx)
217 |         image_id = self.ids[idx]
218 |         target = dict(image_id=image_id, annotations=target)
219 |         if self._transforms is not None:
220 |             img, target = self._transforms(img, target)
221 |         return img, target
222 | 
223 | 
224 | def get_coco(root, image_set, transforms, mode='instances'):
225 |     anno_file_template = "{}_{}2017.json"
226 |     PATHS = {
227 |         "train": ("train2017", os.path.join("annotations", anno_file_template.format(mode, "train"))),
228 |         "val": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val"))),
229 |         # "train": ("val2017", os.path.join("annotations", anno_file_template.format(mode, "val")))
230 |     }
231 | 
232 |     t = [ConvertCocoPolysToMask()]
233 | 
234 |     if transforms is not None:
235 |         t.append(transforms)
236 |     transforms = T.Compose(t)
237 | 
238 |     img_folder, ann_file = PATHS[image_set]
239 |     img_folder = os.path.join(root, img_folder)
240 |     ann_file = os.path.join(root, ann_file)
241 | 
242 |     dataset = CocoDetection(img_folder, ann_file, transforms=transforms)
243 | 
244 |     if image_set == "train":
245 |         dataset = _coco_remove_images_without_annotations(dataset)
246 | 
247 |     # dataset = torch.utils.data.Subset(dataset, [i for i in range(500)])
248 | 
249 |     return dataset
250 | 
251 | 
252 | def get_coco_kp(root, image_set, transforms):
253 |     return get_coco(root, image_set, transforms, mode="person_keypoints")
254 | 


--------------------------------------------------------------------------------
/src/mask_rg/dataset.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import torch
  4 | from PIL import Image
  5 | import cv2
  6 | import torchvision.transforms as T
  7 | import matplotlib.pyplot as plt
  8 | import matplotlib.patches as patches
  9 | import utils
 10 | 
 11 | 
 12 | class PushAndGraspDataset(object):
 13 | 
 14 |     def __init__(self, config):
 15 | 
 16 |         # read configuration
 17 |         self.root = config['dataset']['path']
 18 |         self.imgs_path = os.path.join(config['dataset']['path'], config['dataset']['images'])
 19 |         self.masks_path = os.path.join(config['dataset']['path'], config['dataset']['masks'])
 20 |         self.transforms = config['dataset']['data_augmentation']
 21 |         self.is_depth = config['dataset']['is_depth']
 22 | 
 23 |         self.train_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['train_indices']),
 24 |                                      allow_pickle=True)
 25 |         self.test_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['test_indices']),
 26 |                                     allow_pickle=True)
 27 | 
 28 | 
 29 |         # get a list of images and GTs
 30 |         self.imgs = list(sorted(os.listdir(self.imgs_path)))
 31 |         self.masks = list(sorted(os.listdir(self.masks_path)))
 32 | 
 33 |     def __getitem__(self, idx):
 34 | 
 35 |         # set getter path
 36 |         img_path = os.path.join(self.imgs_path, self.imgs[idx])
 37 |         mask_path = os.path.join(self.masks_path, self.masks[idx])
 38 | 
 39 |         # read an image
 40 |         if self.is_depth:
 41 | 
 42 |             img = np.load(img_path).astype(dtype=np.int16)
 43 |             # TODO take the normalization somewhere out
 44 |             # depth range --> 0 to 5000 1e-4meters
 45 |             # max depth = 5000 --> background (0.5m)
 46 |             # min depth = 3459 --> the minimum depth value in the all dataset (0.npy to 012250.npy)
 47 |             # img = np.round((img - 2960) / 8).astype(np.uint8)
 48 | 
 49 |             # all vals are in between 0 to 250
 50 |             img = np.round(img/20).astype(np.uint8)
 51 |             img = np.repeat(img.reshape(1024, 1024, 1), 3, axis=2)
 52 |         else:
 53 |             # TODO this conversion might create problems for depth when converting back, check!
 54 |             img = Image.open(img_path).convert("RGB")
 55 | 
 56 |         # read a GT mask and reformat
 57 |         mask = cv2.imread(mask_path)
 58 |         mask = np.asarray(mask)
 59 |         mask = mask[:, :, :1]
 60 |         mask = np.squeeze(mask, axis=2)
 61 |         # plt.imshow(mask)
 62 |         # plt.show()
 63 | 
 64 |         # list of objects - All data had been cleaned and obj_ids are in ordered
 65 |         obj_ids = np.unique(mask)
 66 |         # Remove background (i.e. 0)
 67 |         obj_ids = obj_ids[1:]
 68 | 
 69 |         # combine the masks TODO read height and weight from img
 70 |         num_objs = len(obj_ids)
 71 |         masks = np.zeros((num_objs, 1024, 1024), dtype=np.uint8)
 72 |         for i in range(num_objs):
 73 |             masks[i][np.where(mask == i+1)] = 1
 74 | 
 75 |         # get bounding box coordinates for each mask
 76 |         boxes = []
 77 |         for i in range(num_objs):
 78 |             # plt.imshow(masks[i])
 79 |             # plt.show()
 80 |             pos = np.where(masks[i])
 81 |             xmin = np.min(pos[1])
 82 |             xmax = np.max(pos[1])
 83 |             ymin = np.min(pos[0])
 84 |             ymax = np.max(pos[0])
 85 |             boxes.append([xmin, ymin, xmax, ymax])
 86 | 
 87 |             # fig, ax = plt.subplots(1)
 88 |             # ax.imshow(img)
 89 |             # In the documentation it say bottom left corner, here i gave upper left corner for the correct box!!
 90 |             # rect = patches.Rectangle((xmin, ymin), xmax-xmin, ymax-ymin, linewidth=3, edgecolor='r', facecolor='none')
 91 |             # ax.add_patch(rect)
 92 |             # plt.show()
 93 | 
 94 |         # convert everything into a torch.Tensor
 95 |         boxes = torch.as_tensor(boxes, dtype=torch.float32)
 96 |         # there is only one class
 97 |         labels = torch.ones((num_objs,), dtype=torch.int64)
 98 |         masks = torch.as_tensor(masks, dtype=torch.uint8)
 99 | 
100 |         image_id = torch.tensor([idx])
101 |         area = (boxes[:, 3] - boxes[:, 1]) * (boxes[:, 2] - boxes[:, 0])
102 | 
103 |         # If there is no object on the scene, this will be used to ignore that instance during coco eval
104 |         iscrowd = torch.zeros((num_objs,), dtype=torch.int64)
105 | 
106 |         # this control is to solve 'loss is NaN error' during training which can be a result of an invalid box
107 |         keep = (boxes[:, 3] > boxes[:, 1]) & (boxes[:, 2] > boxes[:, 0])
108 |         boxes = boxes[keep]
109 |         masks = masks[keep]
110 | 
111 |         target = {}
112 |         target["boxes"] = boxes
113 |         target["labels"] = labels
114 |         target["masks"] = masks
115 |         target["image_id"] = image_id
116 |         target["area"] = area
117 |         target["iscrowd"] = iscrowd
118 | 
119 |         # TODO data augmentation
120 |         # if self.transforms is not None:
121 |         #     img, target = self.transforms(img, target)
122 | 
123 |         # convert image to torch tensor
124 |         # if self.is_depth:
125 |         #     img = torch.from_numpy(img)
126 |         #     asd=23
127 |         # else:
128 |         tt = T.ToTensor()
129 |         img = tt(img)
130 | 
131 |         return img, target
132 | 
133 |     # def augment_data(train):
134 |     #     transforms = [T.ToTensor()]
135 |     #     if train:
136 |     #         transforms.append(T.RandomHorizontalFlip(0.5))
137 |     #     return T.Compose(transforms)
138 | 
139 |     def __len__(self):
140 |         return len(self.imgs)


--------------------------------------------------------------------------------
/src/mask_rg/engine.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import sys
  3 | import time
  4 | import torch
  5 | 
  6 | import torchvision.models.detection.mask_rcnn
  7 | 
  8 | from src.mask_rg.coco_utils import get_coco_api_from_dataset
  9 | from src.mask_rg.coco_eval import CocoEvaluator
 10 | import src.mask_rg.utils
 11 | 
 12 | 
 13 | def train_one_epoch(model, optimizer, data_loader, device, epoch, print_freq):
 14 |     model.train()
 15 |     metric_logger = utils.MetricLogger(delimiter="  ")
 16 |     metric_logger.add_meter('lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}'))
 17 |     header = 'Epoch: [{}]'.format(epoch)
 18 | 
 19 |     lr_scheduler = None
 20 |     if epoch == 0:
 21 |         warmup_factor = 1. / 1000
 22 |         warmup_iters = min(1000, len(data_loader) - 1)
 23 | 
 24 |         lr_scheduler = utils.warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor)
 25 | 
 26 |     for images, targets in metric_logger.log_every(data_loader, print_freq, header):
 27 |         images = list(image.to(device) for image in images)
 28 |         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
 29 | 
 30 |         loss_dict = model(images, targets)
 31 | 
 32 |         losses = sum(loss for loss in loss_dict.values())
 33 | 
 34 |         # reduce losses over all GPUs for logging purposes
 35 |         loss_dict_reduced = utils.reduce_dict(loss_dict)
 36 |         losses_reduced = sum(loss for loss in loss_dict_reduced.values())
 37 | 
 38 |         loss_value = losses_reduced.item()
 39 | 
 40 |         if not math.isfinite(loss_value):
 41 |             print("Loss is {}, stopping training".format(loss_value))
 42 |             print(loss_dict_reduced)
 43 |             sys.exit(1)
 44 | 
 45 |         optimizer.zero_grad()
 46 |         losses.backward()
 47 |         optimizer.step()
 48 | 
 49 |         if lr_scheduler is not None:
 50 |             lr_scheduler.step()
 51 | 
 52 |         metric_logger.update(loss=losses_reduced, **loss_dict_reduced)
 53 |         metric_logger.update(lr=optimizer.param_groups[0]["lr"])
 54 | 
 55 |     return metric_logger
 56 | 
 57 | 
 58 | def _get_iou_types(model):
 59 |     model_without_ddp = model
 60 |     if isinstance(model, torch.nn.parallel.DistributedDataParallel):
 61 |         model_without_ddp = model.module
 62 |     iou_types = ["bbox"]
 63 |     if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN):
 64 |         iou_types.append("segm")
 65 |     if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN):
 66 |         iou_types.append("keypoints")
 67 |     return iou_types
 68 | 
 69 | 
 70 | @torch.no_grad()
 71 | def evaluate(model, data_loader, device):
 72 |     # n_threads = torch.get_num_threads()
 73 |     # FIXME remove this and make paste_masks_in_image run on the GPU
 74 |     # torch.set_num_threads(n_threads)
 75 |     cpu_device = torch.device('cuda')
 76 |     model.eval()
 77 |     header = 'Test:'
 78 | 
 79 |     coco = get_coco_api_from_dataset(data_loader.dataset)
 80 |     metric_logger = utils.MetricLogger(delimiter="  ")
 81 |     iou_types = _get_iou_types(model)
 82 | 
 83 |     coco_evaluator = CocoEvaluator(coco, iou_types)
 84 | 
 85 |     for images, targets in metric_logger.log_every(data_loader, 100, header):
 86 |         images = list(img.to(device) for img in images)
 87 |         targets = [{k: v.to(device) for k, v in t.items()} for t in targets]
 88 | 
 89 |         torch.cuda.synchronize()
 90 |         model_time = time.time()
 91 |         outputs = model(images)
 92 | 
 93 |         outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs]
 94 |         model_time = time.time() - model_time
 95 | 
 96 |         res = {target["image_id"].item(): output for target, output in zip(targets, outputs)}
 97 |         evaluator_time = time.time()
 98 |         coco_evaluator.update(res)
 99 |         evaluator_time = time.time() - evaluator_time
100 |         metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
101 | 
102 |     # gather the stats from all processes
103 |     metric_logger.synchronize_between_processes()
104 |     print("Averaged stats:", metric_logger)
105 |     coco_evaluator.synchronize_between_processes()
106 | 
107 |     # accumulate predictions from all images
108 |     coco_evaluator.accumulate()
109 |     coco_evaluator.summarize()
110 |     # torch.set_num_threads(n_threads)
111 |     return coco_evaluator
112 | 


--------------------------------------------------------------------------------
/src/mask_rg/main.py:
--------------------------------------------------------------------------------
 1 | from model import MaskRGNetwork
 2 | from dataset import PushAndGraspDataset
 3 | import yaml
 4 | import numpy as np
 5 | import os
 6 | from torch.utils import data as td
 7 | 
 8 | 
 9 | def main():
10 | 
11 |     with open('model_config.yaml') as f:
12 |         configuration = yaml.load(f, Loader=yaml.FullLoader)
13 | 
14 |     # create the model
15 |     model = MaskRGNetwork(configuration)
16 | 
17 |     # create dataset objects and set the model
18 |     dataset = PushAndGraspDataset(configuration)
19 |     test_indices = os.path.join(configuration['dataset']['path'], configuration['dataset']['test_indices'])
20 |     test_subset = td.Subset(dataset, test_indices)
21 |     # Training:
22 |     # model.set_data(dataset)
23 |     # model.train_model()
24 | 
25 |     # Evaluation:
26 |     # this loads the saved weights from the file in the config file
27 |     model.load_weights()
28 |     # load a new dataset for the evaluation
29 |     model.set_data(test_subset, is_test=True, batch_size=20)
30 | 
31 |     # evaluate
32 |     res = model.evaluate_model()
33 |     np.save('results.npy', res)
34 |     with open('results.txt', 'w') as output:
35 |         output.write(res)
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     main()


--------------------------------------------------------------------------------
/src/mask_rg/mask_rg.py:
--------------------------------------------------------------------------------
 1 | from src.mask_rg.model import MaskRGNetwork
 2 | import yaml
 3 | import numpy as np
 4 | import torchvision.transforms as T
 5 | import torch
 6 | from src.mask_rg.rewards import RewardGenerator
 7 | 
 8 | 
 9 | CONFIG_PATH = './model_config.yaml'
10 | 
11 | class MaskRG(object):
12 | 
13 |     def __init__(self, confidence_threshold=0.75, mask_threshold=0.75):
14 |         # Make sure this configuration file is in your execution path
15 |         with open(CONFIG_PATH) as f:
16 |             configuration = yaml.load(f, Loader=yaml.FullLoader)
17 |         with torch.no_grad():
18 |             self.model = MaskRGNetwork(configuration)
19 |             self.model.load_weights()
20 |             self.rg = RewardGenerator(confidence_threshold, mask_threshold)
21 |             self.prediction = None
22 |             self.gt = None
23 | 
24 |     def set_reward_generator(self, depth_image, gt_segmentation):
25 | 
26 |         depth_image = np.round(depth_image / 20).astype(np.uint8)
27 |         depth_image = np.repeat(depth_image.reshape(1024, 1024, 1), 3, axis=2)
28 |         with torch.no_grad():
29 |             tt = T.ToTensor()
30 |             depth_tensor = tt(depth_image)
31 |             depth_tensor = depth_tensor.cuda()
32 |         # print(depth_tensor.requires_grad)
33 |             self.prediction = self.model.eval_single_img([depth_tensor])
34 | 
35 |         self.gt = gt_segmentation
36 |         self.rg.set_element(self.prediction, self.gt)
37 | 
38 |     def get_current_rewards(self):
39 |         return self.rg.get_reward()
40 | 
41 |     def print_segmentation(self, pred_ids):
42 |         return self.rg.print_seg_diff(pred_ids)
43 | 
44 |     def print_masks(self):
45 |         return self.rg.print_masks()
46 | 


--------------------------------------------------------------------------------
/src/mask_rg/model.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import os
  3 | import torch
  4 | import torch.nn as nn
  5 | from torch.utils import data as td
  6 | import torchvision
  7 | from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
  8 | from torchvision.models.detection.mask_rcnn import MaskRCNNPredictor
  9 | from torchvision.models.detection import MaskRCNN
 10 | from torchvision.models.detection.rpn import AnchorGenerator
 11 | from torchvision.transforms import ToTensor
 12 | from src.mask_rg.engine import train_one_epoch, evaluate
 13 | import src.mask_rg.utils
 14 | import time
 15 | import datetime
 16 | import yaml
 17 | from collections import OrderedDict
 18 | import cv2
 19 | from PIL import Image
 20 | import matplotlib.pyplot as plt
 21 | import matplotlib.patches as patches
 22 | 
 23 | class MaskRGNetwork(nn.Module):
 24 |     def __init__(self, config):
 25 |         super(MaskRGNetwork, self).__init__()
 26 | 
 27 |         self.training = False # check if this necessary
 28 | 
 29 |         # read configuration
 30 |         if config['model']['file'] == 'new':
 31 |             use_old_weights = False
 32 |             self.m_pretrained = False
 33 |         elif config['model']['file'] == 'pretrained':
 34 |             use_old_weights = False
 35 |             self.m_pretrained = True
 36 |         else:
 37 |             use_old_weights = True
 38 |             self.weigths_path = os.path.join(config['model']['path'], config['model']['file'])
 39 | 
 40 |         self.num_epochs = config['model']['settings']['epochs']
 41 |         self.lr = config['model']['settings']['learning_rate']
 42 |         self.batch_size = config['model']['settings']['batch_size']
 43 |         self.backbone = config['model']['settings']['backbone']
 44 |         self.bb_pretrained = config['model']['settings']['backbone_pretrained']
 45 |         is_cuda = config['model']['settings']['cuda_available']
 46 | 
 47 |         # uncomment for training!!!
 48 |         # self.train_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['train_indices']))
 49 |         # self.test_indices = np.load(os.path.join(config['dataset']['path'], config['dataset']['test_indices']))
 50 | 
 51 |         self.saving_path = config['saving']['path']
 52 |         self.saving_prefix = config['saving']['model_name']
 53 | 
 54 |         if is_cuda:
 55 |             if torch.cuda.is_available():
 56 |                 self.device = 'cuda'
 57 |                 print("CUDA device found!")
 58 |             else:
 59 |                 print("WARNING: CUDA is not available!!! CPU will be used instead!")
 60 |                 self.device = 'cpu'
 61 |         else:
 62 |             print("CPU will be used to train/evaluate the network!")
 63 |             self.device = 'cpu'
 64 | 
 65 |         # self.backbone = torchvision.models.resnet34(pretrained=True, progress=True)
 66 |         # self.anchors = AnchorGenerator(sizes=ANCHOR_SIZES)
 67 |         # self.backbone.out_channels = [512, 512, 512]
 68 | 
 69 |         if self.backbone == 'resnet50':
 70 |             if not use_old_weights:
 71 |                 self.mask_r_cnn = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=self.m_pretrained, progress=True,
 72 |                                                                             pretrained_backbone=self.bb_pretrained)
 73 |             else:
 74 |                 self.mask_r_cnn = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=False,
 75 |                                                                                      progress=True,
 76 |                                                                                      pretrained_backbone=self.bb_pretrained)
 77 | 
 78 |         # get number of input features for the classifier
 79 |         self.input_features = self.mask_r_cnn.roi_heads.box_predictor.cls_score.in_features
 80 | 
 81 |         # replace the pre-trained head with a new one --> category-agnostic so number of classes=2
 82 |         self.mask_r_cnn.roi_heads.box_predictor = FastRCNNPredictor(self.input_features, 2)
 83 | 
 84 |         # now get the number of input features for the mask classifier
 85 |         self.input_features_mask = self.mask_r_cnn.roi_heads.mask_predictor.conv5_mask.in_channels
 86 | 
 87 |         hidden_layer = 256
 88 |         # and replace the mask predictor with a new one
 89 |         self.mask_r_cnn.roi_heads.mask_predictor = MaskRCNNPredictor(self.input_features_mask, hidden_layer, 2)
 90 | 
 91 |         self.mask_r_cnn.to(self.device)
 92 |         # self.mask_r_cnn = MaskRCNN(backbone=self.backbone, num_classes=2, rpn_anchor_generator=self.anchors)
 93 | 
 94 |         # construct an optimizer
 95 |         self.params = [p for p in self.mask_r_cnn.parameters() if p.requires_grad]
 96 |         self.optimizer = torch.optim.SGD(self.params, lr=self.lr, momentum=0.9, weight_decay=0.00005)
 97 | 
 98 |         if use_old_weights:
 99 |             self.load_weights()
100 | 
101 | 
102 |     def train_model(self):
103 | 
104 |         for epoch in range(self.num_epochs):
105 |             # train for one epoch, printing every 10 iterations
106 |             train_one_epoch(self.mask_r_cnn, self.optimizer, self.data_loader, self.device, epoch, print_freq=5)
107 |             # update the learning rate
108 |             # self.lr_scheduler.step()
109 |             self.optimizer.step()
110 |             self._save_model('_epoch_no_' + str(epoch) + '_')
111 | 
112 | 
113 |     def evaluate_model(self):
114 |         # evaluate on the test dataset
115 |         res = evaluate(self.mask_r_cnn, self.data_loader, device=self.device)
116 |         return res
117 | 
118 |     def eval_single_img(self, img):
119 |         # self.device = torch.device("cpu")
120 | 
121 |         self.mask_r_cnn.eval()
122 |         with torch.no_grad():
123 |             preds = self.mask_r_cnn(img)
124 | 
125 |         return preds
126 | 
127 |     def load_weights(self):
128 |             # this should be called after the initialisation of the model
129 |             self.mask_r_cnn.load_state_dict(torch.load(self.weigths_path))
130 | 
131 |     def set_data(self, data,  is_test=False):
132 | 
133 |         data_subset = td.Subset(data, data.train_indices) if not is_test else td.Subset(data, data.test_indices)
134 | 
135 |         # init a data loader either for training or testing
136 |         # multiprocessing for data loading (num_workers) is not recommended for CUDA, so automatic memory pining
137 |         # (pin_memory=True) is used instead.
138 |         self.data_loader = td.DataLoader(data_subset, batch_size=self.batch_size, shuffle=False,
139 |                                                        num_workers=0, pin_memory=True,
140 |                                                        collate_fn=utils.collate_fn)
141 | 
142 |     def _save_model(self, string=None):
143 |         t = time.time()
144 |         timestamp = datetime.datetime.fromtimestamp(t)
145 |         file_name = self.saving_prefix + string + timestamp.strftime('%Y-%m-%d.%H:%M:%S') + '.pth'
146 |         torch.save(self.mask_r_cnn.state_dict(), os.path.join(self.saving_path, file_name))
147 | 
148 |     @staticmethod
149 |     def print_boxes(image, input_tensor, score_threshold=0.75):
150 |         boxes = input_tensor[0]['boxes']
151 |         scores = input_tensor[0]['scores']
152 | 
153 |         fig, ax = plt.subplots(1)
154 |         ax.imshow(image)
155 |         num_box=0
156 |         for box in boxes:
157 |             box_index = np.where(boxes == box)
158 |             box_index = int(box_index[0][0])
159 |             if scores[box_index] > score_threshold:
160 |                 # In the documentation it say bottom left corner, here i gave upper left corner for the correct box!!
161 |                 rect = patches.Rectangle((box[0], box[1]), box[2]-box[0], box[3]-box[1], linewidth=3, edgecolor='r',
162 |                                          facecolor='none')
163 |                 ax.add_patch(rect)
164 |                 num_box += 1
165 |         print('num boxes that have a score higher than .75 --> ', num_box)
166 |         plt.show()
167 | 
168 |     @staticmethod
169 |     def print_masks(image, input_tensor, score_threshold=0.75):
170 |         masks = input_tensor[0]['masks']
171 |         scores = input_tensor[0]['scores']
172 |         num_pred = masks.shape[0]
173 |         num_masks = 0
174 |         all = np.zeros((1024, 1024), dtype=np.uint8)
175 |         for mask in range(0, num_pred):
176 |             if scores[mask] > score_threshold:
177 |                 #TODO if cuda, add a control here
178 | 
179 |                 mask_arr = np.asarray(masks[mask].cpu().detach()).reshape((1024, 1024))
180 |                 mask_arr = np.where(mask_arr > score_threshold, 1, 0)
181 |                 all[np.where(mask_arr > 0)] = num_masks
182 | 
183 |                 num_masks += 1
184 |         plt.imshow(all)
185 |         plt.show()
186 |         print('num masks that have a score higher than .75 --> ', num_masks)
187 | 
188 | 
189 |     def forward(self, input_img=None):
190 |         pass
191 |         # print("evaluation")
192 |         # tt = T.ToTensor
193 |         # input_tensor = tt(input_img)
194 |         # tensor_list = [input_tensor]
195 |         # self.mask_r_cnn.eval()
196 |         # predictions = self.mask_r_cnn(tensor_list)
197 |         # return predictions
198 | 
199 | 


--------------------------------------------------------------------------------
/src/mask_rg/rewards.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import matplotlib.pyplot as plt
  3 | 
  4 | NON_DETECTION_PUNISHMENT = -0.02
  5 | # EXTRA_DETECTION_PUNISHMENT = 0.0 #-0.01
  6 | 
  7 | color_space = np.asarray([#[78, 121, 167],  # blue
  8 |                           #[89, 161, 79],  # green
  9 |                          # [156, 117, 95],  # brown
 10 |                          # [242, 142, 43],  # orange
 11 |                           #[237, 201, 72],  # yellow
 12 |                           #[186, 176, 172],  # gray
 13 |                          # [255, 87, 89],  # red
 14 |                           #[176, 122, 161],  # purple
 15 |                           #[118, 183, 178],  # cyan
 16 |                           #[255, 157, 167],  # pink
 17 |                           [154, 205, 50],   # yellow green
 18 |                           [85, 107, 47],    # dark olive green
 19 |                           [107, 142, 35],   # olive drab
 20 |                           [124, 252, 0],    # lawn green
 21 |                           [127, 255, 0],    # chart reuse
 22 |                           [173, 255, 47],   # green yellow
 23 |                           [0, 100, 0],      # dark green
 24 |                           [0, 128, 0],      # green
 25 |                           [34, 139, 34],    # forest green
 26 |                           [0, 255, 0],      # lime
 27 |                           [50, 205, 50],    # lime green
 28 |                           [144, 238, 144],  # light green
 29 |                           [152, 251, 152],  # pale green
 30 |                           [143, 188, 143],  # dark sea green
 31 |                           [0, 250, 154],    # medium spring green
 32 |                           [0, 255, 127],    # spring green
 33 |                           [46, 139, 87],    # sea green
 34 |                           [0,255,255],    # cyan
 35 |                         [224,255,255],    # light cyan
 36 |                         [0,206,209],    # dark turquoise
 37 |                         [64,224,208],    # turquoise
 38 |                         [72,209,204],    # medium turquoise
 39 |                         [175,238,238],    # pale turquoise
 40 |                         [127,255,212],    # aqua marine
 41 |                         [176,224,230],    # powder blue
 42 |                         [95,158,160],    # cadet blue
 43 |                         [70,130,180],    # steel blue
 44 |                         [100,149,237],    # corn flower blue
 45 |                         [0,191,255],    # deep sky blue
 46 |                         [30,144,255],    # dodger blue
 47 |                         [173,216,230],    # light blue
 48 |                         [135,206,235],    # sky blue
 49 |                         [135,206,250],    # light sky blue
 50 |                         [25,25,112],    # midnight blue
 51 |                         [0,0,128],    # navy
 52 |                         [0,0,139],    # dark blue
 53 |                         [0,0,205],    # medium blue
 54 |                         [0,0,255],    # blue
 55 |                         [65,105,225],    # royal blue
 56 |                         [138,43,226],    # blue violet
 57 |                         [75,0,130],    # indigo
 58 |                         [72,61,139],    # dark slate blue
 59 |                           ], dtype=np.uint8)
 60 | 
 61 | color_space_red = np.asarray([[128, 0, 0],      # maroon
 62 |                               [139, 0, 0],      # dark red
 63 |                               [165, 42, 42],    # brown
 64 |                               [178, 34, 34], 	# firebrick
 65 |                               [220, 20, 60],    # crimson
 66 |                               [255, 0, 0],      # red
 67 |                               [255, 99, 71],    # tomato
 68 |                               [255, 127, 80],   # coral
 69 |                               [205, 92, 92],    # indian red
 70 |                               [240, 128, 128],  # light coral
 71 |                               [233, 150, 122],  # dark salmon
 72 |                               [250, 128, 114],  # salmon
 73 |                               [255, 160, 122],  # light salmon
 74 |                               [255, 69, 0],     # orange red
 75 |                               [255, 140, 0],    # dark orange
 76 |                               [255, 165, 0],    # orange
 77 |                               [160, 82, 45],    # sienna
 78 |                               [219, 112, 147]   # pale viaolet red
 79 |                               ], dtype=np.uint8)
 80 | 
 81 | 
 82 | class BColors:
 83 |     HEADER = '\033[95m'
 84 |     OKBLUE = '\033[94m'
 85 |     OKGREEN = '\033[92m'
 86 |     WARNING = '\033[93m'
 87 |     FAIL = '\033[91m'
 88 |     ENDC = '\033[0m'
 89 |     BOLD = '\033[1m'
 90 |     UNDERLINE = '\033[4m'
 91 | 
 92 | 
 93 | class RewardGenerator(object):
 94 | 
 95 |     def __init__(self, confidence_threshold=0.75, mask_threshold=0.75, device='cuda'):
 96 | 
 97 |         self.mask_threshold = mask_threshold
 98 |         self.confidence_threshold = confidence_threshold
 99 | 
100 |         # self.set_element(pred_tensor, gt, confidence_threshold, mask_threshold, device)
101 |         self.obj_ids = None
102 |         self.num_objs = None
103 |         self.height = 1024
104 |         self.width = 1024
105 |         self.gts = None
106 |         self.scores = None
107 |         self.masks = None
108 |         self.boxes = None
109 |         self.num_pred = None
110 |         self.valid_masks = []
111 |         self.valid_boxes = []
112 |         self.num_valid_pred = 0
113 | 
114 |     def set_element(self, pred_tensor, gt):
115 |         # list of objects - All data had been cleaned and obj_ids are in ordered
116 |         obj_ids = np.unique(gt)
117 |         # Remove background (i.e. 0)
118 |         self.obj_ids = obj_ids[1:]
119 | 
120 |         # combine the masks
121 |         self.num_objs = len(self.obj_ids)
122 |         self.height = gt.shape[0]
123 |         self.width = gt.shape[1]
124 |         self.gts = np.zeros((self.num_objs, self.height, self.width), dtype=np.uint8)
125 |         for i in range(self.num_objs):
126 |             self.gts[i][np.where(gt == i + 1)] = 1
127 | 
128 |         self.scores = pred_tensor[0]['scores']
129 |         self.masks = pred_tensor[0]['masks']
130 |         self.boxes = pred_tensor[0]['boxes']
131 |         self.num_pred = self.masks.shape[0]
132 |         self.valid_masks = []
133 |         self.valid_boxes = []
134 |         self.num_valid_pred = 0
135 | 
136 |         for pred_no in range(0, self.num_pred):
137 |             if self.scores[pred_no] > self.confidence_threshold:
138 |                 self.valid_masks.append(self.masks[pred_no])
139 |                 self.valid_boxes.append(self.boxes[pred_no])
140 |                 self.num_valid_pred += 1
141 | 
142 |     def f1_reward(self):
143 |         pass
144 | 
145 |     def object_wise(self):
146 |         compare = 0
147 |         return compare
148 | 
149 |     @staticmethod
150 |     def _jaccard(gt, pred):
151 |         intersection = gt * pred
152 |         union = (gt + pred) / 2
153 |         area_i = np.count_nonzero(intersection)
154 |         area_u = np.count_nonzero(union)
155 |         if area_u > 0:
156 |             iou = area_i / area_u
157 |         else:
158 |             return [0, 0, 0]
159 |         return [iou, area_i, area_u]
160 | 
161 |     def get_reward(self):
162 |         res = self._detect_mask_id()
163 |         # self.print_masks()
164 |         # if self.num_valid_pred - self.num_objs > 0:
165 |         #     extra_pun = (self.num_valid_pred - self.num_objs) * EXTRA_DETECTION_PUNISHMENT
166 |         # else:
167 |         #     extra_pun = 0
168 |         if self.num_objs > 0:  # and len(res) <= self.num_objs:
169 |             sum_iou = np.sum(res, axis=0)
170 |             sum_iou = sum_iou[1]
171 |             reward_iou = sum_iou / self.num_objs
172 | 
173 |             true_detections = np.count_nonzero(res, axis=0)
174 |             true_detections = true_detections[1]
175 |             num_non_detected = self.num_objs - true_detections
176 |             reward = reward_iou + num_non_detected * NON_DETECTION_PUNISHMENT #+ extra_pun
177 | 
178 |             print(BColors.HEADER + '--------------------------- MASK-RG RETURNS ----------------------------' + BColors.ENDC)
179 |             print('Number of objects in GT --> ', self.num_objs)
180 |             print('Number of possible objects (confidence threshold {:.2f}) --> {:d}'.format(self.confidence_threshold,
181 |                                                                                            self.num_valid_pred))
182 |             print('Number of correct detections (masking threshold {:.2f}) --> {:d}'.format(self.mask_threshold,
183 |                                                                                               true_detections))
184 |             print('NEGATIVE SCORE: --> {:.2f} (cnst {:.2f} x non-detected '.format(num_non_detected * NON_DETECTION_PUNISHMENT,
185 |                                                                         NON_DETECTION_PUNISHMENT) + BColors.FAIL +
186 |                                                                         ' {:d}'.format(num_non_detected) +  BColors.ENDC + ')')
187 | 
188 |             print('POSITIVE SCORE: --> {:.6f} (Matching IoU)'.format(reward_iou))
189 |             # print('Non detection ({:.3f}) + extra pun ({:.3f}) = '
190 |             #       '{:.3f}'.format(num_non_detected * NON_DETECTION_PUNISHMENT,
191 |             #                       extra_pun, num_non_detected * NON_DETECTION_PUNISHMENT + extra_pun))
192 |             print(BColors.HEADER + 'TOTAL RETURN  --> {:.6f}'.format(reward))
193 |             print('------------------------------------------------------------------------' + BColors.ENDC)
194 |             # count 255-254 and diff
195 |         else:
196 |             # predictions are more than actual objects in the scene
197 |             print("CHECK!")
198 |             return -1
199 |             pass
200 |         # return res, reward, [self.num_objs, num_non_detected, num_non_detected / self.num_objs]
201 |         return res, reward, [self.num_objs, num_non_detected, self.num_valid_pred]
202 | 
203 |     def _detect_mask_id(self):
204 | 
205 |         results = []
206 |         pred_order = np.zeros((self.num_objs, 2), dtype=np.float32)
207 | 
208 |         for idx_gt, gt in enumerate(self.gts):
209 |             for inx_pred, mask in enumerate(self.valid_masks):
210 |                 pred_arr = np.asarray(mask.cpu().detach()).reshape((self.height, self.width))
211 |                 pred_arr = np.where(pred_arr > self.mask_threshold, 1, 0)
212 |                 res = self._jaccard(gt, pred_arr)
213 |                 if res[0] > 0:
214 |                     results.append([inx_pred, res[0]])
215 | 
216 |             if not results == []:
217 |                 res_arr = np.asarray(results)
218 |                 max_index = np.argmax(res_arr, axis=0)
219 |                 max_index = max_index[1]
220 |                 if res_arr[max_index][1] > self.mask_threshold:
221 |                     pred_order[idx_gt] = res_arr[max_index]
222 |                 else:
223 |                     pred_order[idx_gt] = [255, 0]
224 |             else:
225 |                 pred_order[idx_gt] = [254, 0]
226 |             results = []
227 |         return pred_order
228 | 
229 |     def print_masks(self):
230 |         # masks = input_tensor[0]['masks']
231 |         # scores = input_tensor[0]['scores']
232 |         # num_pred = masks.shape[0]
233 |         num_masks = 0
234 |         all = np.zeros((self.height, self.width), dtype=np.uint8)
235 |         for mask in range(self.num_pred):
236 |             if self.scores[mask] > self.confidence_threshold:
237 |                 # TODO if cuda, add a control here
238 |                 mask_arr = np.asarray(self.masks[mask].cpu().detach()).reshape((self.height, self.width))
239 |                 mask_arr = np.where(mask_arr > self.mask_threshold, 1, 0)
240 |                 all[np.where(mask_arr > 0)] = num_masks
241 |                 num_masks += 1
242 |         # plt.imshow(all)
243 |         # plt.show()
244 |         # print('num masks that have a score higher than %.02f --> ' % self.confidence_threshold, num_masks)
245 |         return all
246 | 
247 |     def print_seg_diff(self, order):
248 |         img_err_masks = np.zeros([self.height, self.width, 3], dtype=np.uint8)
249 |         for idx, curr_gt in enumerate(self.gts):
250 |             if not (int(order[idx][0]) == 254 or int(order[idx][0]) == 255):
251 |                 curr_pred = np.asarray(self.masks[int(order[idx][0])].cpu().detach()).reshape((self.height, self.width))
252 |                 curr_pred = np.where(curr_pred > self.mask_threshold, 1, 0)
253 | 
254 |                 compare = curr_pred != curr_gt
255 |                 img_err_masks[compare] = color_space[np.abs(41-idx)]
256 |                 # plt.imshow(img_err_masks)
257 |                 # plt.imshow(curr_gt)
258 |                 # plt.imshow(curr_pred)
259 |             else:
260 |                 img_err_masks[np.asarray(curr_gt, dtype=np.bool)] = color_space_red[np.abs(17-idx)]
261 |                 # plt.imshow(curr_gt)
262 |                 # plt.imshow(img_err_masks)
263 |         return img_err_masks
264 | 


--------------------------------------------------------------------------------
/src/mask_rg/transforms.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import torch
 3 | 
 4 | from torchvision.transforms import functional as F
 5 | 
 6 | 
 7 | def _flip_coco_person_keypoints(kps, width):
 8 |     flip_inds = [0, 2, 1, 4, 3, 6, 5, 8, 7, 10, 9, 12, 11, 14, 13, 16, 15]
 9 |     flipped_data = kps[:, flip_inds]
10 |     flipped_data[..., 0] = width - flipped_data[..., 0]
11 |     # Maintain COCO convention that if visibility == 0, then x, y = 0
12 |     inds = flipped_data[..., 2] == 0
13 |     flipped_data[inds] = 0
14 |     return flipped_data
15 | 
16 | 
17 | class Compose(object):
18 |     def __init__(self, transforms):
19 |         self.transforms = transforms
20 | 
21 |     def __call__(self, image, target):
22 |         for t in self.transforms:
23 |             image, target = t(image, target)
24 |         return image, target
25 | 
26 | 
27 | class RandomHorizontalFlip(object):
28 |     def __init__(self, prob):
29 |         self.prob = prob
30 | 
31 |     def __call__(self, image, target):
32 |         if random.random() < self.prob:
33 |             height, width = image.shape[-2:]
34 |             image = image.flip(-1)
35 |             bbox = target["boxes"]
36 |             bbox[:, [0, 2]] = width - bbox[:, [2, 0]]
37 |             target["boxes"] = bbox
38 |             if "masks" in target:
39 |                 target["masks"] = target["masks"].flip(-1)
40 |             if "keypoints" in target:
41 |                 keypoints = target["keypoints"]
42 |                 keypoints = _flip_coco_person_keypoints(keypoints, width)
43 |                 target["keypoints"] = keypoints
44 |         return image, target
45 | 
46 | 
47 | class ToTensor(object):
48 |     def __call__(self, image, target):
49 |         image = F.to_tensor(image)
50 |         return image, target
51 | 


--------------------------------------------------------------------------------
/src/mask_rg/utils.py:
--------------------------------------------------------------------------------
  1 | from collections import defaultdict, deque
  2 | import datetime
  3 | import pickle
  4 | import time
  5 | 
  6 | import torch
  7 | import torch.distributed as dist
  8 | 
  9 | import errno
 10 | import os
 11 | 
 12 | 
 13 | class SmoothedValue(object):
 14 |     """Track a series of values and provide access to smoothed values over a
 15 |     window or the global series average.
 16 |     """
 17 | 
 18 |     def __init__(self, window_size=20, fmt=None):
 19 |         if fmt is None:
 20 |             fmt = "{median:.4f} ({global_avg:.4f})"
 21 |         self.deque = deque(maxlen=window_size)
 22 |         self.total = 0.0
 23 |         self.count = 0
 24 |         self.fmt = fmt
 25 | 
 26 |     def update(self, value, n=1):
 27 |         self.deque.append(value)
 28 |         self.count += n
 29 |         self.total += value * n
 30 | 
 31 |     def synchronize_between_processes(self):
 32 |         """
 33 |         Warning: does not synchronize the deque!
 34 |         """
 35 |         if not is_dist_avail_and_initialized():
 36 |             return
 37 |         t = torch.tensor([self.count, self.total], dtype=torch.float64, device='cuda')
 38 |         dist.barrier()
 39 |         dist.all_reduce(t)
 40 |         t = t.tolist()
 41 |         self.count = int(t[0])
 42 |         self.total = t[1]
 43 | 
 44 |     @property
 45 |     def median(self):
 46 |         d = torch.tensor(list(self.deque))
 47 |         return d.median().item()
 48 | 
 49 |     @property
 50 |     def avg(self):
 51 |         d = torch.tensor(list(self.deque), dtype=torch.float32)
 52 |         return d.mean().item()
 53 | 
 54 |     @property
 55 |     def global_avg(self):
 56 |         return self.total / self.count
 57 | 
 58 |     @property
 59 |     def max(self):
 60 |         return max(self.deque)
 61 | 
 62 |     @property
 63 |     def value(self):
 64 |         return self.deque[-1]
 65 | 
 66 |     def __str__(self):
 67 |         return self.fmt.format(
 68 |             median=self.median,
 69 |             avg=self.avg,
 70 |             global_avg=self.global_avg,
 71 |             max=self.max,
 72 |             value=self.value)
 73 | 
 74 | 
 75 | def all_gather(data):
 76 |     """
 77 |     Run all_gather on arbitrary picklable data (not necessarily tensors)
 78 |     Args:
 79 |         data: any picklable object
 80 |     Returns:
 81 |         list[data]: list of data gathered from each rank
 82 |     """
 83 |     world_size = get_world_size()
 84 |     if world_size == 1:
 85 |         return [data]
 86 | 
 87 |     # serialized to a Tensor
 88 |     buffer = pickle.dumps(data)
 89 |     storage = torch.ByteStorage.from_buffer(buffer)
 90 |     tensor = torch.ByteTensor(storage).to("cuda")
 91 | 
 92 |     # obtain Tensor size of each rank
 93 |     local_size = torch.tensor([tensor.numel()], device="cuda")
 94 |     size_list = [torch.tensor([0], device="cuda") for _ in range(world_size)]
 95 |     dist.all_gather(size_list, local_size)
 96 |     size_list = [int(size.item()) for size in size_list]
 97 |     max_size = max(size_list)
 98 | 
 99 |     # receiving Tensor from all ranks
100 |     # we pad the tensor because torch all_gather does not support
101 |     # gathering tensors of different shapes
102 |     tensor_list = []
103 |     for _ in size_list:
104 |         tensor_list.append(torch.empty((max_size,), dtype=torch.uint8, device="cuda"))
105 |     if local_size != max_size:
106 |         padding = torch.empty(size=(max_size - local_size,), dtype=torch.uint8, device="cuda")
107 |         tensor = torch.cat((tensor, padding), dim=0)
108 |     dist.all_gather(tensor_list, tensor)
109 | 
110 |     data_list = []
111 |     for size, tensor in zip(size_list, tensor_list):
112 |         buffer = tensor.cpu().numpy().tobytes()[:size]
113 |         data_list.append(pickle.loads(buffer))
114 | 
115 |     return data_list
116 | 
117 | 
118 | def reduce_dict(input_dict, average=True):
119 |     """
120 |     Args:
121 |         input_dict (dict): all the values will be reduced
122 |         average (bool): whether to do average or sum
123 |     Reduce the values in the dictionary from all processes so that all processes
124 |     have the averaged results. Returns a dict with the same fields as
125 |     input_dict, after reduction.
126 |     """
127 |     world_size = get_world_size()
128 |     if world_size < 2:
129 |         return input_dict
130 |     with torch.no_grad():
131 |         names = []
132 |         values = []
133 |         # sort the keys so that they are consistent across processes
134 |         for k in sorted(input_dict.keys()):
135 |             names.append(k)
136 |             values.append(input_dict[k])
137 |         values = torch.stack(values, dim=0)
138 |         dist.all_reduce(values)
139 |         if average:
140 |             values /= world_size
141 |         reduced_dict = {k: v for k, v in zip(names, values)}
142 |     return reduced_dict
143 | 
144 | 
145 | class MetricLogger(object):
146 |     def __init__(self, delimiter="\t"):
147 |         self.meters = defaultdict(SmoothedValue)
148 |         self.delimiter = delimiter
149 | 
150 |     def update(self, **kwargs):
151 |         for k, v in kwargs.items():
152 |             if isinstance(v, torch.Tensor):
153 |                 v = v.item()
154 |             assert isinstance(v, (float, int))
155 |             self.meters[k].update(v)
156 | 
157 |     def __getattr__(self, attr):
158 |         if attr in self.meters:
159 |             return self.meters[attr]
160 |         if attr in self.__dict__:
161 |             return self.__dict__[attr]
162 |         raise AttributeError("'{}' object has no attribute '{}'".format(
163 |             type(self).__name__, attr))
164 | 
165 |     def __str__(self):
166 |         loss_str = []
167 |         for name, meter in self.meters.items():
168 |             loss_str.append(
169 |                 "{}: {}".format(name, str(meter))
170 |             )
171 |         return self.delimiter.join(loss_str)
172 | 
173 |     def synchronize_between_processes(self):
174 |         for meter in self.meters.values():
175 |             meter.synchronize_between_processes()
176 | 
177 |     def add_meter(self, name, meter):
178 |         self.meters[name] = meter
179 | 
180 |     def log_every(self, iterable, print_freq, header=None):
181 |         i = 0
182 |         if not header:
183 |             header = ''
184 |         start_time = time.time()
185 |         end = time.time()
186 |         iter_time = SmoothedValue(fmt='{avg:.4f}')
187 |         data_time = SmoothedValue(fmt='{avg:.4f}')
188 |         space_fmt = ':' + str(len(str(len(iterable)))) + 'd'
189 |         if torch.cuda.is_available():
190 |             log_msg = self.delimiter.join([
191 |                 header,
192 |                 '[{0' + space_fmt + '}/{1}]',
193 |                 'eta: {eta}',
194 |                 '{meters}',
195 |                 'time: {time}',
196 |                 'data: {data}',
197 |                 'max mem: {memory:.0f}'
198 |             ])
199 |         else:
200 |             log_msg = self.delimiter.join([
201 |                 header,
202 |                 '[{0' + space_fmt + '}/{1}]',
203 |                 'eta: {eta}',
204 |                 '{meters}',
205 |                 'time: {time}',
206 |                 'data: {data}'
207 |             ])
208 |         MB = 1024.0 * 1024.0
209 |         for obj in iterable:
210 |             data_time.update(time.time() - end)
211 |             yield obj
212 |             iter_time.update(time.time() - end)
213 |             if i % print_freq == 0 or i == len(iterable) - 1:
214 |                 eta_seconds = iter_time.global_avg * (len(iterable) - i)
215 |                 eta_string = str(datetime.timedelta(seconds=int(eta_seconds)))
216 |                 if torch.cuda.is_available():
217 |                     print(log_msg.format(
218 |                         i, len(iterable), eta=eta_string,
219 |                         meters=str(self),
220 |                         time=str(iter_time), data=str(data_time),
221 |                         memory=torch.cuda.max_memory_allocated() / MB))
222 |                 else:
223 |                     print(log_msg.format(
224 |                         i, len(iterable), eta=eta_string,
225 |                         meters=str(self),
226 |                         time=str(iter_time), data=str(data_time)))
227 |             i += 1
228 |             end = time.time()
229 |         total_time = time.time() - start_time
230 |         total_time_str = str(datetime.timedelta(seconds=int(total_time)))
231 |         print('{} Total time: {} ({:.4f} s / it)'.format(
232 |             header, total_time_str, total_time / len(iterable)))
233 | 
234 | 
235 | def collate_fn(batch):
236 |     return tuple(zip(*batch))
237 | 
238 | 
239 | def warmup_lr_scheduler(optimizer, warmup_iters, warmup_factor):
240 | 
241 |     def f(x):
242 |         if x >= warmup_iters:
243 |             return 1
244 |         alpha = float(x) / warmup_iters
245 |         return warmup_factor * (1 - alpha) + alpha
246 | 
247 |     return torch.optim.lr_scheduler.LambdaLR(optimizer, f)
248 | 
249 | 
250 | def mkdir(path):
251 |     try:
252 |         os.makedirs(path)
253 |     except OSError as e:
254 |         if e.errno != errno.EEXIST:
255 |             raise
256 | 
257 | 
258 | def setup_for_distributed(is_master):
259 |     """
260 |     This function disables printing when not in master process
261 |     """
262 |     import builtins as __builtin__
263 |     builtin_print = __builtin__.print
264 | 
265 |     def print(*args, **kwargs):
266 |         force = kwargs.pop('force', False)
267 |         if is_master or force:
268 |             builtin_print(*args, **kwargs)
269 | 
270 |     __builtin__.print = print
271 | 
272 | 
273 | def is_dist_avail_and_initialized():
274 |     if not dist.is_available():
275 |         return False
276 |     if not dist.is_initialized():
277 |         return False
278 |     return True
279 | 
280 | 
281 | def get_world_size():
282 |     if not is_dist_avail_and_initialized():
283 |         return 1
284 |     return dist.get_world_size()
285 | 
286 | 
287 | def get_rank():
288 |     if not is_dist_avail_and_initialized():
289 |         return 0
290 |     return dist.get_rank()
291 | 
292 | 
293 | def is_main_process():
294 |     return get_rank() == 0
295 | 
296 | 
297 | def save_on_master(*args, **kwargs):
298 |     if is_main_process():
299 |         torch.save(*args, **kwargs)
300 | 
301 | 
302 | def init_distributed_mode(args):
303 |     if 'RANK' in os.environ and 'WORLD_SIZE' in os.environ:
304 |         args.rank = int(os.environ["RANK"])
305 |         args.world_size = int(os.environ['WORLD_SIZE'])
306 |         args.gpu = int(os.environ['LOCAL_RANK'])
307 |     elif 'SLURM_PROCID' in os.environ:
308 |         args.rank = int(os.environ['SLURM_PROCID'])
309 |         args.gpu = args.rank % torch.cuda.device_count()
310 |     else:
311 |         print('Not using distributed mode')
312 |         args.distributed = False
313 |         return
314 | 
315 |     args.distributed = True
316 | 
317 |     torch.cuda.set_device(args.gpu)
318 |     args.dist_backend = 'nccl'
319 |     print('| distributed init (rank {}): {}'.format(
320 |         args.rank, args.dist_url), flush=True)
321 |     torch.distributed.init_process_group(backend=args.dist_backend, init_method=args.dist_url,
322 |                                          world_size=args.world_size, rank=args.rank)
323 |     torch.distributed.barrier()
324 |     setup_for_distributed(args.rank == 0)
325 | 


--------------------------------------------------------------------------------
/src/push-DQN/logger.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import datetime
  3 | import os
  4 | import numpy as np
  5 | import cv2
  6 | import torch 
  7 | # import h5py 
  8 | import shutil
  9 | import matplotlib.pyplot as plt
 10 | 
 11 | class Logger():
 12 | 
 13 |     def __init__(self, continue_logging, logging_directory):
 14 | 
 15 |         # Create directory to save data
 16 |         timestamp = time.time()
 17 |         timestamp_value = datetime.datetime.fromtimestamp(timestamp)
 18 |         self.continue_logging = continue_logging
 19 |         if self.continue_logging:
 20 |             self.base_directory = logging_directory
 21 |             print('Pre-loading data logging session: %s' % (self.base_directory))
 22 |         else:
 23 |             self.base_directory = os.path.join(logging_directory, timestamp_value.strftime('%Y-%m-%d.%H:%M:%S'))
 24 |             print('Creating data logging session: %s' % (self.base_directory))
 25 |         self.info_directory = os.path.join(self.base_directory, 'info')
 26 |         self.color_images_directory = os.path.join(self.base_directory, 'data', 'color-images')
 27 |         self.depth_images_directory = os.path.join(self.base_directory, 'data', 'depth-images')
 28 |         self.color_heightmaps_directory = os.path.join(self.base_directory, 'data', 'color-heightmaps')
 29 |         self.depth_heightmaps_directory = os.path.join(self.base_directory, 'data', 'depth-heightmaps')
 30 |         self.models_directory = os.path.join(self.base_directory, 'models')
 31 |         self.visualizations_directory = os.path.join(self.base_directory, 'visualizations')
 32 |         self.recordings_directory = os.path.join(self.base_directory, 'recordings')
 33 |         self.transitions_directory = os.path.join(self.base_directory, 'transitions')
 34 | 
 35 |         self.mask_rg_directory = os.path.join(self.base_directory, 'mask_rg')
 36 |         self.mask_images_directory = os.path.join(self.base_directory, 'mask_rg', 'prediction-images')
 37 |         self.mask_predictions_directory = os.path.join(self.base_directory, 'mask_rg', 'predictions')
 38 | 
 39 |         if not os.path.exists(self.info_directory):
 40 |             os.makedirs(self.info_directory)
 41 |         if not os.path.exists(self.color_images_directory):
 42 |             os.makedirs(self.color_images_directory)
 43 |         if not os.path.exists(self.depth_images_directory):
 44 |             os.makedirs(self.depth_images_directory)
 45 |         if not os.path.exists(self.color_heightmaps_directory):
 46 |             os.makedirs(self.color_heightmaps_directory)
 47 |         if not os.path.exists(self.depth_heightmaps_directory):
 48 |             os.makedirs(self.depth_heightmaps_directory)
 49 |         if not os.path.exists(self.models_directory):
 50 |             os.makedirs(self.models_directory)
 51 |         if not os.path.exists(self.visualizations_directory):
 52 |             os.makedirs(self.visualizations_directory)
 53 |         if not os.path.exists(self.recordings_directory):
 54 |             os.makedirs(self.recordings_directory)
 55 |         if not os.path.exists(self.transitions_directory):
 56 |             os.makedirs(os.path.join(self.transitions_directory, 'data'))
 57 |         if not os.path.exists(self.mask_rg_directory):
 58 |             os.makedirs(self.mask_rg_directory)
 59 |         if not os.path.exists(self.mask_images_directory):
 60 |             os.makedirs(self.mask_images_directory)
 61 |         if not os.path.exists(self.mask_predictions_directory):
 62 |             os.makedirs(self.mask_predictions_directory)
 63 | 
 64 | 
 65 |     def save_camera_info(self, intrinsics, pose, depth_scale):
 66 |         np.savetxt(os.path.join(self.info_directory, 'camera-intrinsics.txt'), intrinsics, delimiter=' ')
 67 |         np.savetxt(os.path.join(self.info_directory, 'camera-pose.txt'), pose, delimiter=' ')
 68 |         np.savetxt(os.path.join(self.info_directory, 'camera-depth-scale.txt'), [depth_scale], delimiter=' ')
 69 | 
 70 |     def save_heightmap_info(self, boundaries, resolution):
 71 |         np.savetxt(os.path.join(self.info_directory, 'heightmap-boundaries.txt'), boundaries, delimiter=' ')
 72 |         np.savetxt(os.path.join(self.info_directory, 'heightmap-resolution.txt'), [resolution], delimiter=' ')
 73 | 
 74 |     def save_images(self, iteration, color_image, depth_image, mode):
 75 |         color_image = cv2.cvtColor(color_image, cv2.COLOR_RGB2BGR)
 76 |         cv2.imwrite(os.path.join(self.color_images_directory, '%06d.%s.color.png' % (iteration, mode)), color_image)
 77 |         depth_image = np.round(depth_image * 10000).astype(np.uint16) # Save depth in 1e-4 meters
 78 |         cv2.imwrite(os.path.join(self.depth_images_directory, '%06d.%s.depth.png' % (iteration, mode)), depth_image)
 79 |     
 80 |     def save_heightmaps(self, iteration, color_heightmap, depth_heightmap, mode):
 81 |         color_heightmap = cv2.cvtColor(color_heightmap, cv2.COLOR_RGB2BGR)
 82 |         cv2.imwrite(os.path.join(self.color_heightmaps_directory, '%06d.%s.color.png' % (iteration, mode)), color_heightmap)
 83 |         depth_heightmap = np.round(depth_heightmap * 100000).astype(np.uint16) # Save depth in 1e-5 meters
 84 |         cv2.imwrite(os.path.join(self.depth_heightmaps_directory, '%06d.%s.depth.png' % (iteration, mode)), depth_heightmap)
 85 |     
 86 |     def write_to_log(self, log_name, log):
 87 |         np.savetxt(os.path.join(self.transitions_directory, '%s.log.txt' % log_name), log, delimiter=' ')
 88 | 
 89 |     def save_model(self, iteration, model, name):
 90 |         torch.save(model.cpu().state_dict(), os.path.join(self.models_directory, 'snapshot-%06d.%s.pth' % (iteration, name)))
 91 | 
 92 |     def save_backup_model(self, model, name):
 93 |         torch.save(model.state_dict(), os.path.join(self.models_directory, 'snapshot-backup.%s.pth' % (name)))
 94 | 
 95 |     def save_visualizations(self, iteration, affordance_vis, name):
 96 |         cv2.imwrite(os.path.join(self.visualizations_directory, '%06d.%s.png' % (iteration,name)), affordance_vis)
 97 | 
 98 |     # def save_state_features(self, iteration, state_feat):
 99 |     #     h5f = h5py.File(os.path.join(self.visualizations_directory, '%06d.state.h5' % (iteration)), 'w')
100 |     #     h5f.create_dataset('state', data=state_feat.cpu().data.numpy())
101 |     #     h5f.close()
102 | 
103 |     # Record RGB-D video while executing primitive
104 |     # recording_directory = logger.make_new_recording_directory(iteration)
105 |     # camera.start_recording(recording_directory)
106 |     # camera.stop_recording()
107 |     def make_new_recording_directory(self, iteration):
108 |         recording_directory = os.path.join(self.recordings_directory, '%06d' % (iteration))
109 |         if not os.path.exists(recording_directory):
110 |             os.makedirs(recording_directory)
111 |         return recording_directory
112 | 
113 |     def save_transition(self, iteration, transition):
114 |         depth_heightmap = np.round(transition.state * 100000).astype(np.uint16) # Save depth in 1e-5 meters
115 |         cv2.imwrite(os.path.join(self.transitions_directory, 'data', '%06d.0.depth.png' % (iteration)), depth_heightmap)
116 |         next_depth_heightmap = np.round(transition.next_state * 100000).astype(np.uint16) # Save depth in 1e-5 meters
117 |         cv2.imwrite(os.path.join(self.transitions_directory, 'data', '%06d.1.depth.png' % (iteration)), next_depth_heightmap)
118 |         # np.savetxt(os.path.join(self.transitions_directory, '%06d.action.txt' % (iteration)), [1 if (transition.action == 'grasp') else 0], delimiter=' ')
119 |         # np.savetxt(os.path.join(self.transitions_directory, '%06d.reward.txt' % (iteration)), [reward_value], delimiter=' ')
120 | 
121 |     def save_session_success_fail(self, succ_fail):
122 |         # TODO check if file exists, if not generate
123 |         with open(os.path.join(self.base_directory, 'session_success_fail_list.txt'), 'a') as file:
124 |             file.write(succ_fail + ' \n')
125 | 
126 |     def save_config_file(self, config_path):
127 |         shutil.copy(config_path, self.base_directory)
128 | 
129 |     def save_mask_rg_returns(self, seg_vals):
130 |         # np.savetxt(os.path.join(self.mask_rg_directory, '%s.log.txt' % 'segmentation'), seg_vals, delimiter=' ')
131 |         with open(os.path.join(self.mask_rg_directory, 'segmentation.txt'), 'a') as file:
132 |             file.write(str(seg_vals) + ' \n')
133 | 
134 |     def save_mask_diff_image(self, maskdiff, seg, rgb, depth, iteration, session_iteration, err_rate):
135 |         plt.imsave(os.path.join(self.mask_images_directory, "{:06d}.{:02d}.mask_pred_diff.png".format(iteration,session_iteration)), maskdiff)
136 |         plt.imsave(os.path.join(self.mask_images_directory,
137 |                                 "{:06d}.{:02d}.ground_truth.png".format(iteration, session_iteration)), seg)
138 |         plt.imsave(os.path.join(self.mask_images_directory,
139 |                                 "{:06d}.{:02d}.color.png".format(iteration, session_iteration)), rgb)
140 |         plt.imsave(os.path.join(self.mask_images_directory,
141 |                                 "{:06d}.{:02d}.depth.png".format(iteration, session_iteration)), depth)
142 | 
143 |         err_rate.append([iteration, session_iteration])
144 |         with open(os.path.join(self.mask_rg_directory, 'err_rate_per_iteration.txt'), 'a') as file:
145 |             file.write(str(err_rate) + ' \n')
146 |         # np.save(os.path.join(self.mask_predictions_directory, '{:06d}.{:02d}.mask_predictions.npy'.format(iteration,session_iteration)), predictions)
147 | 
148 | 
149 |     def save_pseudo_random_notice(self):
150 |         with open(os.path.join(self.base_directory, 'PSEUDO_RANDOM_MODEL.txt'), 'a') as file:
151 |             file.write('This log generated by a psuedo random model! \n (Parameters in the setup section in push-DQN_config.yaml is not used for this log and should be ignored) \n')


--------------------------------------------------------------------------------
/src/push-DQN/main.py:
--------------------------------------------------------------------------------
  1 | from src.mask_rg.mask_rg import MaskRG
  2 | import time
  3 | import os
  4 | import random
  5 | import threading
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | import scipy as sc
  9 | import cv2
 10 | from collections import namedtuple
 11 | import torch
 12 | from torch.autograd import Variable
 13 | from robot import Robot
 14 | from trainer import Trainer
 15 | from logger import Logger
 16 | import utils
 17 | import scipy.misc
 18 | import yaml
 19 | 
 20 | CONF_PATH = './push-DQN_config.yaml'
 21 | 
 22 | class BColors:
 23 |     HEADER = '\033[95m'
 24 |     OKBLUE = '\033[94m'
 25 |     OKGREEN = '\033[92m'
 26 |     WARNING = '\033[93m'
 27 |     FAIL = '\033[91m'
 28 |     ENDC = '\033[0m'
 29 |     BOLD = '\033[1m'
 30 |     UNDERLINE = '\033[4m'
 31 | 
 32 | def main():
 33 | 
 34 |     with open(CONF_PATH) as f:
 35 |         config = yaml.load(f, Loader=yaml.FullLoader)
 36 | 
 37 |     # plt.ion()
 38 |     mask_rg = MaskRG(config['detection_thresholds']['confidence_threshold'], config['detection_thresholds']['mask_threshold'])
 39 |     session_success_threshold = config['detection_thresholds']['session_success_threshold']
 40 | 
 41 |     snapshot_file = os.path.join(config['model']['path'], config['model']['file']) if config['model']['file'] != 'new' else None
 42 |     #TODO clean this from trainer method
 43 |     load_snapshot = True if snapshot_file is not None else False
 44 | 
 45 |     logging_directory = config['logging']['path'] if config['logging']['path'] != 'Default' else os.path.abspath('./logs')
 46 |     continue_logging = config['logging']['continue_logging']
 47 |     save_visualizations = config['logging']['save_visualizations']
 48 | 
 49 |     workspace_limits = np.asarray(config['environment']['workspace_limits'])
 50 |     heightmap_resolution = float(config['environment']['heightmap_resolution'])
 51 | 
 52 |     min_num_obj = config['environment']['min_num_objects']
 53 |     max_num_obj = config['environment']['max_num_objects']
 54 |     session_limit = config['environment']['session_limit']
 55 | 
 56 |     random_seed = 1234 # (to set the random seed for simulation and neural net initialization)
 57 |     force_cpu = False # (to force code to run in CPU mode)
 58 | 
 59 |     # TODO implementation!
 60 |     # experience_replay = config['setup']['experience_replay']
 61 | 
 62 |     future_reward_discount = config['training']['future_reward_discount']
 63 | 
 64 |     is_testing = config['setup']['is_testing']
 65 |     explore_prob = config['setup']['exploration_probability']
 66 |     explore_rate_decay = config['setup']['epsilon_greedy_policy']['exploration_rate_decay']
 67 |     min_rate_decay = config['setup']['epsilon_greedy_policy']['min_exp_rate_decay']
 68 | 
 69 |     is_random_model = config['setup']['is_random_model']
 70 | 
 71 |     # Set random seed
 72 |     np.random.seed(random_seed)
 73 | 
 74 |     # Initialize pick-and-place system (camera and robot)
 75 |     robot = Robot(min_num_obj, max_num_obj, workspace_limits)
 76 | 
 77 |     # Initialize trainer
 78 |     trainer = Trainer(future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu)
 79 | 
 80 |     # Initialize data logger
 81 |     logger = Logger(continue_logging, logging_directory)
 82 |     logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose
 83 |     logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters
 84 | 
 85 |     # save the current configuration file in the current logging directory
 86 |     logger.save_config_file(CONF_PATH)
 87 | 
 88 |     # Find last executed iteration of pre-loaded log, and load execution info and RL variables
 89 |     if continue_logging:
 90 |         trainer.preload(logger.transitions_directory)
 91 | 
 92 |     # Initialize variables for heuristic bootstrapping and exploration probability
 93 |     # no_change_count = [2, 2] if not is_testing else [0, 0]
 94 | 
 95 |     explore_prob = explore_prob if not is_testing else 0.0
 96 |     # This overrides all above!!!
 97 |     if is_random_model:
 98 |         explore_prob = 1.0
 99 |         explore_rate_decay = False
100 | 
101 |     nonlocal_variables = {'executing_action': False,
102 |                           'primitive_action': None,
103 |                           'best_pix_ind': None,
104 |                           'push_success': False,
105 |                           'grasp_success': False,
106 |                           'session_success': False,
107 |                           'session_first_loop': True}
108 |     session_counter = 0
109 |     # session no (index)--> [number of iteration for that session, fail(0)/success(1)]
110 |     success_fail_record = []
111 |     last_iter = 0
112 |     # Parallel thread to process network output and execute actions
113 |     # -------------------------------------------------------------
114 |     def process_actions():
115 |         while True:
116 |             if nonlocal_variables['executing_action']:
117 | 
118 |                 print('Primitive confidence scores for pushing: {:f}'.format(np.max(push_predictions)))
119 |                 nonlocal_variables['primitive_action'] = 'push'
120 | 
121 |                 # Exploration vs. Exploitation - Best push vs random push / if is_testing --> explore_prob=0!
122 |                 explore_actions = np.random.uniform() < explore_prob
123 |                 if not explore_actions:
124 |                     print('Strategy: ' + BColors.WARNING + 'EXPLOIT' + BColors.ENDC + ' (exploration probability: '
125 |                           + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC)
126 |                     # Exploitation: Return the index of the x, y coordinates of the best prediction in the Q-values
127 |                     nonlocal_variables['best_pix_ind'] = np.unravel_index(np.argmax(push_predictions), push_predictions.shape)
128 |                     predicted_value = np.max(push_predictions)
129 | 
130 |                 else:
131 |                     print('Strategy:' + BColors.WARNING + 'EXPLORE' + BColors.ENDC + ' (exploration probability:'
132 |                           + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC)
133 | 
134 |                     nonlocal_variables['best_pix_ind'] = (np.random.random_integers(0, 15),
135 |                                                           np.random.random_integers(0, 224-1),
136 |                                                           np.random.random_integers(0, 224-1))
137 |                     # nonlocal_variables['best_pix_ind']  = np.random.random_integers(0, 224-1)
138 |                     # nonlocal_variables['best_pix_ind'] = np.random.random_integers(0, 224-1)
139 |                     predicted_value = push_predictions[nonlocal_variables['best_pix_ind']]
140 | 
141 |                 trainer.is_exploit_log.append([0 if explore_actions else 1])
142 |                 logger.write_to_log('is-exploit', trainer.is_exploit_log)
143 |                 # Save predicted confidence value
144 |                 trainer.predicted_value_log.append([predicted_value])
145 |                 logger.write_to_log('predicted-value', trainer.predicted_value_log)
146 | 
147 |                 # Compute 3D position of pixel
148 |                 print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0],
149 |                                                       nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2]))
150 | 
151 |                 best_rotation_angle = np.deg2rad(nonlocal_variables['best_pix_ind'][0] * (360.0/trainer.model.num_rotations))
152 |                 best_pix_x = nonlocal_variables['best_pix_ind'][2]
153 |                 best_pix_y = nonlocal_variables['best_pix_ind'][1]
154 |                 primitive_position = [best_pix_x * heightmap_resolution + workspace_limits[0][0],
155 |                                       best_pix_y * heightmap_resolution + workspace_limits[1][0],
156 |                                       valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0]]
157 |                 print("primitive position --> ", primitive_position)
158 |                 # If pushing, adjust start position, and make sure z value is safe and not too low
159 |                 if nonlocal_variables['primitive_action'] == 'push':
160 |                     finger_width = 0.02
161 |                     safe_kernel_width = int(np.round((finger_width/2)/heightmap_resolution))
162 |                     local_region = valid_depth_heightmap[max(best_pix_y - safe_kernel_width, 0)
163 |                                                          :min(best_pix_y + safe_kernel_width + 1, valid_depth_heightmap.shape[0]),
164 |                                                                 max(best_pix_x - safe_kernel_width, 0)
165 |                                                                 :min(best_pix_x + safe_kernel_width + 1, valid_depth_heightmap.shape[1])]
166 |                     if local_region.size == 0:
167 |                         safe_z_position = workspace_limits[2][0]
168 |                     elif (np.max(local_region) - workspace_limits[2][0]) > finger_width:
169 |                         safe_z_position = np.max(local_region) - finger_width/4
170 |                     else:
171 |                         safe_z_position = np.max(local_region)
172 | 
173 |                     primitive_position[2] = safe_z_position
174 | 
175 |                 # Save executed primitive --> 0 for exploit, 1 for explore
176 |                 if not explore_actions:
177 |                     trainer.executed_action_log.append([0, nonlocal_variables['best_pix_ind'][0],
178 |                                                         nonlocal_variables['best_pix_ind'][1],
179 |                                                         nonlocal_variables['best_pix_ind'][2]])
180 |                 else:
181 |                     trainer.executed_action_log.append([1, nonlocal_variables['best_pix_ind'][0],
182 |                                                         nonlocal_variables['best_pix_ind'][1],
183 |                                                         nonlocal_variables['best_pix_ind'][2]])
184 | 
185 |                 logger.write_to_log('executed-action', trainer.executed_action_log)
186 | 
187 |                 # Visualize executed primitive, and affordances
188 |                 if save_visualizations:
189 |                     push_pred_vis = trainer.get_prediction_vis(push_predictions, color_heightmap, nonlocal_variables['best_pix_ind'])
190 |                     logger.save_visualizations(trainer.iteration, push_pred_vis, 'push')
191 |                     cv2.imwrite('visualization.push.png', push_pred_vis)
192 | 
193 |                 # Initialize variables that influence reward
194 |                 nonlocal_variables['push_success'] = False
195 | 
196 |                 # Execute primitive
197 |                 if nonlocal_variables['primitive_action'] == 'push':
198 |                     nonlocal_variables['push_success'] = robot.push(primitive_position, best_rotation_angle, workspace_limits)
199 |                     print('Push executed: %r' % (nonlocal_variables['push_success']))
200 |                 print(BColors.OKBLUE + '-------------------------------------------------------------------------'
201 |                       + BColors.ENDC)
202 |                 nonlocal_variables['executing_action'] = False
203 | 
204 | 
205 |             time.sleep(0.01)
206 |     action_thread = threading.Thread(target=process_actions)
207 |     action_thread.daemon = True
208 |     action_thread.start()
209 |     exit_called = False
210 |     # -------------------------------------------------------------
211 |     # -------------------------------------------------------------
212 | 
213 |     # Start main training/testing loop
214 |     while True:
215 |         print(BColors.WARNING + '\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration) +
216 |               BColors.ENDC + '  (Session iteration: {:d})'.format(session_counter))
217 |         iteration_time_0 = time.time()
218 |         session_counter += 1
219 |         # Make sure simulation is still stable (if not, reset simulation)
220 |         robot.check_sim()
221 | 
222 |         # Get latest RGB-D image
223 |         color_img, depth_img_raw = robot.get_camera_data()
224 |         # Detph scale is 1 for simulation!!
225 |         depth_img = depth_img_raw * robot.cam_depth_scale # Apply depth scale from calibration
226 | 
227 |         # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
228 |         color_heightmap, depth_heightmap = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics,
229 |                                                                robot.cam_pose, workspace_limits, heightmap_resolution)
230 |         valid_depth_heightmap = depth_heightmap.copy()
231 |         valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
232 | 
233 |         # CHECK IF IT IS THE FIRST LOOP
234 |         if nonlocal_variables['session_first_loop']:
235 | 
236 |             print(BColors.WARNING + 'This is the zeroth iteration! '
237 |                                     'Mask-RG Values below will be used in the first iteration!' + BColors.ENDC)
238 |             # Get ground truth segmentation masks
239 |             color_m_rg, depth_m_rg, [segmentation_mask, num_objects] = robot.get_data_mask_rg()
240 |             # print(num_objects)
241 |             plt.imsave('ground_truth.png', segmentation_mask)
242 |             plt.imsave('color_img.png', color_m_rg)
243 |             plt.imsave('depth_img.png', depth_m_rg)
244 | 
245 |             # set the mask rcnn reward generator with the current depth and gt
246 |             mask_rg.set_reward_generator(depth_m_rg, segmentation_mask)
247 | 
248 |             # get the rewards predictions
249 |             pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards()
250 |             printout = mask_rg.print_segmentation(pred_ids)
251 |             plt.imsave('mask_pred_diff.png', printout)
252 | 
253 |             # all_masks = mask_rg.print_masks()
254 |             # plt.imsave('mask_pred_all.png', all_masks)
255 | 
256 |             prev_seg_reward = seg_reward.copy()
257 |             prev_depth_for_chg_det = depth_m_rg.copy()
258 |             depth_m_rg_act = depth_m_rg.copy() # for change detection patch
259 | 
260 |             # if there is a success without a push just by chance, flag for the termination of the session
261 |             if seg_reward > session_success_threshold:
262 |                 success_without_push = True
263 |                 nonlocal_variables['session_success'] = True
264 |             else:
265 |                 success_without_push = False
266 | 
267 |             # END OF FIRST LOOP --> DEACTIVATE
268 |             nonlocal_variables['session_first_loop'] = False
269 | 
270 |         # Save RGB-D images and RGB-D heightmaps
271 |         logger.save_images(trainer.iteration, color_img, depth_img, '0')
272 |         logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0')
273 | 
274 |         # Save mask prediction difference image and error rate per iteration
275 |         if 'color_m_rg_act' in locals():
276 |             logger.save_mask_diff_image(printout, segmentation_mask_action, color_m_rg_act,
277 |                                         depth_m_rg_act, trainer.iteration, session_counter-1, err_rate)
278 | 
279 |         if not exit_called:
280 |             # Run forward pass with network to get affordances
281 |             push_predictions = trainer.forward(color_heightmap, valid_depth_heightmap, is_volatile=True)
282 | 
283 |             # Execute best primitive action on robot in another thread
284 |             nonlocal_variables['executing_action'] = True
285 | 
286 |         # Run training iteration in current thread (aka training thread)
287 |         # at the first loop prev_color_img will not be in locals! as well as other vars with 'prev_'
288 |         if 'prev_color_img' in locals():
289 | 
290 |             # The thresh hold that will terminate the current session with success, this line is after current
291 |             # push action and getting the current scene reward score from mask_rg
292 |             if seg_reward > session_success_threshold:
293 |                 # Push success!!!
294 |                 nonlocal_variables['session_success'] = True
295 | 
296 |             # Detect changes
297 |             depth_diff = abs(depth_heightmap - prev_depth_heightmap)
298 |             # depth_diff = abs(depth_m_rg_act - prev_depth_for_chg_det)
299 |             depth_diff[np.isnan(depth_diff)] = 0
300 |             depth_diff[depth_diff < 0.01] = 0
301 | 
302 |             depth_diff[depth_diff > 0] = 1
303 |             change_threshold = 100
304 |             change_value = np.sum(depth_diff)
305 |             change_detected = change_value > change_threshold
306 |             print('Change detected: %r (value: %d)' % (change_detected, change_value))
307 | 
308 |             if not success_without_push:
309 |                 label_value, prev_reward_value, seg_vals = trainer.get_reward_value(color_heightmap, valid_depth_heightmap,
310 |                                                                       prev_seg_reward, seg_reward, change_detected)
311 | 
312 |                 trainer.label_value_log.append([label_value])
313 |                 logger.write_to_log('label-value', trainer.label_value_log)
314 |                 trainer.reward_value_log.append([prev_reward_value])
315 |                 logger.write_to_log('reward-value', trainer.reward_value_log)
316 | 
317 |                 logger.save_mask_rg_returns([seg_vals, num_objects_action.size-1])
318 | 
319 |                 # Backpropagate
320 |                 # forward pass only for the best scene between 16 ('prev best pix ind')
321 |                 loss_val = trainer.backprop(prev_color_heightmap, prev_valid_depth_heightmap, prev_primitive_action, prev_best_pix_ind,
322 |                              label_value)
323 | 
324 |                 trainer.loss_log.append([loss_val])
325 |                 logger.write_to_log('loss-value', trainer.loss_log)
326 | 
327 |                 # TODO constants!!
328 |                 if explore_rate_decay and not is_testing:
329 |                     explore_prob = max(0.5 * np.power(0.998, trainer.iteration), min_rate_decay)
330 | 
331 |             # Save model snapshot
332 |                 if not is_testing:
333 |                     # TODO filename string can be changed with something like datetime
334 |                     logger.save_backup_model(trainer.model, 'reinforcement')
335 |                     if trainer.iteration % config['logging']['snapshot_saving_rate'] == 0:
336 |                         logger.save_model(trainer.iteration, trainer.model, 'reinforcement')
337 |                         if trainer.use_cuda:
338 |                             trainer.model = trainer.model.cuda()
339 | 
340 |         # Sync both action thread and training thread
341 |         while nonlocal_variables['executing_action']:
342 |             time.sleep(0.01)
343 | 
344 |         # SUCCESS
345 |         if nonlocal_variables['session_success']:
346 |             if not success_without_push:
347 |                 success_fail_record.append([session_counter, 1])
348 |                 logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after '
349 |                                                  + str(session_counter - 1) +
350 |                                                  ' pushing actions --> SUCCESS!  (tr_it --> {:d})'.format(trainer.iteration))
351 | 
352 |                 print(BColors.WARNING + 'SESSION SUCCESS!' + BColors.ENDC)
353 |             else:
354 |                 success_fail_record.append([session_counter, 2])
355 |                 logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after '
356 |                                                  + str(session_counter - 1) +
357 |                                                  ' TRAINING IGNORED --> (tr_it --> {:d})'.format(
358 |                                                      trainer.iteration))
359 |                 print(BColors.WARNING + 'SESSION CLOSED WITHOUT TRAINING AS THE INITIAL SEGMENTATION WAS GOOD ENOUGH!'
360 |                       + BColors.ENDC)
361 | 
362 |             last_iter = trainer.iteration
363 |             nonlocal_variables['session_success'] = False
364 |             nonlocal_variables['session_first_loop'] = True
365 |             seg_reward = np.asarray(0)
366 |             session_counter = 0
367 |             robot.restart_sim()
368 | 
369 | 
370 |         # FAIL
371 |         # it depends how to count -> 0th iteration of a session does not do any training, just gets two GTs to be able
372 |         #  to calculate rewards at the 1st iteration. "session_counter > 30" means after 31 sessions including the 0th
373 |         #  session or 30 training sessions
374 |         if session_counter > session_limit:
375 | 
376 |             success_fail_record.append([session_counter, 0])
377 |             logger.save_session_success_fail('Session no ' + str(len(success_fail_record)) + ' --> after '
378 |                                              + str(session_counter) +
379 |                                              ' pushing actions --> FAIL!  (tr_it --> {:d})'.format(trainer.iteration))
380 |             last_iter = trainer.iteration
381 |             nonlocal_variables['session_first_loop'] = True
382 |             robot.restart_sim()
383 |             seg_reward = np.asarray(0)
384 | 
385 |             print(BColors.WARNING + 'SESSION TIMEOUT! (after 30 iterations without success)' + BColors.ENDC)
386 |             session_counter = 0
387 | 
388 |         if exit_called:
389 |             break
390 | 
391 |         # Save information for next training step
392 |         prev_color_img = color_img.copy()
393 |         prev_depth_img = depth_img.copy()
394 |         prev_color_heightmap = color_heightmap.copy()
395 |         prev_depth_heightmap = depth_heightmap.copy()
396 |         prev_valid_depth_heightmap = valid_depth_heightmap.copy()
397 |         prev_push_success = nonlocal_variables['push_success']
398 |         prev_grasp_success = False
399 |         prev_primitive_action = 'push'
400 |         prev_push_predictions = push_predictions.copy()
401 |         prev_best_pix_ind = nonlocal_variables['best_pix_ind']
402 | 
403 | 
404 |         if not nonlocal_variables['session_first_loop']:
405 |             # Action is done
406 |             prev_seg_reward = seg_reward.copy()
407 |             prev_depth_for_chg_det = depth_m_rg_act.copy()
408 |             # print('Previous segmentation reward --> {:.5f}'.format(prev_seg_reward))
409 |             # Get RGB-D image
410 |             color_img_action, depth_img_raw_action = robot.get_camera_data()
411 |             # Get ground truth segmentation masks
412 |             color_m_rg_act, depth_m_rg_act, [segmentation_mask_action, num_objects_action] = robot.get_data_mask_rg()
413 |             plt.imsave('ground_truth.png', segmentation_mask_action)
414 |             plt.imsave('color_img.png', color_m_rg_act)
415 |             plt.imsave('depth_img.png', depth_m_rg_act)
416 |             mask_rg.set_reward_generator(depth_m_rg_act, segmentation_mask_action)
417 | 
418 |             pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards()
419 |             printout = mask_rg.print_segmentation(pred_ids)
420 |             plt.imsave('mask_pred_diff.png', printout)
421 |             print('Number of the objects left in the scene after pushing action -->', num_objects_action.size - 1)
422 |             # all_masks = mask_rg.print_masks()
423 |             # plt.imsave('mask_pred_all.png', all_masks)
424 | 
425 |         trainer.iteration += 1
426 |         iteration_time_1 = time.time()
427 |         print('Time elapsed: %f' % (iteration_time_1-iteration_time_0))
428 |         print('---------------------------------------------------------------')
429 |         print('---------------------------------------------------------------')
430 | 
431 | 
432 | if __name__ == '__main__':
433 |     main()
434 | 


--------------------------------------------------------------------------------
/src/push-DQN/main_test.py:
--------------------------------------------------------------------------------
  1 | from src.mask_rg.mask_rg import MaskRG
  2 | import time
  3 | import os
  4 | import random
  5 | import threading
  6 | import matplotlib.pyplot as plt
  7 | import numpy as np
  8 | import scipy as sc
  9 | import cv2
 10 | from collections import namedtuple
 11 | import torch
 12 | from torch.autograd import Variable
 13 | from robot import Robot
 14 | from trainer import Trainer
 15 | from logger import Logger
 16 | import utils
 17 | import scipy.misc
 18 | import yaml
 19 | 
 20 | 
 21 | CONF_PATH = './push-DQN_config.yaml'
 22 | 
 23 | class BColors:
 24 |     HEADER = '\033[95m'
 25 |     OKBLUE = '\033[94m'
 26 |     OKGREEN = '\033[92m'
 27 |     WARNING = '\033[93m'
 28 |     FAIL = '\033[91m'
 29 |     ENDC = '\033[0m'
 30 |     BOLD = '\033[1m'
 31 |     UNDERLINE = '\033[4m'
 32 | 
 33 | def main():
 34 | 
 35 |     with open(CONF_PATH) as f:
 36 |         config = yaml.load(f, Loader=yaml.FullLoader)
 37 | 
 38 |     # plt.ion()
 39 |     mask_rg = MaskRG(config['detection_thresholds']['confidence_threshold'], config['detection_thresholds']['mask_threshold'])
 40 |     session_success_threshold = config['detection_thresholds']['session_success_threshold']
 41 | 
 42 |     snapshot_file = os.path.join(config['model']['path'], config['model']['file']) if config['model']['file'] != 'new' else None
 43 |     #TODO clean this from trainer method
 44 |     load_snapshot = True if snapshot_file is not None else False
 45 | 
 46 |     logging_directory = config['logging']['path'] if config['logging']['path'] != 'Default' else os.path.abspath('./logs')
 47 |     continue_logging = config['logging']['continue_logging']
 48 |     save_visualizations = config['logging']['save_visualizations']
 49 | 
 50 |     workspace_limits = np.asarray(config['environment']['workspace_limits'])
 51 |     heightmap_resolution = float(config['environment']['heightmap_resolution'])
 52 | 
 53 |     min_num_obj = config['environment']['min_num_objects']
 54 |     max_num_obj = config['environment']['max_num_objects']
 55 |     session_limit = config['environment']['session_limit']
 56 | 
 57 |     random_seed = 1234 # (to set the random seed for simulation and neural net initialization)
 58 |     force_cpu = False # (to force code to run in CPU mode)
 59 | 
 60 |     # TODO implementation!
 61 |     # experience_replay = config['setup']['experience_replay']
 62 | 
 63 |     future_reward_discount = config['training']['future_reward_discount']
 64 | 
 65 |     is_testing = config['setup']['is_testing']
 66 |     explore_prob = config['setup']['exploration_probability']
 67 |     explore_rate_decay = config['setup']['epsilon_greedy_policy']['exploration_rate_decay']
 68 |     min_rate_decay = config['setup']['epsilon_greedy_policy']['min_exp_rate_decay']
 69 | 
 70 |     is_random_model = config['setup']['is_random_model']
 71 | 
 72 |     # Set random seed
 73 |     np.random.seed(random_seed)
 74 | 
 75 |     # Initialize pick-and-place system (camera and robot)
 76 |     robot = Robot(min_num_obj, max_num_obj, workspace_limits)
 77 | 
 78 |     # Initialize trainer
 79 |     trainer = Trainer(future_reward_discount, is_testing, load_snapshot, snapshot_file, force_cpu)
 80 | 
 81 |     # Initialize data logger
 82 |     logger = Logger(continue_logging, logging_directory)
 83 |     logger.save_camera_info(robot.cam_intrinsics, robot.cam_pose, robot.cam_depth_scale) # Save camera intrinsics and pose
 84 |     logger.save_heightmap_info(workspace_limits, heightmap_resolution) # Save heightmap parameters
 85 | 
 86 |     # save the current configuration file in the current logging directory
 87 |     logger.save_config_file(CONF_PATH)
 88 | 
 89 |     # Find last executed iteration of pre-loaded log, and load execution info and RL variables
 90 |     if continue_logging:
 91 |         trainer.preload(logger.transitions_directory)
 92 | 
 93 |     # Initialize variables for heuristic bootstrapping and exploration probability
 94 |     # no_change_count = [2, 2] if not is_testing else [0, 0]
 95 | 
 96 |     explore_prob = explore_prob if not is_testing else 0.0
 97 |     # This overrides all above!!!
 98 |     if is_random_model:
 99 |         explore_prob = 1.0
100 |         explore_rate_decay = False
101 | 
102 |     nonlocal_variables = {'executing_action': False,
103 |                           'primitive_action': None,
104 |                           'best_pix_ind': None,
105 |                           'push_success': False,
106 |                           'grasp_success': False,
107 |                           'session_success': False,
108 |                           'session_first_loop': True}
109 |     session_counter = 0
110 |     # session no (index)--> [number of iteration for that session, fail(0)/success(1)]
111 |     success_fail_record = []
112 |     last_iter = 0
113 |     # plt.ion()
114 | 
115 |     # Parallel thread to process network output and execute actions
116 |     # -------------------------------------------------------------
117 |     def process_actions():
118 |         while True:
119 |             if nonlocal_variables['executing_action']:
120 | 
121 |                 print('Primitive confidence scores for pushing: {:f}'.format(np.max(push_predictions)))
122 |                 nonlocal_variables['primitive_action'] = 'push'
123 | 
124 |                 # Exploration vs. Exploitation - Best push vs random push / if is_testing --> explore_prob=0!
125 |                 explore_actions = np.random.uniform() < explore_prob
126 |                 if not explore_actions:
127 |                     print('Strategy: ' + BColors.WARNING + 'EXPLOIT' + BColors.ENDC + ' (exploration probability: '
128 |                           + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC)
129 |                     # Exploitation: Return the index of the x, y coordinates of the best prediction in the Q-values
130 |                     nonlocal_variables['best_pix_ind'] = np.unravel_index(np.argmax(push_predictions), push_predictions.shape)
131 |                     predicted_value = np.max(push_predictions)
132 | 
133 |                 else:
134 |                     print('Strategy:' + BColors.WARNING + 'EXPLORE' + BColors.ENDC + ' (exploration probability:'
135 |                           + BColors.OKBLUE + '{:f})'.format(explore_prob) + BColors.ENDC)
136 | 
137 |                     nonlocal_variables['best_pix_ind'] = (np.random.random_integers(0, 15),
138 |                                                           np.random.random_integers(0, 224-1),
139 |                                                           np.random.random_integers(0, 224-1))
140 |                     # nonlocal_variables['best_pix_ind']  = np.random.random_integers(0, 224-1)
141 |                     # nonlocal_variables['best_pix_ind'] = np.random.random_integers(0, 224-1)
142 |                     predicted_value = push_predictions[nonlocal_variables['best_pix_ind']]
143 | 
144 |                 trainer.is_exploit_log.append([0 if explore_actions else 1])
145 |                 logger.write_to_log('is-exploit', trainer.is_exploit_log)
146 |                 # Save predicted confidence value
147 |                 trainer.predicted_value_log.append([predicted_value])
148 |                 logger.write_to_log('predicted-value', trainer.predicted_value_log)
149 | 
150 |                 # Compute 3D position of pixel
151 |                 print('Action: %s at (%d, %d, %d)' % (nonlocal_variables['primitive_action'], nonlocal_variables['best_pix_ind'][0],
152 |                                                       nonlocal_variables['best_pix_ind'][1], nonlocal_variables['best_pix_ind'][2]))
153 | 
154 |                 best_rotation_angle = np.deg2rad(nonlocal_variables['best_pix_ind'][0] * (360.0/trainer.model.num_rotations))
155 |                 best_pix_x = nonlocal_variables['best_pix_ind'][2]
156 |                 best_pix_y = nonlocal_variables['best_pix_ind'][1]
157 |                 primitive_position = [best_pix_x * heightmap_resolution + workspace_limits[0][0],
158 |                                       best_pix_y * heightmap_resolution + workspace_limits[1][0],
159 |                                       valid_depth_heightmap[best_pix_y][best_pix_x] + workspace_limits[2][0]]
160 |                 print("primitive position --> ", primitive_position)
161 |                 # If pushing, adjust start position, and make sure z value is safe and not too low
162 |                 if nonlocal_variables['primitive_action'] == 'push':
163 |                     finger_width = 0.02
164 |                     safe_kernel_width = int(np.round((finger_width/2)/heightmap_resolution))
165 |                     local_region = valid_depth_heightmap[max(best_pix_y - safe_kernel_width, 0)
166 |                                                          :min(best_pix_y + safe_kernel_width + 1, valid_depth_heightmap.shape[0]),
167 |                                                                 max(best_pix_x - safe_kernel_width, 0)
168 |                                                                 :min(best_pix_x + safe_kernel_width + 1, valid_depth_heightmap.shape[1])]
169 |                     if local_region.size == 0:
170 |                         safe_z_position = workspace_limits[2][0]
171 | 
172 |                     elif (np.max(local_region) - workspace_limits[2][0]) > finger_width:
173 |                         safe_z_position = np.max(local_region) - finger_width/4
174 |                     else:
175 |                         safe_z_position = np.max(local_region)
176 | 
177 |                     primitive_position[2] = safe_z_position
178 | 
179 |                 # Save executed primitive --> 0 for exploit, 1 for explore
180 |                 if not explore_actions:
181 |                     trainer.executed_action_log.append([0, nonlocal_variables['best_pix_ind'][0],
182 |                                                         nonlocal_variables['best_pix_ind'][1],
183 |                                                         nonlocal_variables['best_pix_ind'][2]])
184 |                 else:
185 |                     trainer.executed_action_log.append([1, nonlocal_variables['best_pix_ind'][0],
186 |                                                         nonlocal_variables['best_pix_ind'][1],
187 |                                                         nonlocal_variables['best_pix_ind'][2]])
188 | 
189 |                 logger.write_to_log('executed-action', trainer.executed_action_log)
190 | 
191 |                 # Visualize executed primitive, and affordances
192 |                 if save_visualizations:
193 |                     push_pred_vis = trainer.get_prediction_vis(push_predictions, color_heightmap, nonlocal_variables['best_pix_ind'])
194 |                     logger.save_visualizations(trainer.iteration, push_pred_vis, 'push')
195 |                     cv2.imwrite('visualization.push.png', push_pred_vis)
196 | 
197 |                 # Initialize variables that influence reward
198 |                 nonlocal_variables['push_success'] = False
199 | 
200 |                 # robot.move_to_target([-0.5, 0.0, 0.036 + safe_z_position])
201 | 
202 |                 # Execute primitive
203 |                 if nonlocal_variables['primitive_action'] == 'push':
204 |                     nonlocal_variables['push_success'] = robot.push(primitive_position, best_rotation_angle, workspace_limits)
205 |                     print('Push executed: %r' % (nonlocal_variables['push_success']))
206 |                 print(BColors.OKBLUE + '-------------------------------------------------------------------------'
207 |                       + BColors.ENDC)
208 |                 nonlocal_variables['executing_action'] = False
209 | 
210 | 
211 |             time.sleep(0.01)
212 |     action_thread = threading.Thread(target=process_actions)
213 |     action_thread.daemon = True
214 |     action_thread.start()
215 |     exit_called = False
216 |     # -------------------------------------------------------------
217 |     # -------------------------------------------------------------
218 | 
219 |     # Start main training/testing loop
220 |     while True:
221 |         print(BColors.WARNING + '\n%s iteration: %d' % ('Testing' if is_testing else 'Training', trainer.iteration) +
222 |               BColors.ENDC + '  (Session iteration: {:d})'.format(session_counter))
223 |         iteration_time_0 = time.time()
224 |         session_counter += 1
225 |         # Make sure simulation is still stable (if not, reset simulation)
226 |         robot.check_sim()
227 | 
228 |         # Get latest RGB-D image
229 |         color_img, depth_img_raw = robot.get_camera_data()
230 |         # Detph scale is 1 for simulation!!
231 |         depth_img = depth_img_raw * robot.cam_depth_scale # Apply depth scale from calibration
232 | 
233 |         # Get heightmap from RGB-D image (by re-projecting 3D point cloud)
234 |         color_heightmap, depth_heightmap = utils.get_heightmap(color_img, depth_img, robot.cam_intrinsics,
235 |                                                                robot.cam_pose, workspace_limits, heightmap_resolution)
236 |         valid_depth_heightmap = depth_heightmap.copy()
237 |         valid_depth_heightmap[np.isnan(valid_depth_heightmap)] = 0
238 | 
239 |         # CHECK IF IT IS THE FIRST LOOP
240 |         if nonlocal_variables['session_first_loop']:
241 | 
242 |             print(BColors.WARNING + 'This is the zeroth iteration! '
243 |                                     'Mask-RG Values below will be used in the first iteration!' + BColors.ENDC)
244 |             # Get ground truth segmentation masks
245 |             color_m_rg, depth_m_rg, [segmentation_mask, num_objects] = robot.get_data_mask_rg()
246 |             # print(num_objects)
247 |             plt.imsave('ground_truth.png', segmentation_mask)
248 |             plt.imsave('color_img.png', color_m_rg)
249 |             plt.imsave('depth_img.png', depth_m_rg)
250 | 
251 |             # set the mask rcnn reward generator with the current depth and gt
252 |             mask_rg.set_reward_generator(depth_m_rg, segmentation_mask)
253 | 
254 |             # get the rewards predictions
255 |             pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards()
256 |             printout = mask_rg.print_segmentation(pred_ids)
257 |             plt.imsave('mask_pred_diff.png', printout)
258 | 
259 |             prev_seg_reward = seg_reward.copy()
260 |             prev_depth_for_chg_det = depth_m_rg.copy()
261 |             depth_m_rg_act = depth_m_rg.copy() # for change detection patch
262 | 
263 |             # if there is a success without a push just by chance, flag for the termination of the session
264 |             if seg_reward > session_success_threshold:
265 |                 success_without_push = True
266 |                 nonlocal_variables['session_success'] = True
267 |             else:
268 |                 success_without_push = False
269 | 
270 |             # END OF FIRST LOOP --> DEACTIVATE
271 |             nonlocal_variables['session_first_loop'] = False
272 | 
273 |         # Save RGB-D images and RGB-D heightmaps
274 |         logger.save_images(trainer.iteration, color_img, depth_img, '0')
275 |         logger.save_heightmaps(trainer.iteration, color_heightmap, valid_depth_heightmap, '0')
276 | 
277 |         # Save mask prediction difference image and error rate per iteration
278 |         if 'color_m_rg_act' in locals():
279 |             logger.save_mask_diff_image(printout, segmentation_mask_action, color_m_rg_act,
280 |                                         depth_m_rg_act, trainer.iteration, session_counter-1, err_rate)
281 | 
282 |         if not exit_called:
283 |             # Run forward pass with network to get affordances
284 |             push_predictions = trainer.forward(color_heightmap, valid_depth_heightmap, is_volatile=True)
285 | 
286 |             # Execute best primitive action on robot in another thread
287 |             nonlocal_variables['executing_action'] = True
288 | 
289 |         # Run training iteration in current thread (aka training thread)
290 |         # at the first loop prev_color_img will not be in locals! as well as other vars with 'prev_'
291 |         if 'prev_color_img' in locals():
292 |             # The thresh hold that will terminate the current session with success, this line is after current
293 |             # push action and getting the current scene reward score from mask_rg
294 |             if seg_reward > session_success_threshold:
295 |                 # Push success!!!
296 |                 nonlocal_variables['session_success'] = True
297 | 
298 |             # Detect changes
299 |             depth_diff = abs(depth_heightmap - prev_depth_heightmap)
300 |             # depth_diff = abs(depth_m_rg_act - prev_depth_for_chg_det)
301 |             depth_diff[np.isnan(depth_diff)] = 0
302 |             depth_diff[depth_diff < 0.01] = 0
303 | 
304 |             depth_diff[depth_diff > 0] = 1
305 |             change_threshold = 100
306 |             change_value = np.sum(depth_diff)
307 |             change_detected = change_value > change_threshold
308 |             print('Change detected: %r (value: %d)' % (change_detected, change_value))
309 | 
310 |             if not success_without_push:
311 |                 label_value, prev_reward_value, seg_vals = trainer.get_reward_value(color_heightmap, valid_depth_heightmap,
312 |                                                                       prev_seg_reward, seg_reward, change_detected)
313 | 
314 |                 trainer.label_value_log.append([label_value])
315 |                 logger.write_to_log('label-value', trainer.label_value_log)
316 |                 trainer.reward_value_log.append([prev_reward_value])
317 |                 logger.write_to_log('reward-value', trainer.reward_value_log)
318 | 
319 |                 logger.save_mask_rg_returns([seg_vals, num_objects_action.size-1])
320 | 
321 |         # Sync both action thread and training thread
322 |         while nonlocal_variables['executing_action']:
323 |             time.sleep(0.01)
324 | 
325 |         # SUCCESS
326 |         if nonlocal_variables['session_success']:
327 |             if not success_without_push:
328 |                 success_fail_record.append([session_counter, 1])
329 |                 logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after '
330 |                                                  + str(session_counter - 1) +
331 |                                                  ' pushing actions --> SUCCESS!  (tr_it --> {:d})'.format(trainer.iteration))
332 | 
333 |                 print(BColors.WARNING + 'SESSION SUCCESS!' + BColors.ENDC)
334 |             else:
335 |                 success_fail_record.append([session_counter, 2])
336 |                 logger.save_session_success_fail(' Session no ' + str(len(success_fail_record)) + ' --> after '
337 |                                                  + str(session_counter - 1) +
338 |                                                  ' TRAINING IGNORED --> (tr_it --> {:d})'.format(
339 |                                                      trainer.iteration))
340 |                 print(BColors.WARNING + 'SESSION CLOSED WITHOUT TRAINING AS THE INITIAL SEGMENTATION WAS GOOD ENOUGH!'
341 |                       + BColors.ENDC)
342 | 
343 |             last_iter = trainer.iteration
344 |             nonlocal_variables['session_success'] = False
345 |             nonlocal_variables['session_first_loop'] = True
346 |             seg_reward = np.asarray(0)
347 |             session_counter = 0
348 |             robot.restart_sim()
349 | 
350 |         # FAIL
351 |         # it depends how to count --> 0th iteration of a session does not do any training, just gets two GTs to be able
352 |         #  to calculate rewards at the 1st iteration. "session_counter > 30" means after 31 sessions including the 0th
353 |         #  session or 30 training sessions
354 |         if session_counter > session_limit:
355 | 
356 |             success_fail_record.append([session_counter, 0])
357 |             logger.save_session_success_fail('Session no ' + str(len(success_fail_record)) + ' --> after '
358 |                                              + str(session_counter) +
359 |                                              ' pushing actions --> FAIL!  (tr_it --> {:d})'.format(trainer.iteration))
360 |             last_iter = trainer.iteration
361 |             nonlocal_variables['session_first_loop'] = True
362 |             robot.restart_sim()
363 |             # robot.add_objects(18)
364 |             seg_reward = np.asarray(0)
365 | 
366 |             print(BColors.WARNING + 'SESSION TIMEOUT! (after 30 iterations without success)' + BColors.ENDC)
367 |             session_counter = 0
368 | 
369 |         if exit_called:
370 |             break
371 | 
372 |         # Save information for next training step
373 |         prev_color_img = color_img.copy()
374 |         prev_depth_img = depth_img.copy()
375 |         prev_color_heightmap = color_heightmap.copy()
376 |         prev_depth_heightmap = depth_heightmap.copy()
377 |         prev_valid_depth_heightmap = valid_depth_heightmap.copy()
378 |         prev_push_success = nonlocal_variables['push_success']
379 |         prev_grasp_success = False
380 |         prev_primitive_action = 'push'
381 |         prev_push_predictions = push_predictions.copy()
382 |         prev_best_pix_ind = nonlocal_variables['best_pix_ind']
383 | 
384 | 
385 |         if not nonlocal_variables['session_first_loop']:
386 |             # Action is done
387 |             prev_seg_reward = seg_reward.copy()
388 |             prev_depth_for_chg_det = depth_m_rg_act.copy()
389 |             # print('Previous segmentation reward --> {:.5f}'.format(prev_seg_reward))
390 |             # Get RGB-D image
391 |             color_img_action, depth_img_raw_action = robot.get_camera_data()
392 |             # Get ground truth segmentation masks
393 |             color_m_rg_act, depth_m_rg_act, [segmentation_mask_action, num_objects_action] = robot.get_data_mask_rg()
394 |             plt.imsave('ground_truth.png', segmentation_mask_action)
395 |             plt.imsave('color_img.png', color_m_rg_act)
396 |             plt.imsave('depth_img.png', depth_m_rg_act)
397 |             mask_rg.set_reward_generator(depth_m_rg_act, segmentation_mask_action)
398 | 
399 |             pred_ids, seg_reward, err_rate = mask_rg.get_current_rewards()
400 |             printout = mask_rg.print_segmentation(pred_ids)
401 |             plt.imsave('mask_pred_diff.png', printout)
402 |             print('Number of the objects left in the scene after pushing action -->', num_objects_action.size - 1)
403 |             # plot_obj.set_data(printout)
404 |             # plt.draw()
405 |             all_masks = mask_rg.print_masks()
406 |             plt.imsave('mask_pred_all.png', all_masks)
407 | 
408 |         trainer.iteration += 1
409 |         iteration_time_1 = time.time()
410 |         print('Time elapsed: %f' % (iteration_time_1-iteration_time_0))
411 |         print('---------------------------------------------------------------')
412 |         print('---------------------------------------------------------------')
413 | 
414 | 
415 | if __name__ == '__main__':
416 |     main()
417 | 


--------------------------------------------------------------------------------
/src/push-DQN/models.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from collections import OrderedDict
  4 | import numpy as np
  5 | from scipy import ndimage
  6 | import torch
  7 | import torch.nn as nn
  8 | import torch.nn.functional as F
  9 | from torch.autograd import Variable
 10 | import torchvision
 11 | import matplotlib.pyplot as plt
 12 | import time
 13 | 
 14 | 
 15 | class reactive_net(nn.Module):
 16 | 
 17 |     def __init__(self, use_cuda): # , snapshot=None
 18 |         super(reactive_net, self).__init__()
 19 |         self.use_cuda = use_cuda
 20 | 
 21 |         # Initialize network trunks with DenseNet pre-trained on ImageNet
 22 |         self.push_color_trunk = torchvision.models.densenet.densenet121(pretrained=True)
 23 |         self.push_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True)
 24 |         self.grasp_color_trunk = torchvision.models.densenet.densenet121(pretrained=True)
 25 |         self.grasp_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True)
 26 | 
 27 |         self.num_rotations = 16
 28 | 
 29 |         # Construct network branches for pushing and grasping
 30 |         self.pushnet = nn.Sequential(OrderedDict([
 31 |             ('push-norm0', nn.BatchNorm2d(2048)),
 32 |             ('push-relu0', nn.ReLU(inplace=True)),
 33 |             ('push-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)),
 34 |             ('push-norm1', nn.BatchNorm2d(64)),
 35 |             ('push-relu1', nn.ReLU(inplace=True)),
 36 |             ('push-conv1', nn.Conv2d(64, 3, kernel_size=1, stride=1, bias=False))
 37 |             # ('push-upsample2', nn.Upsample(scale_factor=4, mode='bilinear'))
 38 |         ]))
 39 |         self.graspnet = nn.Sequential(OrderedDict([
 40 |             ('grasp-norm0', nn.BatchNorm2d(2048)),
 41 |             ('grasp-relu0', nn.ReLU(inplace=True)),
 42 |             ('grasp-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)),
 43 |             ('grasp-norm1', nn.BatchNorm2d(64)),
 44 |             ('grasp-relu1', nn.ReLU(inplace=True)),
 45 |             ('grasp-conv1', nn.Conv2d(64, 3, kernel_size=1, stride=1, bias=False))
 46 |             # ('grasp-upsample2', nn.Upsample(scale_factor=4, mode='bilinear'))
 47 |         ]))
 48 | 
 49 |         # Initialize network weights
 50 |         for m in self.named_modules():
 51 |             if 'push-' in m[0] or 'grasp-' in m[0]:
 52 |                 if isinstance(m[1], nn.Conv2d):
 53 |                     nn.init.kaiming_normal(m[1].weight.data)
 54 |                 elif isinstance(m[1], nn.BatchNorm2d):
 55 |                     m[1].weight.data.fill_(1)
 56 |                     m[1].bias.data.zero_()
 57 | 
 58 |         # Initialize output variable (for backprop)
 59 |         self.interm_feat = []
 60 |         self.output_prob = []
 61 | 
 62 | 
 63 |     def forward(self, input_color_data, input_depth_data, is_volatile=False, specific_rotation=-1):
 64 | 
 65 |         if is_volatile:
 66 |             output_prob = []
 67 |             interm_feat = []
 68 | 
 69 |             # Apply rotations to images
 70 |             for rotate_idx in range(self.num_rotations):
 71 |                 rotate_theta = np.radians(rotate_idx*(360/self.num_rotations))
 72 | 
 73 |                 # Compute sample grid for rotation BEFORE neural network
 74 |                 affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]])
 75 |                 affine_mat_before.shape = (2,3,1)
 76 |                 affine_mat_before = torch.from_numpy(affine_mat_before).permute(2,0,1).float()
 77 |                 if self.use_cuda:
 78 |                     flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size())
 79 |                 else:
 80 |                     flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size())
 81 | 
 82 |                 # Rotate images clockwise
 83 |                 if self.use_cuda:
 84 |                     rotate_color = F.grid_sample(Variable(input_color_data, volatile=True).cuda(), flow_grid_before, mode='nearest')
 85 |                     rotate_depth = F.grid_sample(Variable(input_depth_data, volatile=True).cuda(), flow_grid_before, mode='nearest')
 86 |                 else:
 87 |                     rotate_color = F.grid_sample(Variable(input_color_data, volatile=True), flow_grid_before, mode='nearest')
 88 |                     rotate_depth = F.grid_sample(Variable(input_depth_data, volatile=True), flow_grid_before, mode='nearest')
 89 | 
 90 |                 # Compute intermediate features
 91 |                 interm_push_color_feat = self.push_color_trunk.features(rotate_color)
 92 |                 interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth)
 93 |                 interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1)
 94 |                 interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color)
 95 |                 interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth)
 96 |                 interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1)
 97 |                 interm_feat.append([interm_push_feat, interm_grasp_feat])
 98 | 
 99 |                 # Compute sample grid for rotation AFTER branches
100 |                 affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0],[-np.sin(rotate_theta), np.cos(rotate_theta), 0]])
101 |                 affine_mat_after.shape = (2,3,1)
102 |                 affine_mat_after = torch.from_numpy(affine_mat_after).permute(2,0,1).float()
103 |                 if self.use_cuda:
104 |                     flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(), interm_push_feat.data.size())
105 |                 else:
106 |                     flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False), interm_push_feat.data.size())
107 | 
108 |                 # Forward pass through branches, undo rotation on output predictions, upsample results
109 |                 output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')),
110 |                                     nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))])
111 | 
112 |             return output_prob, interm_feat
113 | 
114 |         else:
115 |             self.output_prob = []
116 |             self.interm_feat = []
117 | 
118 |             # Apply rotations to intermediate features
119 |             # for rotate_idx in range(self.num_rotations):
120 |             rotate_idx = specific_rotation
121 |             rotate_theta = np.radians(rotate_idx*(360/self.num_rotations))
122 | 
123 |             # Compute sample grid for rotation BEFORE branches
124 |             affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]])
125 |             affine_mat_before.shape = (2,3,1)
126 |             affine_mat_before = torch.from_numpy(affine_mat_before).permute(2,0,1).float()
127 |             if self.use_cuda:
128 |                 flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size())
129 |             else:
130 |                 flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size())
131 | 
132 |             # Rotate images clockwise
133 |             if self.use_cuda:
134 |                 rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest')
135 |                 rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest')
136 |             else:
137 |                 rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False), flow_grid_before, mode='nearest')
138 |                 rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False), flow_grid_before, mode='nearest')
139 | 
140 |             # Compute intermediate features
141 |             interm_push_color_feat = self.push_color_trunk.features(rotate_color)
142 |             interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth)
143 |             interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1)
144 |             interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color)
145 |             interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth)
146 |             interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1)
147 |             self.interm_feat.append([interm_push_feat, interm_grasp_feat])
148 | 
149 |             # Compute sample grid for rotation AFTER branches
150 |             affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0],[-np.sin(rotate_theta), np.cos(rotate_theta), 0]])
151 |             affine_mat_after.shape = (2,3,1)
152 |             affine_mat_after = torch.from_numpy(affine_mat_after).permute(2,0,1).float()
153 |             if self.use_cuda:
154 |                 flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(), interm_push_feat.data.size())
155 |             else:
156 |                 flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False), interm_push_feat.data.size())
157 | 
158 |             # Forward pass through branches, undo rotation on output predictions, upsample results
159 |             self.output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')),
160 |                                      nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))])
161 | 
162 |             return self.output_prob, self.interm_feat
163 | 
164 | 
165 | class reinforcement_net(nn.Module):
166 | 
167 |     def __init__(self, use_cuda): # , snapshot=None
168 |         super(reinforcement_net, self).__init__()
169 |         self.use_cuda = use_cuda
170 | 
171 |         # Initialize network trunks with DenseNet pre-trained on ImageNet
172 |         self.push_color_trunk = torchvision.models.densenet.densenet121(pretrained=True)
173 |         self.push_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True)
174 |         # self.grasp_color_trunk = torchvision.models.densenet.densenet121(pretrained=True)
175 |         # self.grasp_depth_trunk = torchvision.models.densenet.densenet121(pretrained=True)
176 | 
177 |         self.num_rotations = 16
178 | 
179 |         # Construct network branches for pushing and grasping
180 |         self.pushnet = nn.Sequential(OrderedDict([
181 |             ('push-norm0', nn.BatchNorm2d(2048)),
182 |             ('push-relu0', nn.ReLU(inplace=True)),
183 |             ('push-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)),
184 |             ('push-norm1', nn.BatchNorm2d(64)),
185 |             ('push-relu1', nn.ReLU(inplace=True)),
186 |             ('push-conv1', nn.Conv2d(64, 1, kernel_size=1, stride=1, bias=False))
187 |             # ('push-upsample2', nn.Upsample(scale_factor=4, mode='bilinear'))
188 |         ]))
189 |         # self.graspnet = nn.Sequential(OrderedDict([
190 |         #     ('grasp-norm0', nn.BatchNorm2d(2048)),
191 |         #     ('grasp-relu0', nn.ReLU(inplace=True)),
192 |         #     ('grasp-conv0', nn.Conv2d(2048, 64, kernel_size=1, stride=1, bias=False)),
193 |         #     ('grasp-norm1', nn.BatchNorm2d(64)),
194 |         #     ('grasp-relu1', nn.ReLU(inplace=True)),
195 |         #     ('grasp-conv1', nn.Conv2d(64, 1, kernel_size=1, stride=1, bias=False))
196 |         #     # ('grasp-upsample2', nn.Upsample(scale_factor=4, mode='bilinear'))
197 |         # ]))
198 | 
199 |         # Initialize network weights
200 |         for m in self.named_modules():
201 |             if 'push-' in m[0]: #or 'grasp-' in m[0]:
202 |                 if isinstance(m[1], nn.Conv2d):
203 |                     nn.init.kaiming_normal(m[1].weight.data)
204 |                 elif isinstance(m[1], nn.BatchNorm2d):
205 |                     m[1].weight.data.fill_(1)
206 |                     m[1].bias.data.zero_()
207 | 
208 |         # Initialize output variable (for backprop)
209 |         self.interm_feat = []
210 |         self.output_prob = []
211 | 
212 | 
213 |     def forward(self, input_color_data, input_depth_data, is_volatile=False, specific_rotation=-1):
214 | 
215 |         if is_volatile:
216 |             with torch.no_grad():
217 |                 output_prob = []
218 |                 interm_feat = []
219 | 
220 |                 # Apply rotations to images
221 |                 for rotate_idx in range(self.num_rotations):
222 |                     rotate_theta = np.radians(rotate_idx*(360/self.num_rotations))
223 | 
224 |                     # Compute sample grid for rotation BEFORE neural network
225 |                     affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]])
226 |                     affine_mat_before.shape = (2, 3, 1)
227 |                     affine_mat_before = torch.from_numpy(affine_mat_before).permute(2, 0, 1).float()
228 |                     if self.use_cuda:
229 |                         flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size())
230 |                     else:
231 |                         flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size())
232 | 
233 |                     # Rotate images clockwise
234 |                     if self.use_cuda:
235 |                         with torch.no_grad():
236 |                             rotate_color = F.grid_sample(Variable(input_color_data).cuda(), flow_grid_before, mode='nearest')
237 |                             rotate_depth = F.grid_sample(Variable(input_depth_data).cuda(), flow_grid_before, mode='nearest')
238 |                     else:
239 |                         with torch.no_grad():
240 |                             rotate_color = F.grid_sample(Variable(input_color_data), flow_grid_before, mode='nearest')
241 |                             rotate_depth = F.grid_sample(Variable(input_depth_data), flow_grid_before, mode='nearest')
242 |                     
243 |                     # Compute intermediate features
244 |                     interm_push_color_feat = self.push_color_trunk.features(rotate_color)
245 |                     interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth)
246 |                     interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1)
247 |                     # interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color)
248 |                     # interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth)
249 |                     # interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1)
250 |                     # interm_feat.append([interm_push_feat, interm_grasp_feat])
251 | 
252 |                     # Compute sample grid for rotation AFTER branches
253 |                     affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0],
254 |                                                    [-np.sin(rotate_theta), np.cos(rotate_theta), 0]])
255 |                     affine_mat_after.shape = (2, 3, 1)
256 |                     affine_mat_after = torch.from_numpy(affine_mat_after).permute(2, 0, 1).float()
257 |                     if self.use_cuda:
258 |                         flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(),
259 |                                                         interm_push_feat.data.size())
260 |                     else:
261 |                         flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False),
262 |                                                         interm_push_feat.data.size())
263 | 
264 |                     # Forward pass through branches, undo rotation on output predictions, upsample results
265 |                     # TODO remove 0 replaces grasp net
266 |                     output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')), 0])
267 |                                         #nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))])
268 | 
269 |             return output_prob #, interm_feat
270 | 
271 |         else:
272 |             self.output_prob = []
273 |             self.interm_feat = []
274 | 
275 |             # Apply rotations to intermediate features
276 |             # for rotate_idx in range(self.num_rotations):
277 |             rotate_idx = specific_rotation
278 |             rotate_theta = np.radians(rotate_idx*(360/self.num_rotations))
279 | 
280 |             # Compute sample grid for rotation BEFORE branches
281 |             affine_mat_before = np.asarray([[np.cos(-rotate_theta), np.sin(-rotate_theta), 0],[-np.sin(-rotate_theta), np.cos(-rotate_theta), 0]])
282 |             affine_mat_before.shape = (2,3,1)
283 |             affine_mat_before = torch.from_numpy(affine_mat_before).permute(2,0,1).float()
284 |             if self.use_cuda:
285 |                 flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False).cuda(), input_color_data.size())
286 |             else:
287 |                 flow_grid_before = F.affine_grid(Variable(affine_mat_before, requires_grad=False), input_color_data.size())
288 | 
289 |             # Rotate images clockwise
290 |             if self.use_cuda:
291 |                 rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest')
292 |                 rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False).cuda(), flow_grid_before, mode='nearest')
293 |             else:
294 |                 rotate_color = F.grid_sample(Variable(input_color_data, requires_grad=False), flow_grid_before, mode='nearest')
295 |                 rotate_depth = F.grid_sample(Variable(input_depth_data, requires_grad=False), flow_grid_before, mode='nearest')
296 | 
297 |             # Compute intermediate features
298 |             interm_push_color_feat = self.push_color_trunk.features(rotate_color)
299 |             interm_push_depth_feat = self.push_depth_trunk.features(rotate_depth)
300 |             interm_push_feat = torch.cat((interm_push_color_feat, interm_push_depth_feat), dim=1)
301 |             # interm_grasp_color_feat = self.grasp_color_trunk.features(rotate_color)
302 |             # interm_grasp_depth_feat = self.grasp_depth_trunk.features(rotate_depth)
303 |             # interm_grasp_feat = torch.cat((interm_grasp_color_feat, interm_grasp_depth_feat), dim=1)
304 |             # self.interm_feat.append([interm_push_feat, interm_grasp_feat])
305 | 
306 |             # Compute sample grid for rotation AFTER branches
307 |             affine_mat_after = np.asarray([[np.cos(rotate_theta), np.sin(rotate_theta), 0],[-np.sin(rotate_theta), np.cos(rotate_theta), 0]])
308 |             affine_mat_after.shape = (2, 3, 1)
309 |             affine_mat_after = torch.from_numpy(affine_mat_after).permute(2,0,1).float()
310 |             if self.use_cuda:
311 |                 flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False).cuda(), interm_push_feat.data.size())
312 |             else:
313 |                 flow_grid_after = F.affine_grid(Variable(affine_mat_after, requires_grad=False), interm_push_feat.data.size())
314 | 
315 |             # Forward pass through branches, undo rotation on output predictions, upsample results
316 |             # TODO remove 0 replaces grasp net
317 |             self.output_prob.append([nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.pushnet(interm_push_feat), flow_grid_after, mode='nearest')),0])
318 |                                      #nn.Upsample(scale_factor=16, mode='bilinear').forward(F.grid_sample(self.graspnet(interm_grasp_feat), flow_grid_after, mode='nearest'))])
319 | 
320 |             return self.output_prob #, self.interm_feat
321 | 


--------------------------------------------------------------------------------
/src/push-DQN/trainer.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import time
  3 | import numpy as np
  4 | import cv2
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch.autograd import Variable
  9 | from utils import CrossEntropyLoss2d
 10 | from models import reactive_net, reinforcement_net
 11 | from scipy import ndimage
 12 | import matplotlib.pyplot as plt
 13 | 
 14 | class BColors:
 15 |     HEADER = '\033[95m'
 16 |     OKBLUE = '\033[94m'
 17 |     OKGREEN = '\033[92m'
 18 |     WARNING = '\033[93m'
 19 |     FAIL = '\033[91m'
 20 |     ENDC = '\033[0m'
 21 |     BOLD = '\033[1m'
 22 |     UNDERLINE = '\033[4m'
 23 | 
 24 | class Trainer(object):
 25 |     def __init__(self, future_reward_discount,
 26 |                  is_testing, load_snapshot, snapshot_file, force_cpu):
 27 | 
 28 |         self.method = 'reinforcement'
 29 | 
 30 |         # Check if CUDA can be used
 31 |         if torch.cuda.is_available() and not force_cpu:
 32 |             print("CUDA detected. Running with GPU acceleration.")
 33 |             self.use_cuda = True
 34 |         elif force_cpu:
 35 |             print("CUDA detected, but overriding with option '--cpu'. Running with only CPU.")
 36 |             self.use_cuda = False
 37 |         else:
 38 |             print("CUDA is *NOT* detected. Running with only CPU.")
 39 |             self.use_cuda = False
 40 | 
 41 |         # Fully convolutional classification network for supervised learning
 42 |         if self.method == 'reactive':
 43 |             self.model = reactive_net(self.use_cuda)
 44 | 
 45 |             # Initialize classification loss
 46 |             push_num_classes = 3 # 0 - push, 1 - no change push, 2 - no loss
 47 |             push_class_weights = torch.ones(push_num_classes)
 48 |             push_class_weights[push_num_classes - 1] = 0
 49 |             if self.use_cuda:
 50 |                 self.push_criterion = CrossEntropyLoss2d(push_class_weights.cuda()).cuda()
 51 |             else:
 52 |                 self.push_criterion = CrossEntropyLoss2d(push_class_weights)
 53 |             grasp_num_classes = 3 # 0 - grasp, 1 - failed grasp, 2 - no loss
 54 |             grasp_class_weights = torch.ones(grasp_num_classes)
 55 |             grasp_class_weights[grasp_num_classes - 1] = 0
 56 |             if self.use_cuda:
 57 |                 self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights.cuda()).cuda()
 58 |             else:
 59 |                 self.grasp_criterion = CrossEntropyLoss2d(grasp_class_weights)
 60 | 
 61 |         # Fully convolutional Q network for deep reinforcement learning
 62 |         elif self.method == 'reinforcement':
 63 |             self.model = reinforcement_net(self.use_cuda)
 64 |             self.future_reward_discount = future_reward_discount
 65 | 
 66 |             # Initialize Huber loss
 67 |             self.criterion = torch.nn.SmoothL1Loss(reduce=False) # Huber loss
 68 |             if self.use_cuda:
 69 |                 self.criterion = self.criterion.cuda()
 70 | 
 71 |         # Load pre-trained model
 72 |         if load_snapshot:
 73 |             self.model.load_state_dict(torch.load(snapshot_file))
 74 |             print('Pre-trained model snapshot loaded from: %s' % (snapshot_file))
 75 | 
 76 |         # Convert model from CPU to GPU
 77 |         if self.use_cuda:
 78 |             self.model = self.model.cuda()
 79 | 
 80 |         # Set model to training mode
 81 |         self.model.train()
 82 | 
 83 |         # Initialize optimizer
 84 |         self.optimizer = torch.optim.SGD(self.model.parameters(), lr=1e-4, momentum=0.9, weight_decay=2e-5)
 85 |         self.iteration = 0
 86 | 
 87 |         # Initialize lists to save execution info and RL variables
 88 |         self.executed_action_log = []
 89 |         self.label_value_log = []
 90 |         self.reward_value_log = []
 91 |         self.predicted_value_log = []
 92 |         self.use_heuristic_log = []
 93 |         self.is_exploit_log = []
 94 |         self.clearance_log = []
 95 |         self.loss_log = []
 96 | 
 97 | 
 98 |     # Pre-load execution info and RL variables
 99 |     def preload(self, transitions_directory):
100 |         self.executed_action_log = np.loadtxt(os.path.join(transitions_directory, 'executed-action.log.txt'), delimiter=' ')
101 |         self.iteration = self.executed_action_log.shape[0] - 2
102 |         self.executed_action_log = self.executed_action_log[0:self.iteration,:]
103 |         self.executed_action_log = self.executed_action_log.tolist()
104 |         self.label_value_log = np.loadtxt(os.path.join(transitions_directory, 'label-value.log.txt'), delimiter=' ')
105 |         self.label_value_log = self.label_value_log[0:self.iteration]
106 |         self.label_value_log.shape = (self.iteration,1)
107 |         self.label_value_log = self.label_value_log.tolist()
108 |         self.predicted_value_log = np.loadtxt(os.path.join(transitions_directory, 'predicted-value.log.txt'), delimiter=' ')
109 |         self.predicted_value_log = self.predicted_value_log[0:self.iteration]
110 |         self.predicted_value_log.shape = (self.iteration,1)
111 |         self.predicted_value_log = self.predicted_value_log.tolist()
112 |         self.reward_value_log = np.loadtxt(os.path.join(transitions_directory, 'reward-value.log.txt'), delimiter=' ')
113 |         self.reward_value_log = self.reward_value_log[0:self.iteration]
114 |         self.reward_value_log.shape = (self.iteration,1)
115 |         self.reward_value_log = self.reward_value_log.tolist()
116 |         self.use_heuristic_log = np.loadtxt(os.path.join(transitions_directory, 'use-heuristic.log.txt'), delimiter=' ')
117 |         self.use_heuristic_log = self.use_heuristic_log[0:self.iteration]
118 |         self.use_heuristic_log.shape = (self.iteration,1)
119 |         self.use_heuristic_log = self.use_heuristic_log.tolist()
120 |         self.is_exploit_log = np.loadtxt(os.path.join(transitions_directory, 'is-exploit.log.txt'), delimiter=' ')
121 |         self.is_exploit_log = self.is_exploit_log[0:self.iteration]
122 |         self.is_exploit_log.shape = (self.iteration,1)
123 |         self.is_exploit_log = self.is_exploit_log.tolist()
124 |         self.clearance_log = np.loadtxt(os.path.join(transitions_directory, 'clearance.log.txt'), delimiter=' ')
125 |         self.clearance_log.shape = (self.clearance_log.shape[0],1)
126 |         self.clearance_log = self.clearance_log.tolist()
127 | 
128 |         self.loss_log = np.loadtxt(os.path.join(transitions_directory, 'loss.log.txt'), delimiter=' ')
129 |         self.loss_log.shape = (self.loss_log.shape[0],1)
130 |         self.loss_log = self.loss_log.tolist()
131 | 
132 | 
133 | 
134 |     # Compute forward pass through model to compute affordances/Q
135 |     def forward(self, color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=-1):
136 | 
137 |         # Apply 2x scale to input heightmaps
138 |         color_heightmap_2x = ndimage.zoom(color_heightmap, zoom=[2,2,1], order=0)
139 |         depth_heightmap_2x = ndimage.zoom(depth_heightmap, zoom=[2,2], order=0)
140 |         assert(color_heightmap_2x.shape[0:2] == depth_heightmap_2x.shape[0:2])
141 | 
142 |         # Add extra padding (to handle rotations inside network)
143 |         diag_length = float(color_heightmap_2x.shape[0]) * np.sqrt(2)
144 |         diag_length = np.ceil(diag_length/32)*32
145 |         padding_width = int((diag_length - color_heightmap_2x.shape[0])/2)
146 |         color_heightmap_2x_r =  np.pad(color_heightmap_2x[:,:,0], padding_width, 'constant', constant_values=0)
147 |         color_heightmap_2x_r.shape = (color_heightmap_2x_r.shape[0], color_heightmap_2x_r.shape[1], 1)
148 |         color_heightmap_2x_g =  np.pad(color_heightmap_2x[:,:,1], padding_width, 'constant', constant_values=0)
149 |         color_heightmap_2x_g.shape = (color_heightmap_2x_g.shape[0], color_heightmap_2x_g.shape[1], 1)
150 |         color_heightmap_2x_b =  np.pad(color_heightmap_2x[:,:,2], padding_width, 'constant', constant_values=0)
151 |         color_heightmap_2x_b.shape = (color_heightmap_2x_b.shape[0], color_heightmap_2x_b.shape[1], 1)
152 |         color_heightmap_2x = np.concatenate((color_heightmap_2x_r, color_heightmap_2x_g, color_heightmap_2x_b), axis=2)
153 |         depth_heightmap_2x =  np.pad(depth_heightmap_2x, padding_width, 'constant', constant_values=0)
154 | 
155 |         # Pre-process color image (scale and normalize)
156 |         image_mean = [0.485, 0.456, 0.406]
157 |         image_std = [0.229, 0.224, 0.225]
158 |         input_color_image = color_heightmap_2x.astype(float)/255
159 |         for c in range(3):
160 |             input_color_image[:,:,c] = (input_color_image[:,:,c] - image_mean[c])/image_std[c]
161 | 
162 |         # Pre-process depth image (normalize)
163 |         image_mean = [0.01, 0.01, 0.01]
164 |         image_std = [0.03, 0.03, 0.03]
165 |         depth_heightmap_2x.shape = (depth_heightmap_2x.shape[0], depth_heightmap_2x.shape[1], 1)
166 |         input_depth_image = np.concatenate((depth_heightmap_2x, depth_heightmap_2x, depth_heightmap_2x), axis=2)
167 |         for c in range(3):
168 |             input_depth_image[:,:,c] = (input_depth_image[:,:,c] - image_mean[c])/image_std[c]
169 | 
170 |         # Construct minibatch of size 1 (b,c,h,w)
171 |         input_color_image.shape = (input_color_image.shape[0], input_color_image.shape[1], input_color_image.shape[2], 1)
172 |         input_depth_image.shape = (input_depth_image.shape[0], input_depth_image.shape[1], input_depth_image.shape[2], 1)
173 |         input_color_data = torch.from_numpy(input_color_image.astype(np.float32)).permute(3,2,0,1)
174 |         input_depth_data = torch.from_numpy(input_depth_image.astype(np.float32)).permute(3,2,0,1)
175 | 
176 |         # Pass input data through model
177 |         # output_prob, state_feat
178 |         output_prob = self.model.forward(input_color_data, input_depth_data, is_volatile, specific_rotation)
179 | 
180 |         if self.method == 'reactive':
181 |             pass
182 |             # # Return affordances (and remove extra padding)
183 |             # for rotate_idx in range(len(output_prob)):
184 |             #     if rotate_idx == 0:
185 |             #         push_predictions = F.softmax(output_prob[rotate_idx][0], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)]
186 |             #         grasp_predictions = F.softmax(output_prob[rotate_idx][1], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)]
187 |             #     else:
188 |             #         push_predictions = np.concatenate((push_predictions, F.softmax(output_prob[rotate_idx][0], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)]), axis=0)
189 |             #         grasp_predictions = np.concatenate((grasp_predictions, F.softmax(output_prob[rotate_idx][1], dim=1).cpu().data.numpy()[:,0,(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2),(padding_width/2):(color_heightmap_2x.shape[0]/2 - padding_width/2)]), axis=0)
190 | 
191 |         elif self.method == 'reinforcement':
192 | 
193 |             # Return Q values (and remove extra padding)
194 |             for rotate_idx in range(len(output_prob)):
195 |                 # if first rotation
196 |                 if rotate_idx == 0:
197 |                     push_predictions = output_prob[rotate_idx][0].cpu().data.numpy()[:,
198 |                                        0, int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2),
199 |                                        int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2)]
200 |                 else:
201 |                     push_predictions = np.concatenate((push_predictions,
202 |                                                        output_prob[rotate_idx][0].cpu().data.numpy()[:,
203 |                                                        0, int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2),
204 |                                                        int(padding_width/2):int(color_heightmap_2x.shape[0]/2 - padding_width/2)]),
205 |                                                        axis=0)
206 | 
207 |         return push_predictions
208 | 
209 |     def get_reward_value(self, next_color_heightmap, next_depth_heightmap, prev_seg_score, curr_seg_score, change_detected):
210 | 
211 |         if self.method == 'reactive':
212 |             pass
213 |             # Compute label value
214 |             # label_value = 0
215 |             # if primitive_action == 'push':
216 |             #     if not change_detected:
217 |             #         label_value = 1
218 |             # elif primitive_action == 'grasp':
219 |             #     if not grasp_success:
220 |             #         label_value = 1
221 |             #
222 |             # print('Label value: %d' % (label_value))
223 |             # return label_value, label_value
224 | 
225 |         elif self.method == 'reinforcement':
226 | 
227 |             # Compute current reward
228 |             # current_reward = 0
229 |             # if primitive_action == 'push':
230 |             # if change_detected:
231 |                 # evaluate rewards intrinsically with Q values with a forward pass below
232 |             # if (curr_seg_score - prev_seg_score) == 0:
233 |             #     curr_seg_rew = curr_seg_score
234 |             # else:
235 | 
236 |             # #this should be in the actual training
237 |             # if sess_success and sess_iteration < 10:
238 |             #     current_reward = 10
239 |             # elif sess_success:
240 |             #     current_reward = 5
241 |             # else:
242 |             #     if (curr_seg_score - prev_seg_score) > 0.4:
243 |             #         current_reward = 1.0
244 |             #     elif (curr_seg_score - prev_seg_score) > 0.01:
245 |             #         current_reward = 0.3
246 |             #     elif (curr_seg_score - prev_seg_score) < -0.4:
247 |             #         current_reward = -0.5
248 |             #     elif (curr_seg_score - prev_seg_score) < -0.01:
249 |             #         current_reward = -0.1
250 |             #     else: #(-0.01 to 0.01)
251 |             #         current_reward = 0
252 | 
253 |             diff = curr_seg_score - prev_seg_score
254 |             if change_detected:
255 |                 current_reward = (diff*10)**2
256 |                 # current_reward = np.abs(diff) * np.exp(np.abs(diff))
257 |                 if diff < 0:
258 |                     current_reward *= -1
259 |             else:
260 |                 current_reward = -0.5
261 | 
262 |             seg_vals = [curr_seg_score, prev_seg_score, diff, current_reward, change_detected]
263 |             print('------------------------------- Push-to-See -------------------------------')
264 |             print('Current segmentation score --> ', curr_seg_score, ' Previous segmentation score --> ', prev_seg_score)
265 |             print('Segmentation difference after pushing --> ' + BColors.WARNING, curr_seg_score - prev_seg_score, BColors.ENDC)
266 |             print('CURRENT REWARD --> ' + BColors.WARNING + '{:.5f}'.format(current_reward) + BColors.ENDC)
267 |             next_push_predictions = self.forward(next_color_heightmap, next_depth_heightmap, is_volatile=True)
268 | 
269 |             future_reward = np.max(next_push_predictions)
270 |             print('Future reward: %f' % (future_reward))
271 |             # if primitive_action == 'push' and not self.push_rewards:
272 |             #     # future reward discount --> 0.5 --> default val
273 |             #     expected_reward = self.future_reward_discount * future_reward
274 |             #     print('Expected reward: %f + %f x %f = %f' % (0.0, self.future_reward_discount, future_reward, expected_reward))
275 |             # else:
276 |             expected_reward = current_reward + self.future_reward_discount * future_reward
277 |             print('Expected reward: %f + (%f x %f) = %f' % (current_reward, self.future_reward_discount, future_reward, expected_reward))
278 |             print('---------------------------------------------------------------')
279 |             return expected_reward, current_reward, seg_vals
280 | 
281 | 
282 | 
283 |     # Compute labels and backpropagate
284 |     def backprop(self, color_heightmap, depth_heightmap, primitive_action, best_pix_ind, label_value):
285 | 
286 |         if self.method == 'reactive':
287 | 
288 |             # Compute fill value
289 |             fill_value = 2
290 | 
291 |             # Compute labels
292 |             label = np.zeros((1,320,320)) + fill_value
293 |             action_area = np.zeros((224,224))
294 |             action_area[best_pix_ind[1]][best_pix_ind[2]] = 1
295 |             # blur_kernel = np.ones((5,5),np.float32)/25
296 |             # action_area = cv2.filter2D(action_area, -1, blur_kernel)
297 |             tmp_label = np.zeros((224,224)) + fill_value
298 |             tmp_label[action_area > 0] = label_value
299 |             label[0,48:(320-48),48:(320-48)] = tmp_label
300 | 
301 |             # Compute loss and backward pass
302 |             self.optimizer.zero_grad()
303 |             loss_value = 0
304 |             if primitive_action == 'push':
305 |                 # loss = self.push_criterion(self.model.output_prob[best_pix_ind[0]][0], Variable(torch.from_numpy(label).long().cuda()))
306 | 
307 |                 # Do forward pass with specified rotation (to save gradients)
308 |                 # push_predictions, grasp_predictions, state_feat
309 |                 push_predictions, grasp_predictions = self.forward(color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=best_pix_ind[0])
310 | 
311 |                 if self.use_cuda:
312 |                     loss = self.push_criterion(self.model.output_prob[0][0], Variable(torch.from_numpy(label).long().cuda()))
313 |                 else:
314 |                     loss = self.push_criterion(self.model.output_prob[0][0], Variable(torch.from_numpy(label).long()))
315 |                 loss.backward()
316 |                 loss_value = loss.cpu().data.numpy()
317 | 
318 |             # elif primitive_action == 'grasp':
319 |                 # # loss = self.grasp_criterion(self.model.output_prob[best_pix_ind[0]][1], Variable(torch.from_numpy(label).long().cuda()))
320 |                 # # loss += self.grasp_criterion(self.model.output_prob[(best_pix_ind[0] + self.model.num_rotations/2) % self.model.num_rotations][1], Variable(torch.from_numpy(label).long().cuda()))
321 |                 #
322 |                 # # Do forward pass with specified rotation (to save gradients)
323 |                 # push_predictions, grasp_predictions, state_feat = self.forward(color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=best_pix_ind[0])
324 |                 #
325 |                 # if self.use_cuda:
326 |                 #     loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long().cuda()))
327 |                 # else:
328 |                 #     loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long()))
329 |                 # loss.backward()
330 |                 # loss_value += loss.cpu().data.numpy()
331 |                 #
332 |                 # # Since grasping is symmetric, train with another forward pass of opposite rotation angle
333 |                 # opposite_rotate_idx = (best_pix_ind[0] + self.model.num_rotations/2) % self.model.num_rotations
334 |                 #
335 |                 # push_predictions, grasp_predictions, state_feat = self.forward(color_heightmap, depth_heightmap, is_volatile=False, specific_rotation=opposite_rotate_idx)
336 |                 #
337 |                 # if self.use_cuda:
338 |                 #     loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long().cuda()))
339 |                 # else:
340 |                 #     loss = self.grasp_criterion(self.model.output_prob[0][1], Variable(torch.from_numpy(label).long()))
341 |                 # loss.backward()
342 |                 # loss_value += loss.cpu().data.numpy()
343 |                 #
344 |                 # loss_value = loss_value/2
345 | 
346 |             print('Training loss: %f' % (loss_value))
347 |             self.optimizer.step()
348 | 
349 |         elif self.method == 'reinforcement':
350 | 
351 |             # Compute labels
352 |             # 320 for rotations?yes
353 |             label = np.zeros((1, 320, 320))
354 |             action_area = np.zeros((224, 224))
355 |             action_area[best_pix_ind[1]][best_pix_ind[2]] = 1
356 |             # blur_kernel = np.ones((5,5),np.float32)/25
357 |             # action_area = cv2.filter2D(action_area, -1, blur_kernel)
358 |             tmp_label = np.zeros((224, 224))
359 |             tmp_label[action_area > 0] = label_value
360 |             label[0, 48:(320-48), 48:(320-48)] = tmp_label
361 | 
362 |             # Compute label mask
363 |             label_weights = np.zeros(label.shape)
364 |             tmp_label_weights = np.zeros((224, 224))
365 |             tmp_label_weights[action_area > 0] = 1
366 |             label_weights[0, 48:(320-48), 48:(320-48)] = tmp_label_weights
367 | 
368 |             # Compute loss and backward pass
369 |             self.optimizer.zero_grad()
370 |             loss_value = 0
371 | 
372 |             # Do forward pass with specified rotation (to save gradients)
373 |             # push_predictions, grasp_predictions, state_feat
374 |             push_predictions = self.forward(color_heightmap, depth_heightmap, is_volatile=False,
375 |                                             specific_rotation=best_pix_ind[0])
376 | 
377 |             if self.use_cuda:
378 |                 loss = self.criterion(self.model.output_prob[0][0].view(1, 320, 320),
379 |                                       Variable(torch.from_numpy(label).float().cuda())) * Variable(torch.from_numpy(label_weights).float().cuda(),
380 |                                                                                                    requires_grad=False)
381 |             else:
382 |                 loss = self.criterion(self.model.output_prob[0][0].view(1, 320, 320),
383 |                                       Variable(torch.from_numpy(label).float())) * Variable(torch.from_numpy(label_weights).float(),
384 |                                                                                             requires_grad=False)
385 |             loss = loss.sum()
386 |             loss.backward()
387 |             loss_value = loss.cpu().data.numpy()
388 |             l_value = loss_value
389 |             print(BColors.OKBLUE + 'Training loss: %f' % (loss_value) + BColors.OKBLUE)
390 |             self.optimizer.step()
391 | 
392 |         return l_value
393 | 
394 |     def get_prediction_vis(self, predictions, color_heightmap, best_pix_ind):
395 | 
396 |         canvas = None
397 |         num_rotations = predictions.shape[0]
398 |         for canvas_row in range(int(num_rotations/4)):
399 |             tmp_row_canvas = None
400 |             for canvas_col in range(4):
401 |                 rotate_idx = canvas_row*4+canvas_col
402 |                 prediction_vis = predictions[rotate_idx,:,:].copy()
403 |                 # prediction_vis[prediction_vis < 0] = 0 # assume probability
404 |                 # prediction_vis[prediction_vis > 1] = 1 # assume probability
405 |                 prediction_vis = np.clip(prediction_vis, 0, 1)
406 |                 prediction_vis.shape = (predictions.shape[1], predictions.shape[2])
407 |                 prediction_vis = cv2.applyColorMap((prediction_vis*255).astype(np.uint8), cv2.COLORMAP_JET)
408 |                 if rotate_idx == best_pix_ind[0]:
409 |                     prediction_vis = cv2.circle(prediction_vis, (int(best_pix_ind[2]), int(best_pix_ind[1])), 7, (0,0,255), 2)
410 |                 prediction_vis = ndimage.rotate(prediction_vis, rotate_idx*(360.0/num_rotations), reshape=False, order=0)
411 |                 background_image = ndimage.rotate(color_heightmap, rotate_idx*(360.0/num_rotations), reshape=False, order=0)
412 |                 prediction_vis = (0.5*cv2.cvtColor(background_image, cv2.COLOR_RGB2BGR) + 0.5*prediction_vis).astype(np.uint8)
413 |                 if tmp_row_canvas is None:
414 |                     tmp_row_canvas = prediction_vis
415 |                 else:
416 |                     tmp_row_canvas = np.concatenate((tmp_row_canvas,prediction_vis), axis=1)
417 |             if canvas is None:
418 |                 canvas = tmp_row_canvas
419 |             else:
420 |                 canvas = np.concatenate((canvas,tmp_row_canvas), axis=0)
421 | 
422 |         return canvas
423 | 
424 | 
425 |     def push_heuristic(self, depth_heightmap):
426 | 
427 |         num_rotations = 16
428 | 
429 |         for rotate_idx in range(num_rotations):
430 |             rotated_heightmap = ndimage.rotate(depth_heightmap, rotate_idx*(360.0/num_rotations), reshape=False, order=0)
431 |             valid_areas = np.zeros(rotated_heightmap.shape)
432 |             valid_areas[ndimage.interpolation.shift(rotated_heightmap, [0, -25], order=0) - rotated_heightmap > 0.02] = 1
433 |             # valid_areas = np.multiply(valid_areas, rotated_heightmap)
434 |             blur_kernel = np.ones((25, 25), np.float32)/9
435 |             valid_areas = cv2.filter2D(valid_areas, -1, blur_kernel)
436 |             tmp_push_predictions = ndimage.rotate(valid_areas, -rotate_idx*(360.0/num_rotations), reshape=False, order=0)
437 |             tmp_push_predictions.shape = (1, rotated_heightmap.shape[0], rotated_heightmap.shape[1])
438 | 
439 |             if rotate_idx == 0:
440 |                 push_predictions = tmp_push_predictions
441 |             else:
442 |                 push_predictions = np.concatenate((push_predictions, tmp_push_predictions), axis=0)
443 | 
444 |         best_pix_ind = np.unravel_index(np.argmax(push_predictions), push_predictions.shape)
445 |         return best_pix_ind
446 | 
447 | 
448 |     def grasp_heuristic(self, depth_heightmap):
449 | 
450 |         num_rotations = 16
451 | 
452 |         for rotate_idx in range(num_rotations):
453 |             rotated_heightmap = ndimage.rotate(depth_heightmap, rotate_idx*(360.0/num_rotations), reshape=False, order=0)
454 |             valid_areas = np.zeros(rotated_heightmap.shape)
455 |             valid_areas[np.logical_and(rotated_heightmap - ndimage.interpolation.shift(rotated_heightmap, [0,-25], order=0) > 0.02, rotated_heightmap - ndimage.interpolation.shift(rotated_heightmap, [0,25], order=0) > 0.02)] = 1
456 |             # valid_areas = np.multiply(valid_areas, rotated_heightmap)
457 |             blur_kernel = np.ones((25,25),np.float32)/9
458 |             valid_areas = cv2.filter2D(valid_areas, -1, blur_kernel)
459 |             tmp_grasp_predictions = ndimage.rotate(valid_areas, -rotate_idx*(360.0/num_rotations), reshape=False, order=0)
460 |             tmp_grasp_predictions.shape = (1, rotated_heightmap.shape[0], rotated_heightmap.shape[1])
461 | 
462 |             if rotate_idx == 0:
463 |                 grasp_predictions = tmp_grasp_predictions
464 |             else:
465 |                 grasp_predictions = np.concatenate((grasp_predictions, tmp_grasp_predictions), axis=0)
466 | 
467 |         best_pix_ind = np.unravel_index(np.argmax(grasp_predictions), grasp_predictions.shape)
468 |         return best_pix_ind
469 | 
470 | 


--------------------------------------------------------------------------------
/src/push-DQN/utils.py:
--------------------------------------------------------------------------------
  1 | import struct
  2 | import math
  3 | import numpy as np
  4 | import cv2
  5 | import torch
  6 | import torch.nn as nn
  7 | import torch.nn.functional as F
  8 | from torch.autograd import Variable
  9 | 
 10 | from mpl_toolkits import mplot3d
 11 | import matplotlib.pyplot as plt
 12 | 
 13 | 
 14 | def get_pointcloud(color_img, depth_img, camera_intrinsics):
 15 | 
 16 |     # Get depth image size
 17 |     im_h = depth_img.shape[0]
 18 |     im_w = depth_img.shape[1]
 19 | 
 20 |     # Project depth into 3D point cloud in camera coordinates
 21 |     # np.mgrid would be better
 22 |     # pix_x, pix_y = np.mgrid[0:im_w, 0:im_h]
 23 |     pix_x, pix_y = np.meshgrid(np.linspace(0, im_w-1, im_w), np.linspace(0, im_h-1, im_h))
 24 |     cam_pts_x = np.multiply(pix_x - camera_intrinsics[0][2], depth_img / camera_intrinsics[0][0])
 25 |     cam_pts_y = np.multiply(pix_y - camera_intrinsics[1][2], depth_img / camera_intrinsics[1][1])
 26 |     cam_pts_z = depth_img.copy()
 27 |     cam_pts_x.shape = (im_h * im_w, 1)
 28 |     cam_pts_y.shape = (im_h * im_w, 1)
 29 |     cam_pts_z.shape = (im_h * im_w, 1)
 30 | 
 31 |     # Reshape image into colors for 3D point cloud
 32 |     rgb_pts_r = color_img[:, :, 0]
 33 |     rgb_pts_g = color_img[:, :, 1]
 34 |     rgb_pts_b = color_img[:, :, 2]
 35 |     rgb_pts_r.shape = (im_h * im_w, 1)
 36 |     rgb_pts_g.shape = (im_h * im_w, 1)
 37 |     rgb_pts_b.shape = (im_h * im_w, 1)
 38 | 
 39 |     cam_pts = np.concatenate((cam_pts_x, cam_pts_y, cam_pts_z), axis=1)
 40 |     rgb_pts = np.concatenate((rgb_pts_r, rgb_pts_g, rgb_pts_b), axis=1)
 41 | 
 42 |     return cam_pts, rgb_pts
 43 | 
 44 | 
 45 | def get_heightmap(color_img, depth_img, cam_intrinsics, cam_pose, workspace_limits, heightmap_resolution):
 46 | 
 47 |     # fig = plt.figure()
 48 |     # ax = plt.axes(projection="3d")
 49 | 
 50 |     # Compute heightmap size
 51 |     heightmap_size = np.round(((workspace_limits[1][1] - workspace_limits[1][0])/heightmap_resolution,
 52 |                                (workspace_limits[0][1] - workspace_limits[0][0])/heightmap_resolution)).astype(int)
 53 | 
 54 |     # Get 3D point cloud from RGB-D images
 55 |     surface_pts, color_pts = get_pointcloud(color_img, depth_img, cam_intrinsics)
 56 | 
 57 |     # ax.scatter3D(surface_pts[:, 0], surface_pts[:, 1], surface_pts[:, 2])
 58 |     # plt.show()
 59 | 
 60 |     # Transform 3D point cloud from camera coordinates to robot coordinates
 61 |     surface_pts = np.transpose(np.dot(cam_pose[0:3, 0:3],
 62 |                                       np.transpose(surface_pts)) + np.tile(cam_pose[0:3, 3:], (1, surface_pts.shape[0])))
 63 | 
 64 |     # ax.scatter3D(surface_pts[:, 0], surface_pts[:, 1], surface_pts[:, 2])
 65 |     # plt.show()
 66 | 
 67 |     # Sort surface points by z value
 68 |     sort_z_ind = np.argsort(surface_pts[:, 2])
 69 |     surface_pts = surface_pts[sort_z_ind]
 70 |     color_pts = color_pts[sort_z_ind]
 71 | 
 72 |     # Filter out surface points outside heightmap boundaries
 73 |     heightmap_valid_ind = np.logical_and(
 74 |         np.logical_and(
 75 |             np.logical_and(
 76 |                 np.logical_and(surface_pts[:, 0] >= workspace_limits[0][0], surface_pts[:, 0] < workspace_limits[0][1]),
 77 |                 surface_pts[:, 1] >= workspace_limits[1][0]), surface_pts[:, 1] < workspace_limits[1][1]),
 78 |         surface_pts[:, 2] < workspace_limits[2][1])
 79 | 
 80 |     surface_pts = surface_pts[heightmap_valid_ind]
 81 |     color_pts = color_pts[heightmap_valid_ind]
 82 | 
 83 |     # ax.scatter3D(surface_pts[:, 0], surface_pts[:, 1], surface_pts[:, 2], c=surface_pts[:, 2], cmap='hsv')
 84 |     # plt.show()
 85 | 
 86 |     # Create orthographic top-down-view RGB-D heightmaps
 87 |     color_heightmap_r = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
 88 |     color_heightmap_g = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
 89 |     color_heightmap_b = np.zeros((heightmap_size[0], heightmap_size[1], 1), dtype=np.uint8)
 90 |     depth_heightmap = np.zeros(heightmap_size)
 91 |     heightmap_pix_x = np.floor((surface_pts[:, 0] - workspace_limits[0][0])/heightmap_resolution).astype(int)
 92 |     heightmap_pix_y = np.floor((surface_pts[:, 1] - workspace_limits[1][0])/heightmap_resolution).astype(int)
 93 |     color_heightmap_r[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [0]]
 94 |     color_heightmap_g[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [1]]
 95 |     color_heightmap_b[heightmap_pix_y, heightmap_pix_x] = color_pts[:, [2]]
 96 |     color_heightmap = np.concatenate((color_heightmap_r, color_heightmap_g, color_heightmap_b), axis=2)
 97 |     depth_heightmap[heightmap_pix_y, heightmap_pix_x] = surface_pts[:, 2]
 98 | 
 99 | 
100 | 
101 |     z_bottom = workspace_limits[2][0]
102 |     depth_heightmap = depth_heightmap - z_bottom
103 |     depth_heightmap[depth_heightmap < 0] = 0
104 |     depth_heightmap[depth_heightmap == -z_bottom] = np.nan
105 |     # plt.imshow(depth_heightmap)
106 |     # asd=123
107 |     return color_heightmap, depth_heightmap
108 | 
109 | # Save a 3D point cloud to a binary .ply file
110 | def pcwrite(xyz_pts, filename, rgb_pts=None):
111 |     assert xyz_pts.shape[1] == 3, 'input XYZ points should be an Nx3 matrix'
112 |     if rgb_pts is None:
113 |         rgb_pts = np.ones(xyz_pts.shape).astype(np.uint8)*255
114 |     assert xyz_pts.shape == rgb_pts.shape, 'input RGB colors should be Nx3 matrix and same size as input XYZ points'
115 | 
116 |     # Write header for .ply file
117 |     pc_file = open(filename, 'wb')
118 |     pc_file.write(bytearray('ply\n', 'utf8'))
119 |     pc_file.write(bytearray('format binary_little_endian 1.0\n', 'utf8'))
120 |     pc_file.write(bytearray(('element vertex %d\n' % xyz_pts.shape[0]), 'utf8'))
121 |     pc_file.write(bytearray('property float x\n', 'utf8'))
122 |     pc_file.write(bytearray('property float y\n', 'utf8'))
123 |     pc_file.write(bytearray('property float z\n', 'utf8'))
124 |     pc_file.write(bytearray('property uchar red\n', 'utf8'))
125 |     pc_file.write(bytearray('property uchar green\n', 'utf8'))
126 |     pc_file.write(bytearray('property uchar blue\n', 'utf8'))
127 |     pc_file.write(bytearray('end_header\n', 'utf8'))
128 | 
129 |     # Write 3D points to .ply file
130 |     for i in range(xyz_pts.shape[0]):
131 |         pc_file.write(bytearray(struct.pack("fffccc",xyz_pts[i][0],xyz_pts[i][1],xyz_pts[i][2],rgb_pts[i][0].tostring(),rgb_pts[i][1].tostring(),rgb_pts[i][2].tostring())))
132 |     pc_file.close()
133 | 
134 | 
135 | def get_affordance_vis(grasp_affordances, input_images, num_rotations, best_pix_ind):
136 |     vis = None
137 |     for vis_row in range(num_rotations/4):
138 |         tmp_row_vis = None
139 |         for vis_col in range(4):
140 |             rotate_idx = vis_row*4+vis_col
141 |             affordance_vis = grasp_affordances[rotate_idx,:,:]
142 |             affordance_vis[affordance_vis < 0] = 0 # assume probability
143 |             # affordance_vis = np.divide(affordance_vis, np.max(affordance_vis))
144 |             affordance_vis[affordance_vis > 1] = 1 # assume probability
145 |             affordance_vis.shape = (grasp_affordances.shape[1], grasp_affordances.shape[2])
146 |             affordance_vis = cv2.applyColorMap((affordance_vis*255).astype(np.uint8), cv2.COLORMAP_JET)
147 |             input_image_vis = (input_images[rotate_idx,:,:,:]*255).astype(np.uint8)
148 |             input_image_vis = cv2.resize(input_image_vis, (0,0), fx=0.5, fy=0.5, interpolation=cv2.INTER_NEAREST)
149 |             affordance_vis = (0.5*cv2.cvtColor(input_image_vis, cv2.COLOR_RGB2BGR) + 0.5*affordance_vis).astype(np.uint8)
150 |             if rotate_idx == best_pix_ind[0]:
151 |                 affordance_vis = cv2.circle(affordance_vis, (int(best_pix_ind[2]), int(best_pix_ind[1])), 7, (0,0,255), 2)
152 |             if tmp_row_vis is None:
153 |                 tmp_row_vis = affordance_vis
154 |             else:
155 |                 tmp_row_vis = np.concatenate((tmp_row_vis,affordance_vis), axis=1)
156 |         if vis is None:
157 |             vis = tmp_row_vis
158 |         else:
159 |             vis = np.concatenate((vis,tmp_row_vis), axis=0)
160 | 
161 |     return vis
162 | 
163 | 
164 | def get_difference(color_heightmap, color_space, bg_color_heightmap):
165 | 
166 |     color_space = np.concatenate((color_space, np.asarray([[0.0, 0.0, 0.0]])), axis=0)
167 |     color_space.shape = (color_space.shape[0], 1, 1, color_space.shape[1])
168 |     color_space = np.tile(color_space, (1, color_heightmap.shape[0], color_heightmap.shape[1], 1))
169 | 
170 |     # Normalize color heightmaps
171 |     color_heightmap = color_heightmap.astype(float)/255.0
172 |     color_heightmap.shape = (1, color_heightmap.shape[0], color_heightmap.shape[1], color_heightmap.shape[2])
173 |     color_heightmap = np.tile(color_heightmap, (color_space.shape[0], 1, 1, 1))
174 | 
175 |     bg_color_heightmap = bg_color_heightmap.astype(float)/255.0
176 |     bg_color_heightmap.shape = (1, bg_color_heightmap.shape[0], bg_color_heightmap.shape[1], bg_color_heightmap.shape[2])
177 |     bg_color_heightmap = np.tile(bg_color_heightmap, (color_space.shape[0], 1, 1, 1))
178 | 
179 |     # Compute nearest neighbor distances to key colors
180 |     key_color_dist = np.sqrt(np.sum(np.power(color_heightmap - color_space,2), axis=3))
181 |     # key_color_dist_prob = F.softmax(Variable(torch.from_numpy(key_color_dist), volatile=True), dim=0).data.numpy()
182 | 
183 |     bg_key_color_dist = np.sqrt(np.sum(np.power(bg_color_heightmap - color_space,2), axis=3))
184 |     # bg_key_color_dist_prob = F.softmax(Variable(torch.from_numpy(bg_key_color_dist), volatile=True), dim=0).data.numpy()
185 | 
186 |     key_color_match = np.argmin(key_color_dist, axis=0)
187 |     bg_key_color_match = np.argmin(bg_key_color_dist, axis=0)
188 |     key_color_match[key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 1
189 |     bg_key_color_match[bg_key_color_match == color_space.shape[0] - 1] = color_space.shape[0] + 2
190 | 
191 |     return np.sum(key_color_match == bg_key_color_match).astype(float)/np.sum(bg_key_color_match < color_space.shape[0]).astype(float)
192 | 
193 | 
194 | # Get rotation matrix from euler angles
195 | def euler2rotm(theta):
196 |     R_x = np.array([[1,         0,                  0                   ],
197 |                     [0,         math.cos(theta[0]), -math.sin(theta[0]) ],
198 |                     [0,         math.sin(theta[0]), math.cos(theta[0])  ]
199 |                     ])
200 |     R_y = np.array([[math.cos(theta[1]),    0,      math.sin(theta[1])  ],
201 |                     [0,                     1,      0                   ],
202 |                     [-math.sin(theta[1]),   0,      math.cos(theta[1])  ]
203 |                     ])         
204 |     R_z = np.array([[math.cos(theta[2]),    -math.sin(theta[2]),    0],
205 |                     [math.sin(theta[2]),    math.cos(theta[2]),     0],
206 |                     [0,                     0,                      1]
207 |                     ])            
208 |     R = np.dot(R_z, np.dot( R_y, R_x ))
209 |     return R
210 | 
211 | 
212 | # Checks if a matrix is a valid rotation matrix.
213 | def isRotm(R) :
214 |     Rt = np.transpose(R)
215 |     shouldBeIdentity = np.dot(Rt, R)
216 |     I = np.identity(3, dtype = R.dtype)
217 |     n = np.linalg.norm(I - shouldBeIdentity)
218 |     return n < 1e-6
219 |  
220 |  
221 | # Calculates rotation matrix to euler angles
222 | def rotm2euler(R) :
223 |  
224 |     assert(isRotm(R))
225 | 
226 |     sy = math.sqrt(R[0,0] * R[0,0] +  R[1,0] * R[1,0])
227 |     singular = sy < 1e-6
228 | 
229 |     if  not singular :
230 |         x = math.atan2(R[2,1] , R[2,2])
231 |         y = math.atan2(-R[2,0], sy)
232 |         z = math.atan2(R[1,0], R[0,0])
233 |     else :
234 |         x = math.atan2(-R[1,2], R[1,1])
235 |         y = math.atan2(-R[2,0], sy)
236 |         z = 0
237 |  
238 |     return np.array([x, y, z])
239 | 
240 | 
241 | def angle2rotm(angle, axis, point=None):
242 |     # Copyright (c) 2006-2018, Christoph Gohlke
243 | 
244 |     sina = math.sin(angle)
245 |     cosa = math.cos(angle)
246 |     axis = axis/np.linalg.norm(axis)
247 | 
248 |     # Rotation matrix around unit vector
249 |     R = np.diag([cosa, cosa, cosa])
250 |     R += np.outer(axis, axis) * (1.0 - cosa)
251 |     axis *= sina
252 |     R += np.array([[ 0.0,     -axis[2],  axis[1]],
253 |                       [ axis[2], 0.0,      -axis[0]],
254 |                       [-axis[1], axis[0],  0.0]])
255 |     M = np.identity(4)
256 |     M[:3, :3] = R
257 |     if point is not None:
258 | 
259 |         # Rotation not around origin
260 |         point = np.array(point[:3], dtype=np.float64, copy=False)
261 |         M[:3, 3] = point - np.dot(R, point)
262 |     return M
263 | 
264 | 
265 | def rotm2angle(R):
266 |     # From: euclideanspace.com
267 | 
268 |     epsilon = 0.01 # Margin to allow for rounding errors
269 |     epsilon2 = 0.1 # Margin to distinguish between 0 and 180 degrees
270 | 
271 |     assert(isRotm(R))
272 | 
273 |     if ((abs(R[0][1]-R[1][0])< epsilon) and (abs(R[0][2]-R[2][0])< epsilon) and (abs(R[1][2]-R[2][1])< epsilon)):
274 |         # Singularity found
275 |         # First check for identity matrix which must have +1 for all terms in leading diagonaland zero in other terms
276 |         if ((abs(R[0][1]+R[1][0]) < epsilon2) and (abs(R[0][2]+R[2][0]) < epsilon2) and (abs(R[1][2]+R[2][1]) < epsilon2) and (abs(R[0][0]+R[1][1]+R[2][2]-3) < epsilon2)):
277 |             # this singularity is identity matrix so angle = 0
278 |             return [0,1,0,0] # zero angle, arbitrary axis
279 | 
280 |         # Otherwise this singularity is angle = 180
281 |         angle = np.pi
282 |         xx = (R[0][0]+1)/2
283 |         yy = (R[1][1]+1)/2
284 |         zz = (R[2][2]+1)/2
285 |         xy = (R[0][1]+R[1][0])/4
286 |         xz = (R[0][2]+R[2][0])/4
287 |         yz = (R[1][2]+R[2][1])/4
288 |         if ((xx > yy) and (xx > zz)): # R[0][0] is the largest diagonal term
289 |             if (xx< epsilon):
290 |                 x = 0
291 |                 y = 0.7071
292 |                 z = 0.7071
293 |             else:
294 |                 x = np.sqrt(xx)
295 |                 y = xy/x
296 |                 z = xz/x
297 |         elif (yy > zz): # R[1][1] is the largest diagonal term
298 |             if (yy< epsilon):
299 |                 x = 0.7071
300 |                 y = 0
301 |                 z = 0.7071
302 |             else:
303 |                 y = np.sqrt(yy)
304 |                 x = xy/y
305 |                 z = yz/y
306 |         else: # R[2][2] is the largest diagonal term so base result on this
307 |             if (zz< epsilon):
308 |                 x = 0.7071
309 |                 y = 0.7071
310 |                 z = 0
311 |             else:
312 |                 z = np.sqrt(zz)
313 |                 x = xz/z
314 |                 y = yz/z
315 |         return [angle,x,y,z] # Return 180 deg rotation
316 | 
317 |     # As we have reached here there are no singularities so we can handle normally
318 |     s = np.sqrt((R[2][1] - R[1][2])*(R[2][1] - R[1][2]) + (R[0][2] - R[2][0])*(R[0][2] - R[2][0]) + (R[1][0] - R[0][1])*(R[1][0] - R[0][1])) # used to normalise
319 |     if (abs(s) < 0.001):
320 |         s = 1 
321 | 
322 |     # Prevent divide by zero, should not happen if matrix is orthogonal and should be
323 |     # Caught by singularity test above, but I've left it in just in case
324 |     angle = np.arccos(( R[0][0] + R[1][1] + R[2][2] - 1)/2)
325 |     x = (R[2][1] - R[1][2])/s
326 |     y = (R[0][2] - R[2][0])/s
327 |     z = (R[1][0] - R[0][1])/s
328 |     return [angle,x,y,z]
329 | 
330 | 
331 | # Cross entropy loss for 2D outputs
332 | class CrossEntropyLoss2d(nn.Module):
333 | 
334 |     def __init__(self, weight=None, size_average=True):
335 |         super(CrossEntropyLoss2d, self).__init__()
336 |         self.nll_loss = nn.NLLLoss2d(weight, size_average)
337 | 
338 |     def forward(self, inputs, targets):
339 |         return self.nll_loss(F.log_softmax(inputs, dim=1), targets)
340 | 
341 | 
342 | 
343 | 
344 | 
345 | 
346 | 
347 | 
348 | 
349 | 
350 | 
351 | 
352 | 
353 | 
354 | 
355 | 


--------------------------------------------------------------------------------