├── .gitignore ├── .idea ├── MaskRCNN_body.iml ├── inspectionProfiles │ └── Project_Default.xml ├── misc.xml ├── modules.xml ├── vcs.xml └── workspace.xml ├── Documentation.odt ├── README.md ├── convert_data ├── ChalearnLAPEvaluation.py ├── ChalearnLAPSample.py ├── __init__.py ├── __init__.pyc ├── convert_ADE20k_human_body_parts.py ├── convert_CHALEARN_human_body_parts.py ├── convert_VOC_human_body_parts.py ├── convert_jhmdb.py ├── download_and_convert_data.sh ├── human_body_parts.m ├── read_my_data_keypoints.py └── visualize_records_human_body_parts.py ├── crontab.sh ├── data └── README.md ├── document.pdf ├── draw ├── __init__.py ├── draw.py ├── metric.py ├── utils.py └── utils.pyc ├── libs ├── Makefile ├── __init__.py ├── __init__.pyc ├── boxes │ ├── .gitignore │ ├── __init__.py │ ├── __init__.pyc │ ├── anchor.py │ ├── anchor.pyc │ ├── bbox.pyx │ ├── bbox_transform.py │ ├── bbox_transform.pyc │ ├── blob.py │ ├── cython_anchor.py │ ├── cython_anchor.pyx │ ├── cython_bbox.py │ ├── cython_bbox_transform.py │ ├── cython_bbox_transform.pyx │ ├── cython_nms.py │ ├── cython_nms.pyc │ ├── gprof2dot.py │ ├── nms.py │ ├── nms.pyc │ ├── nms.pyx │ ├── nms_wrapper.py │ ├── nms_wrapper.pyc │ ├── profile │ ├── profile.png │ ├── roi.py │ ├── roi.pyc │ ├── timer.py │ └── timer.pyc ├── configs │ ├── __init__.py │ ├── __init__.pyc │ ├── config_v1.py │ └── config_v1.pyc ├── datasets │ ├── __init__.py │ ├── __init__.pyc │ ├── coco.py │ ├── coco.pyc │ ├── dataset_factory.py │ ├── dataset_factory.pyc │ ├── download_and_convert_coco.py │ ├── download_and_convert_coco.pyc │ └── pycocotools │ │ ├── Makefile │ │ ├── __init__.py │ │ ├── __init__.pyc │ │ ├── _mask.c │ │ ├── _mask.pyx │ │ ├── _mask.so │ │ ├── coco.py │ │ ├── coco.pyc │ │ ├── cocoeval.py │ │ ├── common │ │ ├── gason.cpp │ │ ├── gason.h │ │ ├── maskApi.c │ │ └── maskApi.h │ │ ├── mask.py │ │ ├── mask.pyc │ │ └── setup.py ├── layers │ ├── __init__.py │ ├── __init__.pyc │ ├── anchor.py │ ├── anchor.pyc │ ├── assign.py │ ├── assign.pyc │ ├── crop.py │ ├── crop.pyc │ ├── mask.py │ ├── mask.pyc │ ├── roi.py │ ├── roi.pyc │ ├── sample.py │ ├── sample.pyc │ ├── wrapper.py │ └── wrapper.pyc ├── logs │ ├── __init__.py │ ├── __init__.pyc │ ├── log.py │ └── log.pyc ├── make.sh ├── memory_util.py ├── nets │ ├── __init__.py │ ├── __init__.pyc │ ├── nets_factory.py │ ├── nets_factory.pyc │ ├── pyramid_network.py │ ├── pyramid_network.pyc │ ├── resnet_utils.py │ ├── resnet_utils.pyc │ ├── resnet_v1.py │ ├── resnet_v1.pyc │ └── train_utils.py ├── nms │ ├── .gitignore │ ├── __init__.py │ ├── __init__.pyc │ ├── cpu_nms.pyx │ ├── gpu_nms.hpp │ ├── gpu_nms.pyx │ ├── nms_kernel.cu │ └── py_cpu_nms.py ├── preprocessings │ ├── __init__.py │ ├── __init__.pyc │ ├── coco_v1.py │ ├── coco_v1.pyc │ ├── utils.py │ └── utils.pyc ├── setup.py └── visualization │ ├── __init__.py │ ├── __init__.pyc │ ├── pil_utils.py │ ├── pil_utils.pyc │ ├── summary_utils.py │ └── summary_utils.pyc ├── mask_rcnn_final.xml ├── media ├── file.txt ├── testseg122_1.jpg ├── testseg226_1.jpg ├── testseg255_1.jpg ├── testseg293_1.jpg ├── testseg296_1.jpg ├── testseg305_1.jpg ├── testseg35_1.jpg ├── testseg57_1.jpg └── testseg70_1.jpg ├── train ├── __init__.py ├── __init__.pyc ├── train.py ├── train_utils.py └── train_utils.pyc └── unit_test ├── __init__.py ├── data_test.py ├── layer_test.py ├── preprocessing_test.py └── resnet50_test.py /.gitignore: -------------------------------------------------------------------------------- 1 | data/pretrained_models/ 2 | data/coco/ 3 | output/mask_rcnn/ 4 | convert_data/data/ 5 | draw/data/ 6 | draw/output_seg/ 7 | 8 | 9 | -------------------------------------------------------------------------------- /.idea/MaskRCNN_body.iml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 11 | -------------------------------------------------------------------------------- /.idea/inspectionProfiles/Project_Default.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 61 | -------------------------------------------------------------------------------- /.idea/misc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | -------------------------------------------------------------------------------- /.idea/modules.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /.idea/vcs.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /Documentation.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/Documentation.odt -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Detecting human body parts and Building Skeleton Models using Deep Convolutional Neural Networks 2 | 3 | This repository contains an extension to the amazing work done by [CharlesShang](https://github.com/CharlesShang/FastMaskRCNN) 4 | This is a model of a neural network that is able to do object detection, classification and segmentation. 5 | The ideea is to detect all persons in an image and segment their body parts. The next step is to add keypoint regression. This is a real-time model running at 200ms/frame on a Titan X GPU. 6 | A practical usage for this kind of model would be to a fashion application that looks at the information of your body and proposes the look of different clothes in order for the user to visualize their appearance. 7 | Here are some results from the training set. 8 | 9 | ![demo](media/testseg57_1.jpg) 10 | ![demo](media/testseg122_1.jpg) 11 | ![demo](media/testseg226_1.jpg) 12 | ![demo](media/testseg255_1.jpg) 13 | ![demo](media/testseg293_1.jpg) 14 | ![demo](media/testseg296_1.jpg) 15 | ![demo](media/testseg305_1.jpg) 16 | 17 | The following videos contain the model that does body segmentation (the rest of the parts are not drawn). 18 | The model output has been modified to have 2 classes for detection (human, non-human) and 7 segmentation classes (full body, head, torso, right hand, left hand, right leg, left leg). 19 | 20 | IMAGE ALT TEXT HERE 21 | IMAGE ALT TEXT HERE 22 | 23 | The following videos shows the results of the same model, but this time are drawn the body parts. 24 | 25 | IMAGE ALT TEXT HERE 26 | 27 | # Installation 28 | ``` 29 | git clone https://github.com/Iftimie/MaskRCNN_body.git 30 | cd MaskRCNN_body 31 | mkdir data/coco 32 | mkdir data/coco/records 33 | cd data/coco/records/ 34 | 35 | wget https://www.dropbox.com/s/43ihvomchvwtpns/checkpoint 36 | wget https://www.dropbox.com/s/v6084wee6pjlfk4/coco_resnet50_model.ckpt-248000.data-00000-of-00001 37 | wget https://www.dropbox.com/s/0gqxnbsjzpuz0tz/coco_resnet50_model.ckpt-248000.index 38 | wget https://www.dropbox.com/s/3uildv0wlh79oad/coco_resnet50_model.ckpt-248000.meta 39 | #modify the checkpoint file with your path 40 | cd ../../.. 41 | git checkout test 42 | #modify line 180 in train/test.py with your ip address 43 | #modify line 36 in train/client.py with the respective ip address 44 | 45 | #in one terminal 46 | CUDA_VISIBLE_DEVICES=0 python train/test.py 47 | 48 | #in another terminal. Make sure to have a webcam connected 49 | python train/client.py 50 | ``` 51 | -------------------------------------------------------------------------------- /convert_data/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/convert_data/__init__.py -------------------------------------------------------------------------------- /convert_data/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/convert_data/__init__.pyc -------------------------------------------------------------------------------- /convert_data/convert_CHALEARN_human_body_parts.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import ChalearnLAPSample 3 | import numpy as np 4 | import tensorflow as tf 5 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType 6 | 7 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg) 8 | body_parts_dict={ 9 | 1:1,#head 10 | 2:2,#torso 11 | 3:3,#left hand 12 | 5:3,#left forearm (lower) 13 | 7:3,#left upper arm 14 | 4:4,#right hand 15 | 6:4,#right forearm 16 | 8:4,#right upperarm 17 | 9:5,#left foot 18 | 11:5,#left lower leg 19 | 13:5,#left upper leg 20 | 10:6,#right foot 21 | 12:6,#right lower leg 22 | 14:6,#rihgt upper leg 23 | } 24 | 25 | # poseSample = ChalearnLAPSample.PoseSample("Seq01.zip") 26 | # actorid=1 27 | # limbid=2 28 | # cv2.namedWindow("Seqxx",cv2.WINDOW_NORMAL) 29 | # cv2.namedWindow("Torso",cv2.WINDOW_NORMAL) 30 | # for x in range(1, poseSample.getNumFrames()): 31 | # img=poseSample.getRGB(x) 32 | # torso=poseSample.getLimb(x,actorid,6) 33 | # cv2.imshow("Seqxx",img) 34 | # cv2.imshow("Torso",torso) 35 | # cv2.waitKey(1000) 36 | # cv2.destroyAllWindows() 37 | 38 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords 39 | def loadData(frame_id,img,poseSample): 40 | H,W = img.shape[0],img.shape[1] 41 | gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls] 42 | masks_instances = [] #shape: [N,H,W,7] 43 | for actorid in range(1,3): # there are at maximum 2 persons in one image 44 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts 45 | one_mask_person = np.zeros((H,W),dtype=np.uint8) # whole body 46 | for limbid in range(1,15): 47 | part = poseSample.getLimb(frame_id,actorid,limbid) #get part mask 48 | part = cv2.resize(part[...,0]/255,(W,H)) 49 | masks_for_person[...,body_parts_dict[limbid]] = np.logical_or(masks_for_person[...,body_parts_dict[limbid]],part) #this is where I combine for example right upper leg and right lower leg 50 | one_mask_person=np.logical_or(one_mask_person,part) # this is where I combine the part mask into the whole body 51 | 52 | masks_for_person[...,0] = one_mask_person 53 | _,contours,hierarchy = cv2.findContours(one_mask_person.astype(np.uint8).copy(), 1, 2) #### from here 54 | if len(contours)==0: 55 | continue 56 | x1=100000 57 | y1=100000 58 | x2=-10000 59 | y2=-10000 60 | for contour in contours: 61 | x,y,w,h = cv2.boundingRect(contour) 62 | xw,yh = x+w,y+h 63 | if x x2: 68 | x2=xw 69 | if yh >y2: 70 | y2=yh 71 | gt_boxes.append([x1,y1,x2,y2,1]) #####to here I select the bounding box of the person instance. the mask might be splitted into multiple blobs 72 | masks_instances.append(masks_for_person) 73 | 74 | if len(gt_boxes) ==0: 75 | return False,None,None,None,H,W 76 | masks_instances = np.array(masks_instances,dtype=np.uint8) 77 | gt_boxes = np.array(gt_boxes,dtype=np.float32) 78 | # for h_box in gt_boxes: 79 | # image = cv2.rectangle(img,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2) 80 | # cv2.imshow("img",image) 81 | # cv2.waitKey(100) 82 | mask = masks_instances[0,:,:,1] # this mask is used for visualization in tensorboard 83 | return True,gt_boxes,masks_instances,mask,H,W 84 | 85 | def _int64_feature(values): 86 | if not isinstance(values, (tuple, list)): 87 | values = [values] 88 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) 89 | 90 | def _bytes_feature(values): 91 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) 92 | 93 | def _to_tfexample_coco_raw(image_id, image_data, label_data, 94 | height, width, 95 | num_instances, gt_boxes, masks): 96 | """ just write a raw input""" 97 | return tf.train.Example(features=tf.train.Features(feature={ 98 | 'image/img_id': _int64_feature(image_id), 99 | 'image/encoded': _bytes_feature(image_data), 100 | 'image/height': _int64_feature(height), 101 | 'image/width': _int64_feature(width), 102 | 'label/num_instances': _int64_feature(num_instances), # N 103 | 'label/gt_boxes': _bytes_feature(gt_boxes), # of shape (N, 5), (x1, y1, x2, y2, classid) 104 | 'label/gt_masks': _bytes_feature(masks), # of shape (N, height, width) 105 | 'label/encoded': _bytes_feature(label_data), # deprecated, this is used for pixel-level segmentation 106 | })) 107 | 108 | 109 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) 110 | record_filename = "out_human_and_body_parts_chalearn.tfrecord" 111 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer: 112 | for seq,seq_id in zip(["Seq01.zip","Seq02.zip","Seq03.zip","Seq04.zip","Seq06.zip"],range(5)): # 5 movies 113 | #for seq,seq_id in zip(["Seq03.zip"],range(5)): 114 | poseSample = ChalearnLAPSample.PoseSample(seq) #Chalearn API 115 | for x in range(1, poseSample.getNumFrames(),6): # i skip every 6 images because it is a video because I think many images are redundant 116 | img=poseSample.getRGB(x) 117 | img_id = seq_id*2000+x 118 | persons_exist,gt_boxes,masks_instances,mask,H,W = loadData(x,img,poseSample) 119 | if not persons_exist: 120 | continue 121 | mask_raw = mask.tostring() 122 | # img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR) 123 | # cv2.imshow("image",img) 124 | # cv2.waitKey(1000) 125 | img_raw = img.tostring() 126 | example = _to_tfexample_coco_raw( 127 | img_id, 128 | img_raw, 129 | mask_raw, 130 | H, W, gt_boxes.shape[0], 131 | gt_boxes.tostring(), masks_instances.tostring()) 132 | print x 133 | 134 | tfrecord_writer.write(example.SerializeToString()) 135 | tfrecord_writer.close() 136 | 137 | 138 | 139 | -------------------------------------------------------------------------------- /convert_data/convert_VOC_human_body_parts.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType 3 | import numpy as np 4 | from PIL import Image 5 | import scipy.io as sio 6 | import cv2 7 | import traceback 8 | import logging 9 | 10 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg) 11 | body_parts_dict = { 12 | 'head':1, 13 | 'lear':1, 14 | 'rear':1, 15 | 'mouth':1, 16 | 'hair':1, 17 | 'nose':1, 18 | 'leye':1, 19 | 'reye':1, 20 | 'lebrow':1, 21 | 'rebrow':1, 22 | 'torso':2, 23 | 'neck':2, 24 | 'luarm':3, 25 | 'llarm':3, 26 | 'lhand':3, 27 | 'rlarm':4, 28 | 'ruarm':4, 29 | 'rhand':4, 30 | 'llleg':5, 31 | 'luleg':5, 32 | 'lfoot':5, 33 | 'rlleg':6, 34 | 'ruleg':6, 35 | 'rfoot':6 36 | } 37 | 38 | body_parts_dict = { 39 | 'head':1, 40 | 'lear':1, 41 | 'rear':1, 42 | 'mouth':1, 43 | 'hair':1, 44 | 'nose':1, 45 | 'leye':1, 46 | 'reye':1, 47 | 'lebrow':1, 48 | 'rebrow':1, 49 | 'torso':2, 50 | 'neck':2, 51 | 'luarm':3, 52 | 'llarm':3, 53 | 'lhand':3, 54 | 'rlarm':3, 55 | 'ruarm':3, 56 | 'rhand':3, 57 | 'llleg':5, 58 | 'luleg':5, 59 | 'lfoot':5, 60 | 'rlleg':5, 61 | 'ruleg':5, 62 | 'rfoot':5 63 | } 64 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords 65 | def loadData3(H,W): #### !!!!! i know i do not send the image and the annotations into the function but python is smart enough to look outside the scope of the function. look below 66 | 67 | masks_instances = []#shape: [N,H,W,7] 68 | 69 | persons = [o for o in annotation['anno'][0]['objects'][0][0] if o['class']=='person'] # select all persons from the image 70 | gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls] 71 | for i in range(len(persons)): 72 | p = persons[i] 73 | pa = p['parts'] 74 | parts = pa[0] 75 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts 76 | one_mask_person = np.zeros((H,W),dtype=np.uint8) # whole body 77 | 78 | for part in parts: 79 | part_name = part['part_name'].astype(str)[0] 80 | index = body_parts_dict[part_name] 81 | masks_for_person[...,index] = np.logical_or(masks_for_person[...,index], part['mask'])#this is where I combine for example right upper leg and right lower leg 82 | one_mask_person=np.logical_or(one_mask_person,part['mask']) # this is where I combine the part mask into the whole body 83 | 84 | masks_for_person[...,0]=one_mask_person 85 | kernel = np.ones((5,5),np.uint8) 86 | one_mask_person = np.array(one_mask_person,dtype=np.uint8)#if this line is missing=> error in cv2.dilate 87 | one_mask_person = cv2.dilate(one_mask_person,kernel,iterations = 1) 88 | _,contours,hierarchy = cv2.findContours(one_mask_person, 1, 2) #### from here 89 | if len(contours) ==0: 90 | continue 91 | x1=100000 92 | y1=100000 93 | x2=-10000 94 | y2=-10000 95 | for contour in contours: 96 | x,y,w,h = cv2.boundingRect(contour) 97 | xw,yh = x+w,y+h 98 | if x x2: 103 | x2=xw 104 | if yh >y2: 105 | y2=yh 106 | gt_boxes.append([x1,y1,x2,y2,1]) 107 | 108 | if True:#########################Body 109 | print ("BODYYYYYYYYYYYYYYYY") 110 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) 111 | masks_for_person[...,0]=one_mask_person 112 | 113 | masks_instances.append(masks_for_person.copy()) #####to here I select the bounding box of the person instance. the mask might be splitted into multiple blobs 114 | if len(gt_boxes) ==0: 115 | return False,None,None,None 116 | 117 | masks_instances = np.array(masks_instances,dtype=np.uint8) 118 | gt_boxes = np.array(gt_boxes,dtype=np.float32) 119 | mask = masks_instances[0,:,:,1]# this is for drawing the ground truth in the network in tensorboard 120 | return True,gt_boxes,masks_instances,mask 121 | 122 | def _int64_feature(values): 123 | if not isinstance(values, (tuple, list)): 124 | values = [values] 125 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) 126 | 127 | def _bytes_feature(values): 128 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) 129 | 130 | def _to_tfexample_coco_raw(image_id, image_data, label_data, 131 | height, width, 132 | num_instances, gt_boxes, masks): 133 | """ just write a raw input""" 134 | return tf.train.Example(features=tf.train.Features(feature={ 135 | 'image/img_id': _int64_feature(image_id), 136 | 'image/encoded': _bytes_feature(image_data), 137 | 'image/height': _int64_feature(height), 138 | 'image/width': _int64_feature(width), 139 | 'label/num_instances': _int64_feature(num_instances), # N 140 | 'label/gt_boxes': _bytes_feature(gt_boxes), # of shape (N, 5), (x1, y1, x2, y2, classid) 141 | 'label/gt_masks': _bytes_feature(masks), # of shape (N, height, width) 142 | 'label/encoded': _bytes_feature(label_data), # deprecated, this is used for pixel-level segmentation 143 | })) 144 | 145 | 146 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) 147 | record_filename = "data/out_human_and_body_parts.tfrecord" 148 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer: 149 | for x in range (0,12000): 150 | try: 151 | img_id = x 152 | img_name = '2008_%06d' % (x,) 153 | img = np.array(Image.open('data/JPEGImages/'+img_name+'.jpg')) 154 | annotation = sio.loadmat('data/Annotations_Part/'+img_name+'.mat') 155 | image = cv2.imread('data/JPEGImages/'+img_name+'.jpg') 156 | height, width = img.shape[0],img.shape[1] 157 | img = img.astype(np.uint8) 158 | img_raw = img.tostring() 159 | persons_exist, gt_boxes, masks,mask = loadData3(height, width) 160 | if not persons_exist: 161 | continue 162 | mask_raw = mask.tostring() 163 | 164 | example = _to_tfexample_coco_raw( 165 | img_id, 166 | img_raw, 167 | mask_raw, 168 | height, width, gt_boxes.shape[0], 169 | gt_boxes.tostring(), masks.tostring()) 170 | tfrecord_writer.write(example.SerializeToString()) 171 | print (x) 172 | except BaseException as error: 173 | print error 174 | 175 | tfrecord_writer.close() 176 | -------------------------------------------------------------------------------- /convert_data/convert_jhmdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import scipy.io as sio 3 | import cv2 4 | import numpy as np 5 | import tensorflow as tf 6 | from PIL import Image 7 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType 8 | 9 | 10 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg) 11 | body_parts_dict = { 12 | 2:1,#head 13 | 1:2,#torso 14 | 4:3,#left upper arm 15 | 8:3,#left lower arm 16 | 3:4,#right upper arm 17 | 7:4,#right lower arm 18 | 6:5,#left upper leg 19 | 5:6,#right upper leg 20 | 9:6,#right lower leg 21 | 10:5,#left lower leg 22 | 23 | } 24 | 25 | body_parts_dict = { 26 | 2:1,#head 27 | 1:2,#torso 28 | 4:3,#left upper arm 29 | 8:3,#left lower arm 30 | 3:3,#right upper arm 31 | 7:3,#right lower arm 32 | 6:5,#left upper leg 33 | 5:5,#right upper leg 34 | 9:5,#right lower leg 35 | 10:5,#left lower leg 36 | } 37 | 38 | # this is used to normalize the x,y of keypoint to -1 and 1 39 | def map_value(x,A,B,a,b): 40 | return (x-A)*(b-a)/(B-A)+a 41 | 42 | 43 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords 44 | def loadData(image,instance_mask,parts_mask,keypoints): 45 | gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls] 46 | masks_instances = [] #shape: [N,H,W,7] 47 | _,contours,hierarchy = cv2.findContours(instance_mask.copy(), 1, 2) ######### from here 48 | x1=100000 49 | y1=100000 50 | x2=-10000 51 | y2=-10000 52 | for contour in contours: 53 | x,y,w,h = cv2.boundingRect(contour) 54 | xw,yh = x+w,y+h 55 | if x x2: 60 | x2=xw 61 | if yh >y2: 62 | y2=yh 63 | gt_boxes.append([x1,y1,x2,y2,1]) ######### to here i find the bbox of the person as the mask for the person might contain multiple blobs 64 | H = image.shape[0] 65 | W = image.shape[1] 66 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts 67 | masks_for_person[...,0] = instance_mask.copy() 68 | for x in range(1,11): 69 | part = (parts_mask == x).astype(np.uint8) 70 | masks_for_person[...,body_parts_dict[x]] = np.logical_or(masks_for_person[...,body_parts_dict[x]],part) #this is where I combine for example right upper leg and right lower leg 71 | 72 | for x in range(15): #there are 15 keypoints 73 | # keypoints[0,x] = keypoints[0,x]-x1 74 | # keypoints[1,x] = keypoints[1,x]-y1 75 | keypoints[0,x] = map_value(keypoints[0,x],x1,x2,0.0,112.0) #I first normalize to the keypoint to the size of the output mask (112x112) because the keypoint regression branch comes from the mask branch (this is how i decided to attach it) 76 | keypoints[1,x] = map_value(keypoints[1,x],y1,y2,0.0,112.0) 77 | keypoints[0,x] = map_value(keypoints[0,x],0.0,112.0,-1,1) #then I normalize it to -1 1 #the above operations are redundant but i left them there for visualization/debugging 78 | keypoints[1,x] = map_value(keypoints[1,x],0.0,112.0,-1,1) 79 | 80 | if True:####################BODYYY 81 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts 82 | masks_for_person[...,0] = instance_mask.copy() 83 | 84 | masks_instances.append(masks_for_person) 85 | masks_instances = np.array(masks_instances,dtype=np.uint8) 86 | gt_boxes = np.array(gt_boxes,dtype=np.float32) 87 | mask = masks_instances[0,:,:,1] # this mask is used for visualization in tensorboard 88 | keypoints = keypoints.astype(np.float32) 89 | return gt_boxes,masks_instances,mask,H,W,keypoints 90 | 91 | 92 | def _int64_feature(values): 93 | if not isinstance(values, (tuple, list)): 94 | values = [values] 95 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) 96 | 97 | def _bytes_feature(values): 98 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) 99 | 100 | def _to_tfexample_coco_raw(image_id, image_data, label_data, 101 | height, width, 102 | num_instances, gt_boxes, masks,keypoints): 103 | """ just write a raw input""" 104 | return tf.train.Example(features=tf.train.Features(feature={ 105 | 'image/img_id': _int64_feature(image_id), 106 | 'image/encoded': _bytes_feature(image_data), 107 | 'image/height': _int64_feature(height), 108 | 'image/width': _int64_feature(width), 109 | 'label/num_instances': _int64_feature(num_instances), # N 110 | 'label/gt_boxes': _bytes_feature(gt_boxes), # of shape (N, 5), (x1, y1, x2, y2, classid) 111 | 'label/gt_masks': _bytes_feature(masks), # of shape (N, height, width) 112 | 'label/encoded': _bytes_feature(label_data), # deprecated, this is used for pixel-level segmentation 113 | 'label/keypoints': _bytes_feature(keypoints) 114 | })) 115 | 116 | img_id = 0 117 | scenes = os.listdir('JHMDB_video/ReCompress_Videos') 118 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) 119 | record_filename = "out_human_and_body_parts_keypoints_JHMDB.tfrecord" 120 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer: 121 | for s in scenes: 122 | if s=='.DS_Store': 123 | continue 124 | mask_dir = os.listdir('puppet_mask/'+s) 125 | for mask in mask_dir: 126 | mat_file_instance = sio.loadmat('puppet_mask/'+s+'/'+mask+'/puppet_mask.mat') 127 | video_file = cv2.VideoCapture('JHMDB_video/ReCompress_Videos/'+s+'/'+mask+".avi") 128 | mat_file_parts = sio.loadmat('puppet_flow_com/'+s+'/'+mask+'/puppet_flow.mat') 129 | mat_file_keypoints = sio.loadmat('joint_positions/'+s+'/'+mask+'/joint_positions.mat') 130 | 131 | #ret, image = video_file.read() 132 | for x in range(0,mat_file_parts['part_mask'].shape[2]): 133 | ret, image = video_file.read() 134 | parts = mat_file_parts['part_mask'][...,x] 135 | instance = mat_file_instance['part_mask'][...,x] 136 | keypoints = mat_file_keypoints['pos_img'][...,x] 137 | # parts = mat_file_parts['part_mask'][...,0] 138 | # instance = mat_file_instance['part_mask'][...,0] 139 | # keypoints = mat_file_keypoints['pos_img'][...,0] 140 | 141 | gt_boxes,masks_instances,mask,H,W,keypoints = loadData(image,instance,parts,keypoints) 142 | mask_raw = mask.tostring() 143 | img_raw = image.tostring() 144 | example = _to_tfexample_coco_raw( 145 | img_id, 146 | img_raw, 147 | mask_raw, 148 | H, W, gt_boxes.shape[0], 149 | gt_boxes.tostring(), masks_instances.tostring(),keypoints.tostring()) 150 | tfrecord_writer.write(example.SerializeToString()) 151 | 152 | # cv2.imshow("ar",parts*25) 153 | # cv2.imshow("image",image) 154 | # cv2.imshow("instance",instance*255) 155 | # cv2.waitKey(100) 156 | tfrecord_writer.close() 157 | 158 | 159 | -------------------------------------------------------------------------------- /convert_data/download_and_convert_data.sh: -------------------------------------------------------------------------------- 1 | #mkdir data 2 | 3 | ############################################################################################VOC 4 | #wget http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar 5 | #tar -xvzf VOCtrainval_03-May-2010.tar -C data/ 6 | #tar -xvf VOCtrainval_03-May-2010.tar -C data/ 7 | #mv data/VOCdevkit/VOC2010/JPEGImages/ data/ 8 | #wget http://www.stat.ucla.edu/~xianjie.chen/pascal_part_dataset/trainval.tar.gz 9 | #tar -xvzf trainval.tar.gz -C data/ 10 | #python convert_VOC_human_body_parts.py 11 | #mv data/out_human_and_body_parts.tfrecord out_human_and_body_parts.tfrecord 12 | 13 | ############################################################################################Chalearn 14 | #mkdir data/chalearn 15 | #mkdir data/chalearn/api_code 16 | #wget https://competitions.codalab.org/my/datasets/download/764962c6-c270-4ee1-8721-e5611a5665f2 --no-check-certificate 17 | #wget https://competitions.codalab.org/my/datasets/download/27f9a04b-5499-4acf-b7b2-8aabb26f283c --no-check-certificate 18 | #mv 27f9a04b-5499-4acf-b7b2-8aabb26f283c dataset.zip 19 | #unzip dataset.zip -d data/chalearn/api_code/ 20 | #mv ChalearnLAPEvaluation.py data/chalearn/api_code/ChalearnLAPEvaluation.py 21 | #mv ChalearnLAPSample.py data/chalearn/api_code/ChalearnLAPSample.py 22 | #mv convert_CHALEARN_human_body_parts.py data/chalearn/api_code/convert_CHALEARN_human_body_parts.py 23 | #cd data/chalearn/api_code 24 | #python convert_CHALEARN_human_body_parts.py 25 | #cd ../../.. 26 | #mv data/chalearn/api_code/out_human_and_body_parts_chalearn.tfrecord out_human_and_body_parts_chalearn.tfrecord 27 | 28 | #############################################################################################ADE20K 29 | #wget http://groups.csail.mit.edu/vision/datasets/ADE20K/ADE20K_2016_07_26.zip 30 | #wget http://groups.csail.mit.edu/vision/datasets/ADE20K/code.zip 31 | #mkdir data/ade20k 32 | #unzip ADE20K_2016_07_26.zip -d data/ade20k/ 33 | #unzip code.zip -d data/ade20k/ 34 | #mkdir data/ade20k/output_dir 35 | #mv data/ade20k/ADE20K_2016_07_26/index_ade20k.mat data/ade20k/index_ade20k.mat 36 | #mv human_body_parts.m data/ade20k/human_body_parts.m 37 | #cp data/ade20k/code/loadAde20K.m data/ade20k/loadAde20K.m 38 | #cd data/ade20k/ 39 | #octave human_body_parts.m 40 | #cd ../.. 41 | #python convert_ADE20k_human_body_parts.py 42 | #mv data/out_human_and_body_parts_ade_20k_max640edge.tfrecord out_human_and_body_parts_ade_20k.tfrecord 43 | 44 | ###############################################################################################JHMDB 45 | #wget http://files.is.tue.mpg.de/jhmdb/JHMDB_video.zip 46 | #wget http://files.is.tue.mpg.de/jhmdb/joint_positions.zip 47 | #wget http://files.is.tue.mpg.de/jhmdb/puppet_mask.zip 48 | #wget http://files.is.tue.mpg.de/jhmdb/puppet_flow_com.zip 49 | #mkdir data/jhmdb 50 | mkdir data/jhmdb/JHMDB_video 51 | #unzip JHMDB_video.zip -d data/jhmdb/ 52 | mv data/jhmdb/ReCompress_Videos/ data/jhmdb/JHMDB_video 53 | #unzip joint_positions.zip -d data/jhmdb/ 54 | #unzip puppet_mask.zip -d data/jhmdb/ 55 | #unzip puppet_flow_com.zip -d data/jhmdb/ 56 | 57 | #mv convert_jhmdb.py data/jhmdb/convert_jhmdb.py 58 | #mv read_my_data_keypoints.py data/jhmdb/read_my_data_keypoints.py 59 | #cd data/jhmdb/ 60 | #python convert_jhmdb.py 61 | #cd ../.. 62 | #mv data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord out_human_and_body_parts_keypoints_JHMDB.tfrecord 63 | -------------------------------------------------------------------------------- /convert_data/human_body_parts.m: -------------------------------------------------------------------------------- 1 | #human_body_pars 2 | load('index_ade20k.mat'); 3 | 4 | 5 | strings={'back','head','left arm','left foot','left hand','left leg','left shoulder','neck','right arm','right foot','right hand','right leg','right shoulder','torso'}; 6 | N=22210; 7 | 8 | for n = 1:N 9 | filename = fullfile(index.folder{n}, index.filename{n}); 10 | [Om, Oi, Pm, Pi, objects, parts] = loadAde20K(filename); 11 | 12 | object_class = objects.class; 13 | r = rows(objects.class); 14 | ok=0; 15 | for i =1:r 16 | if findstr(object_class{i,1},'person') 17 | ok=1; 18 | break 19 | endif 20 | end 21 | 22 | pndx = setdiff(unique(Pm),0); 23 | index_object_names = index.objectnames(pndx); 24 | if ok==0 || isempty(index_object_names) 25 | continue 26 | endif 27 | ok=0; 28 | for i=1:14 29 | if any(ismember(index_object_names,strings{i})) != 0 30 | ok=1; 31 | break 32 | endif 33 | end 34 | 35 | if ok ==1 36 | 37 | #disp('ok'); 38 | #figure; imshow(Om, []); title('Object classes'); 39 | #colormap(cat(1, [0 0 0], hsv(255))); 40 | 41 | #figure; imshow(Oi, []); title('Object classes'); 42 | #colormap(cat(1, [0 0 0], hsv(255))); 43 | 44 | #subplot(round(sqrt(Nlevels)), ceil(sqrt(Nlevels)), 1) 45 | #imshow(Pm(:,:,1), []); title('Part classes') 46 | #colormap(cat(1, [0 0 0], hsv(255))) 47 | 48 | file_Om = sprintf('output_dir/Om%d.mat',n); 49 | file_Oi = sprintf('output_dir/Oi%d.mat',n); 50 | file_Pm = sprintf('output_dir/Pm%d.mat',n); 51 | file_Pi = sprintf('output_dir/Pi%d.mat',n); 52 | file_objects = sprintf('output_dir/objects%d.mat',n); 53 | file_parts = sprintf('output_dir/parts%d.mat',n); 54 | file_name = sprintf('output_dir/file%d.jpg',n); 55 | 56 | save(file_Om, 'Om',"-mat7-binary"); 57 | save(file_Oi, 'Oi',"-mat7-binary"); 58 | save(file_Pm, 'Pm',"-mat7-binary"); 59 | save(file_Pi, 'Pi',"-mat7-binary"); 60 | save(file_objects, 'objects',"-mat7-binary"); 61 | save(file_parts, 'parts',"-mat7-binary"); 62 | copyfile(filename,file_name); 63 | 64 | pndx = setdiff(unique(Pm),0); 65 | disp('Parts present in this image:'); 66 | disp(n); 67 | endif 68 | #disp('next'); 69 | #fflush(stdout) 70 | 71 | 72 | 73 | #{ 74 | wndx = setdiff(unique(Om),0); 75 | disp('Objects present in this image (and their wordnet hierarchy):') 76 | for i = 1:length(wndx) 77 | %disp(sprintf('%60s', index.objectnames{wndx(n)})) 78 | if findstr(index.objectnames{wndx(i)},'person') 79 | disp('ok') 80 | figure; imshow(Om, []); title('Object classes') 81 | colormap(cat(1, [0 0 0], hsv(255))) 82 | endif 83 | end 84 | #} 85 | end 86 | -------------------------------------------------------------------------------- /convert_data/read_my_data_keypoints.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType 3 | import numpy as np 4 | from PIL import Image 5 | import scipy.io as sio 6 | import cv2 7 | 8 | def map_value(x,A,B,a,b): 9 | return (x-A)*(b-a)/(B-A)+a 10 | 11 | random_color =np.random.randint(0,180,(7)) 12 | i=0 13 | example = tf.train.Example() 14 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) 15 | for record in tf.python_io.tf_record_iterator('data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord',options): 16 | 17 | i = i+1 18 | print i 19 | if i %70 !=0: 20 | continue 21 | example.ParseFromString(record) 22 | f = example.features.feature 23 | img_idnp = f['image/img_id'].int64_list.value[0] 24 | image_np = f['image/encoded'].bytes_list.value[0] 25 | heightnp = f['image/height'].int64_list.value[0] 26 | widthnp = f['image/width'].int64_list.value[0] 27 | num_instancesnp = f['label/num_instances'].int64_list.value[0] 28 | gt_masksnp = f['label/gt_masks'].bytes_list.value[0] 29 | gt_boxesnp = f['label/gt_boxes'].bytes_list.value[0] 30 | encoded = f['label/encoded'].bytes_list.value[0] 31 | gt_keypoints = f['label/keypoints'].bytes_list.value[0] 32 | 33 | image_np = np.fromstring(image_np, dtype=np.uint8) 34 | image_np = image_np.reshape((heightnp, widthnp, 3)) 35 | gt_masksnp = np.fromstring(gt_masksnp, dtype=np.uint8) 36 | gt_masksnp = gt_masksnp.reshape((num_instancesnp, heightnp, widthnp,7)) 37 | gt_boxesnp = np.fromstring(gt_boxesnp, dtype=np.float32) 38 | gt_boxesnp = gt_boxesnp.reshape((num_instancesnp,5)) 39 | gt_keypointsnp = np.fromstring(gt_keypoints, dtype=np.float32).reshape((2,15)) 40 | cv2.imshow("img",image_np) 41 | cv2.waitKey(100) 42 | hsv = cv2.cvtColor(image_np,cv2.COLOR_BGR2HSV) 43 | for h_box,human_masks in zip(gt_boxesnp,gt_masksnp): 44 | hsv = cv2.rectangle(hsv,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2) 45 | for mask_part in range(7): 46 | mask = human_masks[:,:,mask_part] 47 | mask = mask.astype(np.uint8) 48 | S = 255 49 | if mask_part ==0: 50 | S=100 51 | for x in range(int(h_box[0]),int(h_box[2])): 52 | for y in range(int(h_box[1]),int(h_box[3])): 53 | if mask[y,x]==1: 54 | hsv[y,x,0] = random_color[mask_part] 55 | hsv[y,x,1] = S 56 | for x in range(15): 57 | gt_keypointsnp[0,x] = map_value(gt_keypointsnp[0,x],-10.0,10.0,h_box[0],h_box[2]) 58 | gt_keypointsnp[1,x] = map_value(gt_keypointsnp[1,x],-10.0,10.0,h_box[1],h_box[3]) 59 | hsv = cv2.circle(hsv,(int(gt_keypointsnp[0,x]),int(gt_keypointsnp[1,x])),2,(255,255,255)) 60 | print int(gt_keypointsnp[0,x]),int(gt_keypointsnp[1,x]) 61 | bgrr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR) 62 | cv2.imshow("img",bgrr) 63 | cv2.waitKey(700) 64 | 65 | 66 | 67 | bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR) 68 | cv2.imshow("img",bgr) 69 | cv2.waitKey(700) 70 | 71 | 72 | -------------------------------------------------------------------------------- /convert_data/visualize_records_human_body_parts.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType 3 | import numpy as np 4 | import cv2 5 | 6 | random_color =np.random.randint(0,180,(7)) 7 | 8 | example = tf.train.Example() 9 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) 10 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_keypoints_to_body_parts_COCO.tfrecord',options): 11 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_body_parts_ade_20k_max640edge.tfrecord',options): 12 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_body_parts.tfrecord',options): 13 | #for record in tf.python_io.tf_record_iterator('data/chalearn/api_code/out_human_and_body_parts_chalearn.tfrecord',options): 14 | for record in tf.python_io.tf_record_iterator('data/freiburg/out_human_and_body_parts_Freiburg.tfrecord',options): 15 | #for record in tf.python_io.tf_record_iterator('data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord',options): 16 | example.ParseFromString(record) 17 | f = example.features.feature 18 | img_idnp = f['image/img_id'].int64_list.value[0] 19 | image_np = f['image/encoded'].bytes_list.value[0] 20 | heightnp = f['image/height'].int64_list.value[0] 21 | widthnp = f['image/width'].int64_list.value[0] 22 | num_instancesnp = f['label/num_instances'].int64_list.value[0] 23 | gt_masksnp = f['label/gt_masks'].bytes_list.value[0] 24 | gt_boxesnp = f['label/gt_boxes'].bytes_list.value[0] 25 | encoded = f['label/encoded'].bytes_list.value[0] 26 | image_np = np.fromstring(image_np, dtype=np.uint8) 27 | image_np = image_np.reshape((heightnp, widthnp, 3)) 28 | gt_masksnp = np.fromstring(gt_masksnp, dtype=np.uint8) 29 | gt_masksnp = gt_masksnp.reshape((num_instancesnp, heightnp, widthnp,7)) 30 | gt_boxesnp = np.fromstring(gt_boxesnp, dtype=np.float32) 31 | gt_boxesnp = gt_boxesnp.reshape((num_instancesnp,5)) 32 | cv2.imshow("img",image_np) 33 | cv2.waitKey(100) 34 | hsv = cv2.cvtColor(image_np,cv2.COLOR_BGR2HSV) 35 | for h_box,human_masks in zip(gt_boxesnp,gt_masksnp): 36 | hsv = cv2.rectangle(hsv,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2) 37 | for mask_part in range(7): 38 | mask = human_masks[:,:,mask_part] 39 | mask = mask.astype(np.uint8) 40 | S = 255 41 | if mask_part ==0: 42 | S=100 43 | for x in range(int(h_box[0]),int(h_box[2])): 44 | for y in range(int(h_box[1]),int(h_box[3])): 45 | if mask[y,x]==1: 46 | hsv[y,x,0] = random_color[mask_part] 47 | hsv[y,x,1] = S 48 | bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR) 49 | cv2.imshow("img",bgr) 50 | cv2.waitKey(1000) 51 | 52 | 53 | -------------------------------------------------------------------------------- /crontab.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | out=$(ps aux | grep '/usr/bin/python /hdd1/Alex/testMaskRCNN_human_bodyparts/MaskRCNN_body/train/train.py' | rev | cut -d ' ' -f 1 | rev | wc -l) 3 | if [ $out -eq "2" ];then 4 | echo "2 processes" >> /tmp/testing.txt 5 | else 6 | echo "1 processes" >> /tmp/testing.txt 7 | echo $(date) >> /tmp/testing.txt 8 | export CUDA_VISIBLE_DEVICES=0 9 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64/ 10 | `(/usr/bin/python /hdd1/Alex/testMaskRCNN_human_bodyparts/MaskRCNN_body/train/train.py &>> /tmp/testing.txt)` 11 | echo "tried to start" >> /tmp/testing.txt; 12 | fi 13 | 14 | 15 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | Place and unzip your coco in this dir, like 2 | 3 | ```buildoutcfg 4 | ./data 5 | ./coco 6 | ./annotations 7 | ./train2014 8 | ./val2014 9 | ``` 10 | -------------------------------------------------------------------------------- /document.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/document.pdf -------------------------------------------------------------------------------- /draw/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/draw/__init__.py -------------------------------------------------------------------------------- /draw/draw.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from utils import draw_human_body_parts 3 | 4 | for x in range(1,150): 5 | array = np.load("/home/alex/PycharmProjects/data/array"+str(x)+".npy") 6 | image = array[0] 7 | bbox = array[1] 8 | label =array[2] 9 | prob = array[3] 10 | gt_bbox = array[4] 11 | gt_label = array[5] 12 | final_mask = array[6] 13 | gt_mask = array[7] 14 | 15 | #visualize_mask_gt(bbox,final_mask,gt_mask,label,prob) 16 | #draw_segmentation_parts(1,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask) 17 | #draw_bbox_better(1,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask) ############this is for voc independent body parts 18 | print (x) 19 | draw_human_body_parts(x,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask) 20 | 21 | 22 | -------------------------------------------------------------------------------- /draw/metric.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | 4 | def bbox_overlaps(boxes,query_boxes): # boxes is the predicted boxes and query_boxes is the ground truth boxes 5 | N = boxes.shape[0] 6 | K = query_boxes.shape[0] 7 | overlaps = np.zeros((N, K), dtype=np.float32) 8 | iw, ih, box_area,ua,k, n = 0,0,0,0,0,0 9 | for k in range(K): 10 | box_area = ( 11 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 12 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 13 | ) 14 | for n in range(N): 15 | iw = ( 16 | min(boxes[n, 2], query_boxes[k, 2]) - 17 | max(boxes[n, 0], query_boxes[k, 0]) + 1 18 | ) 19 | if iw > 0: 20 | ih = ( 21 | min(boxes[n, 3], query_boxes[k, 3]) - 22 | max(boxes[n, 1], query_boxes[k, 1]) + 1 23 | ) 24 | if ih > 0: 25 | ua = float( 26 | (boxes[n, 2] - boxes[n, 0] + 1) * 27 | (boxes[n, 3] - boxes[n, 1] + 1) + 28 | box_area - iw * ih 29 | ) 30 | overlaps[n, k] = iw * ih / ua 31 | return overlaps 32 | 33 | def IOU_mask(mask,gt_mask): 34 | intersection = np.sum( (mask * gt_mask) > 0 ) 35 | union = np.sum((np.logical_or(mask,gt_mask))> 0) 36 | return float(intersection)/float(union+1) 37 | 38 | def metric_for_image(bbox=None,gt_bbox=None,label=None, gt_label=None, prob=None,final_mask=None): 39 | #find the overlaps between each predicted box and gt_box 40 | overlaps = bbox_overlaps(np.ascontiguousarray(bbox[:, :4], dtype=np.float),np.ascontiguousarray(gt_bbox[:, :4], dtype=np.float)) 41 | gt_assignment = overlaps.argmax(axis=1) #multiple bboxes may have a single GT 42 | 43 | max_overlaps = overlaps[np.arange(bbox.shape[0]), gt_assignment] #select the predicted boxes that are closest to the gt_box 44 | 45 | good = 0 46 | total_boxes = 0 47 | for i,overlap in enumerate(max_overlaps): 48 | box = bbox[i] 49 | width = int(box[2])-int(box[0]) 50 | height = int(box[3])-int(box[1]) 51 | if prob[i,label[i]] > 0.5 and width*height >1000 and label[i]!=0: #eliminate if classification is less than 0.5. if the box is too small or the label is background 52 | total_boxes = total_boxes+1 #this will be the denominator 53 | if label[i] == gt_label[i]: 54 | if overlap >0.5: #if overlap of the BOXES is bigger than 0.5 55 | output_mask = (final_mask[i] > 0.6).astype(np.uint8) 56 | 57 | gt_maski = gt_mask[:,int(box[1]):int(box[3]),int(box[0]):int(box[2]),:] #crop from gt_mask given the predicted box 58 | gt_maskii = np.zeros([112,112,7],np.uint8) 59 | for x in range(7): 60 | mask = gt_maski[...,x] 61 | mask = mask[0] 62 | gt_maskii[...,x] = cv2.resize(mask.astype(np.uint8),(112,112)) 63 | 64 | if IOU_mask(output_mask,gt_maskii) > 0.5: #if overlap of the MASKS is bigger than 0.5 65 | good = good +1 66 | precision_over_image = float(good)/(float(total_boxes)+np.finfo(np.float32).eps) 67 | return precision_over_image 68 | 69 | metrics = [] 70 | for i in range(0,512): 71 | bbox = np.load('data/bbox'+str(i)+'.npy') 72 | gt_bbox = np.load('data/gt_boxes'+str(i)+'.npy') 73 | final_mask = np.load('data/final_mask'+str(i)+'.npy') 74 | gt_label = np.load('data/gt_label'+str(i)+'.npy') 75 | image = np.load('data/image'+str(i)+'.npy') 76 | label = np.load('data/label'+str(i)+'.npy') 77 | prob = np.load('data/prob'+str(i)+'.npy') 78 | gt_mask = np.load('data/gt_mask'+str(i)+'.npy') 79 | metrics.append(metric_for_image(bbox,gt_bbox,label,gt_label,prob,final_mask)) 80 | 81 | print reduce(lambda x, y: x + y, metrics) / len(metrics) 82 | -------------------------------------------------------------------------------- /draw/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance 4 | import scipy.misc 5 | import cv2 6 | import numpy.ma as ma 7 | 8 | FLAGS = tf.app.flags.FLAGS 9 | _DEBUG = False 10 | 11 | 12 | #not used 13 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None): 14 | #print("image") 15 | #print(image) 16 | #norm_image = np.uint8(image/np.max(np.abs(image))*255.0) 17 | norm_image = np.uint8(image/0.1*127.0 + 127.0) 18 | #print("norm_image") 19 | #print(norm_image) 20 | source_img = Image.fromarray(norm_image) 21 | return source_img.save(FLAGS.train_dir + 'test_' + name + '_' + str(step) +'.jpg', 'JPEG') 22 | 23 | 24 | #label colors 25 | colors = [] 26 | colors.append([180,255,255]) 27 | colors.append([150,255,255]) 28 | colors.append([120,255,255]) 29 | colors.append([90,255,255]) 30 | colors.append([60,255,255]) 31 | colors.append([30,255,255]) 32 | colors.append([0,255,255]) 33 | 34 | 35 | 36 | def draw_human_body_parts(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None): 37 | import cv2 38 | hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 39 | hsv_body = hsv.copy() 40 | if bbox is not None: 41 | dictinary = {} #key: area, value:[box,label,gt_label,prob,mask,color] #i create this dictionary in order to sort by area in order to draw smaller boxes in front 42 | for i, box in enumerate(bbox): 43 | width = int(box[2])-int(box[0]) 44 | height = int(box[3])-int(box[1]) 45 | #l=label[i] 46 | #p = prob[i,label[i]] 47 | if (prob[i,label[i]] > 0.5) and width*height >1000 and label[i]!=0: #eliminate some boxes. label is the predicted score 48 | area = float((box[2]-box[0])*(box[3]-box[1])) 49 | while area in dictinary: #i compute the area in order to draw smaller boxes in front 50 | area+=1 51 | 52 | mask = final_mask[i] 53 | masks = np.zeros((height,width,7)) 54 | body_mask = mask[...,0] > 0.6 55 | body_mask2 = np.array(body_mask,np.uint8) 56 | masks[...,0] = scipy.misc.imresize(body_mask2,(height,width)) 57 | 58 | # cv2.imshow("body_mask",body_mask.astype(np.uint8)*255) 59 | # cv2.waitKey(3000) 60 | for x in range(1,7): 61 | maska = mask[...,x] > 0.6 # if prop for a pixel is bigger than 0.6, draw it 62 | # cv2.imshow("maska"+str(x),maska.astype(np.uint8)*255) 63 | # cv2.waitKey(3000) 64 | maska = np.logical_and(maska,body_mask) # clip the parts in order to fit inside the body. the body is better segmented 65 | maska = ma.masked_array(mask[...,x], mask=np.logical_not(maska)) 66 | maska = np.ma.filled(maska, 0) 67 | #maska = maska >0 68 | maska = scipy.misc.imresize(maska,(height,width)) 69 | 70 | masks[...,x] = maska 71 | dictinary[round(area,4)]=(box,label[i],gt_label[i],prob[i,label[i]],masks,colors[label[i]]) 72 | sorted_keys = sorted(dictinary.iterkeys(),reverse=True) 73 | # cv2.waitKey(6000) 74 | for key,i in zip(sorted_keys,range(len(sorted_keys))): 75 | bo, lab,gt_lab,_,mask,col= dictinary[key] #mask has shape [H,W,7] 76 | 77 | max_indices = np.argmax(mask,axis=2) # this is for when two parts masks are overlapping. there i select the part with the highest probability 78 | #max_indices is an array with size [H,W] and its values represent the per-pixel label of the parts 79 | for x in range(int(bo[0]),int(bo[2])): 80 | for y in range(int(bo[1]),int(bo[3])): 81 | 82 | xm = x-(int(bo[0])) 83 | ym = y-(int(bo[1])) 84 | if mask[ym,xm,max_indices[ym,xm]] >0: # 85 | hsv[y,x,0] = colors[max_indices[ym,xm]][0] 86 | hsv[y,x,1] = 255 87 | 88 | for x in range(int(bo[0]),int(bo[2])): 89 | for y in range(int(bo[1]),int(bo[3])): 90 | 91 | xm = x-(int(bo[0])) 92 | ym = y-(int(bo[1])) 93 | if(mask[ym,xm,0]==1): 94 | hsv_body[y,x,0] = colors[0][0] 95 | hsv_body[y,x,1] = 150 96 | 97 | hsv = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB) 98 | hsv_body = cv2.cvtColor(hsv_body, cv2.COLOR_HSV2RGB) 99 | i=0 100 | for key in sorted_keys: 101 | bo, lab,gt_lab,_,_,col= dictinary[key] 102 | c = (255,0,0) 103 | bo, lab,gt_lab,_,_,col= dictinary[key] 104 | text = cat_id_to_cls_name(lab) 105 | i=i+1 106 | hsv = cv2.rectangle(hsv,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3) 107 | hsv = cv2.putText(hsv,text+' '+str(i),(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX,0.5, color =(255,255,255)) 108 | hsv_body = cv2.rectangle(hsv_body,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3) 109 | hsv_body = cv2.putText(hsv_body,text+' '+str(i),(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX,0.5, color =(255,255,255)) 110 | #cv2.imwrite('test_' + name + '_' + str(step) +'.jpg',image) 111 | cv2.imwrite('/home/alex/PycharmProjects/data/test_seg' + name + '_' + str(step) +'.jpg',hsv) 112 | cv2.imwrite('/home/alex/PycharmProjects/data/test_hsv' + name + '_' + str(step) +'.jpg',hsv_body) 113 | 114 | def cat_id_to_cls_name(catId): 115 | cls_name = np.array(['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 116 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 117 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 118 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 119 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 120 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 121 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 122 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 123 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 124 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 125 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 126 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 127 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 128 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush']) 129 | return cls_name[catId] 130 | -------------------------------------------------------------------------------- /draw/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/draw/utils.pyc -------------------------------------------------------------------------------- /libs/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | python setup.py build_ext --inplace 3 | rm -rf build 4 | sh make.sh -------------------------------------------------------------------------------- /libs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/__init__.py -------------------------------------------------------------------------------- /libs/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/__init__.pyc -------------------------------------------------------------------------------- /libs/boxes/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /libs/boxes/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | from . import cython_nms 8 | from . import cython_bbox 9 | import nms 10 | import timer 11 | from .anchor import anchors 12 | from .anchor import anchors_plane 13 | from .roi import roi_cropping 14 | from .roi import roi_cropping 15 | from . import cython_anchor 16 | from . import cython_bbox_transform -------------------------------------------------------------------------------- /libs/boxes/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/__init__.pyc -------------------------------------------------------------------------------- /libs/boxes/anchor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | from libs.boxes import cython_anchor 7 | 8 | def anchors(scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16): 9 | """Get a set of anchors at one position """ 10 | return generate_anchors(base_size=base, scales=np.asarray(scales, np.int32), ratios=ratios) 11 | 12 | def anchors_plane(height, width, stride = 1.0, 13 | scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16): 14 | """Get a complete set of anchors in a spatial plane, 15 | height, width are plane dimensions 16 | stride is scale ratio of 17 | """ 18 | # TODO: implement in C, or pre-compute them, or set to a fixed input-shape 19 | # enum all anchors in a plane 20 | # scales = kwargs.setdefault('scales', [2, 4, 8, 16, 32]) 21 | # ratios = kwargs.setdefault('ratios', [0.5, 1, 2.0]) 22 | # base = kwargs.setdefault('base', 16) 23 | anc = anchors(scales, ratios, base) 24 | all_anchors = cython_anchor.anchors_plane(height, width, stride, anc) 25 | #print (all_anchors.shape) 26 | return all_anchors 27 | 28 | # Written by Ross Girshick and Sean Bell 29 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 30 | scales=2 ** np.arange(3, 6)): 31 | """ 32 | Generate anchor (reference) windows by enumerating aspect ratios X 33 | scales wrt a reference (0, 0, 15, 15) window. 34 | """ 35 | 36 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 37 | ratio_anchors = _ratio_enum(base_anchor, ratios) 38 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 39 | for i in xrange(ratio_anchors.shape[0])]) 40 | return anchors 41 | 42 | def _whctrs(anchor): 43 | """ 44 | Return width, height, x center, and y center for an anchor (window). 45 | """ 46 | 47 | w = anchor[2] - anchor[0] + 1 48 | h = anchor[3] - anchor[1] + 1 49 | x_ctr = anchor[0] + 0.5 * (w - 1) 50 | y_ctr = anchor[1] + 0.5 * (h - 1) 51 | return w, h, x_ctr, y_ctr 52 | 53 | 54 | def _mkanchors(ws, hs, x_ctr, y_ctr): 55 | """ 56 | Given a vector of widths (ws) and heights (hs) around a center 57 | (x_ctr, y_ctr), output a set of anchors (windows). 58 | """ 59 | 60 | ws = ws[:, np.newaxis] 61 | hs = hs[:, np.newaxis] 62 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 63 | y_ctr - 0.5 * (hs - 1), 64 | x_ctr + 0.5 * (ws - 1), 65 | y_ctr + 0.5 * (hs - 1))) 66 | return anchors 67 | 68 | 69 | def _ratio_enum(anchor, ratios): 70 | """ 71 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 72 | """ 73 | 74 | w, h, x_ctr, y_ctr = _whctrs(anchor) 75 | size = w * h 76 | size_ratios = size / ratios 77 | ws = np.round(np.sqrt(size_ratios)) 78 | hs = np.round(ws * ratios) 79 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 80 | return anchors 81 | 82 | 83 | def _scale_enum(anchor, scales): 84 | """ 85 | Enumerate a set of anchors for each scale wrt an anchor. 86 | """ 87 | 88 | w, h, x_ctr, y_ctr = _whctrs(anchor) 89 | ws = w * scales 90 | hs = h * scales 91 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 92 | return anchors 93 | 94 | def _unmap(data, count, inds, fill=0): 95 | """ Unmap a subset of item (data) back to the original set of items (of 96 | size count) """ 97 | if len(data.shape) == 1: 98 | ret = np.empty((count,), dtype=np.float32) 99 | ret.fill(fill) 100 | ret[inds] = data 101 | else: 102 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 103 | ret.fill(fill) 104 | ret[inds, :] = data 105 | return ret 106 | 107 | if __name__ == '__main__': 108 | import time 109 | 110 | t = time.time() 111 | a = anchors() 112 | num_anchors = 0 113 | 114 | # all_anchors = anchors_plane(200, 250, stride=4, boarder=0) 115 | # num_anchors += all_anchors.shape[0] 116 | for i in range(10): 117 | ancs = anchors() 118 | all_anchors = cython_anchor.anchors_plane(200, 250, 4, ancs) 119 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 120 | all_anchors = cython_anchor.anchors_plane(100, 125, 8, ancs) 121 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 122 | all_anchors = cython_anchor.anchors_plane(50, 63, 16, ancs) 123 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 124 | all_anchors = cython_anchor.anchors_plane(25, 32, 32, ancs) 125 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2] 126 | print('average time: %f' % ((time.time() - t) / 10)) 127 | print('anchors: %d' % (num_anchors / 10)) 128 | print(a.shape, '\n', a) 129 | print (all_anchors.shape) 130 | # from IPython import embed 131 | # embed() 132 | -------------------------------------------------------------------------------- /libs/boxes/anchor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/anchor.pyc -------------------------------------------------------------------------------- /libs/boxes/bbox.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Sergey Karayev 6 | # -------------------------------------------------------- 7 | 8 | cimport cython 9 | import numpy as np 10 | cimport numpy as np 11 | 12 | DTYPE = np.float 13 | ctypedef np.float_t DTYPE_t 14 | 15 | def bbox_overlaps( 16 | np.ndarray[DTYPE_t, ndim=2] boxes, 17 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 18 | """ 19 | Parameters 20 | ---------- 21 | boxes: (N, 4) ndarray of float 22 | query_boxes: (K, 4) ndarray of float 23 | Returns 24 | ------- 25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes 26 | """ 27 | cdef unsigned int N = boxes.shape[0] 28 | cdef unsigned int K = query_boxes.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE) 30 | cdef DTYPE_t iw, ih, box_area 31 | cdef DTYPE_t ua 32 | cdef unsigned int k, n 33 | for k in range(K): 34 | box_area = ( 35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 37 | ) 38 | for n in range(N): 39 | iw = ( 40 | min(boxes[n, 2], query_boxes[k, 2]) - 41 | max(boxes[n, 0], query_boxes[k, 0]) + 1 42 | ) 43 | if iw > 0: 44 | ih = ( 45 | min(boxes[n, 3], query_boxes[k, 3]) - 46 | max(boxes[n, 1], query_boxes[k, 1]) + 1 47 | ) 48 | if ih > 0: 49 | ua = float( 50 | (boxes[n, 2] - boxes[n, 0] + 1) * 51 | (boxes[n, 3] - boxes[n, 1] + 1) + 52 | box_area - iw * ih 53 | ) 54 | overlaps[n, k] = iw * ih / ua 55 | return overlaps 56 | 57 | def bbox_intersections( 58 | np.ndarray[DTYPE_t, ndim=2] boxes, 59 | np.ndarray[DTYPE_t, ndim=2] query_boxes): 60 | """ 61 | For each query box compute the intersection ratio covered by boxes 62 | ---------- 63 | Parameters 64 | ---------- 65 | boxes: (N, 4) ndarray of float 66 | query_boxes: (K, 4) ndarray of float 67 | Returns 68 | ------- 69 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes 70 | """ 71 | cdef unsigned int N = boxes.shape[0] 72 | cdef unsigned int K = query_boxes.shape[0] 73 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE) 74 | cdef DTYPE_t iw, ih, box_area 75 | cdef DTYPE_t ua 76 | cdef unsigned int k, n 77 | for k in range(K): 78 | box_area = ( 79 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) * 80 | (query_boxes[k, 3] - query_boxes[k, 1] + 1) 81 | ) 82 | for n in range(N): 83 | iw = ( 84 | min(boxes[n, 2], query_boxes[k, 2]) - 85 | max(boxes[n, 0], query_boxes[k, 0]) + 1 86 | ) 87 | if iw > 0: 88 | ih = ( 89 | min(boxes[n, 3], query_boxes[k, 3]) - 90 | max(boxes[n, 1], query_boxes[k, 1]) + 1 91 | ) 92 | if ih > 0: 93 | intersec[n, k] = iw * ih / box_area 94 | return intersec -------------------------------------------------------------------------------- /libs/boxes/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import warnings 10 | 11 | def bbox_transform(ex_rois, gt_rois): 12 | """ 13 | computes the distance from ground-truth boxes to the given boxes, normed by their size 14 | :param ex_rois: n * 4 numpy array, given boxes 15 | :param gt_rois: n * 4 numpy array, ground-truth boxes 16 | :return: deltas: n * 4 numpy array, ground-truth boxes 17 | """ 18 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 19 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 20 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 21 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 22 | 23 | # assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \ 24 | # 'Invalid boxes found: {} {}'. \ 25 | # format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :]) 26 | 27 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 28 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 29 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 30 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 31 | 32 | # warnings.catch_warnings() 33 | # warnings.filterwarnings('error') 34 | targets_dx = 10.0 * (gt_ctr_x - ex_ctr_x) / ex_widths #####he multiplied these probably to have bigger numbers 35 | targets_dy = 10.0 * (gt_ctr_y - ex_ctr_y) / ex_heights 36 | targets_dw = 5.0 * np.log(gt_widths / ex_widths) 37 | targets_dh = 5.0 * np.log(gt_heights / ex_heights) 38 | 39 | targets = np.vstack( 40 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 41 | return targets 42 | 43 | def bbox_transform_inv(boxes, deltas):# from file roi.py line 116 the shapes are: boxes=(R, 4), [x1, y1, x2, y2] deltas=(R, Kx4) 44 | if boxes.shape[0] == 0: 45 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 46 | 47 | boxes = boxes.astype(deltas.dtype, copy=False) 48 | 49 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 50 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 51 | ctr_x = boxes[:, 0] + 0.5 * widths 52 | ctr_y = boxes[:, 1] + 0.5 * heights 53 | 54 | dx = deltas[:, 0::4] * 0.1 #####he divided here as to cancel the multiplication at 34 55 | dy = deltas[:, 1::4] * 0.1 56 | dw = deltas[:, 2::4] * 0.2 57 | dh = deltas[:, 3::4] * 0.2 58 | 59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 61 | # pred_w = np.exp(dw) * widths[:, np.newaxis] 62 | # pred_h = np.exp(dh) * heights[:, np.newaxis] 63 | 64 | pred_w = np.exp(dw + np.log(widths[:, np.newaxis])) 65 | pred_h = np.exp(dh + np.log(heights[:, np.newaxis])) 66 | 67 | 68 | #pred_w = np.exp(dw + np.log(widths[:, np.newaxis])) 69 | #pred_h = np.exp(dh + np.log(heights[:, np.newaxis])) 70 | 71 | 72 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 73 | # x1 74 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 75 | # y1 76 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 77 | # x2 78 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1 79 | # y2 80 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1 81 | 82 | return pred_boxes 83 | 84 | def clip_boxes(boxes, im_shape): 85 | """ 86 | Clip boxes to image boundaries. 87 | """ 88 | 89 | # x1 >= 0 90 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 91 | # y1 >= 0 92 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 93 | # x2 < im_shape[1] 94 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 95 | # y2 < im_shape[0] 96 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 97 | return boxes 98 | -------------------------------------------------------------------------------- /libs/boxes/bbox_transform.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/bbox_transform.pyc -------------------------------------------------------------------------------- /libs/boxes/blob.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | """Blob helper functions.""" 9 | 10 | import numpy as np 11 | import cv2 12 | from ..fast_rcnn.config import cfg 13 | 14 | def im_list_to_blob(ims): 15 | """Convert a list of images into a network input. 16 | 17 | Assumes images are already prepared (means subtracted, BGR order, ...). 18 | """ 19 | max_shape = np.array([im.shape for im in ims]).max(axis=0) 20 | num_images = len(ims) 21 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), 22 | dtype=np.float32) 23 | for i in xrange(num_images): 24 | im = ims[i] 25 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im 26 | 27 | return blob 28 | 29 | def prep_im_for_blob(im, pixel_means, target_size, max_size): 30 | """Mean subtract and scale an image for use in a blob.""" 31 | im = im.astype(np.float32, copy=False) 32 | im -= pixel_means 33 | im_shape = im.shape 34 | im_size_min = np.min(im_shape[0:2]) 35 | im_size_max = np.max(im_shape[0:2]) 36 | im_scale = float(target_size) / float(im_size_min) 37 | # Prevent the biggest axis from being more than MAX_SIZE 38 | if np.round(im_scale * im_size_max) > max_size: 39 | im_scale = float(max_size) / float(im_size_max) 40 | if cfg.TRAIN.RANDOM_DOWNSAMPLE: 41 | r = 0.6 + np.random.rand() * 0.4 42 | im_scale *= r 43 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, 44 | interpolation=cv2.INTER_LINEAR) 45 | 46 | return im, im_scale 47 | -------------------------------------------------------------------------------- /libs/boxes/cython_anchor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_anchor.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/cython_anchor.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by CharlesShang@github 5 | # -------------------------------------------------------- 6 | 7 | cimport cython 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | DTYPE = np.float 12 | ctypedef np.float_t DTYPE_t 13 | 14 | def anchors_plane( 15 | int height, int width, int stride, 16 | np.ndarray[DTYPE_t, ndim=2] anchors_base): 17 | """ 18 | Parameters 19 | ---------- 20 | height: height of plane 21 | width: width of plane 22 | stride: stride ot the original image 23 | anchors_base: (A, 4) a base set of anchors 24 | Returns 25 | ------- 26 | all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane 27 | """ 28 | cdef unsigned int A = anchors_base.shape[0] 29 | cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE) 30 | cdef unsigned int iw, ih 31 | cdef unsigned int k 32 | cdef unsigned int A4 33 | cdef unsigned int sh 34 | cdef unsigned int sw 35 | A4 = A*4 36 | for iw in range(width): 37 | sw = iw * stride 38 | for ih in range(height): 39 | sh = ih * stride 40 | for k in range(A): 41 | all_anchors[ih, iw, k, 0] = anchors_base[k, 0] + sw 42 | all_anchors[ih, iw, k, 1] = anchors_base[k, 1] + sh 43 | all_anchors[ih, iw, k, 2] = anchors_base[k, 2] + sw 44 | all_anchors[ih, iw, k, 3] = anchors_base[k, 3] + sh 45 | return all_anchors -------------------------------------------------------------------------------- /libs/boxes/cython_bbox.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/cython_bbox_transform.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox_transform.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/cython_bbox_transform.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Licensed under The MIT License [see LICENSE for details] 4 | # Written by CharlesShang@github 5 | # -------------------------------------------------------- 6 | 7 | cimport cython 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | DTYPE = np.float 12 | ctypedef np.float_t DTYPE_t 13 | # ctypedef float DTYPE_t 14 | 15 | #def bbox_transform( 16 | # np.ndarray[DTYPE_t, ndim=2] ex_rois, 17 | # np.ndarray[DTYPE_t, ndim=2] gt_rois): 18 | def bbox_transform( 19 | np.ndarray[DTYPE_t, ndim=2] ex_rois, 20 | np.ndarray[DTYPE_t, ndim=2] gt_rois): 21 | """ 22 | Parameters 23 | ---------- 24 | ex_rois: n * 4 numpy array, given boxes 25 | gt_rois: n * 4 numpy array, ground-truth boxes 26 | Returns 27 | ------- 28 | targets: (n, 4) ndarray 29 | """ 30 | cdef unsigned int R = ex_rois.shape[0] 31 | cdef np.ndarray[DTYPE_t, ndim=2] targets = np.zeros((R, 4), dtype=DTYPE) 32 | cdef unsigned int i 33 | cdef DTYPE_t gt_w 34 | cdef DTYPE_t gt_h 35 | cdef DTYPE_t gt_cx 36 | cdef DTYPE_t gt_cy 37 | cdef DTYPE_t ex_w 38 | cdef DTYPE_t ex_h 39 | cdef DTYPE_t ex_cx 40 | cdef DTYPE_t ex_cy 41 | for i in range(R): 42 | gt_w = gt_rois[i, 2] - gt_rois[i, 0] + 1.0 43 | gt_h = gt_rois[i, 3] - gt_rois[i, 1] + 1.0 44 | ex_w = ex_rois[i, 2] - ex_rois[i, 0] + 1.0 45 | ex_h = ex_rois[i, 3] - ex_rois[i, 1] + 1.0 46 | gt_cx = gt_rois[i, 0] + gt_w * 0.5 47 | gt_cy = gt_rois[i, 1] + gt_h * 0.5 48 | ex_cx = ex_rois[i, 0] + ex_w * 0.5 49 | ex_cy = ex_rois[i, 1] + ex_h * 0.5 50 | targets[i, 0] = (gt_cx - ex_cx) / ex_w 51 | targets[i, 1] = (gt_cy - ex_cy) / ex_h 52 | targets[i, 2] = np.log(gt_w / ex_w) 53 | targets[i, 3] = np.log(gt_h / ex_h) 54 | return targets 55 | 56 | cdef inline DTYPE_t my_max(DTYPE_t a, DTYPE_t b): return a if a >= b else b 57 | cdef inline DTYPE_t my_min(DTYPE_t a, DTYPE_t b): return a if a <= b else b 58 | 59 | def bbox_transform_inv( 60 | np.ndarray[DTYPE_t, ndim=2] boxes, 61 | np.ndarray[DTYPE_t, ndim=2] deltas): 62 | """ 63 | Parameters 64 | ---------- 65 | boxes: n * 4 numpy array, given boxes 66 | deltas: (n, kx4) numpy array, 67 | Returns 68 | ------- 69 | pred_boxes: (n, kx4) ndarray 70 | """ 71 | cdef unsigned int R = boxes.shape[0] 72 | cdef unsigned int k4 = deltas.shape[1] 73 | cdef unsigned int k 74 | k = k4 / 4 75 | cdef np.ndarray[DTYPE_t, ndim=2] pred_boxes = np.zeros((R, k4), dtype=DTYPE) 76 | if R == 0: 77 | return pred_boxes 78 | 79 | cdef unsigned int i 80 | cdef unsigned int j 81 | cdef unsigned int j4 82 | cdef DTYPE_t w 83 | cdef DTYPE_t h 84 | cdef DTYPE_t cx 85 | cdef DTYPE_t cy 86 | cdef DTYPE_t px 87 | cdef DTYPE_t py 88 | cdef DTYPE_t pw 89 | cdef DTYPE_t ph 90 | for i in range(R): 91 | w = boxes[i, 2] - boxes[i, 0] + 1.0 92 | h = boxes[i, 3] - boxes[i, 1] + 1.0 93 | cx = boxes[i, 0] + w * 0.5 94 | cy = boxes[i, 1] + h * 0.5 95 | for j in range(k): 96 | j4 = j * 4 97 | px = deltas[i, j4 ] * w + cx 98 | py = deltas[i, j4 + 1] * h + cy 99 | pw = np.exp(deltas[i, j4 + 2]) * w 100 | ph = np.exp(deltas[i, j4 + 3]) * h 101 | pred_boxes[i, j4 ] = px - 0.5 * pw 102 | pred_boxes[i, j4 + 1] = py - 0.5 * ph 103 | pred_boxes[i, j4 + 2] = px + 0.5 * pw 104 | pred_boxes[i, j4 + 3] = py + 0.5 * ph 105 | return pred_boxes 106 | 107 | def clip_boxes( 108 | np.ndarray[DTYPE_t, ndim=2] boxes, 109 | np.ndarray[DTYPE_t, ndim=1] im_shape): 110 | """ 111 | Parameters 112 | ---------- 113 | boxes: (n ,kx4) numpy array, given boxes 114 | im_shape:(2,) numpy array, (image_height, image_width) 115 | Returns 116 | ------- 117 | clipped: (n, kx4) ndarray 118 | """ 119 | cdef unsigned int R = boxes.shape[0] 120 | cdef unsigned int k4 = boxes.shape[1] 121 | cdef unsigned int k = k4 / 4 122 | cdef np.ndarray[DTYPE_t, ndim=2] clipped = np.zeros((R, k4), dtype=DTYPE) 123 | cdef unsigned int i 124 | cdef unsigned int j 125 | cdef unsigned int j4 126 | for i in range(R): 127 | for j in range(k): 128 | j4 = j * 4 129 | clipped[i, j4 ] = my_max(my_min(boxes[i, j4 ], im_shape[1]-1), 0) 130 | clipped[i, j4 + 1] = my_max(my_min(boxes[i, j4 + 1], im_shape[0]-1), 0) 131 | clipped[i, j4 + 2] = my_max(my_min(boxes[i, j4 + 2], im_shape[1]-1), 0) 132 | clipped[i, j4 + 3] = my_max(my_min(boxes[i, j4 + 3], im_shape[0]-1), 0) 133 | return clipped -------------------------------------------------------------------------------- /libs/boxes/cython_nms.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import sys, pkg_resources, imp 6 | 7 | def __bootstrap__(): 8 | global __bootstrap__, __loader__, __file__ 9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_nms.so') 10 | __loader__ = None 11 | del __bootstrap__, __loader__ 12 | imp.load_dynamic(__name__, __file__) 13 | 14 | __bootstrap__() -------------------------------------------------------------------------------- /libs/boxes/cython_nms.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/cython_nms.pyc -------------------------------------------------------------------------------- /libs/boxes/nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def nms(dets, thresh): 11 | x1 = dets[:, 0] 12 | y1 = dets[:, 1] 13 | x2 = dets[:, 2] 14 | y2 = dets[:, 3] 15 | scores = dets[:, 4] 16 | 17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 18 | order = scores.argsort()[::-1] 19 | 20 | keep = [] 21 | while order.size > 0: 22 | i = order[0] 23 | keep.append(i) 24 | xx1 = np.maximum(x1[i], x1[order[1:]]) 25 | yy1 = np.maximum(y1[i], y1[order[1:]]) 26 | xx2 = np.minimum(x2[i], x2[order[1:]]) 27 | yy2 = np.minimum(y2[i], y2[order[1:]]) 28 | 29 | w = np.maximum(0.0, xx2 - xx1 + 1) 30 | h = np.maximum(0.0, yy2 - yy1 + 1) 31 | inter = w * h 32 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 33 | 34 | inds = np.where(ovr <= thresh)[0] 35 | order = order[inds + 1] 36 | 37 | return keep 38 | -------------------------------------------------------------------------------- /libs/boxes/nms.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/nms.pyc -------------------------------------------------------------------------------- /libs/boxes/nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 76 | 77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 79 | 80 | cdef int ndets = dets.shape[0] 81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 82 | np.zeros((ndets), dtype=np.int) 83 | 84 | # nominal indices 85 | cdef int _i, _j 86 | # sorted indices 87 | cdef int i, j 88 | # temp variables for box i's (the box currently under consideration) 89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 90 | # variables for computing overlap with box j (lower scoring box) 91 | cdef np.float32_t xx1, yy1, xx2, yy2 92 | cdef np.float32_t w, h 93 | cdef np.float32_t inter, ovr 94 | 95 | keep = [] 96 | for _i in range(ndets): 97 | i = order[_i] 98 | if suppressed[i] == 1: 99 | continue 100 | keep.append(i) 101 | ix1 = x1[i] 102 | iy1 = y1[i] 103 | ix2 = x2[i] 104 | iy2 = y2[i] 105 | iarea = areas[i] 106 | for _j in range(_i + 1, ndets): 107 | j = order[_j] 108 | if suppressed[j] == 1: 109 | continue 110 | xx1 = max(ix1, x1[j]) 111 | yy1 = max(iy1, y1[j]) 112 | xx2 = min(ix2, x2[j]) 113 | yy2 = min(iy2, y2[j]) 114 | w = max(0.0, xx2 - xx1 + 1) 115 | h = max(0.0, yy2 - yy1 + 1) 116 | inter = w * h 117 | ovr = inter / (iarea + areas[j] - inter) 118 | ovr1 = inter / iarea 119 | ovr2 = inter / areas[j] 120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95: 121 | suppressed[j] = 1 122 | 123 | return keep 124 | -------------------------------------------------------------------------------- /libs/boxes/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | import libs.configs.config_v1 as cfg 10 | import libs.nms.gpu_nms as gpu_nms 11 | import libs.nms.cpu_nms as cpu_nms 12 | 13 | def nms(dets, thresh, force_cpu=False): 14 | """Dispatch to either CPU or GPU NMS implementations.""" 15 | 16 | if dets.shape[0] == 0: 17 | return [] 18 | return gpu_nms.gpu_nms(dets, thresh, device_id=0) 19 | 20 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None): 21 | """ 22 | post-process the results of im_detect 23 | :param boxes: N * (K * 4) numpy 24 | :param scores: N * K numpy 25 | :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus') 26 | :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]} 27 | """ 28 | num_class = scores.shape[1] if class_sets is None else len(class_sets) 29 | assert num_class * 4 == boxes.shape[1],\ 30 | 'Detection scores and boxes dont match %d vs %d' % (num_class, boxes.shape[1]) 31 | class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets 32 | 33 | res = [] 34 | for ind, cls in enumerate(class_sets[1:]): 35 | ind += 1 # skip background 36 | cls_boxes = boxes[:, 4*ind : 4*(ind+1)] 37 | cls_scores = scores[:, ind] 38 | dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32) 39 | keep = nms(dets, thresh=0.3) 40 | dets = dets[keep, :] 41 | dets = dets[np.where(dets[:, 4] > threshold)] 42 | r = {} 43 | if dets.shape[0] > 0: 44 | r['class'], r['dets'] = cls, dets 45 | else: 46 | r['class'], r['dets'] = cls, None 47 | res.append(r) 48 | return res 49 | 50 | if __name__=='__main__': 51 | 52 | score = np.random.rand(10, 21) 53 | boxes = np.random.randint(0, 100, (10, 21, 2)) 54 | s = np.random.randint(0, 100, (10, 21, 2)) 55 | s = boxes + s 56 | boxes = np.concatenate((boxes, s), axis=2) 57 | boxes = np.reshape(boxes, [boxes.shape[0], -1]) 58 | # score = np.reshape(score, [score.shape[0], -1]) 59 | res = nms_wrapper(score, boxes) 60 | print (res) -------------------------------------------------------------------------------- /libs/boxes/nms_wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/nms_wrapper.pyc -------------------------------------------------------------------------------- /libs/boxes/profile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/profile -------------------------------------------------------------------------------- /libs/boxes/profile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/profile.png -------------------------------------------------------------------------------- /libs/boxes/roi.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import functools 5 | 6 | import numpy as np 7 | import tensorflow as tf 8 | import tensorflow.contrib.slim as slim 9 | 10 | def roi_align(feat, boxes): 11 | """Given features and boxes, This function crops feature """ 12 | return 13 | 14 | def roi_cropping(feat, boxes, clses, anchors, spatial_scale=1.0/16): 15 | """This function computes final rpn boxes 16 | And crops areas from the incoming features 17 | """ 18 | return -------------------------------------------------------------------------------- /libs/boxes/roi.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/roi.pyc -------------------------------------------------------------------------------- /libs/boxes/timer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import time 9 | 10 | class Timer(object): 11 | """A simple timer.""" 12 | def __init__(self): 13 | self.total_time = 0. 14 | self.calls = 0 15 | self.start_time = 0. 16 | self.diff = 0. 17 | self.average_time = 0. 18 | 19 | def tic(self): 20 | # using time.time instead of time.clock because time time.clock 21 | # does not normalize for multithreading 22 | self.start_time = time.time() 23 | 24 | def toc(self, average=True): 25 | self.diff = time.time() - self.start_time 26 | self.total_time += self.diff 27 | self.calls += 1 28 | self.average_time = self.total_time / self.calls 29 | if average: 30 | return self.average_time 31 | else: 32 | return self.diff 33 | -------------------------------------------------------------------------------- /libs/boxes/timer.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/timer.pyc -------------------------------------------------------------------------------- /libs/configs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/__init__.py -------------------------------------------------------------------------------- /libs/configs/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/__init__.pyc -------------------------------------------------------------------------------- /libs/configs/config_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/config_v1.pyc -------------------------------------------------------------------------------- /libs/datasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/__init__.py -------------------------------------------------------------------------------- /libs/datasets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/__init__.pyc -------------------------------------------------------------------------------- /libs/datasets/coco.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import os 6 | import tensorflow as tf 7 | 8 | import tensorflow.contrib.slim as slim 9 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType 10 | 11 | _FILE_PATTERN = 'coco_%s_*.tfrecord' 12 | 13 | SPLITS_TO_SIZES = {'train2014': 82783, 'val2014': 40504} 14 | 15 | _NUM_CLASSES = 81 16 | 17 | _ITEMS_TO_DESCRIPTIONS = { 18 | 'image': 'A color image of varying size.', 19 | 'label': 'An annotation image of varying size. (pixel-level masks)', 20 | 'gt_masks': 'masks of instances in this image. (instance-level masks), of shape (N, image_height, image_width)', 21 | 'gt_boxes': 'bounding boxes and classes of instances in this image, of shape (N, 5), each entry is (x1, y1, x2, y2)', 22 | } 23 | 24 | 25 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None): 26 | if split_name not in SPLITS_TO_SIZES: 27 | raise ValueError('split name %s was not recognized.' % split_name) 28 | 29 | if not file_pattern: 30 | file_pattern = _FILE_PATTERN 31 | file_pattern = os.path.join(dataset_dir, 'records', file_pattern % split_name) 32 | 33 | # Allowing None in the signature so that dataset_factory can use the default. 34 | if reader is None: 35 | reader = tf.TFRecordReader 36 | 37 | keys_to_features = { 38 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 39 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'), 40 | 'label/encoded': tf.FixedLenFeature((), tf.string, default_value=''), 41 | 'label/format': tf.FixedLenFeature((), tf.string, default_value='png'), 42 | 'image/height': tf.FixedLenFeature((), tf.int64), 43 | 'image/width': tf.FixedLenFeature((), tf.int64), 44 | 45 | 'label/num_instances': tf.FixedLenFeature((), tf.int64), 46 | 'label/gt_boxes': tf.FixedLenFeature((), tf.string), 47 | 'label/gt_masks': tf.FixedLenFeature((), tf.string), 48 | } 49 | 50 | def _masks_decoder(keys_to_tensors): 51 | masks = tf.decode_raw(keys_to_tensors['label/gt_masks'], tf.uint8) 52 | width = tf.cast(keys_to_tensors['image/width'], tf.int32) 53 | height = tf.cast(keys_to_tensors['image/height'], tf.int32) 54 | instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32) 55 | mask_shape = tf.stack([instances, height, width]) 56 | return tf.reshape(masks, mask_shape) 57 | 58 | def _gt_boxes_decoder(keys_to_tensors): 59 | bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32) 60 | instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32) 61 | bboxes_shape = tf.stack([instances, 5]) 62 | return tf.reshape(bboxes, bboxes_shape) 63 | 64 | def _width_decoder(keys_to_tensors): 65 | width = keys_to_tensors['image/width'] 66 | return tf.cast(width, tf.int32) 67 | 68 | def _height_decoder(keys_to_tensors): 69 | height = keys_to_tensors['image/height'] 70 | return tf.cast(height, tf.int32) 71 | 72 | items_to_handlers = { 73 | 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'), 74 | 'label': slim.tfexample_decoder.Image('label/encoded', 'label/format', channels=1), 75 | 'gt_masks': slim.tfexample_decoder.ItemHandlerCallback( 76 | ['label/gt_masks', 'label/num_instances', 'image/width', 'image/height'], _masks_decoder), 77 | 'gt_boxes': slim.tfexample_decoder.ItemHandlerCallback(['label/gt_boxes', 'label/num_instances'], _gt_boxes_decoder), 78 | 'width': slim.tfexample_decoder.ItemHandlerCallback(['image/width'], _width_decoder), 79 | 'height': slim.tfexample_decoder.ItemHandlerCallback(['image/height'], _height_decoder), 80 | } 81 | 82 | decoder = slim.tfexample_decoder.TFExampleDecoder( 83 | keys_to_features, items_to_handlers) 84 | 85 | return slim.dataset.Dataset( 86 | data_sources=file_pattern, 87 | reader=reader, 88 | decoder=decoder, 89 | num_samples=SPLITS_TO_SIZES[split_name], 90 | items_to_descriptions=_ITEMS_TO_DESCRIPTIONS, 91 | num_classes=_NUM_CLASSES) 92 | 93 | def read(tfrecords_filename): 94 | 95 | if not isinstance(tfrecords_filename, list): 96 | tfrecords_filename = [tfrecords_filename] 97 | filename_queue = tf.train.string_input_producer( 98 | tfrecords_filename, num_epochs=100) 99 | 100 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB) 101 | reader = tf.TFRecordReader(options=options) 102 | _, serialized_example = reader.read(filename_queue) 103 | features = tf.parse_single_example( 104 | serialized_example, 105 | features={ 106 | 'image/img_id': tf.FixedLenFeature([], tf.int64), 107 | 'image/encoded': tf.FixedLenFeature([], tf.string), 108 | 'image/height': tf.FixedLenFeature([], tf.int64), 109 | 'image/width': tf.FixedLenFeature([], tf.int64), 110 | 'label/num_instances': tf.FixedLenFeature([], tf.int64), 111 | 'label/gt_masks': tf.FixedLenFeature([], tf.string), 112 | 'label/gt_boxes': tf.FixedLenFeature([], tf.string), 113 | 'label/encoded': tf.FixedLenFeature([], tf.string), 114 | }) 115 | # image = tf.image.decode_jpeg(features['image/encoded'], channels=3) 116 | img_id = tf.cast(features['image/img_id'], tf.int32) 117 | ih = tf.cast(features['image/height'], tf.int32) 118 | iw = tf.cast(features['image/width'], tf.int32) 119 | num_instances = tf.cast(features['label/num_instances'], tf.int32) 120 | image = tf.decode_raw(features['image/encoded'], tf.uint8) 121 | imsize = tf.size(image) 122 | image = tf.cond(tf.equal(imsize, ih * iw), \ 123 | lambda: tf.image.grayscale_to_rgb(tf.reshape(image, (ih, iw, 1))), \ 124 | lambda: tf.reshape(image, (ih, iw, 3))) 125 | 126 | gt_boxes = tf.decode_raw(features['label/gt_boxes'], tf.float32) 127 | gt_boxes = tf.reshape(gt_boxes, [num_instances, 5]) 128 | gt_masks = tf.decode_raw(features['label/gt_masks'], tf.uint8) 129 | gt_masks = tf.cast(gt_masks, tf.int32) 130 | print (ih,iw) 131 | gt_masks = tf.reshape(gt_masks, [num_instances, ih, iw,7]) 132 | ####################################################################be careful here. before 17 at the line above there was num_instances 133 | 134 | 135 | return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id 136 | -------------------------------------------------------------------------------- /libs/datasets/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/coco.pyc -------------------------------------------------------------------------------- /libs/datasets/dataset_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | from libs.visualization.summary_utils import visualize_input 7 | import glob 8 | from libs.datasets import coco 9 | 10 | import libs.preprocessings.coco_v1 as coco_preprocess 11 | 12 | def get_dataset(dataset_name, split_name, dataset_dir, 13 | im_batch=1, is_training=False, file_pattern=None, reader=None): 14 | """""" 15 | if file_pattern is None: 16 | file_pattern = '*.tfrecord' 17 | 18 | tfrecords = glob.glob(dataset_dir + '/records/' + file_pattern) 19 | image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords) 20 | 21 | image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training) 22 | #visualize_input(gt_boxes, image, tf.expand_dims(gt_masks, axis=3)) 23 | 24 | return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id 25 | 26 | -------------------------------------------------------------------------------- /libs/datasets/dataset_factory.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/dataset_factory.pyc -------------------------------------------------------------------------------- /libs/datasets/download_and_convert_coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/download_and_convert_coco.pyc -------------------------------------------------------------------------------- /libs/datasets/pycocotools/Makefile: -------------------------------------------------------------------------------- 1 | all: 2 | # install pycocotools locally 3 | python setup.py build_ext --inplace 4 | rm -rf build 5 | 6 | install: 7 | # install pycocotools to the Python site-packages 8 | python setup.py build_ext install 9 | rm -rf build -------------------------------------------------------------------------------- /libs/datasets/pycocotools/__init__.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tylin' 2 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/__init__.pyc -------------------------------------------------------------------------------- /libs/datasets/pycocotools/_mask.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/_mask.so -------------------------------------------------------------------------------- /libs/datasets/pycocotools/coco.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/coco.pyc -------------------------------------------------------------------------------- /libs/datasets/pycocotools/common/gason.h: -------------------------------------------------------------------------------- 1 | // https://github.com/vivkin/gason - pulled January 10, 2016 2 | #pragma once 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | enum JsonTag { 9 | JSON_NUMBER = 0, 10 | JSON_STRING, 11 | JSON_ARRAY, 12 | JSON_OBJECT, 13 | JSON_TRUE, 14 | JSON_FALSE, 15 | JSON_NULL = 0xF 16 | }; 17 | 18 | struct JsonNode; 19 | 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL 22 | #define JSON_VALUE_TAG_MASK 0xF 23 | #define JSON_VALUE_TAG_SHIFT 47 24 | 25 | union JsonValue { 26 | uint64_t ival; 27 | double fval; 28 | 29 | JsonValue(double x) 30 | : fval(x) { 31 | } 32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) { 33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK); 34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload; 35 | } 36 | bool isDouble() const { 37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK; 38 | } 39 | JsonTag getTag() const { 40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK); 41 | } 42 | uint64_t getPayload() const { 43 | assert(!isDouble()); 44 | return ival & JSON_VALUE_PAYLOAD_MASK; 45 | } 46 | double toNumber() const { 47 | assert(getTag() == JSON_NUMBER); 48 | return fval; 49 | } 50 | char *toString() const { 51 | assert(getTag() == JSON_STRING); 52 | return (char *)getPayload(); 53 | } 54 | JsonNode *toNode() const { 55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT); 56 | return (JsonNode *)getPayload(); 57 | } 58 | }; 59 | 60 | struct JsonNode { 61 | JsonValue value; 62 | JsonNode *next; 63 | char *key; 64 | }; 65 | 66 | struct JsonIterator { 67 | JsonNode *p; 68 | 69 | void operator++() { 70 | p = p->next; 71 | } 72 | bool operator!=(const JsonIterator &x) const { 73 | return p != x.p; 74 | } 75 | JsonNode *operator*() const { 76 | return p; 77 | } 78 | JsonNode *operator->() const { 79 | return p; 80 | } 81 | }; 82 | 83 | inline JsonIterator begin(JsonValue o) { 84 | return JsonIterator{o.toNode()}; 85 | } 86 | inline JsonIterator end(JsonValue) { 87 | return JsonIterator{nullptr}; 88 | } 89 | 90 | #define JSON_ERRNO_MAP(XX) \ 91 | XX(OK, "ok") \ 92 | XX(BAD_NUMBER, "bad number") \ 93 | XX(BAD_STRING, "bad string") \ 94 | XX(BAD_IDENTIFIER, "bad identifier") \ 95 | XX(STACK_OVERFLOW, "stack overflow") \ 96 | XX(STACK_UNDERFLOW, "stack underflow") \ 97 | XX(MISMATCH_BRACKET, "mismatch bracket") \ 98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \ 99 | XX(UNQUOTED_KEY, "unquoted key") \ 100 | XX(BREAKING_BAD, "breaking bad") \ 101 | XX(ALLOCATION_FAILURE, "allocation failure") 102 | 103 | enum JsonErrno { 104 | #define XX(no, str) JSON_##no, 105 | JSON_ERRNO_MAP(XX) 106 | #undef XX 107 | }; 108 | 109 | const char *jsonStrError(int err); 110 | 111 | class JsonAllocator { 112 | struct Zone { 113 | Zone *next; 114 | size_t used; 115 | } *head = nullptr; 116 | 117 | public: 118 | JsonAllocator() = default; 119 | JsonAllocator(const JsonAllocator &) = delete; 120 | JsonAllocator &operator=(const JsonAllocator &) = delete; 121 | JsonAllocator(JsonAllocator &&x) : head(x.head) { 122 | x.head = nullptr; 123 | } 124 | JsonAllocator &operator=(JsonAllocator &&x) { 125 | head = x.head; 126 | x.head = nullptr; 127 | return *this; 128 | } 129 | ~JsonAllocator() { 130 | deallocate(); 131 | } 132 | void *allocate(size_t size); 133 | void deallocate(); 134 | }; 135 | 136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator); 137 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/common/maskApi.h: -------------------------------------------------------------------------------- 1 | /************************************************************************** 2 | * Microsoft COCO Toolbox. version 2.0 3 | * Data, paper, and tutorials available at: http://mscoco.org/ 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 5 | * Licensed under the Simplified BSD License [see coco/license.txt] 6 | **************************************************************************/ 7 | #pragma once 8 | 9 | typedef unsigned int uint; 10 | typedef unsigned long siz; 11 | typedef unsigned char byte; 12 | typedef double* BB; 13 | typedef struct { siz h, w, m; uint *cnts; } RLE; 14 | 15 | /* Initialize/destroy RLE. */ 16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts ); 17 | void rleFree( RLE *R ); 18 | 19 | /* Initialize/destroy RLE array. */ 20 | void rlesInit( RLE **R, siz n ); 21 | void rlesFree( RLE **R, siz n ); 22 | 23 | /* Encode binary masks using RLE. */ 24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n ); 25 | 26 | /* Decode binary masks encoded via RLE. */ 27 | void rleDecode( const RLE *R, byte *mask, siz n ); 28 | 29 | /* Compute union or intersection of encoded masks. */ 30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect ); 31 | 32 | /* Compute area of encoded masks. */ 33 | void rleArea( const RLE *R, siz n, uint *a ); 34 | 35 | /* Compute intersection over union between masks. */ 36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o ); 37 | 38 | /* Compute non-maximum suppression between bounding masks */ 39 | void rleNms( RLE *dt, siz n, uint *keep, double thr ); 40 | 41 | /* Compute intersection over union between bounding boxes. */ 42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o ); 43 | 44 | /* Compute non-maximum suppression between bounding boxes */ 45 | void bbNms( BB dt, siz n, uint *keep, double thr ); 46 | 47 | /* Get bounding boxes surrounding encoded masks. */ 48 | void rleToBbox( const RLE *R, BB bb, siz n ); 49 | 50 | /* Convert bounding boxes to encoded masks. */ 51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n ); 52 | 53 | /* Convert polygon to encoded mask. */ 54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w ); 55 | 56 | /* Get compressed string representation of encoded mask. */ 57 | char* rleToString( const RLE *R ); 58 | 59 | /* Convert from compressed string representation of encoded mask. */ 60 | void rleFrString( RLE *R, char *s, siz h, siz w ); 61 | -------------------------------------------------------------------------------- /libs/datasets/pycocotools/mask.py: -------------------------------------------------------------------------------- 1 | __author__ = 'tsungyi' 2 | 3 | import libs.datasets.pycocotools._mask as _mask 4 | 5 | # Interface for manipulating masks stored in RLE format. 6 | # 7 | # RLE is a simple yet efficient format for storing binary masks. RLE 8 | # first divides a vector (or vectorized image) into a series of piecewise 9 | # constant regions and then for each piece simply stores the length of 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1] 12 | # (note that the odd counts are always the numbers of zeros). Instead of 13 | # storing the counts directly, additional compression is achieved with a 14 | # variable bitrate representation based on a common scheme called LEB128. 15 | # 16 | # Compression is greatest given large piecewise constant regions. 17 | # Specifically, the size of the RLE is proportional to the number of 18 | # *boundaries* in M (or for an image the number of boundaries in the y 19 | # direction). Assuming fairly simple shapes, the RLE representation is 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage 21 | # is substantially lower, especially for large simple objects (large n). 22 | # 23 | # Many common operations on masks can be computed directly using the RLE 24 | # (without need for decoding). This includes computations such as area, 25 | # union, intersection, etc. All of these operations are linear in the 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area 27 | # of the object. Computing these operations on the original mask is O(n). 28 | # Thus, using the RLE can result in substantial computational savings. 29 | # 30 | # The following API functions are defined: 31 | # encode - Encode binary masks using RLE. 32 | # decode - Decode binary masks encoded via RLE. 33 | # merge - Compute union or intersection of encoded masks. 34 | # iou - Compute intersection over union between masks. 35 | # area - Compute area of encoded masks. 36 | # toBbox - Get bounding boxes surrounding encoded masks. 37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask. 38 | # 39 | # Usage: 40 | # Rs = encode( masks ) 41 | # masks = decode( Rs ) 42 | # R = merge( Rs, intersect=false ) 43 | # o = iou( dt, gt, iscrowd ) 44 | # a = area( Rs ) 45 | # bbs = toBbox( Rs ) 46 | # Rs = frPyObjects( [pyObjects], h, w ) 47 | # 48 | # In the API the following formats are used: 49 | # Rs - [dict] Run-length encoding of binary masks 50 | # R - dict Run-length encoding of binary mask 51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order) 52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore 53 | # bbs - [nx4] Bounding box(es) stored as [x y w h] 54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list) 55 | # dt,gt - May be either bounding boxes or encoded masks 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel). 57 | # 58 | # Finally, a note about the intersection over union (iou) computation. 59 | # The standard iou of a ground truth (gt) and detected (dt) object is 60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt)) 61 | # For "crowd" regions, we use a modified criteria. If a gt object is 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt. 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing 65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt) 66 | # For crowd gt regions we use this modified criteria above for the iou. 67 | # 68 | # To compile run "python setup.py build_ext --inplace" 69 | # Please do not contact us for help with compiling. 70 | # 71 | # Microsoft COCO Toolbox. version 2.0 72 | # Data, paper, and tutorials available at: http://mscoco.org/ 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015. 74 | # Licensed under the Simplified BSD License [see coco/license.txt] 75 | 76 | iou = _mask.iou 77 | merge = _mask.merge 78 | frPyObjects = _mask.frPyObjects 79 | 80 | def encode(bimask): 81 | if len(bimask.shape) == 3: 82 | return _mask.encode(bimask) 83 | elif len(bimask.shape) == 2: 84 | h, w = bimask.shape 85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0] 86 | 87 | def decode(rleObjs): 88 | if type(rleObjs) == list: 89 | return _mask.decode(rleObjs) 90 | else: 91 | return _mask.decode([rleObjs])[:,:,0] 92 | 93 | def area(rleObjs): 94 | if type(rleObjs) == list: 95 | return _mask.area(rleObjs) 96 | else: 97 | return _mask.area([rleObjs])[0] 98 | 99 | def toBbox(rleObjs): 100 | if type(rleObjs) == list: 101 | return _mask.toBbox(rleObjs) 102 | else: 103 | return _mask.toBbox([rleObjs])[0] -------------------------------------------------------------------------------- /libs/datasets/pycocotools/mask.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/mask.pyc -------------------------------------------------------------------------------- /libs/datasets/pycocotools/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | from Cython.Build import cythonize 3 | from distutils.extension import Extension 4 | import numpy as np 5 | 6 | # To compile and install locally run "python setup.py build_ext --inplace" 7 | # To install library to Python site-packages run "python setup.py build_ext install" 8 | 9 | ext_modules = [ 10 | Extension( 11 | '_mask', 12 | sources=['./common/maskApi.c', '_mask.pyx'], 13 | include_dirs = [np.get_include(), './common'], 14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'], 15 | ) 16 | ] 17 | 18 | setup(name='pycocotools', 19 | packages=['pycocotools'], 20 | package_dir = {'pycocotools': 'pycocotools'}, 21 | version='2.0', 22 | ext_modules= 23 | cythonize(ext_modules) 24 | ) -------------------------------------------------------------------------------- /libs/layers/__init__.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Written by CharlesShang@github 4 | # -------------------------------------------------------- 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | from .wrapper import anchor_decoder 10 | from .wrapper import anchor_encoder 11 | from .wrapper import roi_decoder 12 | from .wrapper import roi_encoder 13 | from .wrapper import mask_decoder 14 | from .wrapper import mask_encoder 15 | from .wrapper import sample_wrapper as sample_rpn_outputs 16 | from .wrapper import sample_with_gt_wrapper as sample_rpn_outputs_with_gt 17 | from .wrapper import gen_all_anchors 18 | from .wrapper import assign_boxes 19 | from .crop import crop as ROIAlign 20 | from .crop import crop_ as ROIAlign_ 21 | -------------------------------------------------------------------------------- /libs/layers/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/__init__.pyc -------------------------------------------------------------------------------- /libs/layers/anchor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/anchor.pyc -------------------------------------------------------------------------------- /libs/layers/assign.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import numpy as np 8 | 9 | import libs.boxes.cython_bbox as cython_bbox 10 | import libs.configs.config_v1 as cfg 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 12 | from libs.boxes.anchor import anchors_plane 13 | from libs.logs.log import LOG 14 | # FLAGS = tf.app.flags.FLAGS 15 | 16 | _DEBUG = False 17 | 18 | def assign_boxes(gt_boxes, min_k=2, max_k=5): 19 | """assigning boxes to layers in a pyramid according to its area 20 | Params 21 | ----- 22 | gt_boxes: of shape (N, 5), each entry is [x1, y1, x2, y2, cls] 23 | strides: the stride of each layer, like [4, 8, 16, 32] 24 | 25 | Returns 26 | ----- 27 | layer_ids: of shape (N,), each entry is a id indicating the assigned layer id 28 | """ 29 | k0 = 4 30 | if gt_boxes.size > 0: 31 | layer_ids = np.zeros((gt_boxes.shape[0], ), dtype=np.int32) 32 | ws = gt_boxes[:, 2] - gt_boxes[:, 0] 33 | hs = gt_boxes[:, 3] - gt_boxes[:, 1] 34 | areas = ws * hs 35 | k = np.floor(k0 + np.log2(np.sqrt(areas) / 224)) 36 | inds = np.where(k < min_k)[0] 37 | k[inds] = min_k 38 | inds = np.where(k > max_k)[0] 39 | k[inds] = max_k 40 | if _DEBUG: 41 | print ("### boxes and layer ids") 42 | print (np.hstack((gt_boxes[:, 0:4], k[:, np.newaxis]))) 43 | return k.astype(np.int32) 44 | 45 | else: 46 | return np.asarray([], dtype=np.int32) 47 | -------------------------------------------------------------------------------- /libs/layers/assign.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/assign.pyc -------------------------------------------------------------------------------- /libs/layers/crop.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | def crop(images, boxes, batch_inds, stride = 1, pooled_height = 56, pooled_width = 56, scope='ROIAlign'): 8 | """Cropping areas of features into fixed size 9 | Params: 10 | -------- 11 | images: a 4-d Tensor of shape (N, H, W, C) 12 | boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2] 13 | batch_inds: 14 | 15 | Returns: 16 | -------- 17 | A Tensor of shape (N, pooled_height, pooled_width, C) 18 | """ 19 | with tf.name_scope(scope): 20 | # 21 | boxes = boxes / (stride + 0.0)#stride = 32, 16, 8, 4. we need to do that in order to get the crop of feature layer 22 | boxes = tf.reshape(boxes, [-1, 4]) 23 | 24 | # normalize the boxes and swap x y dimensions 25 | shape = tf.shape(images) 26 | boxes = tf.reshape(boxes, [-1, 2]) # to (x, y) 27 | xs = boxes[:, 0] 28 | ys = boxes[:, 1] 29 | xs = xs / tf.cast(shape[2], tf.float32)#######WHYYYY???? because the crop_and resize function needs the values in normalized fucking form 30 | ys = ys / tf.cast(shape[1], tf.float32) 31 | boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1) 32 | boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2) 33 | 34 | # if batch_inds is False: 35 | # num_boxes = tf.shape(boxes)[0] 36 | # batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds') 37 | # batch_inds = boxes[:, 0] * 0 38 | # batch_inds = tf.cast(batch_inds, tf.int32) 39 | 40 | # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds]) 41 | if batch_inds is False: 42 | num_boxes = tf.shape(boxes)[0] 43 | batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds') 44 | batch_inds = boxes[:, 0] * 0 45 | batch_inds = tf.cast(batch_inds, tf.int32) 46 | assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds]) 47 | with tf.control_dependencies([assert_op, images, batch_inds]): 48 | return tf.image.crop_and_resize(images, boxes, batch_inds,### 49 | [pooled_height, pooled_width], 50 | method='bilinear', 51 | name='Crop') 52 | 53 | def crop_(images, boxes, batch_inds, ih, iw, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'): 54 | """Cropping areas of features into fixed size 55 | Params: 56 | -------- 57 | images: a 4-d Tensor of shape (N, H, W, C) 58 | boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2] 59 | batch_inds: 60 | 61 | Returns: 62 | -------- 63 | A Tensor of shape (N, pooled_height, pooled_width, C) 64 | """ 65 | with tf.name_scope(scope): 66 | # 67 | boxes = boxes / (stride + 0.0)#stride = 32, 16, 8, 4. we need to do that in order to get the crop of feature layer 68 | boxes = tf.reshape(boxes, [-1, 4]) 69 | 70 | # normalize the boxes and swap x y dimensions 71 | shape = tf.shape(images)#W/stride, H/stride 72 | boxes = tf.reshape(boxes, [-1, 2]) # to (x, y) 73 | xs = boxes[:, 0] 74 | ys = boxes[:, 1] 75 | xs = xs / tf.cast(shape[2], tf.float32)#######WHYYYY???? because the crop_and resize function needs the values in normalized fucking form 76 | ys = ys / tf.cast(shape[1], tf.float32) 77 | boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1) 78 | boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2) 79 | 80 | # if batch_inds is False: 81 | # num_boxes = tf.shape(boxes)[0] 82 | # batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds') 83 | # batch_inds = boxes[:, 0] * 0 84 | # batch_inds = tf.cast(batch_inds, tf.int32) 85 | 86 | # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds]) 87 | assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds]) 88 | with tf.control_dependencies([assert_op, images, batch_inds]): 89 | return [tf.image.crop_and_resize(images, boxes, batch_inds, 90 | [pooled_height, pooled_width], 91 | method='bilinear', 92 | name='Crop')] + [boxes] 93 | 94 | -------------------------------------------------------------------------------- /libs/layers/crop.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/crop.pyc -------------------------------------------------------------------------------- /libs/layers/mask.py: -------------------------------------------------------------------------------- 1 | # coding=utf-8 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | 6 | import numpy as np 7 | import cv2 8 | import libs.boxes.cython_bbox as cython_bbox 9 | import libs.configs.config_v1 as cfg 10 | from libs.logs.log import LOG 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 12 | 13 | _DEBUG = False 14 | def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width): 15 | """Encode masks groundtruth into learnable targets 16 | Sample some exmaples 17 | 18 | Params 19 | ------ 20 | gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw) 21 | #actually modified by me, gt_mask is of shape (G,imh,imw,7) 22 | gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class] 23 | rois: the bounding boxes of shape (N, 4), 24 | ## scores: scores of shape (N, 1) 25 | num_classes; K 26 | mask_height, mask_width: height and width of output masks 27 | 28 | Returns 29 | ------- 30 | # rois: boxes sampled for cropping masks, of shape (M, 4) 31 | labels: class-ids of shape (M, 1) 32 | mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values 33 | mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled 34 | """ 35 | total_masks = rois.shape[0] 36 | if gt_boxes.size > 0: 37 | # B x G 38 | overlaps = cython_bbox.bbox_overlaps( 39 | np.ascontiguousarray(rois[:, 0:4], dtype=np.float), 40 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 41 | gt_assignment = overlaps.argmax(axis=1) # shape is N 42 | max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N 43 | # note: this will assign every rois with a positive label 44 | # labels = gt_boxes[gt_assignment, 4] # N 45 | labels = np.zeros((total_masks, ), np.float32) 46 | labels[:] = -1 47 | 48 | # sample positive rois which intersection is more than 0.5 49 | keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0] 50 | num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image)) 51 | if keep_inds.size > 0 and num_masks < keep_inds.size: 52 | keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False) 53 | LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\ 54 | %(num_masks, rois.shape[0], gt_masks.shape[0])) 55 | 56 | labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1] 57 | 58 | # rois = rois[inds] 59 | # labels = labels[inds].astype(np.int32) 60 | # gt_assignment = gt_assignment[inds] 61 | 62 | # ignore rois with overlaps between fg_threshold and bg_threshold 63 | # mask are only defined on positive rois 64 | ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0] 65 | labels[ignore_inds] = -1 66 | 67 | mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32) 68 | mask_inside_weights = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.float32) 69 | rois [rois < 0] = 0 70 | 71 | # TODO: speed bottleneck? 72 | for i in keep_inds: 73 | roi = rois[i, :4] 74 | 75 | for x in range(7): 76 | cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3])+1, int(roi[0]):int(roi[2])+1,x] 77 | cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST) 78 | mask_targets[i, :, :, x] = cropped 79 | mask_inside_weights[i, :, :, x] = 1 80 | else: 81 | # there is no gt 82 | labels = np.zeros((total_masks, ), np.float32) 83 | labels[:] = -1 84 | mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32) 85 | mask_inside_weights = np.zeros((total_masks, mask_height, mask_height, num_classes), dtype=np.float32) 86 | #np.save("/home/czurini/Alex/rois.npy",rois) 87 | # np.save("/home/czurini/Alex/mask_targets.npy",mask_targets) 88 | return labels, mask_targets, mask_inside_weights 89 | 90 | def decode(mask_targets, rois, classes, ih, iw): 91 | """Decode outputs into final masks 92 | Params 93 | ------ 94 | mask_targets: of shape (N, h, w, K) 95 | rois: of shape (N, 4) [x1, y1, x2, y2] 96 | classes: of shape (N, 1) the class-id of each roi 97 | height: image height 98 | width: image width 99 | 100 | Returns 101 | ------ 102 | M: a painted image with all masks, of shape (height, width), in [0, K] 103 | """ 104 | Mask = np.zeros((ih, iw), dtype=np.float32) 105 | assert rois.shape[0] == mask_targets.shape[0], \ 106 | '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0]) 107 | num = rois.shape[0] 108 | rois = clip_boxes(rois, (ih, iw)) 109 | for i in np.arange(num): 110 | k = classes[i] 111 | mask = mask_targets[i, :, :, k] 112 | h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1 113 | x, y = rois[i, 0], rois[i, 1] 114 | mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST) 115 | mask *= k 116 | 117 | # paint 118 | Mask[y:y+h, x:x+w] = mask 119 | 120 | return Mask 121 | 122 | 123 | 124 | if __name__ == '__main__': 125 | 126 | import time 127 | import matplotlib.pyplot as plt 128 | 129 | t = time.time() 130 | 131 | for i in range(10): 132 | cfg.FLAGS.mask_threshold = 0.2 133 | N = 50 134 | W, H = 200, 200 135 | M = 50 136 | 137 | gt_masks = np.zeros((2, H, W), dtype=np.int32) 138 | gt_masks[0, 50:150, 50:150] = 1 139 | gt_masks[1, 100:150, 50:150] = 1 140 | gt_boxes = np.asarray( 141 | [ 142 | [20, 20, 100, 100, 1], 143 | [100, 100, 180, 180, 2] 144 | ]) 145 | rois = gt_boxes[:, :4] 146 | print (rois) 147 | rois, labels, mask_targets, mask_inside_weights = encode(gt_masks, gt_boxes, rois, 3, 7, 7) 148 | print (rois) 149 | Mask = decode(mask_targets, rois, labels, H, W) 150 | if True: 151 | plt.figure(1) 152 | plt.imshow(Mask) 153 | plt.show() 154 | time.sleep(2) 155 | print(labels) 156 | print('average time: %f' % ((time.time() - t) / 10.0)) 157 | 158 | -------------------------------------------------------------------------------- /libs/layers/mask.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/mask.pyc -------------------------------------------------------------------------------- /libs/layers/roi.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import numpy as np 6 | 7 | import libs.boxes.cython_bbox as cython_bbox 8 | import libs.configs.config_v1 as cfg 9 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes 10 | from libs.logs.log import LOG 11 | 12 | # FLAGS = tf.app.flags.FLAGS 13 | 14 | _DEBUG = False 15 | 16 | def encode(gt_boxes, rois, num_classes): 17 | """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes 18 | Sampling 19 | Parameters 20 | --------- 21 | gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class] 22 | rois an array of shape (R x 4), [x1, y1, x2, y2] 23 | num_classes: scalar, number of classes 24 | 25 | Returns 26 | -------- 27 | labels: Nx1 array in [0, num_classes) 28 | bbox_targets: of shape (N, Kx4) regression targets 29 | bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned. 30 | """ 31 | 32 | all_rois = rois 33 | num_rois = rois.shape[0] 34 | if gt_boxes.size > 0: 35 | # R x G matrix 36 | overlaps = cython_bbox.bbox_overlaps( 37 | np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float), 38 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 39 | gt_assignment = overlaps.argmax(axis=1) # R 40 | # max_overlaps = overlaps.max(axis=1) # R 41 | max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment] 42 | # note: this will assign every rois with a positive label 43 | # labels = gt_boxes[gt_assignment, 4] 44 | labels = np.zeros([num_rois], dtype=np.float32) 45 | labels[:] = -1 46 | 47 | # if _DEBUG: 48 | # print ('gt_assignment') 49 | # print (gt_assignment) 50 | 51 | # sample rois as to 1:3 52 | fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0] 53 | fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))#rois_per_image =256,fg_roi_fraction=0.25 54 | if fg_inds.size > 0 and fg_rois < fg_inds.size: 55 | fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False) 56 | labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] 57 | 58 | # TODO: sampling strategy 59 | bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0] 60 | bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)#rois_per_image =256 61 | if bg_inds.size > 0 and bg_rois < bg_inds.size: 62 | bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False) 63 | labels[bg_inds] = 0 64 | 65 | # ignore rois with overlaps between fg_threshold and bg_threshold 66 | ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\ 67 | (max_overlaps < cfg.FLAGS.fg_threshold)))[0] 68 | labels[ignore_inds] = -1 69 | 70 | keep_inds = np.append(fg_inds, bg_inds) 71 | if _DEBUG: 72 | print ('keep_inds') 73 | print (keep_inds) 74 | print ('fg_inds') 75 | print (fg_inds) 76 | print ('bg_inds') 77 | print (bg_inds) 78 | print ('bg_rois:', bg_rois) 79 | print ('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold) 80 | # print (max_overlaps) 81 | 82 | LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds))) 83 | 84 | bbox_targets, bbox_inside_weights = _compute_targets( 85 | rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes) 86 | bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0) 87 | bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0) 88 | 89 | else: 90 | # there is no gt 91 | labels = np.zeros((num_rois, ), np.float32) 92 | bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32) 93 | bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32) 94 | bg_rois = min(int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64) 95 | if bg_rois < num_rois: 96 | bg_inds = np.arange(num_rois) 97 | ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False) 98 | labels[ignore_inds] = -1 99 | 100 | return labels, bbox_targets, bbox_inside_weights 101 | 102 | def decode(boxes, scores, rois, ih, iw): 103 | """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois 104 | Parameters 105 | --------- 106 | boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2] 107 | scores: an array of shape (R, K), 108 | rois: an array of shape (R, 4), [x1, y1, x2, y2] 109 | 110 | Returns 111 | -------- 112 | final_boxes: of shape (R x 4) 113 | classes: of shape (R) in {0,1,2,3... K-1} 114 | scores: of shape (R) in [0 ~ 1] 115 | """ 116 | boxes = bbox_transform_inv(rois, deltas=boxes) 117 | classes = np.argmax(scores, axis=1) 118 | classes = classes.astype(np.int32) 119 | scores = np.max(scores, axis=1) 120 | final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32) 121 | for i in np.arange(0, boxes.shape[0]): 122 | ind = classes[i]*4 123 | final_boxes[i, 0:4] = boxes[i, ind:ind+4] 124 | final_boxes = clip_boxes(final_boxes, (ih, iw)) 125 | return final_boxes, classes, scores 126 | 127 | def _compute_targets(ex_rois, gt_rois, labels, num_classes): 128 | """ 129 | This function expands those targets into the 4-of-4*K representation used 130 | by the network (i.e. only one class has non-zero targets). 131 | 132 | Returns: 133 | bbox_target (ndarray): N x 4K blob of regression targets 134 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 135 | """ 136 | 137 | assert ex_rois.shape[0] == gt_rois.shape[0] 138 | assert ex_rois.shape[1] == 4 139 | assert gt_rois.shape[1] == 4 140 | 141 | targets = bbox_transform(ex_rois, gt_rois) 142 | 143 | clss = labels 144 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 145 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 146 | inds = np.where(clss > 0)[0] 147 | for ind in inds: 148 | cls = int(clss[ind]) 149 | start = 4 * cls 150 | end = start + 4 151 | bbox_targets[ind, start:end] = targets[ind, 0:4] 152 | bbox_inside_weights[ind, start:end] = 1 153 | return bbox_targets, bbox_inside_weights 154 | 155 | def _unmap(data, count, inds, fill=0): 156 | """ Unmap a subset of item (data) back to the original set of items (of 157 | size count) """ 158 | if len(data.shape) == 1: 159 | ret = np.empty((count,), dtype=np.float32) 160 | ret.fill(fill) 161 | ret[inds] = data 162 | else: 163 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32) 164 | ret.fill(fill) 165 | ret[inds, :] = data 166 | return ret 167 | 168 | if __name__ == '__main__': 169 | cfg.FLAGS.fg_threshold = 0.1 170 | classes = np.random.randint(0, 3, (10, 1)) 171 | boxes = np.random.randint(10, 50, (10, 2)) 172 | s = np.random.randint(10, 20, (10, 2)) 173 | s = boxes + s 174 | boxes = np.concatenate((boxes, s), axis=1) 175 | gt_boxes = np.hstack((boxes, classes)) 176 | noise = np.random.randint(-3, 3, (10, 4)) 177 | rois = gt_boxes[:, :4] + noise 178 | labels, rois, bbox_targets, bbox_inside_weights = encode(gt_boxes, rois, num_classes=3) 179 | print (labels) 180 | print (bbox_inside_weights) 181 | 182 | ls = np.zeros((labels.shape[0], 3)) 183 | for i in range(labels.shape[0]): 184 | ls[i, labels[i]] = 1 185 | final_boxes, classes, scores = decode(bbox_targets, ls, rois, 100, 100) 186 | print('gt_boxes:\n', gt_boxes) 187 | print ('final boxes:\n', np.hstack((final_boxes, np.expand_dims(classes, axis=1))).astype(np.int32)) 188 | # print (final_boxes.astype(np.int32)) 189 | -------------------------------------------------------------------------------- /libs/layers/roi.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/roi.pyc -------------------------------------------------------------------------------- /libs/layers/sample.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/sample.pyc -------------------------------------------------------------------------------- /libs/layers/wrapper.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Mask RCNN 3 | # Written by CharlesShang@github 4 | # -------------------------------------------------------- 5 | from __future__ import absolute_import 6 | from __future__ import division 7 | from __future__ import print_function 8 | 9 | import tensorflow as tf 10 | from . import anchor 11 | from . import roi 12 | from . import mask 13 | from . import sample 14 | from . import assign 15 | from libs.boxes.anchor import anchors_plane 16 | 17 | def anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder'): 18 | 19 | with tf.name_scope(scope) as sc: 20 | labels, bbox_targets, bbox_inside_weights = \ 21 | tf.py_func(anchor.encode, 22 | [gt_boxes, all_anchors, height, width, stride], 23 | [tf.float32, tf.float32, tf.float32]) 24 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels') 25 | bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets') 26 | bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights') 27 | labels = tf.reshape(labels, (1, height, width, -1)) 28 | bbox_targets = tf.reshape(bbox_targets, (1, height, width, -1)) 29 | bbox_inside_weights = tf.reshape(bbox_inside_weights, (1, height, width, -1)) 30 | 31 | return labels, bbox_targets, bbox_inside_weights 32 | 33 | 34 | def anchor_decoder(boxes, scores, all_anchors, ih, iw, scope='AnchorDecoder'): 35 | 36 | with tf.name_scope(scope) as sc: 37 | final_boxes, classes, scores = \ 38 | tf.py_func(anchor.decode, 39 | [boxes, scores, all_anchors, ih, iw], 40 | [tf.float32, tf.int32, tf.float32]) 41 | final_boxes = tf.convert_to_tensor(final_boxes, name='boxes') 42 | classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes') 43 | scores = tf.convert_to_tensor(scores, name='scores') 44 | final_boxes = tf.reshape(final_boxes, (-1, 4)) 45 | classes = tf.reshape(classes, (-1, )) 46 | scores = tf.reshape(scores, (-1, )) 47 | 48 | return final_boxes, classes, scores 49 | 50 | 51 | def roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder'): 52 | 53 | with tf.name_scope(scope) as sc: 54 | labels, bbox_targets, bbox_inside_weights = \ 55 | tf.py_func(roi.encode, 56 | [gt_boxes, rois, num_classes], 57 | [tf.float32, tf.float32, tf.float32]) 58 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels') 59 | bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets') 60 | bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights') 61 | labels = tf.reshape(labels, (-1, )) 62 | bbox_targets = tf.reshape(bbox_targets, (-1, num_classes * 4)) 63 | bbox_inside_weights = tf.reshape(bbox_inside_weights, (-1, num_classes * 4)) 64 | 65 | return labels, bbox_targets, bbox_inside_weights 66 | 67 | 68 | def roi_decoder(boxes, scores, rois, ih, iw, scope='ROIDecoder'): 69 | 70 | with tf.name_scope(scope) as sc: 71 | final_boxes, classes, scores = \ 72 | tf.py_func(roi.decode, 73 | [boxes, scores, rois, ih, iw], 74 | [tf.float32, tf.int32, tf.float32]) 75 | final_boxes = tf.convert_to_tensor(final_boxes, name='boxes') 76 | classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes') 77 | scores = tf.convert_to_tensor(scores, name='scores') 78 | final_boxes = tf.reshape(final_boxes, (-1, 4)) 79 | 80 | return final_boxes, classes, scores 81 | 82 | def mask_encoder(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width, scope='MaskEncoder'): 83 | 84 | with tf.name_scope(scope) as sc: 85 | labels, mask_targets, mask_inside_weights = \ 86 | tf.py_func(mask.encode, 87 | [gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width], 88 | [tf.float32, tf.int32, tf.float32]) 89 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='classes') 90 | mask_targets = tf.convert_to_tensor(mask_targets, name='mask_targets') 91 | mask_inside_weights = tf.convert_to_tensor(mask_inside_weights, name='mask_inside_weights') 92 | labels = tf.reshape(labels, (-1,)) 93 | mask_targets = tf.reshape(mask_targets, (-1, mask_height, mask_width, num_classes)) 94 | mask_inside_weights = tf.reshape(mask_inside_weights, (-1, mask_height, mask_width, num_classes)) 95 | 96 | return labels, mask_targets, mask_inside_weights 97 | 98 | def mask_decoder(mask_targets, rois, classes, ih, iw, scope='MaskDecoder'): 99 | 100 | with tf.name_scope(scope) as sc: 101 | Mask = \ 102 | tf.py_func(mask.decode, 103 | [mask_targets, rois, classes, ih, iw,], 104 | [tf.float32]) 105 | Mask = tf.convert_to_tensor(Mask, name='MaskImage') 106 | Mask = tf.reshape(Mask, (ih, iw)) 107 | 108 | return Mask 109 | 110 | 111 | def sample_wrapper(boxes, scores, is_training=True, scope='SampleBoxes'): 112 | 113 | with tf.name_scope(scope) as sc: 114 | boxes, scores, batch_inds = \ 115 | tf.py_func(sample.sample_rpn_outputs, 116 | [boxes, scores, is_training], 117 | [tf.float32, tf.float32, tf.int32]) 118 | boxes = tf.convert_to_tensor(boxes, name='Boxes') 119 | scores = tf.convert_to_tensor(scores, name='Scores') 120 | batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds') 121 | boxes = tf.reshape(boxes, (-1, 4)) 122 | batch_inds = tf.reshape(batch_inds, [-1]) 123 | 124 | return boxes, scores, batch_inds 125 | 126 | def sample_with_gt_wrapper(boxes, scores, gt_boxes, is_training=True, scope='SampleBoxesWithGT'): 127 | 128 | with tf.name_scope(scope) as sc: 129 | boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds = \ 130 | tf.py_func(sample.sample_rpn_outputs_wrt_gt_boxes, 131 | [boxes, scores, gt_boxes, is_training], 132 | [tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32]) 133 | boxes = tf.convert_to_tensor(boxes, name='Boxes') 134 | scores = tf.convert_to_tensor(scores, name='Scores') 135 | batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds') 136 | 137 | mask_boxes = tf.convert_to_tensor(mask_boxes, name='MaskBoxes') 138 | mask_scores = tf.convert_to_tensor(mask_scores, name='MaskScores') 139 | mask_batch_inds = tf.convert_to_tensor(mask_batch_inds, name='MaskBatchInds') 140 | 141 | return boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds 142 | 143 | def gen_all_anchors(height, width, stride, scales, scope='GenAnchors'): 144 | 145 | with tf.name_scope(scope) as sc: 146 | all_anchors = \ 147 | tf.py_func(anchors_plane, 148 | [height, width, stride, scales], 149 | [tf.float64] 150 | ) 151 | all_anchors = tf.convert_to_tensor(tf.cast(all_anchors, tf.float32), name='AllAnchors') 152 | all_anchors = tf.reshape(all_anchors, (height, width, -1)) 153 | 154 | return all_anchors 155 | 156 | def assign_boxes(gt_boxes, tensors, layers, scope='AssignGTBoxes'): 157 | 158 | with tf.name_scope(scope) as sc: 159 | min_k = layers[0] 160 | max_k = layers[-1] 161 | assigned_layers = \ 162 | tf.py_func(assign.assign_boxes, 163 | [ gt_boxes, min_k, max_k ], 164 | tf.int32) 165 | assigned_layers = tf.reshape(assigned_layers, [-1]) 166 | 167 | assigned_tensors = [] 168 | for t in tensors: 169 | split_tensors = [] 170 | for l in layers: 171 | tf.cast(l, tf.int32) 172 | inds = tf.where(tf.equal(assigned_layers, l)) 173 | inds = tf.reshape(inds, [-1]) 174 | split_tensors.append(tf.gather(t, inds)) 175 | assigned_tensors.append(split_tensors) 176 | 177 | return assigned_tensors + [assigned_layers] -------------------------------------------------------------------------------- /libs/layers/wrapper.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/wrapper.pyc -------------------------------------------------------------------------------- /libs/logs/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/__init__.py -------------------------------------------------------------------------------- /libs/logs/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/__init__.pyc -------------------------------------------------------------------------------- /libs/logs/log.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import logging 6 | import libs.configs.config_v1 as cfg 7 | 8 | def LOG(mssg): 9 | logging.basicConfig(filename=cfg.FLAGS.train_dir + '/maskrcnn.log', 10 | level=logging.INFO, 11 | datefmt='%m/%d/%Y %I:%M:%S %p', format='%(asctime)s %(message)s') 12 | logging.info(mssg) -------------------------------------------------------------------------------- /libs/logs/log.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/log.pyc -------------------------------------------------------------------------------- /libs/make.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # build pycocotools 4 | cd datasets/pycocotools 5 | make 6 | cd - 7 | -------------------------------------------------------------------------------- /libs/nets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/__init__.py -------------------------------------------------------------------------------- /libs/nets/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/__init__.pyc -------------------------------------------------------------------------------- /libs/nets/nets_factory.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | import functools 5 | 6 | import tensorflow as tf 7 | 8 | from . import resnet_v1 9 | from .resnet_v1 import resnet_v1_50 as resnet50 10 | from .resnet_utils import resnet_arg_scope 11 | from .resnet_v1 import resnet_v1_101 as resnet101 12 | 13 | slim = tf.contrib.slim 14 | 15 | pyramid_maps = { 16 | 'resnet50': {'C1':'resnet_v1_50/conv1/Relu:0', 17 | 'C2':'resnet_v1_50/block1/unit_2/bottleneck_v1', 18 | 'C3':'resnet_v1_50/block2/unit_3/bottleneck_v1', 19 | 'C4':'resnet_v1_50/block3/unit_5/bottleneck_v1', 20 | 'C5':'resnet_v1_50/block4/unit_3/bottleneck_v1', 21 | }, 22 | 'resnet101': {'C1': '', 'C2': '', 23 | 'C3': '', 'C4': '', 24 | 'C5': '', 25 | } 26 | } 27 | 28 | def get_network(name, image, weight_decay=0.000005, is_training=False): 29 | 30 | if name == 'resnet50': 31 | with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): 32 | logits, end_points = resnet50(image, 1000, is_training=is_training) 33 | 34 | if name == 'resnet101': 35 | with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): 36 | logits, end_points = resnet50(image, 1000, is_training=is_training) 37 | 38 | if name == 'resnext50': 39 | name 40 | 41 | end_points['input'] = image 42 | return logits, end_points, pyramid_maps[name] 43 | -------------------------------------------------------------------------------- /libs/nets/nets_factory.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/nets_factory.pyc -------------------------------------------------------------------------------- /libs/nets/pyramid_network.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/pyramid_network.pyc -------------------------------------------------------------------------------- /libs/nets/resnet_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/resnet_utils.pyc -------------------------------------------------------------------------------- /libs/nets/resnet_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/resnet_v1.pyc -------------------------------------------------------------------------------- /libs/nets/train_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import collections 6 | import tensorflow as tf 7 | import libs.configs.config_v1 as cfg 8 | 9 | slim = tf.contrib.slim 10 | FLAGS = tf.app.flags.FLAGS 11 | 12 | def _configure_optimizer(learning_rate): 13 | """Configures the optimizer used for training. 14 | 15 | Args: 16 | learning_rate: A scalar or `Tensor` learning rate. 17 | 18 | Returns: 19 | An instance of an optimizer. 20 | 21 | Raises: 22 | ValueError: if FLAGS.optimizer is not recognized. 23 | """ 24 | if FLAGS.optimizer == 'adadelta': 25 | optimizer = tf.train.AdadeltaOptimizer( 26 | learning_rate, 27 | rho=FLAGS.adadelta_rho, 28 | epsilon=FLAGS.opt_epsilon) 29 | elif FLAGS.optimizer == 'adagrad': 30 | optimizer = tf.train.AdagradOptimizer( 31 | learning_rate, 32 | initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value) 33 | elif FLAGS.optimizer == 'adam': 34 | optimizer = tf.train.AdamOptimizer( 35 | learning_rate, 36 | beta1=FLAGS.adam_beta1, 37 | beta2=FLAGS.adam_beta2, 38 | epsilon=FLAGS.opt_epsilon) 39 | elif FLAGS.optimizer == 'ftrl': 40 | optimizer = tf.train.FtrlOptimizer( 41 | learning_rate, 42 | learning_rate_power=FLAGS.ftrl_learning_rate_power, 43 | initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value, 44 | l1_regularization_strength=FLAGS.ftrl_l1, 45 | l2_regularization_strength=FLAGS.ftrl_l2) 46 | elif FLAGS.optimizer == 'momentum': 47 | optimizer = tf.train.MomentumOptimizer( 48 | learning_rate, 49 | momentum=FLAGS.momentum, 50 | name='Momentum') 51 | elif FLAGS.optimizer == 'rmsprop': 52 | optimizer = tf.train.RMSPropOptimizer( 53 | learning_rate, 54 | decay=FLAGS.rmsprop_decay, 55 | momentum=FLAGS.rmsprop_momentum, 56 | epsilon=FLAGS.opt_epsilon) 57 | elif FLAGS.optimizer == 'sgd': 58 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 59 | else: 60 | raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer) 61 | return optimizer 62 | 63 | def _configure_learning_rate(num_samples_per_epoch, global_step): 64 | """Configures the learning rate. 65 | 66 | Args: 67 | num_samples_per_epoch: The number of samples in each epoch of training. 68 | global_step: The global_step tensor. 69 | 70 | Returns: 71 | A `Tensor` representing the learning rate. 72 | 73 | Raises: 74 | ValueError: if 75 | """ 76 | decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * 77 | FLAGS.num_epochs_per_decay) 78 | if FLAGS.sync_replicas: 79 | decay_steps /= FLAGS.replicas_to_aggregate 80 | 81 | if FLAGS.learning_rate_decay_type == 'exponential': 82 | return tf.train.exponential_decay(FLAGS.learning_rate, 83 | global_step, 84 | decay_steps, 85 | FLAGS.learning_rate_decay_factor, 86 | staircase=True, 87 | name='exponential_decay_learning_rate') 88 | elif FLAGS.learning_rate_decay_type == 'fixed': 89 | return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate') 90 | elif FLAGS.learning_rate_decay_type == 'polynomial': 91 | return tf.train.polynomial_decay(FLAGS.learning_rate, 92 | global_step, 93 | decay_steps, 94 | FLAGS.end_learning_rate, 95 | power=0.9, 96 | cycle=False, 97 | name='polynomial_decay_learning_rate') 98 | else: 99 | raise ValueError('learning_rate_decay_type [%s] was not recognized', 100 | FLAGS.learning_rate_decay_type) 101 | 102 | def _get_variables_to_train(): 103 | """Returns a list of variables to train. 104 | 105 | Returns: 106 | A list of variables to train by the optimizer. 107 | """ 108 | if FLAGS.trainable_scopes is None: 109 | return tf.trainable_variables() 110 | else: 111 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] 112 | 113 | variables_to_train = [] 114 | for scope in scopes: 115 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) 116 | variables_to_train.extend(variables) 117 | return variables_to_train 118 | 119 | def _get_init_fn(): 120 | """Returns a function run by the chief worker to warm-start the training. 121 | 122 | Note that the init_fn is only run when initializing the model during the very 123 | first global step. 124 | 125 | Returns: 126 | An init function run by the supervisor. 127 | """ 128 | if FLAGS.checkpoint_path is None: 129 | return None 130 | 131 | # Warn the user if a checkpoint exists in the train_dir. Then we'll be 132 | # ignoring the checkpoint anyway. 133 | if tf.train.latest_checkpoint(FLAGS.train_dir): 134 | tf.logging.info( 135 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s' 136 | % FLAGS.train_dir) 137 | return None 138 | 139 | exclusions = [] 140 | if FLAGS.checkpoint_exclude_scopes: 141 | exclusions = [scope.strip() 142 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 143 | 144 | # TODO(sguada) variables.filter_variables() 145 | variables_to_restore = [] 146 | for var in slim.get_model_variables(): 147 | excluded = False 148 | for exclusion in exclusions: 149 | if var.op.name.startswith(exclusion): 150 | excluded = True 151 | break 152 | if not excluded: 153 | variables_to_restore.append(var) 154 | 155 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path): 156 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) 157 | else: 158 | checkpoint_path = FLAGS.checkpoint_path 159 | 160 | tf.logging.info('Fine-tuning from %s' % checkpoint_path) 161 | 162 | return slim.assign_from_checkpoint_fn( 163 | checkpoint_path, 164 | variables_to_restore, 165 | ignore_missing_vars=FLAGS.ignore_missing_vars) 166 | 167 | def get_var_list_to_restore(): 168 | """Choosing which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """ 169 | 170 | variables_to_restore = [] 171 | if FLAGS.checkpoint_exclude_scopes is not None: 172 | exclusions = [scope.strip() 173 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 174 | 175 | # build restore list 176 | for var in tf.model_variables(): 177 | excluded = False 178 | for exclusion in exclusions: 179 | if var.name.startswith(exclusion): 180 | excluded = True 181 | break 182 | if not excluded: 183 | variables_to_restore.append(var) 184 | else: 185 | variables_to_restore = tf.model_variables() 186 | 187 | variables_to_restore_final = [] 188 | if FLAGS.checkpoint_include_scopes is not None: 189 | includes = [ 190 | scope.strip() 191 | for scope in FLAGS.checkpoint_include_scopes.split(',') 192 | ] 193 | for var in variables_to_restore: 194 | included = False 195 | for include in includes: 196 | if var.name.startswith(include): 197 | included = True 198 | break 199 | if included: 200 | variables_to_restore_final.append(var) 201 | else: 202 | variables_to_restore_final = variables_to_restore 203 | 204 | return variables_to_restore_final 205 | -------------------------------------------------------------------------------- /libs/nms/.gitignore: -------------------------------------------------------------------------------- 1 | *.c 2 | *.cpp 3 | *.so 4 | -------------------------------------------------------------------------------- /libs/nms/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nms/__init__.py -------------------------------------------------------------------------------- /libs/nms/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nms/__init__.pyc -------------------------------------------------------------------------------- /libs/nms/cpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b): 12 | return a if a >= b else b 13 | 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b): 15 | return a if a <= b else b 16 | 17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh): 18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0] 19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1] 20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2] 21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3] 22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4] 23 | 24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1) 25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1] 26 | 27 | cdef int ndets = dets.shape[0] 28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \ 29 | np.zeros((ndets), dtype=np.int) 30 | 31 | # nominal indices 32 | cdef int _i, _j 33 | # sorted indices 34 | cdef int i, j 35 | # temp variables for box i's (the box currently under consideration) 36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea 37 | # variables for computing overlap with box j (lower scoring box) 38 | cdef np.float32_t xx1, yy1, xx2, yy2 39 | cdef np.float32_t w, h 40 | cdef np.float32_t inter, ovr 41 | 42 | keep = [] 43 | for _i in range(ndets): 44 | i = order[_i] 45 | if suppressed[i] == 1: 46 | continue 47 | keep.append(i) 48 | ix1 = x1[i] 49 | iy1 = y1[i] 50 | ix2 = x2[i] 51 | iy2 = y2[i] 52 | iarea = areas[i] 53 | for _j in range(_i + 1, ndets): 54 | j = order[_j] 55 | if suppressed[j] == 1: 56 | continue 57 | xx1 = max(ix1, x1[j]) 58 | yy1 = max(iy1, y1[j]) 59 | xx2 = min(ix2, x2[j]) 60 | yy2 = min(iy2, y2[j]) 61 | w = max(0.0, xx2 - xx1 + 1) 62 | h = max(0.0, yy2 - yy1 + 1) 63 | inter = w * h 64 | ovr = inter / (iarea + areas[j] - inter) 65 | if ovr >= thresh: 66 | suppressed[j] = 1 67 | 68 | return keep 69 | -------------------------------------------------------------------------------- /libs/nms/gpu_nms.hpp: -------------------------------------------------------------------------------- 1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 2 | int boxes_dim, float nms_overlap_thresh, int device_id); 3 | -------------------------------------------------------------------------------- /libs/nms/gpu_nms.pyx: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | cimport numpy as np 10 | 11 | assert sizeof(int) == sizeof(np.int32_t) 12 | 13 | cdef extern from "gpu_nms.hpp": 14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int) 15 | 16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh, 17 | np.int32_t device_id=0): 18 | cdef int boxes_num = dets.shape[0] 19 | cdef int boxes_dim = dets.shape[1] 20 | cdef int num_out 21 | cdef np.ndarray[np.int32_t, ndim=1] \ 22 | keep = np.zeros(boxes_num, dtype=np.int32) 23 | cdef np.ndarray[np.float32_t, ndim=1] \ 24 | scores = dets[:, 4] 25 | cdef np.ndarray[np.int_t, ndim=1] \ 26 | order = scores.argsort()[::-1] 27 | cdef np.ndarray[np.float32_t, ndim=2] \ 28 | sorted_dets = dets[order, :] 29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id) 30 | keep = keep[:num_out] 31 | return list(order[keep]) 32 | -------------------------------------------------------------------------------- /libs/nms/nms_kernel.cu: -------------------------------------------------------------------------------- 1 | // ------------------------------------------------------------------ 2 | // Faster R-CNN 3 | // Copyright (c) 2015 Microsoft 4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details] 5 | // Written by Shaoqing Ren 6 | // ------------------------------------------------------------------ 7 | 8 | #include "gpu_nms.hpp" 9 | #include 10 | #include 11 | 12 | #define CUDA_CHECK(condition) \ 13 | /* Code block avoids redefinition of cudaError_t error */ \ 14 | do { \ 15 | cudaError_t error = condition; \ 16 | if (error != cudaSuccess) { \ 17 | std::cout << cudaGetErrorString(error) << std::endl; \ 18 | } \ 19 | } while (0) 20 | 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0)) 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8; 23 | 24 | __device__ inline float devIoU(float const * const a, float const * const b) { 25 | float left = max(a[0], b[0]), right = min(a[2], b[2]); 26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]); 27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f); 28 | float interS = width * height; 29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1); 30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1); 31 | return interS / (Sa + Sb - interS); 32 | } 33 | 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh, 35 | const float *dev_boxes, unsigned long long *dev_mask) { 36 | const int row_start = blockIdx.y; 37 | const int col_start = blockIdx.x; 38 | 39 | // if (row_start > col_start) return; 40 | 41 | const int row_size = 42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock); 43 | const int col_size = 44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock); 45 | 46 | __shared__ float block_boxes[threadsPerBlock * 5]; 47 | if (threadIdx.x < col_size) { 48 | block_boxes[threadIdx.x * 5 + 0] = 49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0]; 50 | block_boxes[threadIdx.x * 5 + 1] = 51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1]; 52 | block_boxes[threadIdx.x * 5 + 2] = 53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2]; 54 | block_boxes[threadIdx.x * 5 + 3] = 55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3]; 56 | block_boxes[threadIdx.x * 5 + 4] = 57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4]; 58 | } 59 | __syncthreads(); 60 | 61 | if (threadIdx.x < row_size) { 62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x; 63 | const float *cur_box = dev_boxes + cur_box_idx * 5; 64 | int i = 0; 65 | unsigned long long t = 0; 66 | int start = 0; 67 | if (row_start == col_start) { 68 | start = threadIdx.x + 1; 69 | } 70 | for (i = start; i < col_size; i++) { 71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) { 72 | t |= 1ULL << i; 73 | } 74 | } 75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock); 76 | dev_mask[cur_box_idx * col_blocks + col_start] = t; 77 | } 78 | } 79 | 80 | void _set_device(int device_id) { 81 | int current_device; 82 | CUDA_CHECK(cudaGetDevice(¤t_device)); 83 | if (current_device == device_id) { 84 | return; 85 | } 86 | // The call to cudaSetDevice must come before any calls to Get, which 87 | // may perform initialization using the GPU. 88 | CUDA_CHECK(cudaSetDevice(device_id)); 89 | } 90 | 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num, 92 | int boxes_dim, float nms_overlap_thresh, int device_id) { 93 | _set_device(device_id); 94 | 95 | float* boxes_dev = NULL; 96 | unsigned long long* mask_dev = NULL; 97 | 98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock); 99 | 100 | CUDA_CHECK(cudaMalloc(&boxes_dev, 101 | boxes_num * boxes_dim * sizeof(float))); 102 | CUDA_CHECK(cudaMemcpy(boxes_dev, 103 | boxes_host, 104 | boxes_num * boxes_dim * sizeof(float), 105 | cudaMemcpyHostToDevice)); 106 | 107 | CUDA_CHECK(cudaMalloc(&mask_dev, 108 | boxes_num * col_blocks * sizeof(unsigned long long))); 109 | 110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock), 111 | DIVUP(boxes_num, threadsPerBlock)); 112 | dim3 threads(threadsPerBlock); 113 | nms_kernel<<>>(boxes_num, 114 | nms_overlap_thresh, 115 | boxes_dev, 116 | mask_dev); 117 | 118 | std::vector mask_host(boxes_num * col_blocks); 119 | CUDA_CHECK(cudaMemcpy(&mask_host[0], 120 | mask_dev, 121 | sizeof(unsigned long long) * boxes_num * col_blocks, 122 | cudaMemcpyDeviceToHost)); 123 | 124 | std::vector remv(col_blocks); 125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks); 126 | 127 | int num_to_keep = 0; 128 | for (int i = 0; i < boxes_num; i++) { 129 | int nblock = i / threadsPerBlock; 130 | int inblock = i % threadsPerBlock; 131 | 132 | if (!(remv[nblock] & (1ULL << inblock))) { 133 | keep_out[num_to_keep++] = i; 134 | unsigned long long *p = &mask_host[0] + i * col_blocks; 135 | for (int j = nblock; j < col_blocks; j++) { 136 | remv[j] |= p[j]; 137 | } 138 | } 139 | } 140 | *num_out = num_to_keep; 141 | 142 | CUDA_CHECK(cudaFree(boxes_dev)); 143 | CUDA_CHECK(cudaFree(mask_dev)); 144 | } 145 | -------------------------------------------------------------------------------- /libs/nms/py_cpu_nms.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def py_cpu_nms(dets, thresh): 11 | """Pure Python NMS baseline.""" 12 | x1 = dets[:, 0] 13 | y1 = dets[:, 1] 14 | x2 = dets[:, 2] 15 | y2 = dets[:, 3] 16 | scores = dets[:, 4] 17 | 18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1) 19 | order = scores.argsort()[::-1] 20 | 21 | keep = [] 22 | while order.size > 0: 23 | i = order[0] 24 | keep.append(i) 25 | xx1 = np.maximum(x1[i], x1[order[1:]]) 26 | yy1 = np.maximum(y1[i], y1[order[1:]]) 27 | xx2 = np.minimum(x2[i], x2[order[1:]]) 28 | yy2 = np.minimum(y2[i], y2[order[1:]]) 29 | 30 | w = np.maximum(0.0, xx2 - xx1 + 1) 31 | h = np.maximum(0.0, yy2 - yy1 + 1) 32 | inter = w * h 33 | ovr = inter / (areas[i] + areas[order[1:]] - inter) 34 | 35 | inds = np.where(ovr <= thresh)[0] 36 | order = order[inds + 1] 37 | 38 | return keep 39 | -------------------------------------------------------------------------------- /libs/preprocessings/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/__init__.py -------------------------------------------------------------------------------- /libs/preprocessings/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/__init__.pyc -------------------------------------------------------------------------------- /libs/preprocessings/coco_v1.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from __future__ import absolute_import 5 | from __future__ import division 6 | from __future__ import print_function 7 | 8 | import time 9 | import tensorflow as tf 10 | import libs.configs.config_v1 as cfg 11 | from . import utils as preprocess_utils 12 | 13 | FLAGS = tf.app.flags.FLAGS 14 | 15 | def preprocess_image(image, gt_boxes, gt_masks, is_training=False): 16 | """preprocess image for coco 17 | 1. random flipping 18 | 2. min size resizing 19 | 3. zero mean 20 | 4. ... 21 | """ 22 | if is_training: 23 | return preprocess_for_training(image, gt_boxes, gt_masks) 24 | else: 25 | return preprocess_for_test(image, gt_boxes, gt_masks) 26 | 27 | 28 | def preprocess_for_training(image, gt_boxes, gt_masks): 29 | 30 | ih, iw = tf.shape(image)[0], tf.shape(image)[1] 31 | ## random flipping 32 | coin = tf.to_float(tf.random_uniform([1]))[0] 33 | image, gt_boxes, gt_masks =\ 34 | tf.cond(tf.greater_equal(coin, 0.5), 35 | lambda: (preprocess_utils.flip_image(image), 36 | preprocess_utils.flip_gt_boxes(gt_boxes, ih, iw), 37 | preprocess_utils.flip_gt_masks(gt_masks)), 38 | lambda: (image, gt_boxes, gt_masks)) 39 | 40 | ## min size resizing 41 | new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size)#FLAGS.image_min_size = 640(min edge) 42 | image = tf.expand_dims(image, 0) 43 | image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False) 44 | image = tf.squeeze(image, axis=[0]) 45 | 46 | #gt_masks = tf.expand_dims(gt_masks, -1) 47 | #if uncomment the above error:ValueError: Shape must be rank 4 but is rank 5 for 'ResizeNearestNeighbor' (op: 'ResizeNearestNeighbor') with input shapes: [?,?,?,7,1], [2]. 48 | gt_masks = tf.cast(gt_masks, tf.float32) 49 | gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False) 50 | gt_masks = tf.cast(gt_masks, tf.int32) 51 | #gt_masks = tf.squeeze(gt_masks, axis=[-1]) 52 | 53 | scale_ratio = tf.to_float(new_ih) / tf.to_float(ih) 54 | gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio) 55 | 56 | ## random flip image 57 | # val_lr = tf.to_float(tf.random_uniform([1]))[0] 58 | # image = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_image(image), lambda: image) 59 | # gt_masks = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_masks(gt_masks), lambda: gt_masks) 60 | # gt_boxes = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_boxes(gt_boxes, new_ih, new_iw), lambda: gt_boxes) 61 | 62 | ## zero mean image 63 | image = tf.cast(image, tf.float32) 64 | image = image / 256.0 65 | image = (image - 0.5) * 2.0 66 | image = tf.expand_dims(image, axis=0) 67 | 68 | ## rgb to bgr 69 | image = tf.reverse(image, axis=[-1]) 70 | 71 | return image, gt_boxes, gt_masks 72 | 73 | def preprocess_for_test(image, gt_boxes, gt_masks): 74 | 75 | 76 | ih, iw = tf.shape(image)[0], tf.shape(image)[1] 77 | 78 | ## min size resizing 79 | new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size) 80 | image = tf.expand_dims(image, 0) 81 | image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False) 82 | image = tf.squeeze(image, axis=[0]) 83 | 84 | gt_masks = tf.expand_dims(gt_masks, -1) 85 | gt_masks = tf.cast(gt_masks, tf.float32) 86 | gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False) 87 | gt_masks = tf.cast(gt_masks, tf.int32) 88 | gt_masks = tf.squeeze(gt_masks, axis=[-1]) 89 | 90 | scale_ratio = tf.to_float(new_ih) / tf.to_float(ih) 91 | gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio) 92 | 93 | ## zero mean image 94 | image = tf.cast(image, tf.float32) 95 | image = image / 256.0 96 | image = (image - 0.5) * 2.0 97 | image = tf.expand_dims(image, axis=0) 98 | 99 | ## rgb to bgr 100 | image = tf.reverse(image, axis=[-1]) 101 | 102 | return image, gt_boxes, gt_masks 103 | -------------------------------------------------------------------------------- /libs/preprocessings/coco_v1.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/coco_v1.pyc -------------------------------------------------------------------------------- /libs/preprocessings/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | 5 | import tensorflow as tf 6 | 7 | from tensorflow.python.ops import control_flow_ops 8 | from tensorflow.contrib import slim 9 | 10 | 11 | def _crop(image, offset_height, offset_width, crop_height, crop_width): 12 | original_shape = tf.shape(image) 13 | 14 | rank_assertion = tf.Assert( 15 | tf.equal(tf.rank(image), 3), 16 | ['Rank of image must be equal to 3.']) 17 | cropped_shape = control_flow_ops.with_dependencies( 18 | [rank_assertion], 19 | tf.stack([crop_height, crop_width, original_shape[2]])) 20 | 21 | size_assertion = tf.Assert( 22 | tf.logical_and( 23 | tf.greater_equal(original_shape[0], crop_height), 24 | tf.greater_equal(original_shape[1], crop_width)), 25 | ['Crop size greater than the image size.']) 26 | 27 | offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0])) 28 | 29 | # Use tf.slice instead of crop_to_bounding box as it accepts tensors to 30 | # define the crop size. 31 | image = control_flow_ops.with_dependencies( 32 | [size_assertion], 33 | tf.slice(image, offsets, cropped_shape)) 34 | return tf.reshape(image, cropped_shape) 35 | 36 | 37 | def _random_crop(image_list, label_list, crop_height, crop_width): 38 | if not image_list: 39 | raise ValueError('Empty image_list.') 40 | 41 | # Compute the rank assertions. 42 | rank_assertions = [] 43 | for i in range(len(image_list)): 44 | image_rank = tf.rank(image_list[i]) 45 | rank_assert = tf.Assert( 46 | tf.equal(image_rank, 3), 47 | ['Wrong rank for tensor %s [expected] [actual]', 48 | image_list[i].name, 3, image_rank]) 49 | rank_assertions.append(rank_assert) 50 | 51 | image_shape = control_flow_ops.with_dependencies( 52 | [rank_assertions[0]], 53 | tf.shape(image_list[0])) 54 | image_height = image_shape[0] 55 | image_width = image_shape[1] 56 | crop_size_assert = tf.Assert( 57 | tf.logical_and( 58 | tf.greater_equal(image_height, crop_height), 59 | tf.greater_equal(image_width, crop_width)), 60 | ['Crop size greater than the image size.', image_height, image_width, crop_height, crop_width]) 61 | 62 | asserts = [rank_assertions[0], crop_size_assert] 63 | 64 | for i in range(1, len(image_list)): 65 | image = image_list[i] 66 | asserts.append(rank_assertions[i]) 67 | shape = control_flow_ops.with_dependencies([rank_assertions[i]], 68 | tf.shape(image)) 69 | height = shape[0] 70 | width = shape[1] 71 | 72 | height_assert = tf.Assert( 73 | tf.equal(height, image_height), 74 | ['Wrong height for tensor %s [expected][actual]', 75 | image.name, height, image_height]) 76 | width_assert = tf.Assert( 77 | tf.equal(width, image_width), 78 | ['Wrong width for tensor %s [expected][actual]', 79 | image.name, width, image_width]) 80 | asserts.extend([height_assert, width_assert]) 81 | 82 | # Create a random bounding box. 83 | # 84 | # Use tf.random_uniform and not numpy.random.rand as doing the former would 85 | # generate random numbers at graph eval time, unlike the latter which 86 | # generates random numbers at graph definition time. 87 | max_offset_height = control_flow_ops.with_dependencies( 88 | asserts, tf.reshape(image_height - crop_height + 1, [])) 89 | max_offset_width = control_flow_ops.with_dependencies( 90 | asserts, tf.reshape(image_width - crop_width + 1, [])) 91 | offset_height = tf.random_uniform( 92 | [], maxval=max_offset_height, dtype=tf.int32) 93 | offset_width = tf.random_uniform( 94 | [], maxval=max_offset_width, dtype=tf.int32) 95 | 96 | cropped_images = [_crop(image, offset_height, offset_width, 97 | crop_height, crop_width) for image in image_list] 98 | cropped_labels = [_crop(label, offset_height, offset_width, 99 | crop_height, crop_width) for label in label_list] 100 | return cropped_images, cropped_labels 101 | 102 | 103 | def _central_crop(image_list, label_list, crop_height, crop_width): 104 | output_images = [] 105 | output_labels = [] 106 | for image, label in zip(image_list, label_list): 107 | image_height = tf.shape(image)[0] 108 | image_width = tf.shape(image)[1] 109 | 110 | offset_height = (image_height - crop_height) / 2 111 | offset_width = (image_width - crop_width) / 2 112 | 113 | output_images.append(_crop(image, offset_height, offset_width, 114 | crop_height, crop_width)) 115 | output_labels.append(_crop(label, offset_height, offset_width, 116 | crop_height, crop_width)) 117 | return output_images, output_labels 118 | 119 | 120 | def _smallest_size_at_least(height, width, smallest_side): 121 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) 122 | 123 | height = tf.to_float(height) 124 | width = tf.to_float(width) 125 | smallest_side = tf.to_float(smallest_side) 126 | 127 | scale = tf.cond(tf.greater(height, width), 128 | lambda: smallest_side / width, 129 | lambda: smallest_side / height) 130 | new_height = tf.to_int32(height * scale) 131 | new_width = tf.to_int32(width * scale) 132 | return new_height, new_width 133 | 134 | def _aspect_preserving_resize(image, label, smallest_side): 135 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32) 136 | 137 | shape = tf.shape(image) 138 | height = shape[0] 139 | width = shape[1] 140 | new_height, new_width = _smallest_size_at_least(height, width, smallest_side) 141 | 142 | image = tf.expand_dims(image, 0) 143 | resized_image = tf.image.resize_bilinear(image, [new_height, new_width], 144 | align_corners=False) 145 | resized_image = tf.squeeze(resized_image, axis=[0]) 146 | resized_image.set_shape([None, None, 3]) 147 | 148 | label = tf.expand_dims(label, 0) 149 | resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width], 150 | align_corners=False) 151 | resized_label = tf.squeeze(resized_label, axis=[0]) 152 | resized_label.set_shape([None, None, 1]) 153 | return resized_image, resized_label 154 | 155 | def flip_gt_boxes(gt_boxes, ih, iw): 156 | x1s, y1s, x2s, y2s, cls = \ 157 | gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4] 158 | x1s = tf.to_float(iw) - x1s 159 | x2s = tf.to_float(iw) - x2s 160 | return tf.concat(values=(x2s[:, tf.newaxis], 161 | y1s[:, tf.newaxis], 162 | x1s[:, tf.newaxis], 163 | y2s[:, tf.newaxis], 164 | cls[:, tf.newaxis]), axis=1) 165 | 166 | def flip_gt_masks(gt_masks): 167 | return tf.reverse(gt_masks, axis=[2]) 168 | 169 | def flip_image(image): 170 | return tf.reverse(image, axis=[1]) 171 | 172 | def resize_gt_boxes(gt_boxes, scale_ratio): 173 | xys, cls = \ 174 | gt_boxes[:, 0:4], gt_boxes[:, 4] 175 | xys = xys * scale_ratio 176 | return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1) 177 | 178 | -------------------------------------------------------------------------------- /libs/preprocessings/utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/utils.pyc -------------------------------------------------------------------------------- /libs/setup.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | from os.path import join as pjoin 10 | import numpy as np 11 | from distutils.core import setup 12 | from distutils.extension import Extension 13 | from Cython.Distutils import build_ext 14 | 15 | def find_in_path(name, path): 16 | "Find a file in a search path" 17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/ 18 | for dir in path.split(os.pathsep): 19 | binpath = pjoin(dir, name) 20 | if os.path.exists(binpath): 21 | return os.path.abspath(binpath) 22 | return None 23 | 24 | def locate_cuda(): 25 | """Locate the CUDA environment on the system 26 | 27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64' 28 | and values giving the absolute path to each directory. 29 | 30 | Starts by looking for the CUDAHOME env variable. If not found, everything 31 | is based on finding 'nvcc' in the PATH. 32 | """ 33 | 34 | # first check if the CUDAHOME env variable is in use 35 | if 'CUDAHOME' in os.environ: 36 | home = os.environ['CUDAHOME'] 37 | nvcc = pjoin(home, 'bin', 'nvcc') 38 | else: 39 | # otherwise, search the PATH for NVCC 40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') 41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path) 42 | if nvcc is None: 43 | raise EnvironmentError('The nvcc binary could not be ' 44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME') 45 | home = os.path.dirname(os.path.dirname(nvcc)) 46 | 47 | cudaconfig = {'home':home, 'nvcc':nvcc, 48 | 'include': pjoin(home, 'include'), 49 | 'lib64': pjoin(home, 'lib64')} 50 | for k, v in cudaconfig.iteritems(): 51 | if not os.path.exists(v): 52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v)) 53 | 54 | return cudaconfig 55 | CUDA = locate_cuda() 56 | 57 | # Obtain the numpy include directory. This logic works across numpy versions. 58 | try: 59 | numpy_include = np.get_include() 60 | except AttributeError: 61 | numpy_include = np.get_numpy_include() 62 | 63 | def customize_compiler_for_nvcc(self): 64 | """inject deep into distutils to customize how the dispatch 65 | to gcc/nvcc works. 66 | 67 | If you subclass UnixCCompiler, it's not trivial to get your subclass 68 | injected in, and still have the right customizations (i.e. 69 | distutils.sysconfig.customize_compiler) run on it. So instead of going 70 | the OO route, I have this. Note, it's kindof like a wierd functional 71 | subclassing going on.""" 72 | 73 | # tell the compiler it can processes .cu 74 | self.src_extensions.append('.cu') 75 | 76 | # save references to the default compiler_so and _comple methods 77 | default_compiler_so = self.compiler_so 78 | super = self._compile 79 | 80 | # now redefine the _compile method. This gets executed for each 81 | # object but distutils doesn't have the ability to change compilers 82 | # based on source extension: we add it. 83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): 84 | print extra_postargs 85 | if os.path.splitext(src)[1] == '.cu': 86 | # use the cuda for .cu files 87 | self.set_executable('compiler_so', CUDA['nvcc']) 88 | # use only a subset of the extra_postargs, which are 1-1 translated 89 | # from the extra_compile_args in the Extension class 90 | postargs = extra_postargs['nvcc'] 91 | else: 92 | postargs = extra_postargs['gcc'] 93 | 94 | super(obj, src, ext, cc_args, postargs, pp_opts) 95 | # reset the default compiler_so, which we might have changed for cuda 96 | self.compiler_so = default_compiler_so 97 | 98 | # inject our redefined _compile method into the class 99 | self._compile = _compile 100 | 101 | 102 | # run the customize_compiler 103 | class custom_build_ext(build_ext): 104 | def build_extensions(self): 105 | customize_compiler_for_nvcc(self.compiler) 106 | build_ext.build_extensions(self) 107 | 108 | ext_modules = [ 109 | Extension( 110 | "boxes.cython_bbox", 111 | ["boxes/bbox.pyx"], 112 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 113 | include_dirs = [numpy_include] 114 | ), 115 | Extension( 116 | "boxes.cython_anchor", 117 | ["boxes/cython_anchor.pyx"], 118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 119 | include_dirs = [numpy_include] 120 | ), 121 | Extension( 122 | "boxes.cython_bbox_transform", 123 | ["boxes/cython_bbox_transform.pyx"], 124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 125 | include_dirs=[numpy_include] 126 | ), 127 | Extension( 128 | "boxes.cython_nms", 129 | ["boxes/nms.pyx"], 130 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 131 | include_dirs = [numpy_include] 132 | ), 133 | Extension( 134 | "nms.cpu_nms", 135 | ["nms/cpu_nms.pyx"], 136 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, 137 | include_dirs = [numpy_include] 138 | ), 139 | Extension( 140 | 'nms.gpu_nms', 141 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'], 142 | library_dirs=[CUDA['lib64']], 143 | libraries=['cudart'], 144 | language='c++', 145 | runtime_library_dirs=[CUDA['lib64']], 146 | # this syntax is specific to this build system 147 | # we're only going to use certain compiler args with nvcc and not with gcc 148 | # the implementation of this trick is in customize_compiler() below 149 | extra_compile_args={'gcc': ["-Wno-unused-function"], 150 | 'nvcc': ['-arch=sm_52', 151 | '--ptxas-options=-v', 152 | '-c', 153 | '--compiler-options', 154 | "'-fPIC'"]}, 155 | include_dirs = [numpy_include, CUDA['include']] 156 | ), 157 | ] 158 | 159 | setup( 160 | name='fast_rcnn', 161 | ext_modules=ext_modules, 162 | # inject our custom trigger 163 | cmdclass={'build_ext': custom_build_ext}, 164 | ) 165 | -------------------------------------------------------------------------------- /libs/visualization/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/__init__.py -------------------------------------------------------------------------------- /libs/visualization/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/__init__.pyc -------------------------------------------------------------------------------- /libs/visualization/pil_utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import tensorflow as tf 3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance 4 | import scipy.misc 5 | 6 | FLAGS = tf.app.flags.FLAGS 7 | _DEBUG = False 8 | 9 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None): 10 | #print("image") 11 | #print(image) 12 | #norm_image = np.uint8(image/np.max(np.abs(image))*255.0) 13 | norm_image = np.uint8(image/0.1*127.0 + 127.0) 14 | #print("norm_image") 15 | #print(norm_image) 16 | source_img = Image.fromarray(norm_image) 17 | return source_img.save(FLAGS.train_dir + 'test_' + name + '_' + str(step) +'.jpg', 'JPEG') 18 | 19 | colors = np.random.randint(5, size=(80, 3)) 20 | 21 | 22 | def draw_bbox_better(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None): 23 | import cv2 24 | #source_img = Image.fromarray(image) 25 | hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV) 26 | #b, g, r = source_img.split() 27 | #source_img = Image.merge("RGB", (r, g, b)) 28 | #draw = ImageDraw.Draw(source_img) 29 | #color = '#0000ff' 30 | if bbox is not None: 31 | dictinary = {} 32 | 33 | for i, box in enumerate(bbox): 34 | if (prob[i,label[i]] > 0.5) and (label[i] > 0): 35 | area = float((box[2]-box[0])*(box[3]-box[1])) 36 | while area in dictinary: 37 | area+=1 38 | width = int(box[2])-int(box[0]) 39 | height = int(box[3])-int(box[1]) 40 | mask = final_mask[i] 41 | mask = mask[...,label[i]] 42 | mask = scipy.misc.imresize(mask,(height,width)) 43 | 44 | dictinary[round(area,4)]=(box,label[i],gt_label[i],prob[i,label[i]],mask,colors[label[i],:]) 45 | sorted_keys = sorted(dictinary.iterkeys(),reverse=True) 46 | 47 | big_mask = np.zeros((image.shape[0],image.shape[1],len(bbox)),dtype=np.float32) 48 | 49 | i=0 50 | for key in sorted_keys: 51 | bo, _,_,_,msk,_= dictinary[key] 52 | big_mask[int(bo[1]):int(bo[3]),int(bo[0]):int(bo[2]),i] = msk 53 | i=i+1 54 | 55 | max_indices = np.argmax(big_mask,axis=2) 56 | for key in sorted_keys: 57 | bo, lab,gt_lab,_,_,col= dictinary[key] 58 | for x in range(int(bo[0]),int(bo[2])): 59 | for y in range(int(bo[1]),int(bo[3])): 60 | _,_,_,_,_,col = dictinary.values()[max_indices[y,x]] 61 | #print col 62 | #print (image[y,x,0] ) 63 | image[y,x,...] = col 64 | #hsv[y,x,0]=color[0] 65 | #hsv[y,x,1]=hsv[y,x,1]*0.9 66 | text = cat_id_to_cls_name(lab) 67 | image = cv2.putText(image,text,(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX, 4,(255,255,255),2) 68 | if lab != gt_lab: 69 | c = (255,0,0) 70 | else: 71 | c = (0,0,255) 72 | image = cv2.rectangle(image,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3) 73 | cv2.imwrite('output/est_imgs/test_' + name + '_' + str(step) +'.jpg',image) 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | def draw_bbox(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None): 82 | #print(prob[:,label]) 83 | source_img = Image.fromarray(image) 84 | b, g, r = source_img.split() 85 | source_img = Image.merge("RGB", (r, g, b)) 86 | draw = ImageDraw.Draw(source_img) 87 | color = '#0000ff' 88 | if bbox is not None: 89 | for i, box in enumerate(bbox): 90 | if label is not None: 91 | if prob is not None: 92 | if (prob[i,label[i]] > 0.5) and (label[i] > 0): 93 | if gt_label is not None: 94 | text = cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i]) 95 | if label[i] != gt_label[i]: 96 | color = '#ff0000'#draw.text((2+bbox[i,0], 2+bbox[i,1]), cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i]), fill='#ff0000') 97 | else: 98 | color = '#0000ff' 99 | else: 100 | text = cat_id_to_cls_name(label[i]) 101 | #############################DRAW SEGMENTATION 102 | width = box[2]-box[0] 103 | height = box[3]-box[1] 104 | #print (final_mask.shape) 105 | mask = final_mask[i] 106 | mask = mask[...,label[i]] 107 | mask = scipy.misc.imresize(mask,(height,width)) 108 | mask_pil = Image.fromarray(mask) 109 | source_img.paste(mask_pil,(int(box[0]),int(box[1]))) 110 | #draw.bitmap((int(box[0]),int(box[1])),mask_pil,fill='#00ffff') 111 | draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color) 112 | if _DEBUG is True: 113 | print("plot",label[i], prob[i,label[i]]) 114 | draw.rectangle(box,fill=None,outline=color) 115 | 116 | else: 117 | if _DEBUG is True: 118 | print("skip",label[i], prob[i,label[i]]) 119 | else: 120 | #############################DRAW GT SEGMENTATION 121 | if final_mask is not None: 122 | mask = final_mask[i] 123 | mask_pil = Image.fromarray(mask) 124 | mask_pil = mask_pil.crop([int(box[0]),int(box[1]),int(box[2]),int(box[3])]) 125 | source_img.paste(mask_pil,(int(box[0]),int(box[1]))) 126 | text = cat_id_to_cls_name(label[i]) 127 | draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color) 128 | draw.rectangle(box,fill=None,outline=color) 129 | 130 | 131 | return source_img.save(FLAGS.train_dir + 'est_imgs/test_' + name + '_' + str(step) +'.jpg', 'JPEG') 132 | 133 | def cat_id_to_cls_name(catId): 134 | cls_name = np.array([ 'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 135 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 136 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 137 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe', 138 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 139 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 140 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 141 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 142 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 143 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch', 144 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 145 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', 146 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 147 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush']) 148 | return cls_name[catId] 149 | -------------------------------------------------------------------------------- /libs/visualization/pil_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/pil_utils.pyc -------------------------------------------------------------------------------- /libs/visualization/summary_utils.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | 3 | 4 | def visualize_input(boxes, image, masks): 5 | image_sum_sample = image[:1] 6 | visualize_masks(masks, "input_image_gt_mask") 7 | visualize_bb(image, boxes, "input_image_gt_bb") 8 | visualize_input_image(image_sum_sample) 9 | 10 | 11 | def visualize_rpn_predictions(boxes, image): 12 | image_sum_sample = image[:1] 13 | visualize_bb(image_sum_sample, boxes, "rpn_pred_bb") 14 | 15 | # TODO: Present all masks in different colors 16 | def visualize_masks(masks, name): 17 | masks = tf.cast(masks, tf.float32) 18 | tf.summary.image(name=name, tensor=masks, max_outputs=1) 19 | 20 | 21 | def visualize_bb(image, boxes, name): 22 | image_sum_sample_shape = tf.shape(image)[1:] 23 | gt_x_min = boxes[:, 0] / tf.cast(image_sum_sample_shape[1], tf.float32) 24 | gt_y_min = boxes[:, 1] / tf.cast(image_sum_sample_shape[0], tf.float32) 25 | gt_x_max = boxes[:, 2] / tf.cast(image_sum_sample_shape[1], tf.float32) 26 | gt_y_max = boxes[:, 3] / tf.cast(image_sum_sample_shape[0], tf.float32) 27 | bb = tf.stack([gt_y_min, gt_x_min, gt_y_max, gt_x_max], axis=1) 28 | tf.summary.image(name=name, 29 | tensor=tf.image.draw_bounding_boxes(image, tf.expand_dims(bb, 0), name=None), 30 | max_outputs=1) 31 | 32 | 33 | def visualize_input_image(image): 34 | tf.summary.image(name="input_image", tensor=image, max_outputs=1) 35 | 36 | 37 | def visualize_final_predictions(boxes, image, masks): 38 | visualize_masks(masks, "pred_mask") 39 | visualize_bb(image, boxes, "final_bb_pred") 40 | -------------------------------------------------------------------------------- /libs/visualization/summary_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/summary_utils.pyc -------------------------------------------------------------------------------- /media/file.txt: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /media/testseg122_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg122_1.jpg -------------------------------------------------------------------------------- /media/testseg226_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg226_1.jpg -------------------------------------------------------------------------------- /media/testseg255_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg255_1.jpg -------------------------------------------------------------------------------- /media/testseg293_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg293_1.jpg -------------------------------------------------------------------------------- /media/testseg296_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg296_1.jpg -------------------------------------------------------------------------------- /media/testseg305_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg305_1.jpg -------------------------------------------------------------------------------- /media/testseg35_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg35_1.jpg -------------------------------------------------------------------------------- /media/testseg57_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg57_1.jpg -------------------------------------------------------------------------------- /media/testseg70_1.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg70_1.jpg -------------------------------------------------------------------------------- /train/__init__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | from . import train_utils 5 | -------------------------------------------------------------------------------- /train/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/train/__init__.pyc -------------------------------------------------------------------------------- /train/train_utils.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | 7 | import collections 8 | import tensorflow as tf 9 | import libs.configs.config_v1 as cfg 10 | 11 | slim = tf.contrib.slim 12 | FLAGS = tf.app.flags.FLAGS 13 | 14 | def _configure_optimizer(learning_rate): 15 | """Configures the optimizer used for training. 16 | 17 | Args: 18 | learning_rate: A scalar or `Tensor` learning rate. 19 | 20 | Returns: 21 | An instance of an optimizer. 22 | 23 | Raises: 24 | ValueError: if FLAGS.optimizer is not recognized. 25 | """ 26 | if FLAGS.optimizer == 'adadelta': 27 | optimizer = tf.train.AdadeltaOptimizer( 28 | learning_rate, 29 | rho=FLAGS.adadelta_rho, 30 | epsilon=FLAGS.opt_epsilon) 31 | elif FLAGS.optimizer == 'adagrad': 32 | optimizer = tf.train.AdagradOptimizer( 33 | learning_rate, 34 | initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value) 35 | elif FLAGS.optimizer == 'adam': 36 | optimizer = tf.train.AdamOptimizer( 37 | learning_rate, 38 | beta1=FLAGS.adam_beta1, 39 | beta2=FLAGS.adam_beta2, 40 | epsilon=FLAGS.opt_epsilon) 41 | elif FLAGS.optimizer == 'ftrl': 42 | optimizer = tf.train.FtrlOptimizer( 43 | learning_rate, 44 | learning_rate_power=FLAGS.ftrl_learning_rate_power, 45 | initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value, 46 | l1_regularization_strength=FLAGS.ftrl_l1, 47 | l2_regularization_strength=FLAGS.ftrl_l2) 48 | elif FLAGS.optimizer == 'momentum':##############this is true 49 | optimizer = tf.train.MomentumOptimizer( 50 | learning_rate, 51 | momentum=FLAGS.momentum, 52 | name='Momentum') 53 | elif FLAGS.optimizer == 'rmsprop': 54 | optimizer = tf.train.RMSPropOptimizer( 55 | learning_rate, 56 | decay=FLAGS.rmsprop_decay, 57 | momentum=FLAGS.rmsprop_momentum, 58 | epsilon=FLAGS.opt_epsilon) 59 | elif FLAGS.optimizer == 'sgd': 60 | optimizer = tf.train.GradientDescentOptimizer(learning_rate) 61 | else: 62 | raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer) 63 | return optimizer 64 | 65 | def _configure_learning_rate(num_samples_per_epoch, global_step): 66 | """Configures the learning rate. 67 | 68 | Args: 69 | num_samples_per_epoch: The number of samples in each epoch of training. 70 | global_step: The global_step tensor. 71 | 72 | Returns: 73 | A `Tensor` representing the learning rate. 74 | 75 | Raises: 76 | ValueError: if 77 | """ 78 | decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * #batch size is 1. num_epoch_per_decay is 2 79 | FLAGS.num_epochs_per_decay) #this int will be 165566 for num_samples_per_epoch=82783 80 | if FLAGS.sync_replicas: 81 | decay_steps /= FLAGS.replicas_to_aggregate 82 | 83 | if FLAGS.learning_rate_decay_type == 'exponential': #this is true 84 | return tf.train.exponential_decay(FLAGS.learning_rate, #0.0002 for my setup 85 | global_step, 86 | decay_steps,#165566 for num_samples_per_epoch=82783 87 | FLAGS.learning_rate_decay_factor,#0.94 88 | staircase=True, 89 | name='exponential_decay_learning_rate') 90 | #0,0002×0,94^(130000÷165566) = 0,000190516 91 | #0,0002×0,94^(1÷165566) = 0,0002 92 | 93 | elif FLAGS.learning_rate_decay_type == 'fixed': 94 | return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate') 95 | elif FLAGS.learning_rate_decay_type == 'polynomial': 96 | return tf.train.polynomial_decay(FLAGS.learning_rate, 97 | global_step, 98 | decay_steps, 99 | FLAGS.end_learning_rate, 100 | power=0.9, 101 | cycle=False, 102 | name='polynomial_decay_learning_rate') 103 | else: 104 | raise ValueError('learning_rate_decay_type [%s] was not recognized', 105 | FLAGS.learning_rate_decay_type) 106 | 107 | def _get_variables_to_train(): 108 | """Returns a list of variables to train. 109 | 110 | Returns: 111 | A list of variables to train by the optimizer. 112 | """ 113 | if FLAGS.trainable_scopes is None: 114 | return tf.trainable_variables() 115 | else: 116 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')] 117 | 118 | variables_to_train = [] 119 | for scope in scopes: 120 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope) 121 | variables_to_train.extend(variables) 122 | return variables_to_train 123 | 124 | def _get_init_fn(): 125 | """Returns a function run by the chief worker to warm-start the training. 126 | 127 | Note that the init_fn is only run when initializing the model during the very 128 | first global step. 129 | 130 | Returns: 131 | An init function run by the supervisor. 132 | """ 133 | if FLAGS.checkpoint_path is None: 134 | return None 135 | 136 | # Warn the user if a checkpoint exists in the train_dir. Then we'll 137 | # ignore the checkpoint anyway. 138 | if tf.train.latest_checkpoint(FLAGS.train_dir): 139 | tf.logging.info( 140 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s' 141 | % FLAGS.train_dir) 142 | return None 143 | 144 | exclusions = [] 145 | if FLAGS.checkpoint_exclude_scopes: 146 | exclusions = [scope.strip() 147 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 148 | 149 | # TODO(sguada) variables.filter_variables() 150 | variables_to_restore = [] 151 | for var in slim.get_model_variables(): 152 | for exclusion in exclusions: 153 | if var.op.name.startswith(exclusion): 154 | break 155 | else: 156 | variables_to_restore.append(var) 157 | 158 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path): 159 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path) 160 | else: 161 | checkpoint_path = FLAGS.checkpoint_path 162 | 163 | tf.logging.info('Fine-tuning from %s' % checkpoint_path) 164 | 165 | return slim.assign_from_checkpoint_fn( 166 | checkpoint_path, 167 | variables_to_restore, 168 | ignore_missing_vars=FLAGS.ignore_missing_vars) 169 | 170 | def get_var_list_to_restore(): 171 | """Choose which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """ 172 | 173 | variables_to_restore = [] 174 | if FLAGS.checkpoint_exclude_scopes is not None: 175 | exclusions = [scope.strip() 176 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')] 177 | 178 | # build restore list 179 | for var in tf.model_variables(): 180 | for exclusion in exclusions: 181 | if var.name.startswith(exclusion): 182 | break 183 | else: 184 | variables_to_restore.append(var) 185 | else: 186 | variables_to_restore = tf.model_variables() 187 | 188 | variables_to_restore_final = [] 189 | if FLAGS.checkpoint_include_scopes is not None: 190 | includes = [ 191 | scope.strip() 192 | for scope in FLAGS.checkpoint_include_scopes.split(',') 193 | ] 194 | for var in variables_to_restore: 195 | for include in includes: 196 | if var.name.startswith(include): 197 | variables_to_restore_final.append(var) 198 | break 199 | else: 200 | variables_to_restore_final = variables_to_restore 201 | 202 | return variables_to_restore_final 203 | -------------------------------------------------------------------------------- /train/train_utils.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/train/train_utils.pyc -------------------------------------------------------------------------------- /unit_test/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/unit_test/__init__.py -------------------------------------------------------------------------------- /unit_test/data_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | import functools 7 | 8 | import sys 9 | import os 10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 11 | import numpy as np 12 | import PIL.Image as Image 13 | from PIL import ImageDraw 14 | import tensorflow as tf 15 | import tensorflow.contrib.slim as slim 16 | from libs.logs.log import LOG 17 | import libs.configs.config_v1 as cfg 18 | import libs.nets.resnet_v1 as resnet_v1 19 | import libs.datasets.dataset_factory as dataset_factory 20 | import libs.datasets.coco as coco 21 | import libs.preprocessings.coco_v1 as preprocess_coco 22 | from libs.layers import ROIAlign 23 | 24 | resnet50 = resnet_v1.resnet_v1_50 25 | FLAGS = tf.app.flags.FLAGS 26 | 27 | with tf.Graph().as_default(): 28 | 29 | image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = \ 30 | coco.read('./data/coco/records/coco_train2014_00001-of-00033.tfrecord') 31 | 32 | image, gt_boxes, gt_masks = \ 33 | preprocess_coco.preprocess_image(image, gt_boxes, gt_masks) 34 | 35 | 36 | 37 | sess = tf.Session() 38 | init_op = tf.group(tf.global_variables_initializer(), 39 | tf.local_variables_initializer()) 40 | # init_op = tf.initialize_all_variables() 41 | 42 | boxes = [[100, 100, 200, 200], 43 | [50, 50, 100, 100], 44 | [100, 100, 750, 750], 45 | [50, 50, 60, 60]] 46 | # boxes = np.zeros((0, 4)) 47 | boxes = tf.constant(boxes, tf.float32) 48 | feat = ROIAlign(image, boxes, False, 16, 7, 7) 49 | sess.run(init_op) 50 | 51 | tf.train.start_queue_runners(sess=sess) 52 | with sess.as_default(): 53 | for i in range(20000): 54 | image_np, ih_np, iw_np, gt_boxes_np, gt_masks_np, num_instances_np, img_id_np, \ 55 | feat_np = \ 56 | sess.run([image, ih, iw, gt_boxes, gt_masks, num_instances, img_id, 57 | feat]) 58 | # print (image_np.shape, gt_boxes_np.shape, gt_masks_np.shape) 59 | 60 | if i % 1 == 0: 61 | print ('%d, image_id: %s, instances: %d'% (i, str(img_id_np), num_instances_np)) 62 | image_np = 256 * (image_np * 0.5 + 0.5) 63 | image_np = image_np.astype(np.uint8) 64 | image_np = np.squeeze(image_np) 65 | print (image_np.shape, ih_np, iw_np) 66 | print (feat_np.shape) 67 | im = Image.fromarray(image_np) 68 | imd = ImageDraw.Draw(im) 69 | for i in range(gt_boxes_np.shape[0]): 70 | imd.rectangle(gt_boxes_np[i, :]) 71 | im.save(str(img_id_np) + '.png') 72 | mask = np.sum(gt_masks_np, axis=0, dtype='uint8') 73 | white_pos = np.where(mask > 0) 74 | mask[white_pos] = 255 75 | mask_img = Image.fromarray(mask) 76 | mask_img.save('mask_' + str(img_id_np) + '.png') 77 | # print (gt_boxes_np) 78 | sess.close() 79 | -------------------------------------------------------------------------------- /unit_test/preprocessing_test.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding=utf-8 3 | 4 | import numpy as np 5 | import sys 6 | import os 7 | import tensorflow as tf 8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..')) 9 | 10 | import libs.preprocessings.coco_v1 as coco_preprocess 11 | import libs.configs.config_v1 as cfg 12 | 13 | ih, iw, ic = 400,500, 3 14 | N = 3 15 | image = np.random.randint(0, 255, (ih, iw, ic)).astype(np.uint8) 16 | gt_masks = np.zeros((N, ih, iw)).astype(np.int32) 17 | xy = np.random.randint(0, min(iw, ih)-100, (N, 2)).astype(np.float32) 18 | wh = np.random.randint(20, 40, (N, 2)).astype(np.float32) 19 | cls = np.random.randint(1, 6, (N, 1)).astype(np.float32) 20 | gt_boxes = np.hstack((xy, xy + wh, cls)).astype(np.float32) 21 | gt_boxes_np = gt_boxes 22 | image_np = image 23 | gt_masks_np = gt_masks 24 | 25 | for i in range(N): 26 | box = gt_boxes[i, 0:4] 27 | gt_masks[i, int(box[1]):int(box[3]), 28 | int(box[0]):int(box[2])] = 1 29 | image = tf.constant(image) 30 | gt_boxes = tf.constant(gt_boxes) 31 | gt_masks = tf.constant(gt_masks) 32 | 33 | image, gt_boxes, gt_masks = \ 34 | coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training=True) 35 | 36 | with tf.Session() as sess: 37 | # print(image.eval()) 38 | image_tf, gt_boxes_tf, gt_masks_tf = \ 39 | sess.run([image, gt_boxes, gt_masks]) 40 | print ('#######################') 41 | print ('DATA PREPROCESSING TEST') 42 | print ('#######################') 43 | print ('gt_boxes shape:', gt_boxes_tf.shape) 44 | print('mask shape:', gt_masks_tf.shape) 45 | print(gt_boxes_tf) 46 | for i in range(N): 47 | box = np.round(gt_boxes_tf[i, 0:4]) 48 | box = box.astype(np.int32) 49 | m = gt_masks_tf[i, box[1]:box[3], box[0]:box[2]] 50 | print ('after:', box) 51 | print (np.sum(m)/ (0.0 + m.size)) 52 | print (m) 53 | box = np.round(gt_boxes_np[i, 0:4]) 54 | box = box.astype(np.int32) 55 | m = gt_masks_np[i, box[1]:box[3], box[0]:box[2]] 56 | print ('ori box:', box) 57 | print (np.sum(m)/ (0.0 + m.size)) 58 | --------------------------------------------------------------------------------