├── .gitignore
├── .idea
├── MaskRCNN_body.iml
├── inspectionProfiles
│ └── Project_Default.xml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── Documentation.odt
├── README.md
├── convert_data
├── ChalearnLAPEvaluation.py
├── ChalearnLAPSample.py
├── __init__.py
├── __init__.pyc
├── convert_ADE20k_human_body_parts.py
├── convert_CHALEARN_human_body_parts.py
├── convert_VOC_human_body_parts.py
├── convert_jhmdb.py
├── download_and_convert_data.sh
├── human_body_parts.m
├── read_my_data_keypoints.py
└── visualize_records_human_body_parts.py
├── crontab.sh
├── data
└── README.md
├── document.pdf
├── draw
├── __init__.py
├── draw.py
├── metric.py
├── utils.py
└── utils.pyc
├── libs
├── Makefile
├── __init__.py
├── __init__.pyc
├── boxes
│ ├── .gitignore
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── anchor.py
│ ├── anchor.pyc
│ ├── bbox.pyx
│ ├── bbox_transform.py
│ ├── bbox_transform.pyc
│ ├── blob.py
│ ├── cython_anchor.py
│ ├── cython_anchor.pyx
│ ├── cython_bbox.py
│ ├── cython_bbox_transform.py
│ ├── cython_bbox_transform.pyx
│ ├── cython_nms.py
│ ├── cython_nms.pyc
│ ├── gprof2dot.py
│ ├── nms.py
│ ├── nms.pyc
│ ├── nms.pyx
│ ├── nms_wrapper.py
│ ├── nms_wrapper.pyc
│ ├── profile
│ ├── profile.png
│ ├── roi.py
│ ├── roi.pyc
│ ├── timer.py
│ └── timer.pyc
├── configs
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── config_v1.py
│ └── config_v1.pyc
├── datasets
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── coco.py
│ ├── coco.pyc
│ ├── dataset_factory.py
│ ├── dataset_factory.pyc
│ ├── download_and_convert_coco.py
│ ├── download_and_convert_coco.pyc
│ └── pycocotools
│ │ ├── Makefile
│ │ ├── __init__.py
│ │ ├── __init__.pyc
│ │ ├── _mask.c
│ │ ├── _mask.pyx
│ │ ├── _mask.so
│ │ ├── coco.py
│ │ ├── coco.pyc
│ │ ├── cocoeval.py
│ │ ├── common
│ │ ├── gason.cpp
│ │ ├── gason.h
│ │ ├── maskApi.c
│ │ └── maskApi.h
│ │ ├── mask.py
│ │ ├── mask.pyc
│ │ └── setup.py
├── layers
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── anchor.py
│ ├── anchor.pyc
│ ├── assign.py
│ ├── assign.pyc
│ ├── crop.py
│ ├── crop.pyc
│ ├── mask.py
│ ├── mask.pyc
│ ├── roi.py
│ ├── roi.pyc
│ ├── sample.py
│ ├── sample.pyc
│ ├── wrapper.py
│ └── wrapper.pyc
├── logs
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── log.py
│ └── log.pyc
├── make.sh
├── memory_util.py
├── nets
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── nets_factory.py
│ ├── nets_factory.pyc
│ ├── pyramid_network.py
│ ├── pyramid_network.pyc
│ ├── resnet_utils.py
│ ├── resnet_utils.pyc
│ ├── resnet_v1.py
│ ├── resnet_v1.pyc
│ └── train_utils.py
├── nms
│ ├── .gitignore
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── cpu_nms.pyx
│ ├── gpu_nms.hpp
│ ├── gpu_nms.pyx
│ ├── nms_kernel.cu
│ └── py_cpu_nms.py
├── preprocessings
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── coco_v1.py
│ ├── coco_v1.pyc
│ ├── utils.py
│ └── utils.pyc
├── setup.py
└── visualization
│ ├── __init__.py
│ ├── __init__.pyc
│ ├── pil_utils.py
│ ├── pil_utils.pyc
│ ├── summary_utils.py
│ └── summary_utils.pyc
├── mask_rcnn_final.xml
├── media
├── file.txt
├── testseg122_1.jpg
├── testseg226_1.jpg
├── testseg255_1.jpg
├── testseg293_1.jpg
├── testseg296_1.jpg
├── testseg305_1.jpg
├── testseg35_1.jpg
├── testseg57_1.jpg
└── testseg70_1.jpg
├── train
├── __init__.py
├── __init__.pyc
├── train.py
├── train_utils.py
└── train_utils.pyc
└── unit_test
├── __init__.py
├── data_test.py
├── layer_test.py
├── preprocessing_test.py
└── resnet50_test.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data/pretrained_models/
2 | data/coco/
3 | output/mask_rcnn/
4 | convert_data/data/
5 | draw/data/
6 | draw/output_seg/
7 |
8 |
9 |
--------------------------------------------------------------------------------
/.idea/MaskRCNN_body.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
59 |
60 |
61 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/Documentation.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/Documentation.odt
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Detecting human body parts and Building Skeleton Models using Deep Convolutional Neural Networks
2 |
3 | This repository contains an extension to the amazing work done by [CharlesShang](https://github.com/CharlesShang/FastMaskRCNN)
4 | This is a model of a neural network that is able to do object detection, classification and segmentation.
5 | The ideea is to detect all persons in an image and segment their body parts. The next step is to add keypoint regression. This is a real-time model running at 200ms/frame on a Titan X GPU.
6 | A practical usage for this kind of model would be to a fashion application that looks at the information of your body and proposes the look of different clothes in order for the user to visualize their appearance.
7 | Here are some results from the training set.
8 |
9 | 
10 | 
11 | 
12 | 
13 | 
14 | 
15 | 
16 |
17 | The following videos contain the model that does body segmentation (the rest of the parts are not drawn).
18 | The model output has been modified to have 2 classes for detection (human, non-human) and 7 segmentation classes (full body, head, torso, right hand, left hand, right leg, left leg).
19 |
20 |
21 |
22 |
23 | The following videos shows the results of the same model, but this time are drawn the body parts.
24 |
25 |
26 |
27 | # Installation
28 | ```
29 | git clone https://github.com/Iftimie/MaskRCNN_body.git
30 | cd MaskRCNN_body
31 | mkdir data/coco
32 | mkdir data/coco/records
33 | cd data/coco/records/
34 |
35 | wget https://www.dropbox.com/s/43ihvomchvwtpns/checkpoint
36 | wget https://www.dropbox.com/s/v6084wee6pjlfk4/coco_resnet50_model.ckpt-248000.data-00000-of-00001
37 | wget https://www.dropbox.com/s/0gqxnbsjzpuz0tz/coco_resnet50_model.ckpt-248000.index
38 | wget https://www.dropbox.com/s/3uildv0wlh79oad/coco_resnet50_model.ckpt-248000.meta
39 | #modify the checkpoint file with your path
40 | cd ../../..
41 | git checkout test
42 | #modify line 180 in train/test.py with your ip address
43 | #modify line 36 in train/client.py with the respective ip address
44 |
45 | #in one terminal
46 | CUDA_VISIBLE_DEVICES=0 python train/test.py
47 |
48 | #in another terminal. Make sure to have a webcam connected
49 | python train/client.py
50 | ```
51 |
--------------------------------------------------------------------------------
/convert_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/convert_data/__init__.py
--------------------------------------------------------------------------------
/convert_data/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/convert_data/__init__.pyc
--------------------------------------------------------------------------------
/convert_data/convert_CHALEARN_human_body_parts.py:
--------------------------------------------------------------------------------
1 | import cv2
2 | import ChalearnLAPSample
3 | import numpy as np
4 | import tensorflow as tf
5 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
6 |
7 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg)
8 | body_parts_dict={
9 | 1:1,#head
10 | 2:2,#torso
11 | 3:3,#left hand
12 | 5:3,#left forearm (lower)
13 | 7:3,#left upper arm
14 | 4:4,#right hand
15 | 6:4,#right forearm
16 | 8:4,#right upperarm
17 | 9:5,#left foot
18 | 11:5,#left lower leg
19 | 13:5,#left upper leg
20 | 10:6,#right foot
21 | 12:6,#right lower leg
22 | 14:6,#rihgt upper leg
23 | }
24 |
25 | # poseSample = ChalearnLAPSample.PoseSample("Seq01.zip")
26 | # actorid=1
27 | # limbid=2
28 | # cv2.namedWindow("Seqxx",cv2.WINDOW_NORMAL)
29 | # cv2.namedWindow("Torso",cv2.WINDOW_NORMAL)
30 | # for x in range(1, poseSample.getNumFrames()):
31 | # img=poseSample.getRGB(x)
32 | # torso=poseSample.getLimb(x,actorid,6)
33 | # cv2.imshow("Seqxx",img)
34 | # cv2.imshow("Torso",torso)
35 | # cv2.waitKey(1000)
36 | # cv2.destroyAllWindows()
37 |
38 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords
39 | def loadData(frame_id,img,poseSample):
40 | H,W = img.shape[0],img.shape[1]
41 | gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls]
42 | masks_instances = [] #shape: [N,H,W,7]
43 | for actorid in range(1,3): # there are at maximum 2 persons in one image
44 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts
45 | one_mask_person = np.zeros((H,W),dtype=np.uint8) # whole body
46 | for limbid in range(1,15):
47 | part = poseSample.getLimb(frame_id,actorid,limbid) #get part mask
48 | part = cv2.resize(part[...,0]/255,(W,H))
49 | masks_for_person[...,body_parts_dict[limbid]] = np.logical_or(masks_for_person[...,body_parts_dict[limbid]],part) #this is where I combine for example right upper leg and right lower leg
50 | one_mask_person=np.logical_or(one_mask_person,part) # this is where I combine the part mask into the whole body
51 |
52 | masks_for_person[...,0] = one_mask_person
53 | _,contours,hierarchy = cv2.findContours(one_mask_person.astype(np.uint8).copy(), 1, 2) #### from here
54 | if len(contours)==0:
55 | continue
56 | x1=100000
57 | y1=100000
58 | x2=-10000
59 | y2=-10000
60 | for contour in contours:
61 | x,y,w,h = cv2.boundingRect(contour)
62 | xw,yh = x+w,y+h
63 | if x x2:
68 | x2=xw
69 | if yh >y2:
70 | y2=yh
71 | gt_boxes.append([x1,y1,x2,y2,1]) #####to here I select the bounding box of the person instance. the mask might be splitted into multiple blobs
72 | masks_instances.append(masks_for_person)
73 |
74 | if len(gt_boxes) ==0:
75 | return False,None,None,None,H,W
76 | masks_instances = np.array(masks_instances,dtype=np.uint8)
77 | gt_boxes = np.array(gt_boxes,dtype=np.float32)
78 | # for h_box in gt_boxes:
79 | # image = cv2.rectangle(img,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2)
80 | # cv2.imshow("img",image)
81 | # cv2.waitKey(100)
82 | mask = masks_instances[0,:,:,1] # this mask is used for visualization in tensorboard
83 | return True,gt_boxes,masks_instances,mask,H,W
84 |
85 | def _int64_feature(values):
86 | if not isinstance(values, (tuple, list)):
87 | values = [values]
88 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
89 |
90 | def _bytes_feature(values):
91 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
92 |
93 | def _to_tfexample_coco_raw(image_id, image_data, label_data,
94 | height, width,
95 | num_instances, gt_boxes, masks):
96 | """ just write a raw input"""
97 | return tf.train.Example(features=tf.train.Features(feature={
98 | 'image/img_id': _int64_feature(image_id),
99 | 'image/encoded': _bytes_feature(image_data),
100 | 'image/height': _int64_feature(height),
101 | 'image/width': _int64_feature(width),
102 | 'label/num_instances': _int64_feature(num_instances), # N
103 | 'label/gt_boxes': _bytes_feature(gt_boxes), # of shape (N, 5), (x1, y1, x2, y2, classid)
104 | 'label/gt_masks': _bytes_feature(masks), # of shape (N, height, width)
105 | 'label/encoded': _bytes_feature(label_data), # deprecated, this is used for pixel-level segmentation
106 | }))
107 |
108 |
109 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
110 | record_filename = "out_human_and_body_parts_chalearn.tfrecord"
111 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer:
112 | for seq,seq_id in zip(["Seq01.zip","Seq02.zip","Seq03.zip","Seq04.zip","Seq06.zip"],range(5)): # 5 movies
113 | #for seq,seq_id in zip(["Seq03.zip"],range(5)):
114 | poseSample = ChalearnLAPSample.PoseSample(seq) #Chalearn API
115 | for x in range(1, poseSample.getNumFrames(),6): # i skip every 6 images because it is a video because I think many images are redundant
116 | img=poseSample.getRGB(x)
117 | img_id = seq_id*2000+x
118 | persons_exist,gt_boxes,masks_instances,mask,H,W = loadData(x,img,poseSample)
119 | if not persons_exist:
120 | continue
121 | mask_raw = mask.tostring()
122 | # img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
123 | # cv2.imshow("image",img)
124 | # cv2.waitKey(1000)
125 | img_raw = img.tostring()
126 | example = _to_tfexample_coco_raw(
127 | img_id,
128 | img_raw,
129 | mask_raw,
130 | H, W, gt_boxes.shape[0],
131 | gt_boxes.tostring(), masks_instances.tostring())
132 | print x
133 |
134 | tfrecord_writer.write(example.SerializeToString())
135 | tfrecord_writer.close()
136 |
137 |
138 |
139 |
--------------------------------------------------------------------------------
/convert_data/convert_VOC_human_body_parts.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
3 | import numpy as np
4 | from PIL import Image
5 | import scipy.io as sio
6 | import cv2
7 | import traceback
8 | import logging
9 |
10 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg)
11 | body_parts_dict = {
12 | 'head':1,
13 | 'lear':1,
14 | 'rear':1,
15 | 'mouth':1,
16 | 'hair':1,
17 | 'nose':1,
18 | 'leye':1,
19 | 'reye':1,
20 | 'lebrow':1,
21 | 'rebrow':1,
22 | 'torso':2,
23 | 'neck':2,
24 | 'luarm':3,
25 | 'llarm':3,
26 | 'lhand':3,
27 | 'rlarm':4,
28 | 'ruarm':4,
29 | 'rhand':4,
30 | 'llleg':5,
31 | 'luleg':5,
32 | 'lfoot':5,
33 | 'rlleg':6,
34 | 'ruleg':6,
35 | 'rfoot':6
36 | }
37 |
38 | body_parts_dict = {
39 | 'head':1,
40 | 'lear':1,
41 | 'rear':1,
42 | 'mouth':1,
43 | 'hair':1,
44 | 'nose':1,
45 | 'leye':1,
46 | 'reye':1,
47 | 'lebrow':1,
48 | 'rebrow':1,
49 | 'torso':2,
50 | 'neck':2,
51 | 'luarm':3,
52 | 'llarm':3,
53 | 'lhand':3,
54 | 'rlarm':3,
55 | 'ruarm':3,
56 | 'rhand':3,
57 | 'llleg':5,
58 | 'luleg':5,
59 | 'lfoot':5,
60 | 'rlleg':5,
61 | 'ruleg':5,
62 | 'rfoot':5
63 | }
64 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords
65 | def loadData3(H,W): #### !!!!! i know i do not send the image and the annotations into the function but python is smart enough to look outside the scope of the function. look below
66 |
67 | masks_instances = []#shape: [N,H,W,7]
68 |
69 | persons = [o for o in annotation['anno'][0]['objects'][0][0] if o['class']=='person'] # select all persons from the image
70 | gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls]
71 | for i in range(len(persons)):
72 | p = persons[i]
73 | pa = p['parts']
74 | parts = pa[0]
75 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts
76 | one_mask_person = np.zeros((H,W),dtype=np.uint8) # whole body
77 |
78 | for part in parts:
79 | part_name = part['part_name'].astype(str)[0]
80 | index = body_parts_dict[part_name]
81 | masks_for_person[...,index] = np.logical_or(masks_for_person[...,index], part['mask'])#this is where I combine for example right upper leg and right lower leg
82 | one_mask_person=np.logical_or(one_mask_person,part['mask']) # this is where I combine the part mask into the whole body
83 |
84 | masks_for_person[...,0]=one_mask_person
85 | kernel = np.ones((5,5),np.uint8)
86 | one_mask_person = np.array(one_mask_person,dtype=np.uint8)#if this line is missing=> error in cv2.dilate
87 | one_mask_person = cv2.dilate(one_mask_person,kernel,iterations = 1)
88 | _,contours,hierarchy = cv2.findContours(one_mask_person, 1, 2) #### from here
89 | if len(contours) ==0:
90 | continue
91 | x1=100000
92 | y1=100000
93 | x2=-10000
94 | y2=-10000
95 | for contour in contours:
96 | x,y,w,h = cv2.boundingRect(contour)
97 | xw,yh = x+w,y+h
98 | if x x2:
103 | x2=xw
104 | if yh >y2:
105 | y2=yh
106 | gt_boxes.append([x1,y1,x2,y2,1])
107 |
108 | if True:#########################Body
109 | print ("BODYYYYYYYYYYYYYYYY")
110 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8)
111 | masks_for_person[...,0]=one_mask_person
112 |
113 | masks_instances.append(masks_for_person.copy()) #####to here I select the bounding box of the person instance. the mask might be splitted into multiple blobs
114 | if len(gt_boxes) ==0:
115 | return False,None,None,None
116 |
117 | masks_instances = np.array(masks_instances,dtype=np.uint8)
118 | gt_boxes = np.array(gt_boxes,dtype=np.float32)
119 | mask = masks_instances[0,:,:,1]# this is for drawing the ground truth in the network in tensorboard
120 | return True,gt_boxes,masks_instances,mask
121 |
122 | def _int64_feature(values):
123 | if not isinstance(values, (tuple, list)):
124 | values = [values]
125 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
126 |
127 | def _bytes_feature(values):
128 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
129 |
130 | def _to_tfexample_coco_raw(image_id, image_data, label_data,
131 | height, width,
132 | num_instances, gt_boxes, masks):
133 | """ just write a raw input"""
134 | return tf.train.Example(features=tf.train.Features(feature={
135 | 'image/img_id': _int64_feature(image_id),
136 | 'image/encoded': _bytes_feature(image_data),
137 | 'image/height': _int64_feature(height),
138 | 'image/width': _int64_feature(width),
139 | 'label/num_instances': _int64_feature(num_instances), # N
140 | 'label/gt_boxes': _bytes_feature(gt_boxes), # of shape (N, 5), (x1, y1, x2, y2, classid)
141 | 'label/gt_masks': _bytes_feature(masks), # of shape (N, height, width)
142 | 'label/encoded': _bytes_feature(label_data), # deprecated, this is used for pixel-level segmentation
143 | }))
144 |
145 |
146 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
147 | record_filename = "data/out_human_and_body_parts.tfrecord"
148 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer:
149 | for x in range (0,12000):
150 | try:
151 | img_id = x
152 | img_name = '2008_%06d' % (x,)
153 | img = np.array(Image.open('data/JPEGImages/'+img_name+'.jpg'))
154 | annotation = sio.loadmat('data/Annotations_Part/'+img_name+'.mat')
155 | image = cv2.imread('data/JPEGImages/'+img_name+'.jpg')
156 | height, width = img.shape[0],img.shape[1]
157 | img = img.astype(np.uint8)
158 | img_raw = img.tostring()
159 | persons_exist, gt_boxes, masks,mask = loadData3(height, width)
160 | if not persons_exist:
161 | continue
162 | mask_raw = mask.tostring()
163 |
164 | example = _to_tfexample_coco_raw(
165 | img_id,
166 | img_raw,
167 | mask_raw,
168 | height, width, gt_boxes.shape[0],
169 | gt_boxes.tostring(), masks.tostring())
170 | tfrecord_writer.write(example.SerializeToString())
171 | print (x)
172 | except BaseException as error:
173 | print error
174 |
175 | tfrecord_writer.close()
176 |
--------------------------------------------------------------------------------
/convert_data/convert_jhmdb.py:
--------------------------------------------------------------------------------
1 | import os
2 | import scipy.io as sio
3 | import cv2
4 | import numpy as np
5 | import tensorflow as tf
6 | from PIL import Image
7 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
8 |
9 |
10 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg)
11 | body_parts_dict = {
12 | 2:1,#head
13 | 1:2,#torso
14 | 4:3,#left upper arm
15 | 8:3,#left lower arm
16 | 3:4,#right upper arm
17 | 7:4,#right lower arm
18 | 6:5,#left upper leg
19 | 5:6,#right upper leg
20 | 9:6,#right lower leg
21 | 10:5,#left lower leg
22 |
23 | }
24 |
25 | body_parts_dict = {
26 | 2:1,#head
27 | 1:2,#torso
28 | 4:3,#left upper arm
29 | 8:3,#left lower arm
30 | 3:3,#right upper arm
31 | 7:3,#right lower arm
32 | 6:5,#left upper leg
33 | 5:5,#right upper leg
34 | 9:5,#right lower leg
35 | 10:5,#left lower leg
36 | }
37 |
38 | # this is used to normalize the x,y of keypoint to -1 and 1
39 | def map_value(x,A,B,a,b):
40 | return (x-A)*(b-a)/(B-A)+a
41 |
42 |
43 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords
44 | def loadData(image,instance_mask,parts_mask,keypoints):
45 | gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls]
46 | masks_instances = [] #shape: [N,H,W,7]
47 | _,contours,hierarchy = cv2.findContours(instance_mask.copy(), 1, 2) ######### from here
48 | x1=100000
49 | y1=100000
50 | x2=-10000
51 | y2=-10000
52 | for contour in contours:
53 | x,y,w,h = cv2.boundingRect(contour)
54 | xw,yh = x+w,y+h
55 | if x x2:
60 | x2=xw
61 | if yh >y2:
62 | y2=yh
63 | gt_boxes.append([x1,y1,x2,y2,1]) ######### to here i find the bbox of the person as the mask for the person might contain multiple blobs
64 | H = image.shape[0]
65 | W = image.shape[1]
66 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts
67 | masks_for_person[...,0] = instance_mask.copy()
68 | for x in range(1,11):
69 | part = (parts_mask == x).astype(np.uint8)
70 | masks_for_person[...,body_parts_dict[x]] = np.logical_or(masks_for_person[...,body_parts_dict[x]],part) #this is where I combine for example right upper leg and right lower leg
71 |
72 | for x in range(15): #there are 15 keypoints
73 | # keypoints[0,x] = keypoints[0,x]-x1
74 | # keypoints[1,x] = keypoints[1,x]-y1
75 | keypoints[0,x] = map_value(keypoints[0,x],x1,x2,0.0,112.0) #I first normalize to the keypoint to the size of the output mask (112x112) because the keypoint regression branch comes from the mask branch (this is how i decided to attach it)
76 | keypoints[1,x] = map_value(keypoints[1,x],y1,y2,0.0,112.0)
77 | keypoints[0,x] = map_value(keypoints[0,x],0.0,112.0,-1,1) #then I normalize it to -1 1 #the above operations are redundant but i left them there for visualization/debugging
78 | keypoints[1,x] = map_value(keypoints[1,x],0.0,112.0,-1,1)
79 |
80 | if True:####################BODYYY
81 | masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts
82 | masks_for_person[...,0] = instance_mask.copy()
83 |
84 | masks_instances.append(masks_for_person)
85 | masks_instances = np.array(masks_instances,dtype=np.uint8)
86 | gt_boxes = np.array(gt_boxes,dtype=np.float32)
87 | mask = masks_instances[0,:,:,1] # this mask is used for visualization in tensorboard
88 | keypoints = keypoints.astype(np.float32)
89 | return gt_boxes,masks_instances,mask,H,W,keypoints
90 |
91 |
92 | def _int64_feature(values):
93 | if not isinstance(values, (tuple, list)):
94 | values = [values]
95 | return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
96 |
97 | def _bytes_feature(values):
98 | return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
99 |
100 | def _to_tfexample_coco_raw(image_id, image_data, label_data,
101 | height, width,
102 | num_instances, gt_boxes, masks,keypoints):
103 | """ just write a raw input"""
104 | return tf.train.Example(features=tf.train.Features(feature={
105 | 'image/img_id': _int64_feature(image_id),
106 | 'image/encoded': _bytes_feature(image_data),
107 | 'image/height': _int64_feature(height),
108 | 'image/width': _int64_feature(width),
109 | 'label/num_instances': _int64_feature(num_instances), # N
110 | 'label/gt_boxes': _bytes_feature(gt_boxes), # of shape (N, 5), (x1, y1, x2, y2, classid)
111 | 'label/gt_masks': _bytes_feature(masks), # of shape (N, height, width)
112 | 'label/encoded': _bytes_feature(label_data), # deprecated, this is used for pixel-level segmentation
113 | 'label/keypoints': _bytes_feature(keypoints)
114 | }))
115 |
116 | img_id = 0
117 | scenes = os.listdir('JHMDB_video/ReCompress_Videos')
118 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
119 | record_filename = "out_human_and_body_parts_keypoints_JHMDB.tfrecord"
120 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer:
121 | for s in scenes:
122 | if s=='.DS_Store':
123 | continue
124 | mask_dir = os.listdir('puppet_mask/'+s)
125 | for mask in mask_dir:
126 | mat_file_instance = sio.loadmat('puppet_mask/'+s+'/'+mask+'/puppet_mask.mat')
127 | video_file = cv2.VideoCapture('JHMDB_video/ReCompress_Videos/'+s+'/'+mask+".avi")
128 | mat_file_parts = sio.loadmat('puppet_flow_com/'+s+'/'+mask+'/puppet_flow.mat')
129 | mat_file_keypoints = sio.loadmat('joint_positions/'+s+'/'+mask+'/joint_positions.mat')
130 |
131 | #ret, image = video_file.read()
132 | for x in range(0,mat_file_parts['part_mask'].shape[2]):
133 | ret, image = video_file.read()
134 | parts = mat_file_parts['part_mask'][...,x]
135 | instance = mat_file_instance['part_mask'][...,x]
136 | keypoints = mat_file_keypoints['pos_img'][...,x]
137 | # parts = mat_file_parts['part_mask'][...,0]
138 | # instance = mat_file_instance['part_mask'][...,0]
139 | # keypoints = mat_file_keypoints['pos_img'][...,0]
140 |
141 | gt_boxes,masks_instances,mask,H,W,keypoints = loadData(image,instance,parts,keypoints)
142 | mask_raw = mask.tostring()
143 | img_raw = image.tostring()
144 | example = _to_tfexample_coco_raw(
145 | img_id,
146 | img_raw,
147 | mask_raw,
148 | H, W, gt_boxes.shape[0],
149 | gt_boxes.tostring(), masks_instances.tostring(),keypoints.tostring())
150 | tfrecord_writer.write(example.SerializeToString())
151 |
152 | # cv2.imshow("ar",parts*25)
153 | # cv2.imshow("image",image)
154 | # cv2.imshow("instance",instance*255)
155 | # cv2.waitKey(100)
156 | tfrecord_writer.close()
157 |
158 |
159 |
--------------------------------------------------------------------------------
/convert_data/download_and_convert_data.sh:
--------------------------------------------------------------------------------
1 | #mkdir data
2 |
3 | ############################################################################################VOC
4 | #wget http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar
5 | #tar -xvzf VOCtrainval_03-May-2010.tar -C data/
6 | #tar -xvf VOCtrainval_03-May-2010.tar -C data/
7 | #mv data/VOCdevkit/VOC2010/JPEGImages/ data/
8 | #wget http://www.stat.ucla.edu/~xianjie.chen/pascal_part_dataset/trainval.tar.gz
9 | #tar -xvzf trainval.tar.gz -C data/
10 | #python convert_VOC_human_body_parts.py
11 | #mv data/out_human_and_body_parts.tfrecord out_human_and_body_parts.tfrecord
12 |
13 | ############################################################################################Chalearn
14 | #mkdir data/chalearn
15 | #mkdir data/chalearn/api_code
16 | #wget https://competitions.codalab.org/my/datasets/download/764962c6-c270-4ee1-8721-e5611a5665f2 --no-check-certificate
17 | #wget https://competitions.codalab.org/my/datasets/download/27f9a04b-5499-4acf-b7b2-8aabb26f283c --no-check-certificate
18 | #mv 27f9a04b-5499-4acf-b7b2-8aabb26f283c dataset.zip
19 | #unzip dataset.zip -d data/chalearn/api_code/
20 | #mv ChalearnLAPEvaluation.py data/chalearn/api_code/ChalearnLAPEvaluation.py
21 | #mv ChalearnLAPSample.py data/chalearn/api_code/ChalearnLAPSample.py
22 | #mv convert_CHALEARN_human_body_parts.py data/chalearn/api_code/convert_CHALEARN_human_body_parts.py
23 | #cd data/chalearn/api_code
24 | #python convert_CHALEARN_human_body_parts.py
25 | #cd ../../..
26 | #mv data/chalearn/api_code/out_human_and_body_parts_chalearn.tfrecord out_human_and_body_parts_chalearn.tfrecord
27 |
28 | #############################################################################################ADE20K
29 | #wget http://groups.csail.mit.edu/vision/datasets/ADE20K/ADE20K_2016_07_26.zip
30 | #wget http://groups.csail.mit.edu/vision/datasets/ADE20K/code.zip
31 | #mkdir data/ade20k
32 | #unzip ADE20K_2016_07_26.zip -d data/ade20k/
33 | #unzip code.zip -d data/ade20k/
34 | #mkdir data/ade20k/output_dir
35 | #mv data/ade20k/ADE20K_2016_07_26/index_ade20k.mat data/ade20k/index_ade20k.mat
36 | #mv human_body_parts.m data/ade20k/human_body_parts.m
37 | #cp data/ade20k/code/loadAde20K.m data/ade20k/loadAde20K.m
38 | #cd data/ade20k/
39 | #octave human_body_parts.m
40 | #cd ../..
41 | #python convert_ADE20k_human_body_parts.py
42 | #mv data/out_human_and_body_parts_ade_20k_max640edge.tfrecord out_human_and_body_parts_ade_20k.tfrecord
43 |
44 | ###############################################################################################JHMDB
45 | #wget http://files.is.tue.mpg.de/jhmdb/JHMDB_video.zip
46 | #wget http://files.is.tue.mpg.de/jhmdb/joint_positions.zip
47 | #wget http://files.is.tue.mpg.de/jhmdb/puppet_mask.zip
48 | #wget http://files.is.tue.mpg.de/jhmdb/puppet_flow_com.zip
49 | #mkdir data/jhmdb
50 | mkdir data/jhmdb/JHMDB_video
51 | #unzip JHMDB_video.zip -d data/jhmdb/
52 | mv data/jhmdb/ReCompress_Videos/ data/jhmdb/JHMDB_video
53 | #unzip joint_positions.zip -d data/jhmdb/
54 | #unzip puppet_mask.zip -d data/jhmdb/
55 | #unzip puppet_flow_com.zip -d data/jhmdb/
56 |
57 | #mv convert_jhmdb.py data/jhmdb/convert_jhmdb.py
58 | #mv read_my_data_keypoints.py data/jhmdb/read_my_data_keypoints.py
59 | #cd data/jhmdb/
60 | #python convert_jhmdb.py
61 | #cd ../..
62 | #mv data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord out_human_and_body_parts_keypoints_JHMDB.tfrecord
63 |
--------------------------------------------------------------------------------
/convert_data/human_body_parts.m:
--------------------------------------------------------------------------------
1 | #human_body_pars
2 | load('index_ade20k.mat');
3 |
4 |
5 | strings={'back','head','left arm','left foot','left hand','left leg','left shoulder','neck','right arm','right foot','right hand','right leg','right shoulder','torso'};
6 | N=22210;
7 |
8 | for n = 1:N
9 | filename = fullfile(index.folder{n}, index.filename{n});
10 | [Om, Oi, Pm, Pi, objects, parts] = loadAde20K(filename);
11 |
12 | object_class = objects.class;
13 | r = rows(objects.class);
14 | ok=0;
15 | for i =1:r
16 | if findstr(object_class{i,1},'person')
17 | ok=1;
18 | break
19 | endif
20 | end
21 |
22 | pndx = setdiff(unique(Pm),0);
23 | index_object_names = index.objectnames(pndx);
24 | if ok==0 || isempty(index_object_names)
25 | continue
26 | endif
27 | ok=0;
28 | for i=1:14
29 | if any(ismember(index_object_names,strings{i})) != 0
30 | ok=1;
31 | break
32 | endif
33 | end
34 |
35 | if ok ==1
36 |
37 | #disp('ok');
38 | #figure; imshow(Om, []); title('Object classes');
39 | #colormap(cat(1, [0 0 0], hsv(255)));
40 |
41 | #figure; imshow(Oi, []); title('Object classes');
42 | #colormap(cat(1, [0 0 0], hsv(255)));
43 |
44 | #subplot(round(sqrt(Nlevels)), ceil(sqrt(Nlevels)), 1)
45 | #imshow(Pm(:,:,1), []); title('Part classes')
46 | #colormap(cat(1, [0 0 0], hsv(255)))
47 |
48 | file_Om = sprintf('output_dir/Om%d.mat',n);
49 | file_Oi = sprintf('output_dir/Oi%d.mat',n);
50 | file_Pm = sprintf('output_dir/Pm%d.mat',n);
51 | file_Pi = sprintf('output_dir/Pi%d.mat',n);
52 | file_objects = sprintf('output_dir/objects%d.mat',n);
53 | file_parts = sprintf('output_dir/parts%d.mat',n);
54 | file_name = sprintf('output_dir/file%d.jpg',n);
55 |
56 | save(file_Om, 'Om',"-mat7-binary");
57 | save(file_Oi, 'Oi',"-mat7-binary");
58 | save(file_Pm, 'Pm',"-mat7-binary");
59 | save(file_Pi, 'Pi',"-mat7-binary");
60 | save(file_objects, 'objects',"-mat7-binary");
61 | save(file_parts, 'parts',"-mat7-binary");
62 | copyfile(filename,file_name);
63 |
64 | pndx = setdiff(unique(Pm),0);
65 | disp('Parts present in this image:');
66 | disp(n);
67 | endif
68 | #disp('next');
69 | #fflush(stdout)
70 |
71 |
72 |
73 | #{
74 | wndx = setdiff(unique(Om),0);
75 | disp('Objects present in this image (and their wordnet hierarchy):')
76 | for i = 1:length(wndx)
77 | %disp(sprintf('%60s', index.objectnames{wndx(n)}))
78 | if findstr(index.objectnames{wndx(i)},'person')
79 | disp('ok')
80 | figure; imshow(Om, []); title('Object classes')
81 | colormap(cat(1, [0 0 0], hsv(255)))
82 | endif
83 | end
84 | #}
85 | end
86 |
--------------------------------------------------------------------------------
/convert_data/read_my_data_keypoints.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
3 | import numpy as np
4 | from PIL import Image
5 | import scipy.io as sio
6 | import cv2
7 |
8 | def map_value(x,A,B,a,b):
9 | return (x-A)*(b-a)/(B-A)+a
10 |
11 | random_color =np.random.randint(0,180,(7))
12 | i=0
13 | example = tf.train.Example()
14 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
15 | for record in tf.python_io.tf_record_iterator('data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord',options):
16 |
17 | i = i+1
18 | print i
19 | if i %70 !=0:
20 | continue
21 | example.ParseFromString(record)
22 | f = example.features.feature
23 | img_idnp = f['image/img_id'].int64_list.value[0]
24 | image_np = f['image/encoded'].bytes_list.value[0]
25 | heightnp = f['image/height'].int64_list.value[0]
26 | widthnp = f['image/width'].int64_list.value[0]
27 | num_instancesnp = f['label/num_instances'].int64_list.value[0]
28 | gt_masksnp = f['label/gt_masks'].bytes_list.value[0]
29 | gt_boxesnp = f['label/gt_boxes'].bytes_list.value[0]
30 | encoded = f['label/encoded'].bytes_list.value[0]
31 | gt_keypoints = f['label/keypoints'].bytes_list.value[0]
32 |
33 | image_np = np.fromstring(image_np, dtype=np.uint8)
34 | image_np = image_np.reshape((heightnp, widthnp, 3))
35 | gt_masksnp = np.fromstring(gt_masksnp, dtype=np.uint8)
36 | gt_masksnp = gt_masksnp.reshape((num_instancesnp, heightnp, widthnp,7))
37 | gt_boxesnp = np.fromstring(gt_boxesnp, dtype=np.float32)
38 | gt_boxesnp = gt_boxesnp.reshape((num_instancesnp,5))
39 | gt_keypointsnp = np.fromstring(gt_keypoints, dtype=np.float32).reshape((2,15))
40 | cv2.imshow("img",image_np)
41 | cv2.waitKey(100)
42 | hsv = cv2.cvtColor(image_np,cv2.COLOR_BGR2HSV)
43 | for h_box,human_masks in zip(gt_boxesnp,gt_masksnp):
44 | hsv = cv2.rectangle(hsv,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2)
45 | for mask_part in range(7):
46 | mask = human_masks[:,:,mask_part]
47 | mask = mask.astype(np.uint8)
48 | S = 255
49 | if mask_part ==0:
50 | S=100
51 | for x in range(int(h_box[0]),int(h_box[2])):
52 | for y in range(int(h_box[1]),int(h_box[3])):
53 | if mask[y,x]==1:
54 | hsv[y,x,0] = random_color[mask_part]
55 | hsv[y,x,1] = S
56 | for x in range(15):
57 | gt_keypointsnp[0,x] = map_value(gt_keypointsnp[0,x],-10.0,10.0,h_box[0],h_box[2])
58 | gt_keypointsnp[1,x] = map_value(gt_keypointsnp[1,x],-10.0,10.0,h_box[1],h_box[3])
59 | hsv = cv2.circle(hsv,(int(gt_keypointsnp[0,x]),int(gt_keypointsnp[1,x])),2,(255,255,255))
60 | print int(gt_keypointsnp[0,x]),int(gt_keypointsnp[1,x])
61 | bgrr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
62 | cv2.imshow("img",bgrr)
63 | cv2.waitKey(700)
64 |
65 |
66 |
67 | bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
68 | cv2.imshow("img",bgr)
69 | cv2.waitKey(700)
70 |
71 |
72 |
--------------------------------------------------------------------------------
/convert_data/visualize_records_human_body_parts.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
3 | import numpy as np
4 | import cv2
5 |
6 | random_color =np.random.randint(0,180,(7))
7 |
8 | example = tf.train.Example()
9 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
10 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_keypoints_to_body_parts_COCO.tfrecord',options):
11 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_body_parts_ade_20k_max640edge.tfrecord',options):
12 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_body_parts.tfrecord',options):
13 | #for record in tf.python_io.tf_record_iterator('data/chalearn/api_code/out_human_and_body_parts_chalearn.tfrecord',options):
14 | for record in tf.python_io.tf_record_iterator('data/freiburg/out_human_and_body_parts_Freiburg.tfrecord',options):
15 | #for record in tf.python_io.tf_record_iterator('data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord',options):
16 | example.ParseFromString(record)
17 | f = example.features.feature
18 | img_idnp = f['image/img_id'].int64_list.value[0]
19 | image_np = f['image/encoded'].bytes_list.value[0]
20 | heightnp = f['image/height'].int64_list.value[0]
21 | widthnp = f['image/width'].int64_list.value[0]
22 | num_instancesnp = f['label/num_instances'].int64_list.value[0]
23 | gt_masksnp = f['label/gt_masks'].bytes_list.value[0]
24 | gt_boxesnp = f['label/gt_boxes'].bytes_list.value[0]
25 | encoded = f['label/encoded'].bytes_list.value[0]
26 | image_np = np.fromstring(image_np, dtype=np.uint8)
27 | image_np = image_np.reshape((heightnp, widthnp, 3))
28 | gt_masksnp = np.fromstring(gt_masksnp, dtype=np.uint8)
29 | gt_masksnp = gt_masksnp.reshape((num_instancesnp, heightnp, widthnp,7))
30 | gt_boxesnp = np.fromstring(gt_boxesnp, dtype=np.float32)
31 | gt_boxesnp = gt_boxesnp.reshape((num_instancesnp,5))
32 | cv2.imshow("img",image_np)
33 | cv2.waitKey(100)
34 | hsv = cv2.cvtColor(image_np,cv2.COLOR_BGR2HSV)
35 | for h_box,human_masks in zip(gt_boxesnp,gt_masksnp):
36 | hsv = cv2.rectangle(hsv,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2)
37 | for mask_part in range(7):
38 | mask = human_masks[:,:,mask_part]
39 | mask = mask.astype(np.uint8)
40 | S = 255
41 | if mask_part ==0:
42 | S=100
43 | for x in range(int(h_box[0]),int(h_box[2])):
44 | for y in range(int(h_box[1]),int(h_box[3])):
45 | if mask[y,x]==1:
46 | hsv[y,x,0] = random_color[mask_part]
47 | hsv[y,x,1] = S
48 | bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
49 | cv2.imshow("img",bgr)
50 | cv2.waitKey(1000)
51 |
52 |
53 |
--------------------------------------------------------------------------------
/crontab.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | out=$(ps aux | grep '/usr/bin/python /hdd1/Alex/testMaskRCNN_human_bodyparts/MaskRCNN_body/train/train.py' | rev | cut -d ' ' -f 1 | rev | wc -l)
3 | if [ $out -eq "2" ];then
4 | echo "2 processes" >> /tmp/testing.txt
5 | else
6 | echo "1 processes" >> /tmp/testing.txt
7 | echo $(date) >> /tmp/testing.txt
8 | export CUDA_VISIBLE_DEVICES=0
9 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64/
10 | `(/usr/bin/python /hdd1/Alex/testMaskRCNN_human_bodyparts/MaskRCNN_body/train/train.py &>> /tmp/testing.txt)`
11 | echo "tried to start" >> /tmp/testing.txt;
12 | fi
13 |
14 |
15 |
--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
1 | Place and unzip your coco in this dir, like
2 |
3 | ```buildoutcfg
4 | ./data
5 | ./coco
6 | ./annotations
7 | ./train2014
8 | ./val2014
9 | ```
10 |
--------------------------------------------------------------------------------
/document.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/document.pdf
--------------------------------------------------------------------------------
/draw/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/draw/__init__.py
--------------------------------------------------------------------------------
/draw/draw.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | from utils import draw_human_body_parts
3 |
4 | for x in range(1,150):
5 | array = np.load("/home/alex/PycharmProjects/data/array"+str(x)+".npy")
6 | image = array[0]
7 | bbox = array[1]
8 | label =array[2]
9 | prob = array[3]
10 | gt_bbox = array[4]
11 | gt_label = array[5]
12 | final_mask = array[6]
13 | gt_mask = array[7]
14 |
15 | #visualize_mask_gt(bbox,final_mask,gt_mask,label,prob)
16 | #draw_segmentation_parts(1,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask)
17 | #draw_bbox_better(1,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask) ############this is for voc independent body parts
18 | print (x)
19 | draw_human_body_parts(x,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask)
20 |
21 |
22 |
--------------------------------------------------------------------------------
/draw/metric.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import cv2
3 |
4 | def bbox_overlaps(boxes,query_boxes): # boxes is the predicted boxes and query_boxes is the ground truth boxes
5 | N = boxes.shape[0]
6 | K = query_boxes.shape[0]
7 | overlaps = np.zeros((N, K), dtype=np.float32)
8 | iw, ih, box_area,ua,k, n = 0,0,0,0,0,0
9 | for k in range(K):
10 | box_area = (
11 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
12 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
13 | )
14 | for n in range(N):
15 | iw = (
16 | min(boxes[n, 2], query_boxes[k, 2]) -
17 | max(boxes[n, 0], query_boxes[k, 0]) + 1
18 | )
19 | if iw > 0:
20 | ih = (
21 | min(boxes[n, 3], query_boxes[k, 3]) -
22 | max(boxes[n, 1], query_boxes[k, 1]) + 1
23 | )
24 | if ih > 0:
25 | ua = float(
26 | (boxes[n, 2] - boxes[n, 0] + 1) *
27 | (boxes[n, 3] - boxes[n, 1] + 1) +
28 | box_area - iw * ih
29 | )
30 | overlaps[n, k] = iw * ih / ua
31 | return overlaps
32 |
33 | def IOU_mask(mask,gt_mask):
34 | intersection = np.sum( (mask * gt_mask) > 0 )
35 | union = np.sum((np.logical_or(mask,gt_mask))> 0)
36 | return float(intersection)/float(union+1)
37 |
38 | def metric_for_image(bbox=None,gt_bbox=None,label=None, gt_label=None, prob=None,final_mask=None):
39 | #find the overlaps between each predicted box and gt_box
40 | overlaps = bbox_overlaps(np.ascontiguousarray(bbox[:, :4], dtype=np.float),np.ascontiguousarray(gt_bbox[:, :4], dtype=np.float))
41 | gt_assignment = overlaps.argmax(axis=1) #multiple bboxes may have a single GT
42 |
43 | max_overlaps = overlaps[np.arange(bbox.shape[0]), gt_assignment] #select the predicted boxes that are closest to the gt_box
44 |
45 | good = 0
46 | total_boxes = 0
47 | for i,overlap in enumerate(max_overlaps):
48 | box = bbox[i]
49 | width = int(box[2])-int(box[0])
50 | height = int(box[3])-int(box[1])
51 | if prob[i,label[i]] > 0.5 and width*height >1000 and label[i]!=0: #eliminate if classification is less than 0.5. if the box is too small or the label is background
52 | total_boxes = total_boxes+1 #this will be the denominator
53 | if label[i] == gt_label[i]:
54 | if overlap >0.5: #if overlap of the BOXES is bigger than 0.5
55 | output_mask = (final_mask[i] > 0.6).astype(np.uint8)
56 |
57 | gt_maski = gt_mask[:,int(box[1]):int(box[3]),int(box[0]):int(box[2]),:] #crop from gt_mask given the predicted box
58 | gt_maskii = np.zeros([112,112,7],np.uint8)
59 | for x in range(7):
60 | mask = gt_maski[...,x]
61 | mask = mask[0]
62 | gt_maskii[...,x] = cv2.resize(mask.astype(np.uint8),(112,112))
63 |
64 | if IOU_mask(output_mask,gt_maskii) > 0.5: #if overlap of the MASKS is bigger than 0.5
65 | good = good +1
66 | precision_over_image = float(good)/(float(total_boxes)+np.finfo(np.float32).eps)
67 | return precision_over_image
68 |
69 | metrics = []
70 | for i in range(0,512):
71 | bbox = np.load('data/bbox'+str(i)+'.npy')
72 | gt_bbox = np.load('data/gt_boxes'+str(i)+'.npy')
73 | final_mask = np.load('data/final_mask'+str(i)+'.npy')
74 | gt_label = np.load('data/gt_label'+str(i)+'.npy')
75 | image = np.load('data/image'+str(i)+'.npy')
76 | label = np.load('data/label'+str(i)+'.npy')
77 | prob = np.load('data/prob'+str(i)+'.npy')
78 | gt_mask = np.load('data/gt_mask'+str(i)+'.npy')
79 | metrics.append(metric_for_image(bbox,gt_bbox,label,gt_label,prob,final_mask))
80 |
81 | print reduce(lambda x, y: x + y, metrics) / len(metrics)
82 |
--------------------------------------------------------------------------------
/draw/utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
4 | import scipy.misc
5 | import cv2
6 | import numpy.ma as ma
7 |
8 | FLAGS = tf.app.flags.FLAGS
9 | _DEBUG = False
10 |
11 |
12 | #not used
13 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None):
14 | #print("image")
15 | #print(image)
16 | #norm_image = np.uint8(image/np.max(np.abs(image))*255.0)
17 | norm_image = np.uint8(image/0.1*127.0 + 127.0)
18 | #print("norm_image")
19 | #print(norm_image)
20 | source_img = Image.fromarray(norm_image)
21 | return source_img.save(FLAGS.train_dir + 'test_' + name + '_' + str(step) +'.jpg', 'JPEG')
22 |
23 |
24 | #label colors
25 | colors = []
26 | colors.append([180,255,255])
27 | colors.append([150,255,255])
28 | colors.append([120,255,255])
29 | colors.append([90,255,255])
30 | colors.append([60,255,255])
31 | colors.append([30,255,255])
32 | colors.append([0,255,255])
33 |
34 |
35 |
36 | def draw_human_body_parts(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None):
37 | import cv2
38 | hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
39 | hsv_body = hsv.copy()
40 | if bbox is not None:
41 | dictinary = {} #key: area, value:[box,label,gt_label,prob,mask,color] #i create this dictionary in order to sort by area in order to draw smaller boxes in front
42 | for i, box in enumerate(bbox):
43 | width = int(box[2])-int(box[0])
44 | height = int(box[3])-int(box[1])
45 | #l=label[i]
46 | #p = prob[i,label[i]]
47 | if (prob[i,label[i]] > 0.5) and width*height >1000 and label[i]!=0: #eliminate some boxes. label is the predicted score
48 | area = float((box[2]-box[0])*(box[3]-box[1]))
49 | while area in dictinary: #i compute the area in order to draw smaller boxes in front
50 | area+=1
51 |
52 | mask = final_mask[i]
53 | masks = np.zeros((height,width,7))
54 | body_mask = mask[...,0] > 0.6
55 | body_mask2 = np.array(body_mask,np.uint8)
56 | masks[...,0] = scipy.misc.imresize(body_mask2,(height,width))
57 |
58 | # cv2.imshow("body_mask",body_mask.astype(np.uint8)*255)
59 | # cv2.waitKey(3000)
60 | for x in range(1,7):
61 | maska = mask[...,x] > 0.6 # if prop for a pixel is bigger than 0.6, draw it
62 | # cv2.imshow("maska"+str(x),maska.astype(np.uint8)*255)
63 | # cv2.waitKey(3000)
64 | maska = np.logical_and(maska,body_mask) # clip the parts in order to fit inside the body. the body is better segmented
65 | maska = ma.masked_array(mask[...,x], mask=np.logical_not(maska))
66 | maska = np.ma.filled(maska, 0)
67 | #maska = maska >0
68 | maska = scipy.misc.imresize(maska,(height,width))
69 |
70 | masks[...,x] = maska
71 | dictinary[round(area,4)]=(box,label[i],gt_label[i],prob[i,label[i]],masks,colors[label[i]])
72 | sorted_keys = sorted(dictinary.iterkeys(),reverse=True)
73 | # cv2.waitKey(6000)
74 | for key,i in zip(sorted_keys,range(len(sorted_keys))):
75 | bo, lab,gt_lab,_,mask,col= dictinary[key] #mask has shape [H,W,7]
76 |
77 | max_indices = np.argmax(mask,axis=2) # this is for when two parts masks are overlapping. there i select the part with the highest probability
78 | #max_indices is an array with size [H,W] and its values represent the per-pixel label of the parts
79 | for x in range(int(bo[0]),int(bo[2])):
80 | for y in range(int(bo[1]),int(bo[3])):
81 |
82 | xm = x-(int(bo[0]))
83 | ym = y-(int(bo[1]))
84 | if mask[ym,xm,max_indices[ym,xm]] >0: #
85 | hsv[y,x,0] = colors[max_indices[ym,xm]][0]
86 | hsv[y,x,1] = 255
87 |
88 | for x in range(int(bo[0]),int(bo[2])):
89 | for y in range(int(bo[1]),int(bo[3])):
90 |
91 | xm = x-(int(bo[0]))
92 | ym = y-(int(bo[1]))
93 | if(mask[ym,xm,0]==1):
94 | hsv_body[y,x,0] = colors[0][0]
95 | hsv_body[y,x,1] = 150
96 |
97 | hsv = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
98 | hsv_body = cv2.cvtColor(hsv_body, cv2.COLOR_HSV2RGB)
99 | i=0
100 | for key in sorted_keys:
101 | bo, lab,gt_lab,_,_,col= dictinary[key]
102 | c = (255,0,0)
103 | bo, lab,gt_lab,_,_,col= dictinary[key]
104 | text = cat_id_to_cls_name(lab)
105 | i=i+1
106 | hsv = cv2.rectangle(hsv,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3)
107 | hsv = cv2.putText(hsv,text+' '+str(i),(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX,0.5, color =(255,255,255))
108 | hsv_body = cv2.rectangle(hsv_body,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3)
109 | hsv_body = cv2.putText(hsv_body,text+' '+str(i),(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX,0.5, color =(255,255,255))
110 | #cv2.imwrite('test_' + name + '_' + str(step) +'.jpg',image)
111 | cv2.imwrite('/home/alex/PycharmProjects/data/test_seg' + name + '_' + str(step) +'.jpg',hsv)
112 | cv2.imwrite('/home/alex/PycharmProjects/data/test_hsv' + name + '_' + str(step) +'.jpg',hsv_body)
113 |
114 | def cat_id_to_cls_name(catId):
115 | cls_name = np.array(['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
116 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
117 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
118 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
119 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
120 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
121 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
122 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
123 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
124 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
125 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
126 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
127 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
128 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush'])
129 | return cls_name[catId]
130 |
--------------------------------------------------------------------------------
/draw/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/draw/utils.pyc
--------------------------------------------------------------------------------
/libs/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | python setup.py build_ext --inplace
3 | rm -rf build
4 | sh make.sh
--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/__init__.py
--------------------------------------------------------------------------------
/libs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/__init__.pyc
--------------------------------------------------------------------------------
/libs/boxes/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/libs/boxes/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 | from . import cython_nms
8 | from . import cython_bbox
9 | import nms
10 | import timer
11 | from .anchor import anchors
12 | from .anchor import anchors_plane
13 | from .roi import roi_cropping
14 | from .roi import roi_cropping
15 | from . import cython_anchor
16 | from . import cython_bbox_transform
--------------------------------------------------------------------------------
/libs/boxes/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/__init__.pyc
--------------------------------------------------------------------------------
/libs/boxes/anchor.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 | from libs.boxes import cython_anchor
7 |
8 | def anchors(scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16):
9 | """Get a set of anchors at one position """
10 | return generate_anchors(base_size=base, scales=np.asarray(scales, np.int32), ratios=ratios)
11 |
12 | def anchors_plane(height, width, stride = 1.0,
13 | scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16):
14 | """Get a complete set of anchors in a spatial plane,
15 | height, width are plane dimensions
16 | stride is scale ratio of
17 | """
18 | # TODO: implement in C, or pre-compute them, or set to a fixed input-shape
19 | # enum all anchors in a plane
20 | # scales = kwargs.setdefault('scales', [2, 4, 8, 16, 32])
21 | # ratios = kwargs.setdefault('ratios', [0.5, 1, 2.0])
22 | # base = kwargs.setdefault('base', 16)
23 | anc = anchors(scales, ratios, base)
24 | all_anchors = cython_anchor.anchors_plane(height, width, stride, anc)
25 | #print (all_anchors.shape)
26 | return all_anchors
27 |
28 | # Written by Ross Girshick and Sean Bell
29 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
30 | scales=2 ** np.arange(3, 6)):
31 | """
32 | Generate anchor (reference) windows by enumerating aspect ratios X
33 | scales wrt a reference (0, 0, 15, 15) window.
34 | """
35 |
36 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
37 | ratio_anchors = _ratio_enum(base_anchor, ratios)
38 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
39 | for i in xrange(ratio_anchors.shape[0])])
40 | return anchors
41 |
42 | def _whctrs(anchor):
43 | """
44 | Return width, height, x center, and y center for an anchor (window).
45 | """
46 |
47 | w = anchor[2] - anchor[0] + 1
48 | h = anchor[3] - anchor[1] + 1
49 | x_ctr = anchor[0] + 0.5 * (w - 1)
50 | y_ctr = anchor[1] + 0.5 * (h - 1)
51 | return w, h, x_ctr, y_ctr
52 |
53 |
54 | def _mkanchors(ws, hs, x_ctr, y_ctr):
55 | """
56 | Given a vector of widths (ws) and heights (hs) around a center
57 | (x_ctr, y_ctr), output a set of anchors (windows).
58 | """
59 |
60 | ws = ws[:, np.newaxis]
61 | hs = hs[:, np.newaxis]
62 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
63 | y_ctr - 0.5 * (hs - 1),
64 | x_ctr + 0.5 * (ws - 1),
65 | y_ctr + 0.5 * (hs - 1)))
66 | return anchors
67 |
68 |
69 | def _ratio_enum(anchor, ratios):
70 | """
71 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
72 | """
73 |
74 | w, h, x_ctr, y_ctr = _whctrs(anchor)
75 | size = w * h
76 | size_ratios = size / ratios
77 | ws = np.round(np.sqrt(size_ratios))
78 | hs = np.round(ws * ratios)
79 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
80 | return anchors
81 |
82 |
83 | def _scale_enum(anchor, scales):
84 | """
85 | Enumerate a set of anchors for each scale wrt an anchor.
86 | """
87 |
88 | w, h, x_ctr, y_ctr = _whctrs(anchor)
89 | ws = w * scales
90 | hs = h * scales
91 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
92 | return anchors
93 |
94 | def _unmap(data, count, inds, fill=0):
95 | """ Unmap a subset of item (data) back to the original set of items (of
96 | size count) """
97 | if len(data.shape) == 1:
98 | ret = np.empty((count,), dtype=np.float32)
99 | ret.fill(fill)
100 | ret[inds] = data
101 | else:
102 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
103 | ret.fill(fill)
104 | ret[inds, :] = data
105 | return ret
106 |
107 | if __name__ == '__main__':
108 | import time
109 |
110 | t = time.time()
111 | a = anchors()
112 | num_anchors = 0
113 |
114 | # all_anchors = anchors_plane(200, 250, stride=4, boarder=0)
115 | # num_anchors += all_anchors.shape[0]
116 | for i in range(10):
117 | ancs = anchors()
118 | all_anchors = cython_anchor.anchors_plane(200, 250, 4, ancs)
119 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
120 | all_anchors = cython_anchor.anchors_plane(100, 125, 8, ancs)
121 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
122 | all_anchors = cython_anchor.anchors_plane(50, 63, 16, ancs)
123 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
124 | all_anchors = cython_anchor.anchors_plane(25, 32, 32, ancs)
125 | num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
126 | print('average time: %f' % ((time.time() - t) / 10))
127 | print('anchors: %d' % (num_anchors / 10))
128 | print(a.shape, '\n', a)
129 | print (all_anchors.shape)
130 | # from IPython import embed
131 | # embed()
132 |
--------------------------------------------------------------------------------
/libs/boxes/anchor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/anchor.pyc
--------------------------------------------------------------------------------
/libs/boxes/bbox.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Sergey Karayev
6 | # --------------------------------------------------------
7 |
8 | cimport cython
9 | import numpy as np
10 | cimport numpy as np
11 |
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 |
15 | def bbox_overlaps(
16 | np.ndarray[DTYPE_t, ndim=2] boxes,
17 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 | """
19 | Parameters
20 | ----------
21 | boxes: (N, 4) ndarray of float
22 | query_boxes: (K, 4) ndarray of float
23 | Returns
24 | -------
25 | overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 | """
27 | cdef unsigned int N = boxes.shape[0]
28 | cdef unsigned int K = query_boxes.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 | cdef DTYPE_t iw, ih, box_area
31 | cdef DTYPE_t ua
32 | cdef unsigned int k, n
33 | for k in range(K):
34 | box_area = (
35 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 | )
38 | for n in range(N):
39 | iw = (
40 | min(boxes[n, 2], query_boxes[k, 2]) -
41 | max(boxes[n, 0], query_boxes[k, 0]) + 1
42 | )
43 | if iw > 0:
44 | ih = (
45 | min(boxes[n, 3], query_boxes[k, 3]) -
46 | max(boxes[n, 1], query_boxes[k, 1]) + 1
47 | )
48 | if ih > 0:
49 | ua = float(
50 | (boxes[n, 2] - boxes[n, 0] + 1) *
51 | (boxes[n, 3] - boxes[n, 1] + 1) +
52 | box_area - iw * ih
53 | )
54 | overlaps[n, k] = iw * ih / ua
55 | return overlaps
56 |
57 | def bbox_intersections(
58 | np.ndarray[DTYPE_t, ndim=2] boxes,
59 | np.ndarray[DTYPE_t, ndim=2] query_boxes):
60 | """
61 | For each query box compute the intersection ratio covered by boxes
62 | ----------
63 | Parameters
64 | ----------
65 | boxes: (N, 4) ndarray of float
66 | query_boxes: (K, 4) ndarray of float
67 | Returns
68 | -------
69 | overlaps: (N, K) ndarray of intersec between boxes and query_boxes
70 | """
71 | cdef unsigned int N = boxes.shape[0]
72 | cdef unsigned int K = query_boxes.shape[0]
73 | cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
74 | cdef DTYPE_t iw, ih, box_area
75 | cdef DTYPE_t ua
76 | cdef unsigned int k, n
77 | for k in range(K):
78 | box_area = (
79 | (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
80 | (query_boxes[k, 3] - query_boxes[k, 1] + 1)
81 | )
82 | for n in range(N):
83 | iw = (
84 | min(boxes[n, 2], query_boxes[k, 2]) -
85 | max(boxes[n, 0], query_boxes[k, 0]) + 1
86 | )
87 | if iw > 0:
88 | ih = (
89 | min(boxes[n, 3], query_boxes[k, 3]) -
90 | max(boxes[n, 1], query_boxes[k, 1]) + 1
91 | )
92 | if ih > 0:
93 | intersec[n, k] = iw * ih / box_area
94 | return intersec
--------------------------------------------------------------------------------
/libs/boxes/bbox_transform.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import warnings
10 |
11 | def bbox_transform(ex_rois, gt_rois):
12 | """
13 | computes the distance from ground-truth boxes to the given boxes, normed by their size
14 | :param ex_rois: n * 4 numpy array, given boxes
15 | :param gt_rois: n * 4 numpy array, ground-truth boxes
16 | :return: deltas: n * 4 numpy array, ground-truth boxes
17 | """
18 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
19 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
20 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
21 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
22 |
23 | # assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \
24 | # 'Invalid boxes found: {} {}'. \
25 | # format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :])
26 |
27 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
28 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
29 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
30 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
31 |
32 | # warnings.catch_warnings()
33 | # warnings.filterwarnings('error')
34 | targets_dx = 10.0 * (gt_ctr_x - ex_ctr_x) / ex_widths #####he multiplied these probably to have bigger numbers
35 | targets_dy = 10.0 * (gt_ctr_y - ex_ctr_y) / ex_heights
36 | targets_dw = 5.0 * np.log(gt_widths / ex_widths)
37 | targets_dh = 5.0 * np.log(gt_heights / ex_heights)
38 |
39 | targets = np.vstack(
40 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
41 | return targets
42 |
43 | def bbox_transform_inv(boxes, deltas):# from file roi.py line 116 the shapes are: boxes=(R, 4), [x1, y1, x2, y2] deltas=(R, Kx4)
44 | if boxes.shape[0] == 0:
45 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
46 |
47 | boxes = boxes.astype(deltas.dtype, copy=False)
48 |
49 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
50 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
51 | ctr_x = boxes[:, 0] + 0.5 * widths
52 | ctr_y = boxes[:, 1] + 0.5 * heights
53 |
54 | dx = deltas[:, 0::4] * 0.1 #####he divided here as to cancel the multiplication at 34
55 | dy = deltas[:, 1::4] * 0.1
56 | dw = deltas[:, 2::4] * 0.2
57 | dh = deltas[:, 3::4] * 0.2
58 |
59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
61 | # pred_w = np.exp(dw) * widths[:, np.newaxis]
62 | # pred_h = np.exp(dh) * heights[:, np.newaxis]
63 |
64 | pred_w = np.exp(dw + np.log(widths[:, np.newaxis]))
65 | pred_h = np.exp(dh + np.log(heights[:, np.newaxis]))
66 |
67 |
68 | #pred_w = np.exp(dw + np.log(widths[:, np.newaxis]))
69 | #pred_h = np.exp(dh + np.log(heights[:, np.newaxis]))
70 |
71 |
72 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
73 | # x1
74 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
75 | # y1
76 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
77 | # x2
78 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
79 | # y2
80 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
81 |
82 | return pred_boxes
83 |
84 | def clip_boxes(boxes, im_shape):
85 | """
86 | Clip boxes to image boundaries.
87 | """
88 |
89 | # x1 >= 0
90 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
91 | # y1 >= 0
92 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
93 | # x2 < im_shape[1]
94 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
95 | # y2 < im_shape[0]
96 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
97 | return boxes
98 |
--------------------------------------------------------------------------------
/libs/boxes/bbox_transform.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/bbox_transform.pyc
--------------------------------------------------------------------------------
/libs/boxes/blob.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | """Blob helper functions."""
9 |
10 | import numpy as np
11 | import cv2
12 | from ..fast_rcnn.config import cfg
13 |
14 | def im_list_to_blob(ims):
15 | """Convert a list of images into a network input.
16 |
17 | Assumes images are already prepared (means subtracted, BGR order, ...).
18 | """
19 | max_shape = np.array([im.shape for im in ims]).max(axis=0)
20 | num_images = len(ims)
21 | blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
22 | dtype=np.float32)
23 | for i in xrange(num_images):
24 | im = ims[i]
25 | blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
26 |
27 | return blob
28 |
29 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
30 | """Mean subtract and scale an image for use in a blob."""
31 | im = im.astype(np.float32, copy=False)
32 | im -= pixel_means
33 | im_shape = im.shape
34 | im_size_min = np.min(im_shape[0:2])
35 | im_size_max = np.max(im_shape[0:2])
36 | im_scale = float(target_size) / float(im_size_min)
37 | # Prevent the biggest axis from being more than MAX_SIZE
38 | if np.round(im_scale * im_size_max) > max_size:
39 | im_scale = float(max_size) / float(im_size_max)
40 | if cfg.TRAIN.RANDOM_DOWNSAMPLE:
41 | r = 0.6 + np.random.rand() * 0.4
42 | im_scale *= r
43 | im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
44 | interpolation=cv2.INTER_LINEAR)
45 |
46 | return im, im_scale
47 |
--------------------------------------------------------------------------------
/libs/boxes/cython_anchor.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import sys, pkg_resources, imp
6 |
7 | def __bootstrap__():
8 | global __bootstrap__, __loader__, __file__
9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_anchor.so')
10 | __loader__ = None
11 | del __bootstrap__, __loader__
12 | imp.load_dynamic(__name__, __file__)
13 |
14 | __bootstrap__()
--------------------------------------------------------------------------------
/libs/boxes/cython_anchor.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Mask RCNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by CharlesShang@github
5 | # --------------------------------------------------------
6 |
7 | cimport cython
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | DTYPE = np.float
12 | ctypedef np.float_t DTYPE_t
13 |
14 | def anchors_plane(
15 | int height, int width, int stride,
16 | np.ndarray[DTYPE_t, ndim=2] anchors_base):
17 | """
18 | Parameters
19 | ----------
20 | height: height of plane
21 | width: width of plane
22 | stride: stride ot the original image
23 | anchors_base: (A, 4) a base set of anchors
24 | Returns
25 | -------
26 | all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
27 | """
28 | cdef unsigned int A = anchors_base.shape[0]
29 | cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE)
30 | cdef unsigned int iw, ih
31 | cdef unsigned int k
32 | cdef unsigned int A4
33 | cdef unsigned int sh
34 | cdef unsigned int sw
35 | A4 = A*4
36 | for iw in range(width):
37 | sw = iw * stride
38 | for ih in range(height):
39 | sh = ih * stride
40 | for k in range(A):
41 | all_anchors[ih, iw, k, 0] = anchors_base[k, 0] + sw
42 | all_anchors[ih, iw, k, 1] = anchors_base[k, 1] + sh
43 | all_anchors[ih, iw, k, 2] = anchors_base[k, 2] + sw
44 | all_anchors[ih, iw, k, 3] = anchors_base[k, 3] + sh
45 | return all_anchors
--------------------------------------------------------------------------------
/libs/boxes/cython_bbox.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import sys, pkg_resources, imp
6 |
7 | def __bootstrap__():
8 | global __bootstrap__, __loader__, __file__
9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox.so')
10 | __loader__ = None
11 | del __bootstrap__, __loader__
12 | imp.load_dynamic(__name__, __file__)
13 |
14 | __bootstrap__()
--------------------------------------------------------------------------------
/libs/boxes/cython_bbox_transform.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import sys, pkg_resources, imp
6 |
7 | def __bootstrap__():
8 | global __bootstrap__, __loader__, __file__
9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox_transform.so')
10 | __loader__ = None
11 | del __bootstrap__, __loader__
12 | imp.load_dynamic(__name__, __file__)
13 |
14 | __bootstrap__()
--------------------------------------------------------------------------------
/libs/boxes/cython_bbox_transform.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Mask RCNN
3 | # Licensed under The MIT License [see LICENSE for details]
4 | # Written by CharlesShang@github
5 | # --------------------------------------------------------
6 |
7 | cimport cython
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | DTYPE = np.float
12 | ctypedef np.float_t DTYPE_t
13 | # ctypedef float DTYPE_t
14 |
15 | #def bbox_transform(
16 | # np.ndarray[DTYPE_t, ndim=2] ex_rois,
17 | # np.ndarray[DTYPE_t, ndim=2] gt_rois):
18 | def bbox_transform(
19 | np.ndarray[DTYPE_t, ndim=2] ex_rois,
20 | np.ndarray[DTYPE_t, ndim=2] gt_rois):
21 | """
22 | Parameters
23 | ----------
24 | ex_rois: n * 4 numpy array, given boxes
25 | gt_rois: n * 4 numpy array, ground-truth boxes
26 | Returns
27 | -------
28 | targets: (n, 4) ndarray
29 | """
30 | cdef unsigned int R = ex_rois.shape[0]
31 | cdef np.ndarray[DTYPE_t, ndim=2] targets = np.zeros((R, 4), dtype=DTYPE)
32 | cdef unsigned int i
33 | cdef DTYPE_t gt_w
34 | cdef DTYPE_t gt_h
35 | cdef DTYPE_t gt_cx
36 | cdef DTYPE_t gt_cy
37 | cdef DTYPE_t ex_w
38 | cdef DTYPE_t ex_h
39 | cdef DTYPE_t ex_cx
40 | cdef DTYPE_t ex_cy
41 | for i in range(R):
42 | gt_w = gt_rois[i, 2] - gt_rois[i, 0] + 1.0
43 | gt_h = gt_rois[i, 3] - gt_rois[i, 1] + 1.0
44 | ex_w = ex_rois[i, 2] - ex_rois[i, 0] + 1.0
45 | ex_h = ex_rois[i, 3] - ex_rois[i, 1] + 1.0
46 | gt_cx = gt_rois[i, 0] + gt_w * 0.5
47 | gt_cy = gt_rois[i, 1] + gt_h * 0.5
48 | ex_cx = ex_rois[i, 0] + ex_w * 0.5
49 | ex_cy = ex_rois[i, 1] + ex_h * 0.5
50 | targets[i, 0] = (gt_cx - ex_cx) / ex_w
51 | targets[i, 1] = (gt_cy - ex_cy) / ex_h
52 | targets[i, 2] = np.log(gt_w / ex_w)
53 | targets[i, 3] = np.log(gt_h / ex_h)
54 | return targets
55 |
56 | cdef inline DTYPE_t my_max(DTYPE_t a, DTYPE_t b): return a if a >= b else b
57 | cdef inline DTYPE_t my_min(DTYPE_t a, DTYPE_t b): return a if a <= b else b
58 |
59 | def bbox_transform_inv(
60 | np.ndarray[DTYPE_t, ndim=2] boxes,
61 | np.ndarray[DTYPE_t, ndim=2] deltas):
62 | """
63 | Parameters
64 | ----------
65 | boxes: n * 4 numpy array, given boxes
66 | deltas: (n, kx4) numpy array,
67 | Returns
68 | -------
69 | pred_boxes: (n, kx4) ndarray
70 | """
71 | cdef unsigned int R = boxes.shape[0]
72 | cdef unsigned int k4 = deltas.shape[1]
73 | cdef unsigned int k
74 | k = k4 / 4
75 | cdef np.ndarray[DTYPE_t, ndim=2] pred_boxes = np.zeros((R, k4), dtype=DTYPE)
76 | if R == 0:
77 | return pred_boxes
78 |
79 | cdef unsigned int i
80 | cdef unsigned int j
81 | cdef unsigned int j4
82 | cdef DTYPE_t w
83 | cdef DTYPE_t h
84 | cdef DTYPE_t cx
85 | cdef DTYPE_t cy
86 | cdef DTYPE_t px
87 | cdef DTYPE_t py
88 | cdef DTYPE_t pw
89 | cdef DTYPE_t ph
90 | for i in range(R):
91 | w = boxes[i, 2] - boxes[i, 0] + 1.0
92 | h = boxes[i, 3] - boxes[i, 1] + 1.0
93 | cx = boxes[i, 0] + w * 0.5
94 | cy = boxes[i, 1] + h * 0.5
95 | for j in range(k):
96 | j4 = j * 4
97 | px = deltas[i, j4 ] * w + cx
98 | py = deltas[i, j4 + 1] * h + cy
99 | pw = np.exp(deltas[i, j4 + 2]) * w
100 | ph = np.exp(deltas[i, j4 + 3]) * h
101 | pred_boxes[i, j4 ] = px - 0.5 * pw
102 | pred_boxes[i, j4 + 1] = py - 0.5 * ph
103 | pred_boxes[i, j4 + 2] = px + 0.5 * pw
104 | pred_boxes[i, j4 + 3] = py + 0.5 * ph
105 | return pred_boxes
106 |
107 | def clip_boxes(
108 | np.ndarray[DTYPE_t, ndim=2] boxes,
109 | np.ndarray[DTYPE_t, ndim=1] im_shape):
110 | """
111 | Parameters
112 | ----------
113 | boxes: (n ,kx4) numpy array, given boxes
114 | im_shape:(2,) numpy array, (image_height, image_width)
115 | Returns
116 | -------
117 | clipped: (n, kx4) ndarray
118 | """
119 | cdef unsigned int R = boxes.shape[0]
120 | cdef unsigned int k4 = boxes.shape[1]
121 | cdef unsigned int k = k4 / 4
122 | cdef np.ndarray[DTYPE_t, ndim=2] clipped = np.zeros((R, k4), dtype=DTYPE)
123 | cdef unsigned int i
124 | cdef unsigned int j
125 | cdef unsigned int j4
126 | for i in range(R):
127 | for j in range(k):
128 | j4 = j * 4
129 | clipped[i, j4 ] = my_max(my_min(boxes[i, j4 ], im_shape[1]-1), 0)
130 | clipped[i, j4 + 1] = my_max(my_min(boxes[i, j4 + 1], im_shape[0]-1), 0)
131 | clipped[i, j4 + 2] = my_max(my_min(boxes[i, j4 + 2], im_shape[1]-1), 0)
132 | clipped[i, j4 + 3] = my_max(my_min(boxes[i, j4 + 3], im_shape[0]-1), 0)
133 | return clipped
--------------------------------------------------------------------------------
/libs/boxes/cython_nms.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import sys, pkg_resources, imp
6 |
7 | def __bootstrap__():
8 | global __bootstrap__, __loader__, __file__
9 | __file__ = pkg_resources.resource_filename(__name__, 'cython_nms.so')
10 | __loader__ = None
11 | del __bootstrap__, __loader__
12 | imp.load_dynamic(__name__, __file__)
13 |
14 | __bootstrap__()
--------------------------------------------------------------------------------
/libs/boxes/cython_nms.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/cython_nms.pyc
--------------------------------------------------------------------------------
/libs/boxes/nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def nms(dets, thresh):
11 | x1 = dets[:, 0]
12 | y1 = dets[:, 1]
13 | x2 = dets[:, 2]
14 | y2 = dets[:, 3]
15 | scores = dets[:, 4]
16 |
17 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 | order = scores.argsort()[::-1]
19 |
20 | keep = []
21 | while order.size > 0:
22 | i = order[0]
23 | keep.append(i)
24 | xx1 = np.maximum(x1[i], x1[order[1:]])
25 | yy1 = np.maximum(y1[i], y1[order[1:]])
26 | xx2 = np.minimum(x2[i], x2[order[1:]])
27 | yy2 = np.minimum(y2[i], y2[order[1:]])
28 |
29 | w = np.maximum(0.0, xx2 - xx1 + 1)
30 | h = np.maximum(0.0, yy2 - yy1 + 1)
31 | inter = w * h
32 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 |
34 | inds = np.where(ovr <= thresh)[0]
35 | order = order[inds + 1]
36 |
37 | return keep
38 |
--------------------------------------------------------------------------------
/libs/boxes/nms.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/nms.pyc
--------------------------------------------------------------------------------
/libs/boxes/nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
71 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
72 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
73 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
74 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
75 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
76 |
77 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
78 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
79 |
80 | cdef int ndets = dets.shape[0]
81 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
82 | np.zeros((ndets), dtype=np.int)
83 |
84 | # nominal indices
85 | cdef int _i, _j
86 | # sorted indices
87 | cdef int i, j
88 | # temp variables for box i's (the box currently under consideration)
89 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
90 | # variables for computing overlap with box j (lower scoring box)
91 | cdef np.float32_t xx1, yy1, xx2, yy2
92 | cdef np.float32_t w, h
93 | cdef np.float32_t inter, ovr
94 |
95 | keep = []
96 | for _i in range(ndets):
97 | i = order[_i]
98 | if suppressed[i] == 1:
99 | continue
100 | keep.append(i)
101 | ix1 = x1[i]
102 | iy1 = y1[i]
103 | ix2 = x2[i]
104 | iy2 = y2[i]
105 | iarea = areas[i]
106 | for _j in range(_i + 1, ndets):
107 | j = order[_j]
108 | if suppressed[j] == 1:
109 | continue
110 | xx1 = max(ix1, x1[j])
111 | yy1 = max(iy1, y1[j])
112 | xx2 = min(ix2, x2[j])
113 | yy2 = min(iy2, y2[j])
114 | w = max(0.0, xx2 - xx1 + 1)
115 | h = max(0.0, yy2 - yy1 + 1)
116 | inter = w * h
117 | ovr = inter / (iarea + areas[j] - inter)
118 | ovr1 = inter / iarea
119 | ovr2 = inter / areas[j]
120 | if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 | suppressed[j] = 1
122 |
123 | return keep
124 |
--------------------------------------------------------------------------------
/libs/boxes/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | import libs.configs.config_v1 as cfg
10 | import libs.nms.gpu_nms as gpu_nms
11 | import libs.nms.cpu_nms as cpu_nms
12 |
13 | def nms(dets, thresh, force_cpu=False):
14 | """Dispatch to either CPU or GPU NMS implementations."""
15 |
16 | if dets.shape[0] == 0:
17 | return []
18 | return gpu_nms.gpu_nms(dets, thresh, device_id=0)
19 |
20 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None):
21 | """
22 | post-process the results of im_detect
23 | :param boxes: N * (K * 4) numpy
24 | :param scores: N * K numpy
25 | :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus')
26 | :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]}
27 | """
28 | num_class = scores.shape[1] if class_sets is None else len(class_sets)
29 | assert num_class * 4 == boxes.shape[1],\
30 | 'Detection scores and boxes dont match %d vs %d' % (num_class, boxes.shape[1])
31 | class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets
32 |
33 | res = []
34 | for ind, cls in enumerate(class_sets[1:]):
35 | ind += 1 # skip background
36 | cls_boxes = boxes[:, 4*ind : 4*(ind+1)]
37 | cls_scores = scores[:, ind]
38 | dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
39 | keep = nms(dets, thresh=0.3)
40 | dets = dets[keep, :]
41 | dets = dets[np.where(dets[:, 4] > threshold)]
42 | r = {}
43 | if dets.shape[0] > 0:
44 | r['class'], r['dets'] = cls, dets
45 | else:
46 | r['class'], r['dets'] = cls, None
47 | res.append(r)
48 | return res
49 |
50 | if __name__=='__main__':
51 |
52 | score = np.random.rand(10, 21)
53 | boxes = np.random.randint(0, 100, (10, 21, 2))
54 | s = np.random.randint(0, 100, (10, 21, 2))
55 | s = boxes + s
56 | boxes = np.concatenate((boxes, s), axis=2)
57 | boxes = np.reshape(boxes, [boxes.shape[0], -1])
58 | # score = np.reshape(score, [score.shape[0], -1])
59 | res = nms_wrapper(score, boxes)
60 | print (res)
--------------------------------------------------------------------------------
/libs/boxes/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/nms_wrapper.pyc
--------------------------------------------------------------------------------
/libs/boxes/profile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/profile
--------------------------------------------------------------------------------
/libs/boxes/profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/profile.png
--------------------------------------------------------------------------------
/libs/boxes/roi.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | import functools
5 |
6 | import numpy as np
7 | import tensorflow as tf
8 | import tensorflow.contrib.slim as slim
9 |
10 | def roi_align(feat, boxes):
11 | """Given features and boxes, This function crops feature """
12 | return
13 |
14 | def roi_cropping(feat, boxes, clses, anchors, spatial_scale=1.0/16):
15 | """This function computes final rpn boxes
16 | And crops areas from the incoming features
17 | """
18 | return
--------------------------------------------------------------------------------
/libs/boxes/roi.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/roi.pyc
--------------------------------------------------------------------------------
/libs/boxes/timer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import time
9 |
10 | class Timer(object):
11 | """A simple timer."""
12 | def __init__(self):
13 | self.total_time = 0.
14 | self.calls = 0
15 | self.start_time = 0.
16 | self.diff = 0.
17 | self.average_time = 0.
18 |
19 | def tic(self):
20 | # using time.time instead of time.clock because time time.clock
21 | # does not normalize for multithreading
22 | self.start_time = time.time()
23 |
24 | def toc(self, average=True):
25 | self.diff = time.time() - self.start_time
26 | self.total_time += self.diff
27 | self.calls += 1
28 | self.average_time = self.total_time / self.calls
29 | if average:
30 | return self.average_time
31 | else:
32 | return self.diff
33 |
--------------------------------------------------------------------------------
/libs/boxes/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/timer.pyc
--------------------------------------------------------------------------------
/libs/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/__init__.py
--------------------------------------------------------------------------------
/libs/configs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/__init__.pyc
--------------------------------------------------------------------------------
/libs/configs/config_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/config_v1.pyc
--------------------------------------------------------------------------------
/libs/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/__init__.py
--------------------------------------------------------------------------------
/libs/datasets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/__init__.pyc
--------------------------------------------------------------------------------
/libs/datasets/coco.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import os
6 | import tensorflow as tf
7 |
8 | import tensorflow.contrib.slim as slim
9 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
10 |
11 | _FILE_PATTERN = 'coco_%s_*.tfrecord'
12 |
13 | SPLITS_TO_SIZES = {'train2014': 82783, 'val2014': 40504}
14 |
15 | _NUM_CLASSES = 81
16 |
17 | _ITEMS_TO_DESCRIPTIONS = {
18 | 'image': 'A color image of varying size.',
19 | 'label': 'An annotation image of varying size. (pixel-level masks)',
20 | 'gt_masks': 'masks of instances in this image. (instance-level masks), of shape (N, image_height, image_width)',
21 | 'gt_boxes': 'bounding boxes and classes of instances in this image, of shape (N, 5), each entry is (x1, y1, x2, y2)',
22 | }
23 |
24 |
25 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
26 | if split_name not in SPLITS_TO_SIZES:
27 | raise ValueError('split name %s was not recognized.' % split_name)
28 |
29 | if not file_pattern:
30 | file_pattern = _FILE_PATTERN
31 | file_pattern = os.path.join(dataset_dir, 'records', file_pattern % split_name)
32 |
33 | # Allowing None in the signature so that dataset_factory can use the default.
34 | if reader is None:
35 | reader = tf.TFRecordReader
36 |
37 | keys_to_features = {
38 | 'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
39 | 'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
40 | 'label/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
41 | 'label/format': tf.FixedLenFeature((), tf.string, default_value='png'),
42 | 'image/height': tf.FixedLenFeature((), tf.int64),
43 | 'image/width': tf.FixedLenFeature((), tf.int64),
44 |
45 | 'label/num_instances': tf.FixedLenFeature((), tf.int64),
46 | 'label/gt_boxes': tf.FixedLenFeature((), tf.string),
47 | 'label/gt_masks': tf.FixedLenFeature((), tf.string),
48 | }
49 |
50 | def _masks_decoder(keys_to_tensors):
51 | masks = tf.decode_raw(keys_to_tensors['label/gt_masks'], tf.uint8)
52 | width = tf.cast(keys_to_tensors['image/width'], tf.int32)
53 | height = tf.cast(keys_to_tensors['image/height'], tf.int32)
54 | instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32)
55 | mask_shape = tf.stack([instances, height, width])
56 | return tf.reshape(masks, mask_shape)
57 |
58 | def _gt_boxes_decoder(keys_to_tensors):
59 | bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32)
60 | instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32)
61 | bboxes_shape = tf.stack([instances, 5])
62 | return tf.reshape(bboxes, bboxes_shape)
63 |
64 | def _width_decoder(keys_to_tensors):
65 | width = keys_to_tensors['image/width']
66 | return tf.cast(width, tf.int32)
67 |
68 | def _height_decoder(keys_to_tensors):
69 | height = keys_to_tensors['image/height']
70 | return tf.cast(height, tf.int32)
71 |
72 | items_to_handlers = {
73 | 'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
74 | 'label': slim.tfexample_decoder.Image('label/encoded', 'label/format', channels=1),
75 | 'gt_masks': slim.tfexample_decoder.ItemHandlerCallback(
76 | ['label/gt_masks', 'label/num_instances', 'image/width', 'image/height'], _masks_decoder),
77 | 'gt_boxes': slim.tfexample_decoder.ItemHandlerCallback(['label/gt_boxes', 'label/num_instances'], _gt_boxes_decoder),
78 | 'width': slim.tfexample_decoder.ItemHandlerCallback(['image/width'], _width_decoder),
79 | 'height': slim.tfexample_decoder.ItemHandlerCallback(['image/height'], _height_decoder),
80 | }
81 |
82 | decoder = slim.tfexample_decoder.TFExampleDecoder(
83 | keys_to_features, items_to_handlers)
84 |
85 | return slim.dataset.Dataset(
86 | data_sources=file_pattern,
87 | reader=reader,
88 | decoder=decoder,
89 | num_samples=SPLITS_TO_SIZES[split_name],
90 | items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
91 | num_classes=_NUM_CLASSES)
92 |
93 | def read(tfrecords_filename):
94 |
95 | if not isinstance(tfrecords_filename, list):
96 | tfrecords_filename = [tfrecords_filename]
97 | filename_queue = tf.train.string_input_producer(
98 | tfrecords_filename, num_epochs=100)
99 |
100 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
101 | reader = tf.TFRecordReader(options=options)
102 | _, serialized_example = reader.read(filename_queue)
103 | features = tf.parse_single_example(
104 | serialized_example,
105 | features={
106 | 'image/img_id': tf.FixedLenFeature([], tf.int64),
107 | 'image/encoded': tf.FixedLenFeature([], tf.string),
108 | 'image/height': tf.FixedLenFeature([], tf.int64),
109 | 'image/width': tf.FixedLenFeature([], tf.int64),
110 | 'label/num_instances': tf.FixedLenFeature([], tf.int64),
111 | 'label/gt_masks': tf.FixedLenFeature([], tf.string),
112 | 'label/gt_boxes': tf.FixedLenFeature([], tf.string),
113 | 'label/encoded': tf.FixedLenFeature([], tf.string),
114 | })
115 | # image = tf.image.decode_jpeg(features['image/encoded'], channels=3)
116 | img_id = tf.cast(features['image/img_id'], tf.int32)
117 | ih = tf.cast(features['image/height'], tf.int32)
118 | iw = tf.cast(features['image/width'], tf.int32)
119 | num_instances = tf.cast(features['label/num_instances'], tf.int32)
120 | image = tf.decode_raw(features['image/encoded'], tf.uint8)
121 | imsize = tf.size(image)
122 | image = tf.cond(tf.equal(imsize, ih * iw), \
123 | lambda: tf.image.grayscale_to_rgb(tf.reshape(image, (ih, iw, 1))), \
124 | lambda: tf.reshape(image, (ih, iw, 3)))
125 |
126 | gt_boxes = tf.decode_raw(features['label/gt_boxes'], tf.float32)
127 | gt_boxes = tf.reshape(gt_boxes, [num_instances, 5])
128 | gt_masks = tf.decode_raw(features['label/gt_masks'], tf.uint8)
129 | gt_masks = tf.cast(gt_masks, tf.int32)
130 | print (ih,iw)
131 | gt_masks = tf.reshape(gt_masks, [num_instances, ih, iw,7])
132 | ####################################################################be careful here. before 17 at the line above there was num_instances
133 |
134 |
135 | return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id
136 |
--------------------------------------------------------------------------------
/libs/datasets/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/coco.pyc
--------------------------------------------------------------------------------
/libs/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import tensorflow as tf
6 | from libs.visualization.summary_utils import visualize_input
7 | import glob
8 | from libs.datasets import coco
9 |
10 | import libs.preprocessings.coco_v1 as coco_preprocess
11 |
12 | def get_dataset(dataset_name, split_name, dataset_dir,
13 | im_batch=1, is_training=False, file_pattern=None, reader=None):
14 | """"""
15 | if file_pattern is None:
16 | file_pattern = '*.tfrecord'
17 |
18 | tfrecords = glob.glob(dataset_dir + '/records/' + file_pattern)
19 | image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords)
20 |
21 | image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training)
22 | #visualize_input(gt_boxes, image, tf.expand_dims(gt_masks, axis=3))
23 |
24 | return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id
25 |
26 |
--------------------------------------------------------------------------------
/libs/datasets/dataset_factory.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/dataset_factory.pyc
--------------------------------------------------------------------------------
/libs/datasets/download_and_convert_coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/download_and_convert_coco.pyc
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | # install pycocotools locally
3 | python setup.py build_ext --inplace
4 | rm -rf build
5 |
6 | install:
7 | # install pycocotools to the Python site-packages
8 | python setup.py build_ext install
9 | rm -rf build
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 |
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/__init__.pyc
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/_mask.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/_mask.so
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/coco.pyc
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/gason.h:
--------------------------------------------------------------------------------
1 | // https://github.com/vivkin/gason - pulled January 10, 2016
2 | #pragma once
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | enum JsonTag {
9 | JSON_NUMBER = 0,
10 | JSON_STRING,
11 | JSON_ARRAY,
12 | JSON_OBJECT,
13 | JSON_TRUE,
14 | JSON_FALSE,
15 | JSON_NULL = 0xF
16 | };
17 |
18 | struct JsonNode;
19 |
20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
22 | #define JSON_VALUE_TAG_MASK 0xF
23 | #define JSON_VALUE_TAG_SHIFT 47
24 |
25 | union JsonValue {
26 | uint64_t ival;
27 | double fval;
28 |
29 | JsonValue(double x)
30 | : fval(x) {
31 | }
32 | JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
33 | assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
34 | ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
35 | }
36 | bool isDouble() const {
37 | return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
38 | }
39 | JsonTag getTag() const {
40 | return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
41 | }
42 | uint64_t getPayload() const {
43 | assert(!isDouble());
44 | return ival & JSON_VALUE_PAYLOAD_MASK;
45 | }
46 | double toNumber() const {
47 | assert(getTag() == JSON_NUMBER);
48 | return fval;
49 | }
50 | char *toString() const {
51 | assert(getTag() == JSON_STRING);
52 | return (char *)getPayload();
53 | }
54 | JsonNode *toNode() const {
55 | assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
56 | return (JsonNode *)getPayload();
57 | }
58 | };
59 |
60 | struct JsonNode {
61 | JsonValue value;
62 | JsonNode *next;
63 | char *key;
64 | };
65 |
66 | struct JsonIterator {
67 | JsonNode *p;
68 |
69 | void operator++() {
70 | p = p->next;
71 | }
72 | bool operator!=(const JsonIterator &x) const {
73 | return p != x.p;
74 | }
75 | JsonNode *operator*() const {
76 | return p;
77 | }
78 | JsonNode *operator->() const {
79 | return p;
80 | }
81 | };
82 |
83 | inline JsonIterator begin(JsonValue o) {
84 | return JsonIterator{o.toNode()};
85 | }
86 | inline JsonIterator end(JsonValue) {
87 | return JsonIterator{nullptr};
88 | }
89 |
90 | #define JSON_ERRNO_MAP(XX) \
91 | XX(OK, "ok") \
92 | XX(BAD_NUMBER, "bad number") \
93 | XX(BAD_STRING, "bad string") \
94 | XX(BAD_IDENTIFIER, "bad identifier") \
95 | XX(STACK_OVERFLOW, "stack overflow") \
96 | XX(STACK_UNDERFLOW, "stack underflow") \
97 | XX(MISMATCH_BRACKET, "mismatch bracket") \
98 | XX(UNEXPECTED_CHARACTER, "unexpected character") \
99 | XX(UNQUOTED_KEY, "unquoted key") \
100 | XX(BREAKING_BAD, "breaking bad") \
101 | XX(ALLOCATION_FAILURE, "allocation failure")
102 |
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 | JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 |
109 | const char *jsonStrError(int err);
110 |
111 | class JsonAllocator {
112 | struct Zone {
113 | Zone *next;
114 | size_t used;
115 | } *head = nullptr;
116 |
117 | public:
118 | JsonAllocator() = default;
119 | JsonAllocator(const JsonAllocator &) = delete;
120 | JsonAllocator &operator=(const JsonAllocator &) = delete;
121 | JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 | x.head = nullptr;
123 | }
124 | JsonAllocator &operator=(JsonAllocator &&x) {
125 | head = x.head;
126 | x.head = nullptr;
127 | return *this;
128 | }
129 | ~JsonAllocator() {
130 | deallocate();
131 | }
132 | void *allocate(size_t size);
133 | void deallocate();
134 | };
135 |
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 |
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/maskApi.h:
--------------------------------------------------------------------------------
1 | /**************************************************************************
2 | * Microsoft COCO Toolbox. version 2.0
3 | * Data, paper, and tutorials available at: http://mscoco.org/
4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
5 | * Licensed under the Simplified BSD License [see coco/license.txt]
6 | **************************************************************************/
7 | #pragma once
8 |
9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 |
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 |
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 |
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 |
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 |
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 |
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 |
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 |
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 |
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 |
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 |
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 |
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 |
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 |
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 |
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 |
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/mask.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tsungyi'
2 |
3 | import libs.datasets.pycocotools._mask as _mask
4 |
5 | # Interface for manipulating masks stored in RLE format.
6 | #
7 | # RLE is a simple yet efficient format for storing binary masks. RLE
8 | # first divides a vector (or vectorized image) into a series of piecewise
9 | # constant regions and then for each piece simply stores the length of
10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
12 | # (note that the odd counts are always the numbers of zeros). Instead of
13 | # storing the counts directly, additional compression is achieved with a
14 | # variable bitrate representation based on a common scheme called LEB128.
15 | #
16 | # Compression is greatest given large piecewise constant regions.
17 | # Specifically, the size of the RLE is proportional to the number of
18 | # *boundaries* in M (or for an image the number of boundaries in the y
19 | # direction). Assuming fairly simple shapes, the RLE representation is
20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
21 | # is substantially lower, especially for large simple objects (large n).
22 | #
23 | # Many common operations on masks can be computed directly using the RLE
24 | # (without need for decoding). This includes computations such as area,
25 | # union, intersection, etc. All of these operations are linear in the
26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
27 | # of the object. Computing these operations on the original mask is O(n).
28 | # Thus, using the RLE can result in substantial computational savings.
29 | #
30 | # The following API functions are defined:
31 | # encode - Encode binary masks using RLE.
32 | # decode - Decode binary masks encoded via RLE.
33 | # merge - Compute union or intersection of encoded masks.
34 | # iou - Compute intersection over union between masks.
35 | # area - Compute area of encoded masks.
36 | # toBbox - Get bounding boxes surrounding encoded masks.
37 | # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
38 | #
39 | # Usage:
40 | # Rs = encode( masks )
41 | # masks = decode( Rs )
42 | # R = merge( Rs, intersect=false )
43 | # o = iou( dt, gt, iscrowd )
44 | # a = area( Rs )
45 | # bbs = toBbox( Rs )
46 | # Rs = frPyObjects( [pyObjects], h, w )
47 | #
48 | # In the API the following formats are used:
49 | # Rs - [dict] Run-length encoding of binary masks
50 | # R - dict Run-length encoding of binary mask
51 | # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
52 | # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
53 | # bbs - [nx4] Bounding box(es) stored as [x y w h]
54 | # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
55 | # dt,gt - May be either bounding boxes or encoded masks
56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
57 | #
58 | # Finally, a note about the intersection over union (iou) computation.
59 | # The standard iou of a ground truth (gt) and detected (dt) object is
60 | # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
61 | # For "crowd" regions, we use a modified criteria. If a gt object is
62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
65 | # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
66 | # For crowd gt regions we use this modified criteria above for the iou.
67 | #
68 | # To compile run "python setup.py build_ext --inplace"
69 | # Please do not contact us for help with compiling.
70 | #
71 | # Microsoft COCO Toolbox. version 2.0
72 | # Data, paper, and tutorials available at: http://mscoco.org/
73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
74 | # Licensed under the Simplified BSD License [see coco/license.txt]
75 |
76 | iou = _mask.iou
77 | merge = _mask.merge
78 | frPyObjects = _mask.frPyObjects
79 |
80 | def encode(bimask):
81 | if len(bimask.shape) == 3:
82 | return _mask.encode(bimask)
83 | elif len(bimask.shape) == 2:
84 | h, w = bimask.shape
85 | return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
86 |
87 | def decode(rleObjs):
88 | if type(rleObjs) == list:
89 | return _mask.decode(rleObjs)
90 | else:
91 | return _mask.decode([rleObjs])[:,:,0]
92 |
93 | def area(rleObjs):
94 | if type(rleObjs) == list:
95 | return _mask.area(rleObjs)
96 | else:
97 | return _mask.area([rleObjs])[0]
98 |
99 | def toBbox(rleObjs):
100 | if type(rleObjs) == list:
101 | return _mask.toBbox(rleObjs)
102 | else:
103 | return _mask.toBbox([rleObjs])[0]
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/mask.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/mask.pyc
--------------------------------------------------------------------------------
/libs/datasets/pycocotools/setup.py:
--------------------------------------------------------------------------------
1 | from distutils.core import setup
2 | from Cython.Build import cythonize
3 | from distutils.extension import Extension
4 | import numpy as np
5 |
6 | # To compile and install locally run "python setup.py build_ext --inplace"
7 | # To install library to Python site-packages run "python setup.py build_ext install"
8 |
9 | ext_modules = [
10 | Extension(
11 | '_mask',
12 | sources=['./common/maskApi.c', '_mask.pyx'],
13 | include_dirs = [np.get_include(), './common'],
14 | extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 | )
16 | ]
17 |
18 | setup(name='pycocotools',
19 | packages=['pycocotools'],
20 | package_dir = {'pycocotools': 'pycocotools'},
21 | version='2.0',
22 | ext_modules=
23 | cythonize(ext_modules)
24 | )
--------------------------------------------------------------------------------
/libs/layers/__init__.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Mask RCNN
3 | # Written by CharlesShang@github
4 | # --------------------------------------------------------
5 | from __future__ import absolute_import
6 | from __future__ import division
7 | from __future__ import print_function
8 |
9 | from .wrapper import anchor_decoder
10 | from .wrapper import anchor_encoder
11 | from .wrapper import roi_decoder
12 | from .wrapper import roi_encoder
13 | from .wrapper import mask_decoder
14 | from .wrapper import mask_encoder
15 | from .wrapper import sample_wrapper as sample_rpn_outputs
16 | from .wrapper import sample_with_gt_wrapper as sample_rpn_outputs_with_gt
17 | from .wrapper import gen_all_anchors
18 | from .wrapper import assign_boxes
19 | from .crop import crop as ROIAlign
20 | from .crop import crop_ as ROIAlign_
21 |
--------------------------------------------------------------------------------
/libs/layers/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/__init__.pyc
--------------------------------------------------------------------------------
/libs/layers/anchor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/anchor.pyc
--------------------------------------------------------------------------------
/libs/layers/assign.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import numpy as np
8 |
9 | import libs.boxes.cython_bbox as cython_bbox
10 | import libs.configs.config_v1 as cfg
11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
12 | from libs.boxes.anchor import anchors_plane
13 | from libs.logs.log import LOG
14 | # FLAGS = tf.app.flags.FLAGS
15 |
16 | _DEBUG = False
17 |
18 | def assign_boxes(gt_boxes, min_k=2, max_k=5):
19 | """assigning boxes to layers in a pyramid according to its area
20 | Params
21 | -----
22 | gt_boxes: of shape (N, 5), each entry is [x1, y1, x2, y2, cls]
23 | strides: the stride of each layer, like [4, 8, 16, 32]
24 |
25 | Returns
26 | -----
27 | layer_ids: of shape (N,), each entry is a id indicating the assigned layer id
28 | """
29 | k0 = 4
30 | if gt_boxes.size > 0:
31 | layer_ids = np.zeros((gt_boxes.shape[0], ), dtype=np.int32)
32 | ws = gt_boxes[:, 2] - gt_boxes[:, 0]
33 | hs = gt_boxes[:, 3] - gt_boxes[:, 1]
34 | areas = ws * hs
35 | k = np.floor(k0 + np.log2(np.sqrt(areas) / 224))
36 | inds = np.where(k < min_k)[0]
37 | k[inds] = min_k
38 | inds = np.where(k > max_k)[0]
39 | k[inds] = max_k
40 | if _DEBUG:
41 | print ("### boxes and layer ids")
42 | print (np.hstack((gt_boxes[:, 0:4], k[:, np.newaxis])))
43 | return k.astype(np.int32)
44 |
45 | else:
46 | return np.asarray([], dtype=np.int32)
47 |
--------------------------------------------------------------------------------
/libs/layers/assign.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/assign.pyc
--------------------------------------------------------------------------------
/libs/layers/crop.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import tensorflow as tf
6 |
7 | def crop(images, boxes, batch_inds, stride = 1, pooled_height = 56, pooled_width = 56, scope='ROIAlign'):
8 | """Cropping areas of features into fixed size
9 | Params:
10 | --------
11 | images: a 4-d Tensor of shape (N, H, W, C)
12 | boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
13 | batch_inds:
14 |
15 | Returns:
16 | --------
17 | A Tensor of shape (N, pooled_height, pooled_width, C)
18 | """
19 | with tf.name_scope(scope):
20 | #
21 | boxes = boxes / (stride + 0.0)#stride = 32, 16, 8, 4. we need to do that in order to get the crop of feature layer
22 | boxes = tf.reshape(boxes, [-1, 4])
23 |
24 | # normalize the boxes and swap x y dimensions
25 | shape = tf.shape(images)
26 | boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
27 | xs = boxes[:, 0]
28 | ys = boxes[:, 1]
29 | xs = xs / tf.cast(shape[2], tf.float32)#######WHYYYY???? because the crop_and resize function needs the values in normalized fucking form
30 | ys = ys / tf.cast(shape[1], tf.float32)
31 | boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
32 | boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2)
33 |
34 | # if batch_inds is False:
35 | # num_boxes = tf.shape(boxes)[0]
36 | # batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
37 | # batch_inds = boxes[:, 0] * 0
38 | # batch_inds = tf.cast(batch_inds, tf.int32)
39 |
40 | # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
41 | if batch_inds is False:
42 | num_boxes = tf.shape(boxes)[0]
43 | batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
44 | batch_inds = boxes[:, 0] * 0
45 | batch_inds = tf.cast(batch_inds, tf.int32)
46 | assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
47 | with tf.control_dependencies([assert_op, images, batch_inds]):
48 | return tf.image.crop_and_resize(images, boxes, batch_inds,###
49 | [pooled_height, pooled_width],
50 | method='bilinear',
51 | name='Crop')
52 |
53 | def crop_(images, boxes, batch_inds, ih, iw, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'):
54 | """Cropping areas of features into fixed size
55 | Params:
56 | --------
57 | images: a 4-d Tensor of shape (N, H, W, C)
58 | boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
59 | batch_inds:
60 |
61 | Returns:
62 | --------
63 | A Tensor of shape (N, pooled_height, pooled_width, C)
64 | """
65 | with tf.name_scope(scope):
66 | #
67 | boxes = boxes / (stride + 0.0)#stride = 32, 16, 8, 4. we need to do that in order to get the crop of feature layer
68 | boxes = tf.reshape(boxes, [-1, 4])
69 |
70 | # normalize the boxes and swap x y dimensions
71 | shape = tf.shape(images)#W/stride, H/stride
72 | boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
73 | xs = boxes[:, 0]
74 | ys = boxes[:, 1]
75 | xs = xs / tf.cast(shape[2], tf.float32)#######WHYYYY???? because the crop_and resize function needs the values in normalized fucking form
76 | ys = ys / tf.cast(shape[1], tf.float32)
77 | boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
78 | boxes = tf.reshape(boxes, [-1, 4]) # to (y1, x1, y2, x2)
79 |
80 | # if batch_inds is False:
81 | # num_boxes = tf.shape(boxes)[0]
82 | # batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
83 | # batch_inds = boxes[:, 0] * 0
84 | # batch_inds = tf.cast(batch_inds, tf.int32)
85 |
86 | # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
87 | assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
88 | with tf.control_dependencies([assert_op, images, batch_inds]):
89 | return [tf.image.crop_and_resize(images, boxes, batch_inds,
90 | [pooled_height, pooled_width],
91 | method='bilinear',
92 | name='Crop')] + [boxes]
93 |
94 |
--------------------------------------------------------------------------------
/libs/layers/crop.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/crop.pyc
--------------------------------------------------------------------------------
/libs/layers/mask.py:
--------------------------------------------------------------------------------
1 | # coding=utf-8
2 | from __future__ import absolute_import
3 | from __future__ import division
4 | from __future__ import print_function
5 |
6 | import numpy as np
7 | import cv2
8 | import libs.boxes.cython_bbox as cython_bbox
9 | import libs.configs.config_v1 as cfg
10 | from libs.logs.log import LOG
11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
12 |
13 | _DEBUG = False
14 | def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
15 | """Encode masks groundtruth into learnable targets
16 | Sample some exmaples
17 |
18 | Params
19 | ------
20 | gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
21 | #actually modified by me, gt_mask is of shape (G,imh,imw,7)
22 | gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
23 | rois: the bounding boxes of shape (N, 4),
24 | ## scores: scores of shape (N, 1)
25 | num_classes; K
26 | mask_height, mask_width: height and width of output masks
27 |
28 | Returns
29 | -------
30 | # rois: boxes sampled for cropping masks, of shape (M, 4)
31 | labels: class-ids of shape (M, 1)
32 | mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
33 | mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled
34 | """
35 | total_masks = rois.shape[0]
36 | if gt_boxes.size > 0:
37 | # B x G
38 | overlaps = cython_bbox.bbox_overlaps(
39 | np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
40 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
41 | gt_assignment = overlaps.argmax(axis=1) # shape is N
42 | max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N
43 | # note: this will assign every rois with a positive label
44 | # labels = gt_boxes[gt_assignment, 4] # N
45 | labels = np.zeros((total_masks, ), np.float32)
46 | labels[:] = -1
47 |
48 | # sample positive rois which intersection is more than 0.5
49 | keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
50 | num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
51 | if keep_inds.size > 0 and num_masks < keep_inds.size:
52 | keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False)
53 | LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
54 | %(num_masks, rois.shape[0], gt_masks.shape[0]))
55 |
56 | labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1]
57 |
58 | # rois = rois[inds]
59 | # labels = labels[inds].astype(np.int32)
60 | # gt_assignment = gt_assignment[inds]
61 |
62 | # ignore rois with overlaps between fg_threshold and bg_threshold
63 | # mask are only defined on positive rois
64 | ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0]
65 | labels[ignore_inds] = -1
66 |
67 | mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
68 | mask_inside_weights = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.float32)
69 | rois [rois < 0] = 0
70 |
71 | # TODO: speed bottleneck?
72 | for i in keep_inds:
73 | roi = rois[i, :4]
74 |
75 | for x in range(7):
76 | cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3])+1, int(roi[0]):int(roi[2])+1,x]
77 | cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST)
78 | mask_targets[i, :, :, x] = cropped
79 | mask_inside_weights[i, :, :, x] = 1
80 | else:
81 | # there is no gt
82 | labels = np.zeros((total_masks, ), np.float32)
83 | labels[:] = -1
84 | mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
85 | mask_inside_weights = np.zeros((total_masks, mask_height, mask_height, num_classes), dtype=np.float32)
86 | #np.save("/home/czurini/Alex/rois.npy",rois)
87 | # np.save("/home/czurini/Alex/mask_targets.npy",mask_targets)
88 | return labels, mask_targets, mask_inside_weights
89 |
90 | def decode(mask_targets, rois, classes, ih, iw):
91 | """Decode outputs into final masks
92 | Params
93 | ------
94 | mask_targets: of shape (N, h, w, K)
95 | rois: of shape (N, 4) [x1, y1, x2, y2]
96 | classes: of shape (N, 1) the class-id of each roi
97 | height: image height
98 | width: image width
99 |
100 | Returns
101 | ------
102 | M: a painted image with all masks, of shape (height, width), in [0, K]
103 | """
104 | Mask = np.zeros((ih, iw), dtype=np.float32)
105 | assert rois.shape[0] == mask_targets.shape[0], \
106 | '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0])
107 | num = rois.shape[0]
108 | rois = clip_boxes(rois, (ih, iw))
109 | for i in np.arange(num):
110 | k = classes[i]
111 | mask = mask_targets[i, :, :, k]
112 | h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1
113 | x, y = rois[i, 0], rois[i, 1]
114 | mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
115 | mask *= k
116 |
117 | # paint
118 | Mask[y:y+h, x:x+w] = mask
119 |
120 | return Mask
121 |
122 |
123 |
124 | if __name__ == '__main__':
125 |
126 | import time
127 | import matplotlib.pyplot as plt
128 |
129 | t = time.time()
130 |
131 | for i in range(10):
132 | cfg.FLAGS.mask_threshold = 0.2
133 | N = 50
134 | W, H = 200, 200
135 | M = 50
136 |
137 | gt_masks = np.zeros((2, H, W), dtype=np.int32)
138 | gt_masks[0, 50:150, 50:150] = 1
139 | gt_masks[1, 100:150, 50:150] = 1
140 | gt_boxes = np.asarray(
141 | [
142 | [20, 20, 100, 100, 1],
143 | [100, 100, 180, 180, 2]
144 | ])
145 | rois = gt_boxes[:, :4]
146 | print (rois)
147 | rois, labels, mask_targets, mask_inside_weights = encode(gt_masks, gt_boxes, rois, 3, 7, 7)
148 | print (rois)
149 | Mask = decode(mask_targets, rois, labels, H, W)
150 | if True:
151 | plt.figure(1)
152 | plt.imshow(Mask)
153 | plt.show()
154 | time.sleep(2)
155 | print(labels)
156 | print('average time: %f' % ((time.time() - t) / 10.0))
157 |
158 |
--------------------------------------------------------------------------------
/libs/layers/mask.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/mask.pyc
--------------------------------------------------------------------------------
/libs/layers/roi.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import numpy as np
6 |
7 | import libs.boxes.cython_bbox as cython_bbox
8 | import libs.configs.config_v1 as cfg
9 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
10 | from libs.logs.log import LOG
11 |
12 | # FLAGS = tf.app.flags.FLAGS
13 |
14 | _DEBUG = False
15 |
16 | def encode(gt_boxes, rois, num_classes):
17 | """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes
18 | Sampling
19 | Parameters
20 | ---------
21 | gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class]
22 | rois an array of shape (R x 4), [x1, y1, x2, y2]
23 | num_classes: scalar, number of classes
24 |
25 | Returns
26 | --------
27 | labels: Nx1 array in [0, num_classes)
28 | bbox_targets: of shape (N, Kx4) regression targets
29 | bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned.
30 | """
31 |
32 | all_rois = rois
33 | num_rois = rois.shape[0]
34 | if gt_boxes.size > 0:
35 | # R x G matrix
36 | overlaps = cython_bbox.bbox_overlaps(
37 | np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float),
38 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
39 | gt_assignment = overlaps.argmax(axis=1) # R
40 | # max_overlaps = overlaps.max(axis=1) # R
41 | max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment]
42 | # note: this will assign every rois with a positive label
43 | # labels = gt_boxes[gt_assignment, 4]
44 | labels = np.zeros([num_rois], dtype=np.float32)
45 | labels[:] = -1
46 |
47 | # if _DEBUG:
48 | # print ('gt_assignment')
49 | # print (gt_assignment)
50 |
51 | # sample rois as to 1:3
52 | fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
53 | fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))#rois_per_image =256,fg_roi_fraction=0.25
54 | if fg_inds.size > 0 and fg_rois < fg_inds.size:
55 | fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
56 | labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4]
57 |
58 | # TODO: sampling strategy
59 | bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
60 | bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)#rois_per_image =256
61 | if bg_inds.size > 0 and bg_rois < bg_inds.size:
62 | bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
63 | labels[bg_inds] = 0
64 |
65 | # ignore rois with overlaps between fg_threshold and bg_threshold
66 | ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\
67 | (max_overlaps < cfg.FLAGS.fg_threshold)))[0]
68 | labels[ignore_inds] = -1
69 |
70 | keep_inds = np.append(fg_inds, bg_inds)
71 | if _DEBUG:
72 | print ('keep_inds')
73 | print (keep_inds)
74 | print ('fg_inds')
75 | print (fg_inds)
76 | print ('bg_inds')
77 | print (bg_inds)
78 | print ('bg_rois:', bg_rois)
79 | print ('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold)
80 | # print (max_overlaps)
81 |
82 | LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds)))
83 |
84 | bbox_targets, bbox_inside_weights = _compute_targets(
85 | rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes)
86 | bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0)
87 | bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0)
88 |
89 | else:
90 | # there is no gt
91 | labels = np.zeros((num_rois, ), np.float32)
92 | bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32)
93 | bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32)
94 | bg_rois = min(int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64)
95 | if bg_rois < num_rois:
96 | bg_inds = np.arange(num_rois)
97 | ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False)
98 | labels[ignore_inds] = -1
99 |
100 | return labels, bbox_targets, bbox_inside_weights
101 |
102 | def decode(boxes, scores, rois, ih, iw):
103 | """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois
104 | Parameters
105 | ---------
106 | boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2]
107 | scores: an array of shape (R, K),
108 | rois: an array of shape (R, 4), [x1, y1, x2, y2]
109 |
110 | Returns
111 | --------
112 | final_boxes: of shape (R x 4)
113 | classes: of shape (R) in {0,1,2,3... K-1}
114 | scores: of shape (R) in [0 ~ 1]
115 | """
116 | boxes = bbox_transform_inv(rois, deltas=boxes)
117 | classes = np.argmax(scores, axis=1)
118 | classes = classes.astype(np.int32)
119 | scores = np.max(scores, axis=1)
120 | final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32)
121 | for i in np.arange(0, boxes.shape[0]):
122 | ind = classes[i]*4
123 | final_boxes[i, 0:4] = boxes[i, ind:ind+4]
124 | final_boxes = clip_boxes(final_boxes, (ih, iw))
125 | return final_boxes, classes, scores
126 |
127 | def _compute_targets(ex_rois, gt_rois, labels, num_classes):
128 | """
129 | This function expands those targets into the 4-of-4*K representation used
130 | by the network (i.e. only one class has non-zero targets).
131 |
132 | Returns:
133 | bbox_target (ndarray): N x 4K blob of regression targets
134 | bbox_inside_weights (ndarray): N x 4K blob of loss weights
135 | """
136 |
137 | assert ex_rois.shape[0] == gt_rois.shape[0]
138 | assert ex_rois.shape[1] == 4
139 | assert gt_rois.shape[1] == 4
140 |
141 | targets = bbox_transform(ex_rois, gt_rois)
142 |
143 | clss = labels
144 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
145 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
146 | inds = np.where(clss > 0)[0]
147 | for ind in inds:
148 | cls = int(clss[ind])
149 | start = 4 * cls
150 | end = start + 4
151 | bbox_targets[ind, start:end] = targets[ind, 0:4]
152 | bbox_inside_weights[ind, start:end] = 1
153 | return bbox_targets, bbox_inside_weights
154 |
155 | def _unmap(data, count, inds, fill=0):
156 | """ Unmap a subset of item (data) back to the original set of items (of
157 | size count) """
158 | if len(data.shape) == 1:
159 | ret = np.empty((count,), dtype=np.float32)
160 | ret.fill(fill)
161 | ret[inds] = data
162 | else:
163 | ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
164 | ret.fill(fill)
165 | ret[inds, :] = data
166 | return ret
167 |
168 | if __name__ == '__main__':
169 | cfg.FLAGS.fg_threshold = 0.1
170 | classes = np.random.randint(0, 3, (10, 1))
171 | boxes = np.random.randint(10, 50, (10, 2))
172 | s = np.random.randint(10, 20, (10, 2))
173 | s = boxes + s
174 | boxes = np.concatenate((boxes, s), axis=1)
175 | gt_boxes = np.hstack((boxes, classes))
176 | noise = np.random.randint(-3, 3, (10, 4))
177 | rois = gt_boxes[:, :4] + noise
178 | labels, rois, bbox_targets, bbox_inside_weights = encode(gt_boxes, rois, num_classes=3)
179 | print (labels)
180 | print (bbox_inside_weights)
181 |
182 | ls = np.zeros((labels.shape[0], 3))
183 | for i in range(labels.shape[0]):
184 | ls[i, labels[i]] = 1
185 | final_boxes, classes, scores = decode(bbox_targets, ls, rois, 100, 100)
186 | print('gt_boxes:\n', gt_boxes)
187 | print ('final boxes:\n', np.hstack((final_boxes, np.expand_dims(classes, axis=1))).astype(np.int32))
188 | # print (final_boxes.astype(np.int32))
189 |
--------------------------------------------------------------------------------
/libs/layers/roi.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/roi.pyc
--------------------------------------------------------------------------------
/libs/layers/sample.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/sample.pyc
--------------------------------------------------------------------------------
/libs/layers/wrapper.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Mask RCNN
3 | # Written by CharlesShang@github
4 | # --------------------------------------------------------
5 | from __future__ import absolute_import
6 | from __future__ import division
7 | from __future__ import print_function
8 |
9 | import tensorflow as tf
10 | from . import anchor
11 | from . import roi
12 | from . import mask
13 | from . import sample
14 | from . import assign
15 | from libs.boxes.anchor import anchors_plane
16 |
17 | def anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder'):
18 |
19 | with tf.name_scope(scope) as sc:
20 | labels, bbox_targets, bbox_inside_weights = \
21 | tf.py_func(anchor.encode,
22 | [gt_boxes, all_anchors, height, width, stride],
23 | [tf.float32, tf.float32, tf.float32])
24 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels')
25 | bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets')
26 | bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights')
27 | labels = tf.reshape(labels, (1, height, width, -1))
28 | bbox_targets = tf.reshape(bbox_targets, (1, height, width, -1))
29 | bbox_inside_weights = tf.reshape(bbox_inside_weights, (1, height, width, -1))
30 |
31 | return labels, bbox_targets, bbox_inside_weights
32 |
33 |
34 | def anchor_decoder(boxes, scores, all_anchors, ih, iw, scope='AnchorDecoder'):
35 |
36 | with tf.name_scope(scope) as sc:
37 | final_boxes, classes, scores = \
38 | tf.py_func(anchor.decode,
39 | [boxes, scores, all_anchors, ih, iw],
40 | [tf.float32, tf.int32, tf.float32])
41 | final_boxes = tf.convert_to_tensor(final_boxes, name='boxes')
42 | classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes')
43 | scores = tf.convert_to_tensor(scores, name='scores')
44 | final_boxes = tf.reshape(final_boxes, (-1, 4))
45 | classes = tf.reshape(classes, (-1, ))
46 | scores = tf.reshape(scores, (-1, ))
47 |
48 | return final_boxes, classes, scores
49 |
50 |
51 | def roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder'):
52 |
53 | with tf.name_scope(scope) as sc:
54 | labels, bbox_targets, bbox_inside_weights = \
55 | tf.py_func(roi.encode,
56 | [gt_boxes, rois, num_classes],
57 | [tf.float32, tf.float32, tf.float32])
58 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels')
59 | bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets')
60 | bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights')
61 | labels = tf.reshape(labels, (-1, ))
62 | bbox_targets = tf.reshape(bbox_targets, (-1, num_classes * 4))
63 | bbox_inside_weights = tf.reshape(bbox_inside_weights, (-1, num_classes * 4))
64 |
65 | return labels, bbox_targets, bbox_inside_weights
66 |
67 |
68 | def roi_decoder(boxes, scores, rois, ih, iw, scope='ROIDecoder'):
69 |
70 | with tf.name_scope(scope) as sc:
71 | final_boxes, classes, scores = \
72 | tf.py_func(roi.decode,
73 | [boxes, scores, rois, ih, iw],
74 | [tf.float32, tf.int32, tf.float32])
75 | final_boxes = tf.convert_to_tensor(final_boxes, name='boxes')
76 | classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes')
77 | scores = tf.convert_to_tensor(scores, name='scores')
78 | final_boxes = tf.reshape(final_boxes, (-1, 4))
79 |
80 | return final_boxes, classes, scores
81 |
82 | def mask_encoder(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width, scope='MaskEncoder'):
83 |
84 | with tf.name_scope(scope) as sc:
85 | labels, mask_targets, mask_inside_weights = \
86 | tf.py_func(mask.encode,
87 | [gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width],
88 | [tf.float32, tf.int32, tf.float32])
89 | labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='classes')
90 | mask_targets = tf.convert_to_tensor(mask_targets, name='mask_targets')
91 | mask_inside_weights = tf.convert_to_tensor(mask_inside_weights, name='mask_inside_weights')
92 | labels = tf.reshape(labels, (-1,))
93 | mask_targets = tf.reshape(mask_targets, (-1, mask_height, mask_width, num_classes))
94 | mask_inside_weights = tf.reshape(mask_inside_weights, (-1, mask_height, mask_width, num_classes))
95 |
96 | return labels, mask_targets, mask_inside_weights
97 |
98 | def mask_decoder(mask_targets, rois, classes, ih, iw, scope='MaskDecoder'):
99 |
100 | with tf.name_scope(scope) as sc:
101 | Mask = \
102 | tf.py_func(mask.decode,
103 | [mask_targets, rois, classes, ih, iw,],
104 | [tf.float32])
105 | Mask = tf.convert_to_tensor(Mask, name='MaskImage')
106 | Mask = tf.reshape(Mask, (ih, iw))
107 |
108 | return Mask
109 |
110 |
111 | def sample_wrapper(boxes, scores, is_training=True, scope='SampleBoxes'):
112 |
113 | with tf.name_scope(scope) as sc:
114 | boxes, scores, batch_inds = \
115 | tf.py_func(sample.sample_rpn_outputs,
116 | [boxes, scores, is_training],
117 | [tf.float32, tf.float32, tf.int32])
118 | boxes = tf.convert_to_tensor(boxes, name='Boxes')
119 | scores = tf.convert_to_tensor(scores, name='Scores')
120 | batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds')
121 | boxes = tf.reshape(boxes, (-1, 4))
122 | batch_inds = tf.reshape(batch_inds, [-1])
123 |
124 | return boxes, scores, batch_inds
125 |
126 | def sample_with_gt_wrapper(boxes, scores, gt_boxes, is_training=True, scope='SampleBoxesWithGT'):
127 |
128 | with tf.name_scope(scope) as sc:
129 | boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds = \
130 | tf.py_func(sample.sample_rpn_outputs_wrt_gt_boxes,
131 | [boxes, scores, gt_boxes, is_training],
132 | [tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32])
133 | boxes = tf.convert_to_tensor(boxes, name='Boxes')
134 | scores = tf.convert_to_tensor(scores, name='Scores')
135 | batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds')
136 |
137 | mask_boxes = tf.convert_to_tensor(mask_boxes, name='MaskBoxes')
138 | mask_scores = tf.convert_to_tensor(mask_scores, name='MaskScores')
139 | mask_batch_inds = tf.convert_to_tensor(mask_batch_inds, name='MaskBatchInds')
140 |
141 | return boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds
142 |
143 | def gen_all_anchors(height, width, stride, scales, scope='GenAnchors'):
144 |
145 | with tf.name_scope(scope) as sc:
146 | all_anchors = \
147 | tf.py_func(anchors_plane,
148 | [height, width, stride, scales],
149 | [tf.float64]
150 | )
151 | all_anchors = tf.convert_to_tensor(tf.cast(all_anchors, tf.float32), name='AllAnchors')
152 | all_anchors = tf.reshape(all_anchors, (height, width, -1))
153 |
154 | return all_anchors
155 |
156 | def assign_boxes(gt_boxes, tensors, layers, scope='AssignGTBoxes'):
157 |
158 | with tf.name_scope(scope) as sc:
159 | min_k = layers[0]
160 | max_k = layers[-1]
161 | assigned_layers = \
162 | tf.py_func(assign.assign_boxes,
163 | [ gt_boxes, min_k, max_k ],
164 | tf.int32)
165 | assigned_layers = tf.reshape(assigned_layers, [-1])
166 |
167 | assigned_tensors = []
168 | for t in tensors:
169 | split_tensors = []
170 | for l in layers:
171 | tf.cast(l, tf.int32)
172 | inds = tf.where(tf.equal(assigned_layers, l))
173 | inds = tf.reshape(inds, [-1])
174 | split_tensors.append(tf.gather(t, inds))
175 | assigned_tensors.append(split_tensors)
176 |
177 | return assigned_tensors + [assigned_layers]
--------------------------------------------------------------------------------
/libs/layers/wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/wrapper.pyc
--------------------------------------------------------------------------------
/libs/logs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/__init__.py
--------------------------------------------------------------------------------
/libs/logs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/__init__.pyc
--------------------------------------------------------------------------------
/libs/logs/log.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import logging
6 | import libs.configs.config_v1 as cfg
7 |
8 | def LOG(mssg):
9 | logging.basicConfig(filename=cfg.FLAGS.train_dir + '/maskrcnn.log',
10 | level=logging.INFO,
11 | datefmt='%m/%d/%Y %I:%M:%S %p', format='%(asctime)s %(message)s')
12 | logging.info(mssg)
--------------------------------------------------------------------------------
/libs/logs/log.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/log.pyc
--------------------------------------------------------------------------------
/libs/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | # build pycocotools
4 | cd datasets/pycocotools
5 | make
6 | cd -
7 |
--------------------------------------------------------------------------------
/libs/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/__init__.py
--------------------------------------------------------------------------------
/libs/nets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/__init__.pyc
--------------------------------------------------------------------------------
/libs/nets/nets_factory.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | import functools
5 |
6 | import tensorflow as tf
7 |
8 | from . import resnet_v1
9 | from .resnet_v1 import resnet_v1_50 as resnet50
10 | from .resnet_utils import resnet_arg_scope
11 | from .resnet_v1 import resnet_v1_101 as resnet101
12 |
13 | slim = tf.contrib.slim
14 |
15 | pyramid_maps = {
16 | 'resnet50': {'C1':'resnet_v1_50/conv1/Relu:0',
17 | 'C2':'resnet_v1_50/block1/unit_2/bottleneck_v1',
18 | 'C3':'resnet_v1_50/block2/unit_3/bottleneck_v1',
19 | 'C4':'resnet_v1_50/block3/unit_5/bottleneck_v1',
20 | 'C5':'resnet_v1_50/block4/unit_3/bottleneck_v1',
21 | },
22 | 'resnet101': {'C1': '', 'C2': '',
23 | 'C3': '', 'C4': '',
24 | 'C5': '',
25 | }
26 | }
27 |
28 | def get_network(name, image, weight_decay=0.000005, is_training=False):
29 |
30 | if name == 'resnet50':
31 | with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
32 | logits, end_points = resnet50(image, 1000, is_training=is_training)
33 |
34 | if name == 'resnet101':
35 | with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
36 | logits, end_points = resnet50(image, 1000, is_training=is_training)
37 |
38 | if name == 'resnext50':
39 | name
40 |
41 | end_points['input'] = image
42 | return logits, end_points, pyramid_maps[name]
43 |
--------------------------------------------------------------------------------
/libs/nets/nets_factory.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/nets_factory.pyc
--------------------------------------------------------------------------------
/libs/nets/pyramid_network.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/pyramid_network.pyc
--------------------------------------------------------------------------------
/libs/nets/resnet_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/resnet_utils.pyc
--------------------------------------------------------------------------------
/libs/nets/resnet_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/resnet_v1.pyc
--------------------------------------------------------------------------------
/libs/nets/train_utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import collections
6 | import tensorflow as tf
7 | import libs.configs.config_v1 as cfg
8 |
9 | slim = tf.contrib.slim
10 | FLAGS = tf.app.flags.FLAGS
11 |
12 | def _configure_optimizer(learning_rate):
13 | """Configures the optimizer used for training.
14 |
15 | Args:
16 | learning_rate: A scalar or `Tensor` learning rate.
17 |
18 | Returns:
19 | An instance of an optimizer.
20 |
21 | Raises:
22 | ValueError: if FLAGS.optimizer is not recognized.
23 | """
24 | if FLAGS.optimizer == 'adadelta':
25 | optimizer = tf.train.AdadeltaOptimizer(
26 | learning_rate,
27 | rho=FLAGS.adadelta_rho,
28 | epsilon=FLAGS.opt_epsilon)
29 | elif FLAGS.optimizer == 'adagrad':
30 | optimizer = tf.train.AdagradOptimizer(
31 | learning_rate,
32 | initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value)
33 | elif FLAGS.optimizer == 'adam':
34 | optimizer = tf.train.AdamOptimizer(
35 | learning_rate,
36 | beta1=FLAGS.adam_beta1,
37 | beta2=FLAGS.adam_beta2,
38 | epsilon=FLAGS.opt_epsilon)
39 | elif FLAGS.optimizer == 'ftrl':
40 | optimizer = tf.train.FtrlOptimizer(
41 | learning_rate,
42 | learning_rate_power=FLAGS.ftrl_learning_rate_power,
43 | initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
44 | l1_regularization_strength=FLAGS.ftrl_l1,
45 | l2_regularization_strength=FLAGS.ftrl_l2)
46 | elif FLAGS.optimizer == 'momentum':
47 | optimizer = tf.train.MomentumOptimizer(
48 | learning_rate,
49 | momentum=FLAGS.momentum,
50 | name='Momentum')
51 | elif FLAGS.optimizer == 'rmsprop':
52 | optimizer = tf.train.RMSPropOptimizer(
53 | learning_rate,
54 | decay=FLAGS.rmsprop_decay,
55 | momentum=FLAGS.rmsprop_momentum,
56 | epsilon=FLAGS.opt_epsilon)
57 | elif FLAGS.optimizer == 'sgd':
58 | optimizer = tf.train.GradientDescentOptimizer(learning_rate)
59 | else:
60 | raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer)
61 | return optimizer
62 |
63 | def _configure_learning_rate(num_samples_per_epoch, global_step):
64 | """Configures the learning rate.
65 |
66 | Args:
67 | num_samples_per_epoch: The number of samples in each epoch of training.
68 | global_step: The global_step tensor.
69 |
70 | Returns:
71 | A `Tensor` representing the learning rate.
72 |
73 | Raises:
74 | ValueError: if
75 | """
76 | decay_steps = int(num_samples_per_epoch / FLAGS.batch_size *
77 | FLAGS.num_epochs_per_decay)
78 | if FLAGS.sync_replicas:
79 | decay_steps /= FLAGS.replicas_to_aggregate
80 |
81 | if FLAGS.learning_rate_decay_type == 'exponential':
82 | return tf.train.exponential_decay(FLAGS.learning_rate,
83 | global_step,
84 | decay_steps,
85 | FLAGS.learning_rate_decay_factor,
86 | staircase=True,
87 | name='exponential_decay_learning_rate')
88 | elif FLAGS.learning_rate_decay_type == 'fixed':
89 | return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate')
90 | elif FLAGS.learning_rate_decay_type == 'polynomial':
91 | return tf.train.polynomial_decay(FLAGS.learning_rate,
92 | global_step,
93 | decay_steps,
94 | FLAGS.end_learning_rate,
95 | power=0.9,
96 | cycle=False,
97 | name='polynomial_decay_learning_rate')
98 | else:
99 | raise ValueError('learning_rate_decay_type [%s] was not recognized',
100 | FLAGS.learning_rate_decay_type)
101 |
102 | def _get_variables_to_train():
103 | """Returns a list of variables to train.
104 |
105 | Returns:
106 | A list of variables to train by the optimizer.
107 | """
108 | if FLAGS.trainable_scopes is None:
109 | return tf.trainable_variables()
110 | else:
111 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
112 |
113 | variables_to_train = []
114 | for scope in scopes:
115 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
116 | variables_to_train.extend(variables)
117 | return variables_to_train
118 |
119 | def _get_init_fn():
120 | """Returns a function run by the chief worker to warm-start the training.
121 |
122 | Note that the init_fn is only run when initializing the model during the very
123 | first global step.
124 |
125 | Returns:
126 | An init function run by the supervisor.
127 | """
128 | if FLAGS.checkpoint_path is None:
129 | return None
130 |
131 | # Warn the user if a checkpoint exists in the train_dir. Then we'll be
132 | # ignoring the checkpoint anyway.
133 | if tf.train.latest_checkpoint(FLAGS.train_dir):
134 | tf.logging.info(
135 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s'
136 | % FLAGS.train_dir)
137 | return None
138 |
139 | exclusions = []
140 | if FLAGS.checkpoint_exclude_scopes:
141 | exclusions = [scope.strip()
142 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
143 |
144 | # TODO(sguada) variables.filter_variables()
145 | variables_to_restore = []
146 | for var in slim.get_model_variables():
147 | excluded = False
148 | for exclusion in exclusions:
149 | if var.op.name.startswith(exclusion):
150 | excluded = True
151 | break
152 | if not excluded:
153 | variables_to_restore.append(var)
154 |
155 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
156 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
157 | else:
158 | checkpoint_path = FLAGS.checkpoint_path
159 |
160 | tf.logging.info('Fine-tuning from %s' % checkpoint_path)
161 |
162 | return slim.assign_from_checkpoint_fn(
163 | checkpoint_path,
164 | variables_to_restore,
165 | ignore_missing_vars=FLAGS.ignore_missing_vars)
166 |
167 | def get_var_list_to_restore():
168 | """Choosing which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """
169 |
170 | variables_to_restore = []
171 | if FLAGS.checkpoint_exclude_scopes is not None:
172 | exclusions = [scope.strip()
173 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
174 |
175 | # build restore list
176 | for var in tf.model_variables():
177 | excluded = False
178 | for exclusion in exclusions:
179 | if var.name.startswith(exclusion):
180 | excluded = True
181 | break
182 | if not excluded:
183 | variables_to_restore.append(var)
184 | else:
185 | variables_to_restore = tf.model_variables()
186 |
187 | variables_to_restore_final = []
188 | if FLAGS.checkpoint_include_scopes is not None:
189 | includes = [
190 | scope.strip()
191 | for scope in FLAGS.checkpoint_include_scopes.split(',')
192 | ]
193 | for var in variables_to_restore:
194 | included = False
195 | for include in includes:
196 | if var.name.startswith(include):
197 | included = True
198 | break
199 | if included:
200 | variables_to_restore_final.append(var)
201 | else:
202 | variables_to_restore_final = variables_to_restore
203 |
204 | return variables_to_restore_final
205 |
--------------------------------------------------------------------------------
/libs/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 |
--------------------------------------------------------------------------------
/libs/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nms/__init__.py
--------------------------------------------------------------------------------
/libs/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nms/__init__.pyc
--------------------------------------------------------------------------------
/libs/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 | return a if a >= b else b
13 |
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 | return a if a <= b else b
16 |
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 | cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 | cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 | cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 | cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 | cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 |
24 | cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 | cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 |
27 | cdef int ndets = dets.shape[0]
28 | cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 | np.zeros((ndets), dtype=np.int)
30 |
31 | # nominal indices
32 | cdef int _i, _j
33 | # sorted indices
34 | cdef int i, j
35 | # temp variables for box i's (the box currently under consideration)
36 | cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 | # variables for computing overlap with box j (lower scoring box)
38 | cdef np.float32_t xx1, yy1, xx2, yy2
39 | cdef np.float32_t w, h
40 | cdef np.float32_t inter, ovr
41 |
42 | keep = []
43 | for _i in range(ndets):
44 | i = order[_i]
45 | if suppressed[i] == 1:
46 | continue
47 | keep.append(i)
48 | ix1 = x1[i]
49 | iy1 = y1[i]
50 | ix2 = x2[i]
51 | iy2 = y2[i]
52 | iarea = areas[i]
53 | for _j in range(_i + 1, ndets):
54 | j = order[_j]
55 | if suppressed[j] == 1:
56 | continue
57 | xx1 = max(ix1, x1[j])
58 | yy1 = max(iy1, y1[j])
59 | xx2 = min(ix2, x2[j])
60 | yy2 = min(iy2, y2[j])
61 | w = max(0.0, xx2 - xx1 + 1)
62 | h = max(0.0, yy2 - yy1 + 1)
63 | inter = w * h
64 | ovr = inter / (iarea + areas[j] - inter)
65 | if ovr >= thresh:
66 | suppressed[j] = 1
67 |
68 | return keep
69 |
--------------------------------------------------------------------------------
/libs/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 | int boxes_dim, float nms_overlap_thresh, int device_id);
3 |
--------------------------------------------------------------------------------
/libs/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 | cimport numpy as np
10 |
11 | assert sizeof(int) == sizeof(np.int32_t)
12 |
13 | cdef extern from "gpu_nms.hpp":
14 | void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 |
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 | np.int32_t device_id=0):
18 | cdef int boxes_num = dets.shape[0]
19 | cdef int boxes_dim = dets.shape[1]
20 | cdef int num_out
21 | cdef np.ndarray[np.int32_t, ndim=1] \
22 | keep = np.zeros(boxes_num, dtype=np.int32)
23 | cdef np.ndarray[np.float32_t, ndim=1] \
24 | scores = dets[:, 4]
25 | cdef np.ndarray[np.int_t, ndim=1] \
26 | order = scores.argsort()[::-1]
27 | cdef np.ndarray[np.float32_t, ndim=2] \
28 | sorted_dets = dets[order, :]
29 | _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 | keep = keep[:num_out]
31 | return list(order[keep])
32 |
--------------------------------------------------------------------------------
/libs/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
1 | // ------------------------------------------------------------------
2 | // Faster R-CNN
3 | // Copyright (c) 2015 Microsoft
4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
5 | // Written by Shaoqing Ren
6 | // ------------------------------------------------------------------
7 |
8 | #include "gpu_nms.hpp"
9 | #include
10 | #include
11 |
12 | #define CUDA_CHECK(condition) \
13 | /* Code block avoids redefinition of cudaError_t error */ \
14 | do { \
15 | cudaError_t error = condition; \
16 | if (error != cudaSuccess) { \
17 | std::cout << cudaGetErrorString(error) << std::endl; \
18 | } \
19 | } while (0)
20 |
21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
23 |
24 | __device__ inline float devIoU(float const * const a, float const * const b) {
25 | float left = max(a[0], b[0]), right = min(a[2], b[2]);
26 | float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
27 | float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
28 | float interS = width * height;
29 | float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
30 | float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
31 | return interS / (Sa + Sb - interS);
32 | }
33 |
34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
35 | const float *dev_boxes, unsigned long long *dev_mask) {
36 | const int row_start = blockIdx.y;
37 | const int col_start = blockIdx.x;
38 |
39 | // if (row_start > col_start) return;
40 |
41 | const int row_size =
42 | min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
43 | const int col_size =
44 | min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
45 |
46 | __shared__ float block_boxes[threadsPerBlock * 5];
47 | if (threadIdx.x < col_size) {
48 | block_boxes[threadIdx.x * 5 + 0] =
49 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
50 | block_boxes[threadIdx.x * 5 + 1] =
51 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
52 | block_boxes[threadIdx.x * 5 + 2] =
53 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
54 | block_boxes[threadIdx.x * 5 + 3] =
55 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
56 | block_boxes[threadIdx.x * 5 + 4] =
57 | dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
58 | }
59 | __syncthreads();
60 |
61 | if (threadIdx.x < row_size) {
62 | const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
63 | const float *cur_box = dev_boxes + cur_box_idx * 5;
64 | int i = 0;
65 | unsigned long long t = 0;
66 | int start = 0;
67 | if (row_start == col_start) {
68 | start = threadIdx.x + 1;
69 | }
70 | for (i = start; i < col_size; i++) {
71 | if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
72 | t |= 1ULL << i;
73 | }
74 | }
75 | const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
76 | dev_mask[cur_box_idx * col_blocks + col_start] = t;
77 | }
78 | }
79 |
80 | void _set_device(int device_id) {
81 | int current_device;
82 | CUDA_CHECK(cudaGetDevice(¤t_device));
83 | if (current_device == device_id) {
84 | return;
85 | }
86 | // The call to cudaSetDevice must come before any calls to Get, which
87 | // may perform initialization using the GPU.
88 | CUDA_CHECK(cudaSetDevice(device_id));
89 | }
90 |
91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
92 | int boxes_dim, float nms_overlap_thresh, int device_id) {
93 | _set_device(device_id);
94 |
95 | float* boxes_dev = NULL;
96 | unsigned long long* mask_dev = NULL;
97 |
98 | const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
99 |
100 | CUDA_CHECK(cudaMalloc(&boxes_dev,
101 | boxes_num * boxes_dim * sizeof(float)));
102 | CUDA_CHECK(cudaMemcpy(boxes_dev,
103 | boxes_host,
104 | boxes_num * boxes_dim * sizeof(float),
105 | cudaMemcpyHostToDevice));
106 |
107 | CUDA_CHECK(cudaMalloc(&mask_dev,
108 | boxes_num * col_blocks * sizeof(unsigned long long)));
109 |
110 | dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 | DIVUP(boxes_num, threadsPerBlock));
112 | dim3 threads(threadsPerBlock);
113 | nms_kernel<<>>(boxes_num,
114 | nms_overlap_thresh,
115 | boxes_dev,
116 | mask_dev);
117 |
118 | std::vector mask_host(boxes_num * col_blocks);
119 | CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 | mask_dev,
121 | sizeof(unsigned long long) * boxes_num * col_blocks,
122 | cudaMemcpyDeviceToHost));
123 |
124 | std::vector remv(col_blocks);
125 | memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 |
127 | int num_to_keep = 0;
128 | for (int i = 0; i < boxes_num; i++) {
129 | int nblock = i / threadsPerBlock;
130 | int inblock = i % threadsPerBlock;
131 |
132 | if (!(remv[nblock] & (1ULL << inblock))) {
133 | keep_out[num_to_keep++] = i;
134 | unsigned long long *p = &mask_host[0] + i * col_blocks;
135 | for (int j = nblock; j < col_blocks; j++) {
136 | remv[j] |= p[j];
137 | }
138 | }
139 | }
140 | *num_out = num_to_keep;
141 |
142 | CUDA_CHECK(cudaFree(boxes_dev));
143 | CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 |
--------------------------------------------------------------------------------
/libs/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def py_cpu_nms(dets, thresh):
11 | """Pure Python NMS baseline."""
12 | x1 = dets[:, 0]
13 | y1 = dets[:, 1]
14 | x2 = dets[:, 2]
15 | y2 = dets[:, 3]
16 | scores = dets[:, 4]
17 |
18 | areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 | order = scores.argsort()[::-1]
20 |
21 | keep = []
22 | while order.size > 0:
23 | i = order[0]
24 | keep.append(i)
25 | xx1 = np.maximum(x1[i], x1[order[1:]])
26 | yy1 = np.maximum(y1[i], y1[order[1:]])
27 | xx2 = np.minimum(x2[i], x2[order[1:]])
28 | yy2 = np.minimum(y2[i], y2[order[1:]])
29 |
30 | w = np.maximum(0.0, xx2 - xx1 + 1)
31 | h = np.maximum(0.0, yy2 - yy1 + 1)
32 | inter = w * h
33 | ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 |
35 | inds = np.where(ovr <= thresh)[0]
36 | order = order[inds + 1]
37 |
38 | return keep
39 |
--------------------------------------------------------------------------------
/libs/preprocessings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/__init__.py
--------------------------------------------------------------------------------
/libs/preprocessings/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/__init__.pyc
--------------------------------------------------------------------------------
/libs/preprocessings/coco_v1.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 |
4 | from __future__ import absolute_import
5 | from __future__ import division
6 | from __future__ import print_function
7 |
8 | import time
9 | import tensorflow as tf
10 | import libs.configs.config_v1 as cfg
11 | from . import utils as preprocess_utils
12 |
13 | FLAGS = tf.app.flags.FLAGS
14 |
15 | def preprocess_image(image, gt_boxes, gt_masks, is_training=False):
16 | """preprocess image for coco
17 | 1. random flipping
18 | 2. min size resizing
19 | 3. zero mean
20 | 4. ...
21 | """
22 | if is_training:
23 | return preprocess_for_training(image, gt_boxes, gt_masks)
24 | else:
25 | return preprocess_for_test(image, gt_boxes, gt_masks)
26 |
27 |
28 | def preprocess_for_training(image, gt_boxes, gt_masks):
29 |
30 | ih, iw = tf.shape(image)[0], tf.shape(image)[1]
31 | ## random flipping
32 | coin = tf.to_float(tf.random_uniform([1]))[0]
33 | image, gt_boxes, gt_masks =\
34 | tf.cond(tf.greater_equal(coin, 0.5),
35 | lambda: (preprocess_utils.flip_image(image),
36 | preprocess_utils.flip_gt_boxes(gt_boxes, ih, iw),
37 | preprocess_utils.flip_gt_masks(gt_masks)),
38 | lambda: (image, gt_boxes, gt_masks))
39 |
40 | ## min size resizing
41 | new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size)#FLAGS.image_min_size = 640(min edge)
42 | image = tf.expand_dims(image, 0)
43 | image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False)
44 | image = tf.squeeze(image, axis=[0])
45 |
46 | #gt_masks = tf.expand_dims(gt_masks, -1)
47 | #if uncomment the above error:ValueError: Shape must be rank 4 but is rank 5 for 'ResizeNearestNeighbor' (op: 'ResizeNearestNeighbor') with input shapes: [?,?,?,7,1], [2].
48 | gt_masks = tf.cast(gt_masks, tf.float32)
49 | gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False)
50 | gt_masks = tf.cast(gt_masks, tf.int32)
51 | #gt_masks = tf.squeeze(gt_masks, axis=[-1])
52 |
53 | scale_ratio = tf.to_float(new_ih) / tf.to_float(ih)
54 | gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio)
55 |
56 | ## random flip image
57 | # val_lr = tf.to_float(tf.random_uniform([1]))[0]
58 | # image = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_image(image), lambda: image)
59 | # gt_masks = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_masks(gt_masks), lambda: gt_masks)
60 | # gt_boxes = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_boxes(gt_boxes, new_ih, new_iw), lambda: gt_boxes)
61 |
62 | ## zero mean image
63 | image = tf.cast(image, tf.float32)
64 | image = image / 256.0
65 | image = (image - 0.5) * 2.0
66 | image = tf.expand_dims(image, axis=0)
67 |
68 | ## rgb to bgr
69 | image = tf.reverse(image, axis=[-1])
70 |
71 | return image, gt_boxes, gt_masks
72 |
73 | def preprocess_for_test(image, gt_boxes, gt_masks):
74 |
75 |
76 | ih, iw = tf.shape(image)[0], tf.shape(image)[1]
77 |
78 | ## min size resizing
79 | new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size)
80 | image = tf.expand_dims(image, 0)
81 | image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False)
82 | image = tf.squeeze(image, axis=[0])
83 |
84 | gt_masks = tf.expand_dims(gt_masks, -1)
85 | gt_masks = tf.cast(gt_masks, tf.float32)
86 | gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False)
87 | gt_masks = tf.cast(gt_masks, tf.int32)
88 | gt_masks = tf.squeeze(gt_masks, axis=[-1])
89 |
90 | scale_ratio = tf.to_float(new_ih) / tf.to_float(ih)
91 | gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio)
92 |
93 | ## zero mean image
94 | image = tf.cast(image, tf.float32)
95 | image = image / 256.0
96 | image = (image - 0.5) * 2.0
97 | image = tf.expand_dims(image, axis=0)
98 |
99 | ## rgb to bgr
100 | image = tf.reverse(image, axis=[-1])
101 |
102 | return image, gt_boxes, gt_masks
103 |
--------------------------------------------------------------------------------
/libs/preprocessings/coco_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/coco_v1.pyc
--------------------------------------------------------------------------------
/libs/preprocessings/utils.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 |
5 | import tensorflow as tf
6 |
7 | from tensorflow.python.ops import control_flow_ops
8 | from tensorflow.contrib import slim
9 |
10 |
11 | def _crop(image, offset_height, offset_width, crop_height, crop_width):
12 | original_shape = tf.shape(image)
13 |
14 | rank_assertion = tf.Assert(
15 | tf.equal(tf.rank(image), 3),
16 | ['Rank of image must be equal to 3.'])
17 | cropped_shape = control_flow_ops.with_dependencies(
18 | [rank_assertion],
19 | tf.stack([crop_height, crop_width, original_shape[2]]))
20 |
21 | size_assertion = tf.Assert(
22 | tf.logical_and(
23 | tf.greater_equal(original_shape[0], crop_height),
24 | tf.greater_equal(original_shape[1], crop_width)),
25 | ['Crop size greater than the image size.'])
26 |
27 | offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
28 |
29 | # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
30 | # define the crop size.
31 | image = control_flow_ops.with_dependencies(
32 | [size_assertion],
33 | tf.slice(image, offsets, cropped_shape))
34 | return tf.reshape(image, cropped_shape)
35 |
36 |
37 | def _random_crop(image_list, label_list, crop_height, crop_width):
38 | if not image_list:
39 | raise ValueError('Empty image_list.')
40 |
41 | # Compute the rank assertions.
42 | rank_assertions = []
43 | for i in range(len(image_list)):
44 | image_rank = tf.rank(image_list[i])
45 | rank_assert = tf.Assert(
46 | tf.equal(image_rank, 3),
47 | ['Wrong rank for tensor %s [expected] [actual]',
48 | image_list[i].name, 3, image_rank])
49 | rank_assertions.append(rank_assert)
50 |
51 | image_shape = control_flow_ops.with_dependencies(
52 | [rank_assertions[0]],
53 | tf.shape(image_list[0]))
54 | image_height = image_shape[0]
55 | image_width = image_shape[1]
56 | crop_size_assert = tf.Assert(
57 | tf.logical_and(
58 | tf.greater_equal(image_height, crop_height),
59 | tf.greater_equal(image_width, crop_width)),
60 | ['Crop size greater than the image size.', image_height, image_width, crop_height, crop_width])
61 |
62 | asserts = [rank_assertions[0], crop_size_assert]
63 |
64 | for i in range(1, len(image_list)):
65 | image = image_list[i]
66 | asserts.append(rank_assertions[i])
67 | shape = control_flow_ops.with_dependencies([rank_assertions[i]],
68 | tf.shape(image))
69 | height = shape[0]
70 | width = shape[1]
71 |
72 | height_assert = tf.Assert(
73 | tf.equal(height, image_height),
74 | ['Wrong height for tensor %s [expected][actual]',
75 | image.name, height, image_height])
76 | width_assert = tf.Assert(
77 | tf.equal(width, image_width),
78 | ['Wrong width for tensor %s [expected][actual]',
79 | image.name, width, image_width])
80 | asserts.extend([height_assert, width_assert])
81 |
82 | # Create a random bounding box.
83 | #
84 | # Use tf.random_uniform and not numpy.random.rand as doing the former would
85 | # generate random numbers at graph eval time, unlike the latter which
86 | # generates random numbers at graph definition time.
87 | max_offset_height = control_flow_ops.with_dependencies(
88 | asserts, tf.reshape(image_height - crop_height + 1, []))
89 | max_offset_width = control_flow_ops.with_dependencies(
90 | asserts, tf.reshape(image_width - crop_width + 1, []))
91 | offset_height = tf.random_uniform(
92 | [], maxval=max_offset_height, dtype=tf.int32)
93 | offset_width = tf.random_uniform(
94 | [], maxval=max_offset_width, dtype=tf.int32)
95 |
96 | cropped_images = [_crop(image, offset_height, offset_width,
97 | crop_height, crop_width) for image in image_list]
98 | cropped_labels = [_crop(label, offset_height, offset_width,
99 | crop_height, crop_width) for label in label_list]
100 | return cropped_images, cropped_labels
101 |
102 |
103 | def _central_crop(image_list, label_list, crop_height, crop_width):
104 | output_images = []
105 | output_labels = []
106 | for image, label in zip(image_list, label_list):
107 | image_height = tf.shape(image)[0]
108 | image_width = tf.shape(image)[1]
109 |
110 | offset_height = (image_height - crop_height) / 2
111 | offset_width = (image_width - crop_width) / 2
112 |
113 | output_images.append(_crop(image, offset_height, offset_width,
114 | crop_height, crop_width))
115 | output_labels.append(_crop(label, offset_height, offset_width,
116 | crop_height, crop_width))
117 | return output_images, output_labels
118 |
119 |
120 | def _smallest_size_at_least(height, width, smallest_side):
121 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
122 |
123 | height = tf.to_float(height)
124 | width = tf.to_float(width)
125 | smallest_side = tf.to_float(smallest_side)
126 |
127 | scale = tf.cond(tf.greater(height, width),
128 | lambda: smallest_side / width,
129 | lambda: smallest_side / height)
130 | new_height = tf.to_int32(height * scale)
131 | new_width = tf.to_int32(width * scale)
132 | return new_height, new_width
133 |
134 | def _aspect_preserving_resize(image, label, smallest_side):
135 | smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
136 |
137 | shape = tf.shape(image)
138 | height = shape[0]
139 | width = shape[1]
140 | new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
141 |
142 | image = tf.expand_dims(image, 0)
143 | resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
144 | align_corners=False)
145 | resized_image = tf.squeeze(resized_image, axis=[0])
146 | resized_image.set_shape([None, None, 3])
147 |
148 | label = tf.expand_dims(label, 0)
149 | resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width],
150 | align_corners=False)
151 | resized_label = tf.squeeze(resized_label, axis=[0])
152 | resized_label.set_shape([None, None, 1])
153 | return resized_image, resized_label
154 |
155 | def flip_gt_boxes(gt_boxes, ih, iw):
156 | x1s, y1s, x2s, y2s, cls = \
157 | gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4]
158 | x1s = tf.to_float(iw) - x1s
159 | x2s = tf.to_float(iw) - x2s
160 | return tf.concat(values=(x2s[:, tf.newaxis],
161 | y1s[:, tf.newaxis],
162 | x1s[:, tf.newaxis],
163 | y2s[:, tf.newaxis],
164 | cls[:, tf.newaxis]), axis=1)
165 |
166 | def flip_gt_masks(gt_masks):
167 | return tf.reverse(gt_masks, axis=[2])
168 |
169 | def flip_image(image):
170 | return tf.reverse(image, axis=[1])
171 |
172 | def resize_gt_boxes(gt_boxes, scale_ratio):
173 | xys, cls = \
174 | gt_boxes[:, 0:4], gt_boxes[:, 4]
175 | xys = xys * scale_ratio
176 | return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1)
177 |
178 |
--------------------------------------------------------------------------------
/libs/preprocessings/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/utils.pyc
--------------------------------------------------------------------------------
/libs/setup.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import os
9 | from os.path import join as pjoin
10 | import numpy as np
11 | from distutils.core import setup
12 | from distutils.extension import Extension
13 | from Cython.Distutils import build_ext
14 |
15 | def find_in_path(name, path):
16 | "Find a file in a search path"
17 | #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
18 | for dir in path.split(os.pathsep):
19 | binpath = pjoin(dir, name)
20 | if os.path.exists(binpath):
21 | return os.path.abspath(binpath)
22 | return None
23 |
24 | def locate_cuda():
25 | """Locate the CUDA environment on the system
26 |
27 | Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
28 | and values giving the absolute path to each directory.
29 |
30 | Starts by looking for the CUDAHOME env variable. If not found, everything
31 | is based on finding 'nvcc' in the PATH.
32 | """
33 |
34 | # first check if the CUDAHOME env variable is in use
35 | if 'CUDAHOME' in os.environ:
36 | home = os.environ['CUDAHOME']
37 | nvcc = pjoin(home, 'bin', 'nvcc')
38 | else:
39 | # otherwise, search the PATH for NVCC
40 | default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
41 | nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
42 | if nvcc is None:
43 | raise EnvironmentError('The nvcc binary could not be '
44 | 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
45 | home = os.path.dirname(os.path.dirname(nvcc))
46 |
47 | cudaconfig = {'home':home, 'nvcc':nvcc,
48 | 'include': pjoin(home, 'include'),
49 | 'lib64': pjoin(home, 'lib64')}
50 | for k, v in cudaconfig.iteritems():
51 | if not os.path.exists(v):
52 | raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
53 |
54 | return cudaconfig
55 | CUDA = locate_cuda()
56 |
57 | # Obtain the numpy include directory. This logic works across numpy versions.
58 | try:
59 | numpy_include = np.get_include()
60 | except AttributeError:
61 | numpy_include = np.get_numpy_include()
62 |
63 | def customize_compiler_for_nvcc(self):
64 | """inject deep into distutils to customize how the dispatch
65 | to gcc/nvcc works.
66 |
67 | If you subclass UnixCCompiler, it's not trivial to get your subclass
68 | injected in, and still have the right customizations (i.e.
69 | distutils.sysconfig.customize_compiler) run on it. So instead of going
70 | the OO route, I have this. Note, it's kindof like a wierd functional
71 | subclassing going on."""
72 |
73 | # tell the compiler it can processes .cu
74 | self.src_extensions.append('.cu')
75 |
76 | # save references to the default compiler_so and _comple methods
77 | default_compiler_so = self.compiler_so
78 | super = self._compile
79 |
80 | # now redefine the _compile method. This gets executed for each
81 | # object but distutils doesn't have the ability to change compilers
82 | # based on source extension: we add it.
83 | def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
84 | print extra_postargs
85 | if os.path.splitext(src)[1] == '.cu':
86 | # use the cuda for .cu files
87 | self.set_executable('compiler_so', CUDA['nvcc'])
88 | # use only a subset of the extra_postargs, which are 1-1 translated
89 | # from the extra_compile_args in the Extension class
90 | postargs = extra_postargs['nvcc']
91 | else:
92 | postargs = extra_postargs['gcc']
93 |
94 | super(obj, src, ext, cc_args, postargs, pp_opts)
95 | # reset the default compiler_so, which we might have changed for cuda
96 | self.compiler_so = default_compiler_so
97 |
98 | # inject our redefined _compile method into the class
99 | self._compile = _compile
100 |
101 |
102 | # run the customize_compiler
103 | class custom_build_ext(build_ext):
104 | def build_extensions(self):
105 | customize_compiler_for_nvcc(self.compiler)
106 | build_ext.build_extensions(self)
107 |
108 | ext_modules = [
109 | Extension(
110 | "boxes.cython_bbox",
111 | ["boxes/bbox.pyx"],
112 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
113 | include_dirs = [numpy_include]
114 | ),
115 | Extension(
116 | "boxes.cython_anchor",
117 | ["boxes/cython_anchor.pyx"],
118 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 | include_dirs = [numpy_include]
120 | ),
121 | Extension(
122 | "boxes.cython_bbox_transform",
123 | ["boxes/cython_bbox_transform.pyx"],
124 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
125 | include_dirs=[numpy_include]
126 | ),
127 | Extension(
128 | "boxes.cython_nms",
129 | ["boxes/nms.pyx"],
130 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
131 | include_dirs = [numpy_include]
132 | ),
133 | Extension(
134 | "nms.cpu_nms",
135 | ["nms/cpu_nms.pyx"],
136 | extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
137 | include_dirs = [numpy_include]
138 | ),
139 | Extension(
140 | 'nms.gpu_nms',
141 | ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
142 | library_dirs=[CUDA['lib64']],
143 | libraries=['cudart'],
144 | language='c++',
145 | runtime_library_dirs=[CUDA['lib64']],
146 | # this syntax is specific to this build system
147 | # we're only going to use certain compiler args with nvcc and not with gcc
148 | # the implementation of this trick is in customize_compiler() below
149 | extra_compile_args={'gcc': ["-Wno-unused-function"],
150 | 'nvcc': ['-arch=sm_52',
151 | '--ptxas-options=-v',
152 | '-c',
153 | '--compiler-options',
154 | "'-fPIC'"]},
155 | include_dirs = [numpy_include, CUDA['include']]
156 | ),
157 | ]
158 |
159 | setup(
160 | name='fast_rcnn',
161 | ext_modules=ext_modules,
162 | # inject our custom trigger
163 | cmdclass={'build_ext': custom_build_ext},
164 | )
165 |
--------------------------------------------------------------------------------
/libs/visualization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/__init__.py
--------------------------------------------------------------------------------
/libs/visualization/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/__init__.pyc
--------------------------------------------------------------------------------
/libs/visualization/pil_utils.py:
--------------------------------------------------------------------------------
1 | import numpy as np
2 | import tensorflow as tf
3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
4 | import scipy.misc
5 |
6 | FLAGS = tf.app.flags.FLAGS
7 | _DEBUG = False
8 |
9 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None):
10 | #print("image")
11 | #print(image)
12 | #norm_image = np.uint8(image/np.max(np.abs(image))*255.0)
13 | norm_image = np.uint8(image/0.1*127.0 + 127.0)
14 | #print("norm_image")
15 | #print(norm_image)
16 | source_img = Image.fromarray(norm_image)
17 | return source_img.save(FLAGS.train_dir + 'test_' + name + '_' + str(step) +'.jpg', 'JPEG')
18 |
19 | colors = np.random.randint(5, size=(80, 3))
20 |
21 |
22 | def draw_bbox_better(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None):
23 | import cv2
24 | #source_img = Image.fromarray(image)
25 | hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
26 | #b, g, r = source_img.split()
27 | #source_img = Image.merge("RGB", (r, g, b))
28 | #draw = ImageDraw.Draw(source_img)
29 | #color = '#0000ff'
30 | if bbox is not None:
31 | dictinary = {}
32 |
33 | for i, box in enumerate(bbox):
34 | if (prob[i,label[i]] > 0.5) and (label[i] > 0):
35 | area = float((box[2]-box[0])*(box[3]-box[1]))
36 | while area in dictinary:
37 | area+=1
38 | width = int(box[2])-int(box[0])
39 | height = int(box[3])-int(box[1])
40 | mask = final_mask[i]
41 | mask = mask[...,label[i]]
42 | mask = scipy.misc.imresize(mask,(height,width))
43 |
44 | dictinary[round(area,4)]=(box,label[i],gt_label[i],prob[i,label[i]],mask,colors[label[i],:])
45 | sorted_keys = sorted(dictinary.iterkeys(),reverse=True)
46 |
47 | big_mask = np.zeros((image.shape[0],image.shape[1],len(bbox)),dtype=np.float32)
48 |
49 | i=0
50 | for key in sorted_keys:
51 | bo, _,_,_,msk,_= dictinary[key]
52 | big_mask[int(bo[1]):int(bo[3]),int(bo[0]):int(bo[2]),i] = msk
53 | i=i+1
54 |
55 | max_indices = np.argmax(big_mask,axis=2)
56 | for key in sorted_keys:
57 | bo, lab,gt_lab,_,_,col= dictinary[key]
58 | for x in range(int(bo[0]),int(bo[2])):
59 | for y in range(int(bo[1]),int(bo[3])):
60 | _,_,_,_,_,col = dictinary.values()[max_indices[y,x]]
61 | #print col
62 | #print (image[y,x,0] )
63 | image[y,x,...] = col
64 | #hsv[y,x,0]=color[0]
65 | #hsv[y,x,1]=hsv[y,x,1]*0.9
66 | text = cat_id_to_cls_name(lab)
67 | image = cv2.putText(image,text,(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX, 4,(255,255,255),2)
68 | if lab != gt_lab:
69 | c = (255,0,0)
70 | else:
71 | c = (0,0,255)
72 | image = cv2.rectangle(image,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3)
73 | cv2.imwrite('output/est_imgs/test_' + name + '_' + str(step) +'.jpg',image)
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 | def draw_bbox(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None):
82 | #print(prob[:,label])
83 | source_img = Image.fromarray(image)
84 | b, g, r = source_img.split()
85 | source_img = Image.merge("RGB", (r, g, b))
86 | draw = ImageDraw.Draw(source_img)
87 | color = '#0000ff'
88 | if bbox is not None:
89 | for i, box in enumerate(bbox):
90 | if label is not None:
91 | if prob is not None:
92 | if (prob[i,label[i]] > 0.5) and (label[i] > 0):
93 | if gt_label is not None:
94 | text = cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i])
95 | if label[i] != gt_label[i]:
96 | color = '#ff0000'#draw.text((2+bbox[i,0], 2+bbox[i,1]), cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i]), fill='#ff0000')
97 | else:
98 | color = '#0000ff'
99 | else:
100 | text = cat_id_to_cls_name(label[i])
101 | #############################DRAW SEGMENTATION
102 | width = box[2]-box[0]
103 | height = box[3]-box[1]
104 | #print (final_mask.shape)
105 | mask = final_mask[i]
106 | mask = mask[...,label[i]]
107 | mask = scipy.misc.imresize(mask,(height,width))
108 | mask_pil = Image.fromarray(mask)
109 | source_img.paste(mask_pil,(int(box[0]),int(box[1])))
110 | #draw.bitmap((int(box[0]),int(box[1])),mask_pil,fill='#00ffff')
111 | draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color)
112 | if _DEBUG is True:
113 | print("plot",label[i], prob[i,label[i]])
114 | draw.rectangle(box,fill=None,outline=color)
115 |
116 | else:
117 | if _DEBUG is True:
118 | print("skip",label[i], prob[i,label[i]])
119 | else:
120 | #############################DRAW GT SEGMENTATION
121 | if final_mask is not None:
122 | mask = final_mask[i]
123 | mask_pil = Image.fromarray(mask)
124 | mask_pil = mask_pil.crop([int(box[0]),int(box[1]),int(box[2]),int(box[3])])
125 | source_img.paste(mask_pil,(int(box[0]),int(box[1])))
126 | text = cat_id_to_cls_name(label[i])
127 | draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color)
128 | draw.rectangle(box,fill=None,outline=color)
129 |
130 |
131 | return source_img.save(FLAGS.train_dir + 'est_imgs/test_' + name + '_' + str(step) +'.jpg', 'JPEG')
132 |
133 | def cat_id_to_cls_name(catId):
134 | cls_name = np.array([ 'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
135 | 'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
136 | 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
137 | 'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
138 | 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
139 | 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
140 | 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
141 | 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
142 | 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
143 | 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
144 | 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
145 | 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
146 | 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
147 | 'scissors', 'teddy bear', 'hair drier', 'toothbrush'])
148 | return cls_name[catId]
149 |
--------------------------------------------------------------------------------
/libs/visualization/pil_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/pil_utils.pyc
--------------------------------------------------------------------------------
/libs/visualization/summary_utils.py:
--------------------------------------------------------------------------------
1 | import tensorflow as tf
2 |
3 |
4 | def visualize_input(boxes, image, masks):
5 | image_sum_sample = image[:1]
6 | visualize_masks(masks, "input_image_gt_mask")
7 | visualize_bb(image, boxes, "input_image_gt_bb")
8 | visualize_input_image(image_sum_sample)
9 |
10 |
11 | def visualize_rpn_predictions(boxes, image):
12 | image_sum_sample = image[:1]
13 | visualize_bb(image_sum_sample, boxes, "rpn_pred_bb")
14 |
15 | # TODO: Present all masks in different colors
16 | def visualize_masks(masks, name):
17 | masks = tf.cast(masks, tf.float32)
18 | tf.summary.image(name=name, tensor=masks, max_outputs=1)
19 |
20 |
21 | def visualize_bb(image, boxes, name):
22 | image_sum_sample_shape = tf.shape(image)[1:]
23 | gt_x_min = boxes[:, 0] / tf.cast(image_sum_sample_shape[1], tf.float32)
24 | gt_y_min = boxes[:, 1] / tf.cast(image_sum_sample_shape[0], tf.float32)
25 | gt_x_max = boxes[:, 2] / tf.cast(image_sum_sample_shape[1], tf.float32)
26 | gt_y_max = boxes[:, 3] / tf.cast(image_sum_sample_shape[0], tf.float32)
27 | bb = tf.stack([gt_y_min, gt_x_min, gt_y_max, gt_x_max], axis=1)
28 | tf.summary.image(name=name,
29 | tensor=tf.image.draw_bounding_boxes(image, tf.expand_dims(bb, 0), name=None),
30 | max_outputs=1)
31 |
32 |
33 | def visualize_input_image(image):
34 | tf.summary.image(name="input_image", tensor=image, max_outputs=1)
35 |
36 |
37 | def visualize_final_predictions(boxes, image, masks):
38 | visualize_masks(masks, "pred_mask")
39 | visualize_bb(image, boxes, "final_bb_pred")
40 |
--------------------------------------------------------------------------------
/libs/visualization/summary_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/summary_utils.pyc
--------------------------------------------------------------------------------
/media/file.txt:
--------------------------------------------------------------------------------
1 |
2 |
--------------------------------------------------------------------------------
/media/testseg122_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg122_1.jpg
--------------------------------------------------------------------------------
/media/testseg226_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg226_1.jpg
--------------------------------------------------------------------------------
/media/testseg255_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg255_1.jpg
--------------------------------------------------------------------------------
/media/testseg293_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg293_1.jpg
--------------------------------------------------------------------------------
/media/testseg296_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg296_1.jpg
--------------------------------------------------------------------------------
/media/testseg305_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg305_1.jpg
--------------------------------------------------------------------------------
/media/testseg35_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg35_1.jpg
--------------------------------------------------------------------------------
/media/testseg57_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg57_1.jpg
--------------------------------------------------------------------------------
/media/testseg70_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg70_1.jpg
--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 |
4 | from . import train_utils
5 |
--------------------------------------------------------------------------------
/train/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/train/__init__.pyc
--------------------------------------------------------------------------------
/train/train_utils.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 |
7 | import collections
8 | import tensorflow as tf
9 | import libs.configs.config_v1 as cfg
10 |
11 | slim = tf.contrib.slim
12 | FLAGS = tf.app.flags.FLAGS
13 |
14 | def _configure_optimizer(learning_rate):
15 | """Configures the optimizer used for training.
16 |
17 | Args:
18 | learning_rate: A scalar or `Tensor` learning rate.
19 |
20 | Returns:
21 | An instance of an optimizer.
22 |
23 | Raises:
24 | ValueError: if FLAGS.optimizer is not recognized.
25 | """
26 | if FLAGS.optimizer == 'adadelta':
27 | optimizer = tf.train.AdadeltaOptimizer(
28 | learning_rate,
29 | rho=FLAGS.adadelta_rho,
30 | epsilon=FLAGS.opt_epsilon)
31 | elif FLAGS.optimizer == 'adagrad':
32 | optimizer = tf.train.AdagradOptimizer(
33 | learning_rate,
34 | initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value)
35 | elif FLAGS.optimizer == 'adam':
36 | optimizer = tf.train.AdamOptimizer(
37 | learning_rate,
38 | beta1=FLAGS.adam_beta1,
39 | beta2=FLAGS.adam_beta2,
40 | epsilon=FLAGS.opt_epsilon)
41 | elif FLAGS.optimizer == 'ftrl':
42 | optimizer = tf.train.FtrlOptimizer(
43 | learning_rate,
44 | learning_rate_power=FLAGS.ftrl_learning_rate_power,
45 | initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
46 | l1_regularization_strength=FLAGS.ftrl_l1,
47 | l2_regularization_strength=FLAGS.ftrl_l2)
48 | elif FLAGS.optimizer == 'momentum':##############this is true
49 | optimizer = tf.train.MomentumOptimizer(
50 | learning_rate,
51 | momentum=FLAGS.momentum,
52 | name='Momentum')
53 | elif FLAGS.optimizer == 'rmsprop':
54 | optimizer = tf.train.RMSPropOptimizer(
55 | learning_rate,
56 | decay=FLAGS.rmsprop_decay,
57 | momentum=FLAGS.rmsprop_momentum,
58 | epsilon=FLAGS.opt_epsilon)
59 | elif FLAGS.optimizer == 'sgd':
60 | optimizer = tf.train.GradientDescentOptimizer(learning_rate)
61 | else:
62 | raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer)
63 | return optimizer
64 |
65 | def _configure_learning_rate(num_samples_per_epoch, global_step):
66 | """Configures the learning rate.
67 |
68 | Args:
69 | num_samples_per_epoch: The number of samples in each epoch of training.
70 | global_step: The global_step tensor.
71 |
72 | Returns:
73 | A `Tensor` representing the learning rate.
74 |
75 | Raises:
76 | ValueError: if
77 | """
78 | decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * #batch size is 1. num_epoch_per_decay is 2
79 | FLAGS.num_epochs_per_decay) #this int will be 165566 for num_samples_per_epoch=82783
80 | if FLAGS.sync_replicas:
81 | decay_steps /= FLAGS.replicas_to_aggregate
82 |
83 | if FLAGS.learning_rate_decay_type == 'exponential': #this is true
84 | return tf.train.exponential_decay(FLAGS.learning_rate, #0.0002 for my setup
85 | global_step,
86 | decay_steps,#165566 for num_samples_per_epoch=82783
87 | FLAGS.learning_rate_decay_factor,#0.94
88 | staircase=True,
89 | name='exponential_decay_learning_rate')
90 | #0,0002×0,94^(130000÷165566) = 0,000190516
91 | #0,0002×0,94^(1÷165566) = 0,0002
92 |
93 | elif FLAGS.learning_rate_decay_type == 'fixed':
94 | return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate')
95 | elif FLAGS.learning_rate_decay_type == 'polynomial':
96 | return tf.train.polynomial_decay(FLAGS.learning_rate,
97 | global_step,
98 | decay_steps,
99 | FLAGS.end_learning_rate,
100 | power=0.9,
101 | cycle=False,
102 | name='polynomial_decay_learning_rate')
103 | else:
104 | raise ValueError('learning_rate_decay_type [%s] was not recognized',
105 | FLAGS.learning_rate_decay_type)
106 |
107 | def _get_variables_to_train():
108 | """Returns a list of variables to train.
109 |
110 | Returns:
111 | A list of variables to train by the optimizer.
112 | """
113 | if FLAGS.trainable_scopes is None:
114 | return tf.trainable_variables()
115 | else:
116 | scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
117 |
118 | variables_to_train = []
119 | for scope in scopes:
120 | variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
121 | variables_to_train.extend(variables)
122 | return variables_to_train
123 |
124 | def _get_init_fn():
125 | """Returns a function run by the chief worker to warm-start the training.
126 |
127 | Note that the init_fn is only run when initializing the model during the very
128 | first global step.
129 |
130 | Returns:
131 | An init function run by the supervisor.
132 | """
133 | if FLAGS.checkpoint_path is None:
134 | return None
135 |
136 | # Warn the user if a checkpoint exists in the train_dir. Then we'll
137 | # ignore the checkpoint anyway.
138 | if tf.train.latest_checkpoint(FLAGS.train_dir):
139 | tf.logging.info(
140 | 'Ignoring --checkpoint_path because a checkpoint already exists in %s'
141 | % FLAGS.train_dir)
142 | return None
143 |
144 | exclusions = []
145 | if FLAGS.checkpoint_exclude_scopes:
146 | exclusions = [scope.strip()
147 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
148 |
149 | # TODO(sguada) variables.filter_variables()
150 | variables_to_restore = []
151 | for var in slim.get_model_variables():
152 | for exclusion in exclusions:
153 | if var.op.name.startswith(exclusion):
154 | break
155 | else:
156 | variables_to_restore.append(var)
157 |
158 | if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
159 | checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
160 | else:
161 | checkpoint_path = FLAGS.checkpoint_path
162 |
163 | tf.logging.info('Fine-tuning from %s' % checkpoint_path)
164 |
165 | return slim.assign_from_checkpoint_fn(
166 | checkpoint_path,
167 | variables_to_restore,
168 | ignore_missing_vars=FLAGS.ignore_missing_vars)
169 |
170 | def get_var_list_to_restore():
171 | """Choose which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """
172 |
173 | variables_to_restore = []
174 | if FLAGS.checkpoint_exclude_scopes is not None:
175 | exclusions = [scope.strip()
176 | for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
177 |
178 | # build restore list
179 | for var in tf.model_variables():
180 | for exclusion in exclusions:
181 | if var.name.startswith(exclusion):
182 | break
183 | else:
184 | variables_to_restore.append(var)
185 | else:
186 | variables_to_restore = tf.model_variables()
187 |
188 | variables_to_restore_final = []
189 | if FLAGS.checkpoint_include_scopes is not None:
190 | includes = [
191 | scope.strip()
192 | for scope in FLAGS.checkpoint_include_scopes.split(',')
193 | ]
194 | for var in variables_to_restore:
195 | for include in includes:
196 | if var.name.startswith(include):
197 | variables_to_restore_final.append(var)
198 | break
199 | else:
200 | variables_to_restore_final = variables_to_restore
201 |
202 | return variables_to_restore_final
203 |
--------------------------------------------------------------------------------
/train/train_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/train/train_utils.pyc
--------------------------------------------------------------------------------
/unit_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/unit_test/__init__.py
--------------------------------------------------------------------------------
/unit_test/data_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from __future__ import absolute_import
4 | from __future__ import division
5 | from __future__ import print_function
6 | import functools
7 |
8 | import sys
9 | import os
10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
11 | import numpy as np
12 | import PIL.Image as Image
13 | from PIL import ImageDraw
14 | import tensorflow as tf
15 | import tensorflow.contrib.slim as slim
16 | from libs.logs.log import LOG
17 | import libs.configs.config_v1 as cfg
18 | import libs.nets.resnet_v1 as resnet_v1
19 | import libs.datasets.dataset_factory as dataset_factory
20 | import libs.datasets.coco as coco
21 | import libs.preprocessings.coco_v1 as preprocess_coco
22 | from libs.layers import ROIAlign
23 |
24 | resnet50 = resnet_v1.resnet_v1_50
25 | FLAGS = tf.app.flags.FLAGS
26 |
27 | with tf.Graph().as_default():
28 |
29 | image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = \
30 | coco.read('./data/coco/records/coco_train2014_00001-of-00033.tfrecord')
31 |
32 | image, gt_boxes, gt_masks = \
33 | preprocess_coco.preprocess_image(image, gt_boxes, gt_masks)
34 |
35 |
36 |
37 | sess = tf.Session()
38 | init_op = tf.group(tf.global_variables_initializer(),
39 | tf.local_variables_initializer())
40 | # init_op = tf.initialize_all_variables()
41 |
42 | boxes = [[100, 100, 200, 200],
43 | [50, 50, 100, 100],
44 | [100, 100, 750, 750],
45 | [50, 50, 60, 60]]
46 | # boxes = np.zeros((0, 4))
47 | boxes = tf.constant(boxes, tf.float32)
48 | feat = ROIAlign(image, boxes, False, 16, 7, 7)
49 | sess.run(init_op)
50 |
51 | tf.train.start_queue_runners(sess=sess)
52 | with sess.as_default():
53 | for i in range(20000):
54 | image_np, ih_np, iw_np, gt_boxes_np, gt_masks_np, num_instances_np, img_id_np, \
55 | feat_np = \
56 | sess.run([image, ih, iw, gt_boxes, gt_masks, num_instances, img_id,
57 | feat])
58 | # print (image_np.shape, gt_boxes_np.shape, gt_masks_np.shape)
59 |
60 | if i % 1 == 0:
61 | print ('%d, image_id: %s, instances: %d'% (i, str(img_id_np), num_instances_np))
62 | image_np = 256 * (image_np * 0.5 + 0.5)
63 | image_np = image_np.astype(np.uint8)
64 | image_np = np.squeeze(image_np)
65 | print (image_np.shape, ih_np, iw_np)
66 | print (feat_np.shape)
67 | im = Image.fromarray(image_np)
68 | imd = ImageDraw.Draw(im)
69 | for i in range(gt_boxes_np.shape[0]):
70 | imd.rectangle(gt_boxes_np[i, :])
71 | im.save(str(img_id_np) + '.png')
72 | mask = np.sum(gt_masks_np, axis=0, dtype='uint8')
73 | white_pos = np.where(mask > 0)
74 | mask[white_pos] = 255
75 | mask_img = Image.fromarray(mask)
76 | mask_img.save('mask_' + str(img_id_np) + '.png')
77 | # print (gt_boxes_np)
78 | sess.close()
79 |
--------------------------------------------------------------------------------
/unit_test/preprocessing_test.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 |
4 | import numpy as np
5 | import sys
6 | import os
7 | import tensorflow as tf
8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
9 |
10 | import libs.preprocessings.coco_v1 as coco_preprocess
11 | import libs.configs.config_v1 as cfg
12 |
13 | ih, iw, ic = 400,500, 3
14 | N = 3
15 | image = np.random.randint(0, 255, (ih, iw, ic)).astype(np.uint8)
16 | gt_masks = np.zeros((N, ih, iw)).astype(np.int32)
17 | xy = np.random.randint(0, min(iw, ih)-100, (N, 2)).astype(np.float32)
18 | wh = np.random.randint(20, 40, (N, 2)).astype(np.float32)
19 | cls = np.random.randint(1, 6, (N, 1)).astype(np.float32)
20 | gt_boxes = np.hstack((xy, xy + wh, cls)).astype(np.float32)
21 | gt_boxes_np = gt_boxes
22 | image_np = image
23 | gt_masks_np = gt_masks
24 |
25 | for i in range(N):
26 | box = gt_boxes[i, 0:4]
27 | gt_masks[i, int(box[1]):int(box[3]),
28 | int(box[0]):int(box[2])] = 1
29 | image = tf.constant(image)
30 | gt_boxes = tf.constant(gt_boxes)
31 | gt_masks = tf.constant(gt_masks)
32 |
33 | image, gt_boxes, gt_masks = \
34 | coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training=True)
35 |
36 | with tf.Session() as sess:
37 | # print(image.eval())
38 | image_tf, gt_boxes_tf, gt_masks_tf = \
39 | sess.run([image, gt_boxes, gt_masks])
40 | print ('#######################')
41 | print ('DATA PREPROCESSING TEST')
42 | print ('#######################')
43 | print ('gt_boxes shape:', gt_boxes_tf.shape)
44 | print('mask shape:', gt_masks_tf.shape)
45 | print(gt_boxes_tf)
46 | for i in range(N):
47 | box = np.round(gt_boxes_tf[i, 0:4])
48 | box = box.astype(np.int32)
49 | m = gt_masks_tf[i, box[1]:box[3], box[0]:box[2]]
50 | print ('after:', box)
51 | print (np.sum(m)/ (0.0 + m.size))
52 | print (m)
53 | box = np.round(gt_boxes_np[i, 0:4])
54 | box = box.astype(np.int32)
55 | m = gt_masks_np[i, box[1]:box[3], box[0]:box[2]]
56 | print ('ori box:', box)
57 | print (np.sum(m)/ (0.0 + m.size))
58 |
--------------------------------------------------------------------------------