├── .gitignore
├── .idea
    ├── MaskRCNN_body.iml
    ├── inspectionProfiles
    │   └── Project_Default.xml
    ├── misc.xml
    ├── modules.xml
    ├── vcs.xml
    └── workspace.xml
├── Documentation.odt
├── README.md
├── convert_data
    ├── ChalearnLAPEvaluation.py
    ├── ChalearnLAPSample.py
    ├── __init__.py
    ├── __init__.pyc
    ├── convert_ADE20k_human_body_parts.py
    ├── convert_CHALEARN_human_body_parts.py
    ├── convert_VOC_human_body_parts.py
    ├── convert_jhmdb.py
    ├── download_and_convert_data.sh
    ├── human_body_parts.m
    ├── read_my_data_keypoints.py
    └── visualize_records_human_body_parts.py
├── crontab.sh
├── data
    └── README.md
├── document.pdf
├── draw
    ├── __init__.py
    ├── draw.py
    ├── metric.py
    ├── utils.py
    └── utils.pyc
├── libs
    ├── Makefile
    ├── __init__.py
    ├── __init__.pyc
    ├── boxes
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── anchor.py
    │   ├── anchor.pyc
    │   ├── bbox.pyx
    │   ├── bbox_transform.py
    │   ├── bbox_transform.pyc
    │   ├── blob.py
    │   ├── cython_anchor.py
    │   ├── cython_anchor.pyx
    │   ├── cython_bbox.py
    │   ├── cython_bbox_transform.py
    │   ├── cython_bbox_transform.pyx
    │   ├── cython_nms.py
    │   ├── cython_nms.pyc
    │   ├── gprof2dot.py
    │   ├── nms.py
    │   ├── nms.pyc
    │   ├── nms.pyx
    │   ├── nms_wrapper.py
    │   ├── nms_wrapper.pyc
    │   ├── profile
    │   ├── profile.png
    │   ├── roi.py
    │   ├── roi.pyc
    │   ├── timer.py
    │   └── timer.pyc
    ├── configs
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── config_v1.py
    │   └── config_v1.pyc
    ├── datasets
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── coco.py
    │   ├── coco.pyc
    │   ├── dataset_factory.py
    │   ├── dataset_factory.pyc
    │   ├── download_and_convert_coco.py
    │   ├── download_and_convert_coco.pyc
    │   └── pycocotools
    │   │   ├── Makefile
    │   │   ├── __init__.py
    │   │   ├── __init__.pyc
    │   │   ├── _mask.c
    │   │   ├── _mask.pyx
    │   │   ├── _mask.so
    │   │   ├── coco.py
    │   │   ├── coco.pyc
    │   │   ├── cocoeval.py
    │   │   ├── common
    │   │       ├── gason.cpp
    │   │       ├── gason.h
    │   │       ├── maskApi.c
    │   │       └── maskApi.h
    │   │   ├── mask.py
    │   │   ├── mask.pyc
    │   │   └── setup.py
    ├── layers
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── anchor.py
    │   ├── anchor.pyc
    │   ├── assign.py
    │   ├── assign.pyc
    │   ├── crop.py
    │   ├── crop.pyc
    │   ├── mask.py
    │   ├── mask.pyc
    │   ├── roi.py
    │   ├── roi.pyc
    │   ├── sample.py
    │   ├── sample.pyc
    │   ├── wrapper.py
    │   └── wrapper.pyc
    ├── logs
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── log.py
    │   └── log.pyc
    ├── make.sh
    ├── memory_util.py
    ├── nets
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── nets_factory.py
    │   ├── nets_factory.pyc
    │   ├── pyramid_network.py
    │   ├── pyramid_network.pyc
    │   ├── resnet_utils.py
    │   ├── resnet_utils.pyc
    │   ├── resnet_v1.py
    │   ├── resnet_v1.pyc
    │   └── train_utils.py
    ├── nms
    │   ├── .gitignore
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── cpu_nms.pyx
    │   ├── gpu_nms.hpp
    │   ├── gpu_nms.pyx
    │   ├── nms_kernel.cu
    │   └── py_cpu_nms.py
    ├── preprocessings
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── coco_v1.py
    │   ├── coco_v1.pyc
    │   ├── utils.py
    │   └── utils.pyc
    ├── setup.py
    └── visualization
    │   ├── __init__.py
    │   ├── __init__.pyc
    │   ├── pil_utils.py
    │   ├── pil_utils.pyc
    │   ├── summary_utils.py
    │   └── summary_utils.pyc
├── mask_rcnn_final.xml
├── media
    ├── file.txt
    ├── testseg122_1.jpg
    ├── testseg226_1.jpg
    ├── testseg255_1.jpg
    ├── testseg293_1.jpg
    ├── testseg296_1.jpg
    ├── testseg305_1.jpg
    ├── testseg35_1.jpg
    ├── testseg57_1.jpg
    └── testseg70_1.jpg
├── train
    ├── __init__.py
    ├── __init__.pyc
    ├── train.py
    ├── train_utils.py
    └── train_utils.pyc
└── unit_test
    ├── __init__.py
    ├── data_test.py
    ├── layer_test.py
    ├── preprocessing_test.py
    └── resnet50_test.py


/.gitignore:
--------------------------------------------------------------------------------
1 | data/pretrained_models/
2 | data/coco/
3 | output/mask_rcnn/
4 | convert_data/data/
5 | draw/data/
6 | draw/output_seg/
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/.idea/MaskRCNN_body.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="PYTHON_MODULE" version="4">
 3 |   <component name="NewModuleRootManager">
 4 |     <content url="file://$MODULE_DIR$" />
 5 |     <orderEntry type="inheritedJdk" />
 6 |     <orderEntry type="sourceFolder" forTests="false" />
 7 |   </component>
 8 |   <component name="TestRunnerService">
 9 |     <option name="PROJECT_TEST_RUNNER" value="Unittests" />
10 |   </component>
11 | </module>


--------------------------------------------------------------------------------
/.idea/inspectionProfiles/Project_Default.xml:
--------------------------------------------------------------------------------
 1 | <component name="InspectionProjectProfileManager">
 2 |   <profile version="1.0">
 3 |     <option name="myName" value="Project Default" />
 4 |     <inspection_tool class="PyPep8Inspection" enabled="true" level="WEAK WARNING" enabled_by_default="true">
 5 |       <option name="ignoredErrors">
 6 |         <list>
 7 |           <option value="W29" />
 8 |           <option value="E501" />
 9 |           <option value="W29" />
10 |           <option value="E501" />
11 |           <option value="W29" />
12 |           <option value="E501" />
13 |           <option value="W29" />
14 |           <option value="E501" />
15 |           <option value="W29" />
16 |           <option value="E501" />
17 |           <option value="W29" />
18 |           <option value="E501" />
19 |           <option value="W29" />
20 |           <option value="E501" />
21 |           <option value="W29" />
22 |           <option value="E501" />
23 |           <option value="W29" />
24 |           <option value="E501" />
25 |           <option value="W29" />
26 |           <option value="E501" />
27 |           <option value="W29" />
28 |           <option value="E501" />
29 |           <option value="W29" />
30 |           <option value="E501" />
31 |           <option value="W29" />
32 |           <option value="E501" />
33 |           <option value="W29" />
34 |           <option value="E501" />
35 |           <option value="W29" />
36 |           <option value="E501" />
37 |           <option value="W29" />
38 |           <option value="E501" />
39 |           <option value="W29" />
40 |           <option value="E501" />
41 |           <option value="W29" />
42 |           <option value="E501" />
43 |           <option value="W29" />
44 |           <option value="E501" />
45 |           <option value="W29" />
46 |           <option value="E501" />
47 |           <option value="W29" />
48 |           <option value="E501" />
49 |           <option value="W29" />
50 |           <option value="E501" />
51 |           <option value="W29" />
52 |           <option value="E501" />
53 |           <option value="W29" />
54 |           <option value="E501" />
55 |           <option value="W29" />
56 |           <option value="E501" />
57 |         </list>
58 |       </option>
59 |     </inspection_tool>
60 |   </profile>
61 | </component>


--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectRootManager" version="2" project-jdk-name="Python 2.7.12 (/usr/bin/python2.7)" project-jdk-type="Python SDK" />
4 | </project>


--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="ProjectModuleManager">
4 |     <modules>
5 |       <module fileurl="file://$PROJECT_DIR$/.idea/MaskRCNN_body.iml" filepath="$PROJECT_DIR$/.idea/MaskRCNN_body.iml" />
6 |     </modules>
7 |   </component>
8 | </project>


--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <project version="4">
3 |   <component name="VcsDirectoryMappings">
4 |     <mapping directory="$PROJECT_DIR$" vcs="Git" />
5 |   </component>
6 | </project>


--------------------------------------------------------------------------------
/Documentation.odt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/Documentation.odt


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Detecting human body parts and Building Skeleton Models using Deep Convolutional Neural Networks
 2 | 
 3 | This repository contains an extension to the amazing work done by [CharlesShang](https://github.com/CharlesShang/FastMaskRCNN)
 4 | This is a model of a neural network that is able to do object detection, classification and segmentation.
 5 | The ideea is to detect all persons in an image and segment their body parts. The next step is to add keypoint regression. This is a real-time model running at 200ms/frame on a Titan X GPU.
 6 | A practical usage for this kind of model would be to a fashion application that looks at the information of your body and proposes the look of different clothes in order for the user to visualize their appearance.
 7 | Here are some results from the training set.
 8 | 
 9 | ![demo](media/testseg57_1.jpg)
10 | ![demo](media/testseg122_1.jpg)
11 | ![demo](media/testseg226_1.jpg)
12 | ![demo](media/testseg255_1.jpg)
13 | ![demo](media/testseg293_1.jpg)
14 | ![demo](media/testseg296_1.jpg)
15 | ![demo](media/testseg305_1.jpg)
16 | 
17 | The following videos contain the model that does body segmentation (the rest of the parts are not drawn).
18 | The model output has been modified to have 2 classes for detection (human, non-human) and 7 segmentation classes (full body, head, torso, right hand, left hand, right leg, left leg).
19 | 
20 | <a href="http://www.youtube.com/watch?feature=player_embedded&v=kcdhp1rE4Y4" target="_blank"><img src="http://img.youtube.com/vi/kcdhp1rE4Y4/0.jpg" alt="IMAGE ALT TEXT HERE" width="240" height="180" border="10" /></a>
21 | <a href="http://www.youtube.com/watch?feature=player_embedded&v=qwtCRHeERCo" target="_blank"><img src="http://img.youtube.com/vi/qwtCRHeERCo/0.jpg" alt="IMAGE ALT TEXT HERE" width="240" height="180" border="10" /></a>
22 | 
23 | The following videos shows the results of the same model, but this time are drawn the body parts.
24 | 
25 | <a href="http://www.youtube.com/watch?feature=player_embedded&v=6KEgvAE0wZ0" target="_blank"><img src="http://img.youtube.com/vi/6KEgvAE0wZ0/0.jpg" alt="IMAGE ALT TEXT HERE" width="240" height="180" border="10" /></a>
26 | 
27 | # Installation
28 | ```
29 | git clone https://github.com/Iftimie/MaskRCNN_body.git
30 | cd MaskRCNN_body
31 | mkdir data/coco
32 | mkdir data/coco/records
33 | cd data/coco/records/
34 | 
35 | wget https://www.dropbox.com/s/43ihvomchvwtpns/checkpoint
36 | wget https://www.dropbox.com/s/v6084wee6pjlfk4/coco_resnet50_model.ckpt-248000.data-00000-of-00001
37 | wget https://www.dropbox.com/s/0gqxnbsjzpuz0tz/coco_resnet50_model.ckpt-248000.index
38 | wget https://www.dropbox.com/s/3uildv0wlh79oad/coco_resnet50_model.ckpt-248000.meta
39 | #modify the checkpoint file with your path
40 | cd ../../..
41 | git checkout test
42 | #modify line 180 in train/test.py with your ip address
43 | #modify line 36 in train/client.py with the respective ip address
44 | 
45 | #in one terminal
46 | CUDA_VISIBLE_DEVICES=0 python train/test.py
47 | 
48 | #in another terminal. Make sure to have a webcam connected
49 | python train/client.py
50 | ```
51 | 


--------------------------------------------------------------------------------
/convert_data/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/convert_data/__init__.py


--------------------------------------------------------------------------------
/convert_data/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/convert_data/__init__.pyc


--------------------------------------------------------------------------------
/convert_data/convert_CHALEARN_human_body_parts.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import ChalearnLAPSample
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
  6 | 
  7 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg)
  8 | body_parts_dict={
  9 |     1:1,#head
 10 |     2:2,#torso
 11 |     3:3,#left hand
 12 |     5:3,#left forearm (lower)
 13 |     7:3,#left upper arm
 14 |     4:4,#right hand
 15 |     6:4,#right forearm
 16 |     8:4,#right upperarm
 17 |     9:5,#left foot
 18 |     11:5,#left lower leg
 19 |     13:5,#left upper leg
 20 |     10:6,#right foot
 21 |     12:6,#right lower leg
 22 |     14:6,#rihgt upper leg
 23 | }
 24 | 
 25 | # poseSample = ChalearnLAPSample.PoseSample("Seq01.zip")
 26 | # actorid=1
 27 | # limbid=2
 28 | # cv2.namedWindow("Seqxx",cv2.WINDOW_NORMAL)
 29 | # cv2.namedWindow("Torso",cv2.WINDOW_NORMAL)
 30 | # for x in range(1, poseSample.getNumFrames()):
 31 | #     img=poseSample.getRGB(x)
 32 | #     torso=poseSample.getLimb(x,actorid,6)
 33 | #     cv2.imshow("Seqxx",img)
 34 | #     cv2.imshow("Torso",torso)
 35 | #     cv2.waitKey(1000)
 36 | # cv2.destroyAllWindows()
 37 | 
 38 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords
 39 | def loadData(frame_id,img,poseSample):
 40 |     H,W = img.shape[0],img.shape[1]
 41 |     gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls]
 42 |     masks_instances = [] #shape: [N,H,W,7]
 43 |     for actorid in range(1,3): # there are at maximum 2 persons in one image
 44 |         masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body +  6 parts
 45 |         one_mask_person = np.zeros((H,W),dtype=np.uint8) # whole body
 46 |         for limbid in range(1,15):
 47 |             part = poseSample.getLimb(frame_id,actorid,limbid) #get part mask
 48 |             part = cv2.resize(part[...,0]/255,(W,H))
 49 |             masks_for_person[...,body_parts_dict[limbid]] = np.logical_or(masks_for_person[...,body_parts_dict[limbid]],part) #this is where I combine for example right upper leg and right lower leg
 50 |             one_mask_person=np.logical_or(one_mask_person,part) # this is where I combine the part mask into the whole body
 51 | 
 52 |         masks_for_person[...,0] = one_mask_person
 53 |         _,contours,hierarchy = cv2.findContours(one_mask_person.astype(np.uint8).copy(), 1, 2) #### from here
 54 |         if len(contours)==0:
 55 |             continue
 56 |         x1=100000
 57 |         y1=100000
 58 |         x2=-10000
 59 |         y2=-10000
 60 |         for contour in contours:
 61 |             x,y,w,h = cv2.boundingRect(contour)
 62 |             xw,yh = x+w,y+h
 63 |             if x <x1:
 64 |                 x1 = x
 65 |             if y <y1:
 66 |                 y1 = y
 67 |             if xw > x2:
 68 |                 x2=xw
 69 |             if yh >y2:
 70 |                 y2=yh
 71 |         gt_boxes.append([x1,y1,x2,y2,1])                                                    #####to here I select the bounding box of the person instance. the mask might be splitted into multiple blobs
 72 |         masks_instances.append(masks_for_person)
 73 | 
 74 |     if len(gt_boxes) ==0:
 75 |         return False,None,None,None,H,W
 76 |     masks_instances = np.array(masks_instances,dtype=np.uint8)
 77 |     gt_boxes = np.array(gt_boxes,dtype=np.float32)
 78 |     # for h_box in gt_boxes:
 79 |     #     image = cv2.rectangle(img,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2)
 80 |     #     cv2.imshow("img",image)
 81 |     #     cv2.waitKey(100)
 82 |     mask = masks_instances[0,:,:,1] # this mask is used for visualization in tensorboard
 83 |     return True,gt_boxes,masks_instances,mask,H,W
 84 | 
 85 | def _int64_feature(values):
 86 |   if not isinstance(values, (tuple, list)):
 87 |     values = [values]
 88 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
 89 | 
 90 | def _bytes_feature(values):
 91 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
 92 | 
 93 | def _to_tfexample_coco_raw(image_id, image_data, label_data,
 94 |                            height, width,
 95 |                            num_instances, gt_boxes, masks):
 96 |   """ just write a raw input"""
 97 |   return tf.train.Example(features=tf.train.Features(feature={
 98 |     'image/img_id': _int64_feature(image_id),
 99 |     'image/encoded': _bytes_feature(image_data),
100 |     'image/height': _int64_feature(height),
101 |     'image/width': _int64_feature(width),
102 |     'label/num_instances': _int64_feature(num_instances),  # N
103 |     'label/gt_boxes': _bytes_feature(gt_boxes),  # of shape (N, 5), (x1, y1, x2, y2, classid)
104 |     'label/gt_masks': _bytes_feature(masks),  # of shape (N, height, width)
105 |     'label/encoded': _bytes_feature(label_data),  # deprecated, this is used for pixel-level segmentation
106 |   }))
107 | 
108 | 
109 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
110 | record_filename = "out_human_and_body_parts_chalearn.tfrecord"
111 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer:
112 |     for seq,seq_id in zip(["Seq01.zip","Seq02.zip","Seq03.zip","Seq04.zip","Seq06.zip"],range(5)): # 5 movies
113 |     #for seq,seq_id in zip(["Seq03.zip"],range(5)):
114 |         poseSample = ChalearnLAPSample.PoseSample(seq) #Chalearn API
115 |         for x in range(1, poseSample.getNumFrames(),6): # i skip every 6 images because it is a video because I think many images are redundant
116 |             img=poseSample.getRGB(x)
117 |             img_id = seq_id*2000+x
118 |             persons_exist,gt_boxes,masks_instances,mask,H,W = loadData(x,img,poseSample)
119 |             if not persons_exist:
120 |                 continue
121 |             mask_raw = mask.tostring()
122 |             # img = cv2.cvtColor(img,cv2.COLOR_RGB2BGR)
123 |             # cv2.imshow("image",img)
124 |             # cv2.waitKey(1000)
125 |             img_raw = img.tostring()
126 |             example = _to_tfexample_coco_raw(
127 |                 img_id,
128 |                 img_raw,
129 |                 mask_raw,
130 |                 H, W, gt_boxes.shape[0],
131 |                 gt_boxes.tostring(), masks_instances.tostring())
132 |             print x
133 | 
134 |             tfrecord_writer.write(example.SerializeToString())
135 |     tfrecord_writer.close()
136 | 
137 | 
138 | 
139 | 


--------------------------------------------------------------------------------
/convert_data/convert_VOC_human_body_parts.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
  3 | import numpy as np
  4 | from PIL import Image
  5 | import scipy.io as sio
  6 | import cv2
  7 | import traceback
  8 | import logging
  9 | 
 10 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg)
 11 | body_parts_dict = {
 12 |     'head':1,
 13 |     'lear':1,
 14 |     'rear':1,
 15 |     'mouth':1,
 16 |     'hair':1,
 17 |     'nose':1,
 18 |     'leye':1,
 19 |     'reye':1,
 20 |     'lebrow':1,
 21 |     'rebrow':1,
 22 |     'torso':2,
 23 |     'neck':2,
 24 |     'luarm':3,
 25 |     'llarm':3,
 26 |     'lhand':3,
 27 |     'rlarm':4,
 28 |     'ruarm':4,
 29 |     'rhand':4,
 30 |     'llleg':5,
 31 |     'luleg':5,
 32 |     'lfoot':5,
 33 |     'rlleg':6,
 34 |     'ruleg':6,
 35 |     'rfoot':6
 36 | }
 37 | 
 38 | body_parts_dict = {
 39 |     'head':1,
 40 |     'lear':1,
 41 |     'rear':1,
 42 |     'mouth':1,
 43 |     'hair':1,
 44 |     'nose':1,
 45 |     'leye':1,
 46 |     'reye':1,
 47 |     'lebrow':1,
 48 |     'rebrow':1,
 49 |     'torso':2,
 50 |     'neck':2,
 51 |     'luarm':3,
 52 |     'llarm':3,
 53 |     'lhand':3,
 54 |     'rlarm':3,
 55 |     'ruarm':3,
 56 |     'rhand':3,
 57 |     'llleg':5,
 58 |     'luleg':5,
 59 |     'lfoot':5,
 60 |     'rlleg':5,
 61 |     'ruleg':5,
 62 |     'rfoot':5
 63 | }
 64 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords
 65 | def loadData3(H,W): ####  !!!!! i know i do not send the image and the annotations into the function but python is smart enough to look outside the scope of the function. look below
 66 | 
 67 |     masks_instances = []#shape: [N,H,W,7]
 68 | 
 69 |     persons = [o for o in annotation['anno'][0]['objects'][0][0] if o['class']=='person'] # select all persons from the image
 70 |     gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls]
 71 |     for i in range(len(persons)):
 72 |         p = persons[i]
 73 |         pa = p['parts']
 74 |         parts = pa[0]
 75 |         masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body +  6 parts
 76 |         one_mask_person = np.zeros((H,W),dtype=np.uint8) # whole body
 77 | 
 78 |         for part in parts:
 79 |             part_name = part['part_name'].astype(str)[0]
 80 |             index = body_parts_dict[part_name]
 81 |             masks_for_person[...,index] = np.logical_or(masks_for_person[...,index], part['mask'])#this is where I combine for example right upper leg and right lower leg
 82 |             one_mask_person=np.logical_or(one_mask_person,part['mask'])  # this is where I combine the part mask into the whole body
 83 | 
 84 |         masks_for_person[...,0]=one_mask_person
 85 |         kernel = np.ones((5,5),np.uint8)
 86 |         one_mask_person = np.array(one_mask_person,dtype=np.uint8)#if this line is missing=> error in cv2.dilate
 87 |         one_mask_person = cv2.dilate(one_mask_person,kernel,iterations = 1)
 88 |         _,contours,hierarchy = cv2.findContours(one_mask_person, 1, 2) #### from here
 89 |         if len(contours) ==0:
 90 |             continue
 91 |         x1=100000
 92 |         y1=100000
 93 |         x2=-10000
 94 |         y2=-10000
 95 |         for contour in contours:
 96 |             x,y,w,h = cv2.boundingRect(contour)
 97 |             xw,yh = x+w,y+h
 98 |             if x <x1:
 99 |                 x1 = x
100 |             if y <y1:
101 |                 y1 = y
102 |             if xw > x2:
103 |                 x2=xw
104 |             if yh >y2:
105 |                 y2=yh
106 |         gt_boxes.append([x1,y1,x2,y2,1])
107 | 
108 |         if True:#########################Body
109 |             print ("BODYYYYYYYYYYYYYYYY")
110 |             masks_for_person = np.zeros((H,W,7),dtype=np.uint8)
111 |             masks_for_person[...,0]=one_mask_person
112 | 
113 |         masks_instances.append(masks_for_person.copy())                   #####to here I select the bounding box of the person instance. the mask might be splitted into multiple blobs
114 |     if len(gt_boxes) ==0:
115 |         return False,None,None,None
116 | 
117 |     masks_instances = np.array(masks_instances,dtype=np.uint8)
118 |     gt_boxes = np.array(gt_boxes,dtype=np.float32)
119 |     mask = masks_instances[0,:,:,1]# this is for drawing the ground truth in the network  in tensorboard
120 |     return True,gt_boxes,masks_instances,mask
121 | 
122 | def _int64_feature(values):
123 |   if not isinstance(values, (tuple, list)):
124 |     values = [values]
125 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
126 | 
127 | def _bytes_feature(values):
128 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
129 | 
130 | def _to_tfexample_coco_raw(image_id, image_data, label_data,
131 |                            height, width,
132 |                            num_instances, gt_boxes, masks):
133 |   """ just write a raw input"""
134 |   return tf.train.Example(features=tf.train.Features(feature={
135 |     'image/img_id': _int64_feature(image_id),
136 |     'image/encoded': _bytes_feature(image_data),
137 |     'image/height': _int64_feature(height),
138 |     'image/width': _int64_feature(width),
139 |     'label/num_instances': _int64_feature(num_instances),  # N
140 |     'label/gt_boxes': _bytes_feature(gt_boxes),  # of shape (N, 5), (x1, y1, x2, y2, classid)
141 |     'label/gt_masks': _bytes_feature(masks),  # of shape (N, height, width)
142 |     'label/encoded': _bytes_feature(label_data),  # deprecated, this is used for pixel-level segmentation
143 |   }))
144 | 
145 | 
146 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
147 | record_filename = "data/out_human_and_body_parts.tfrecord"
148 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer:
149 |     for x in range (0,12000):
150 |         try:
151 |             img_id = x
152 |             img_name = '2008_%06d' % (x,)
153 |             img = np.array(Image.open('data/JPEGImages/'+img_name+'.jpg'))
154 |             annotation = sio.loadmat('data/Annotations_Part/'+img_name+'.mat')
155 |             image = cv2.imread('data/JPEGImages/'+img_name+'.jpg')
156 |             height, width = img.shape[0],img.shape[1]
157 |             img = img.astype(np.uint8)
158 |             img_raw = img.tostring()
159 |             persons_exist, gt_boxes, masks,mask = loadData3(height, width)
160 |             if not persons_exist:
161 |                 continue
162 |             mask_raw = mask.tostring()
163 | 
164 |             example = _to_tfexample_coco_raw(
165 |                   img_id,
166 |                   img_raw,
167 |                   mask_raw,
168 |                   height, width, gt_boxes.shape[0],
169 |                   gt_boxes.tostring(), masks.tostring())
170 |             tfrecord_writer.write(example.SerializeToString())
171 |             print (x)
172 |         except BaseException as error:
173 |             print error
174 | 
175 |     tfrecord_writer.close()
176 | 


--------------------------------------------------------------------------------
/convert_data/convert_jhmdb.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import scipy.io as sio
  3 | import cv2
  4 | import numpy as np
  5 | import tensorflow as tf
  6 | from PIL import Image
  7 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
  8 | 
  9 | 
 10 | #the body_parts_dict is used for combining multiple labels into a single part (right upper leg and right lower leg become the same class. right leg)
 11 | body_parts_dict = {
 12 |     2:1,#head
 13 |     1:2,#torso
 14 |     4:3,#left upper arm
 15 |     8:3,#left lower arm
 16 |     3:4,#right upper arm
 17 |     7:4,#right lower arm
 18 |     6:5,#left upper leg
 19 |     5:6,#right upper leg
 20 |     9:6,#right lower leg
 21 |     10:5,#left lower leg
 22 | 
 23 | }
 24 | 
 25 | body_parts_dict = {
 26 |     2:1,#head
 27 |     1:2,#torso
 28 |     4:3,#left upper arm
 29 |     8:3,#left lower arm
 30 |     3:3,#right upper arm
 31 |     7:3,#right lower arm
 32 |     6:5,#left upper leg
 33 |     5:5,#right upper leg
 34 |     9:5,#right lower leg
 35 |     10:5,#left lower leg
 36 | }
 37 | 
 38 | # this is used to normalize the x,y of keypoint to -1 and 1
 39 | def map_value(x,A,B,a,b):
 40 |     return (x-A)*(b-a)/(B-A)+a
 41 | 
 42 | 
 43 | #load data takes some form of annotations (provided from dataset) and return the correct form of annotation for tfrecords
 44 | def loadData(image,instance_mask,parts_mask,keypoints):
 45 |     gt_boxes = [] #will have shape: [N,x1,y1,x2,y2,cls]
 46 |     masks_instances = [] #shape: [N,H,W,7]
 47 |     _,contours,hierarchy = cv2.findContours(instance_mask.copy(), 1, 2) ######### from here
 48 |     x1=100000
 49 |     y1=100000
 50 |     x2=-10000
 51 |     y2=-10000
 52 |     for contour in contours:
 53 |         x,y,w,h = cv2.boundingRect(contour)
 54 |         xw,yh = x+w,y+h
 55 |         if x <x1:
 56 |             x1 = x
 57 |         if y <y1:
 58 |             y1 = y
 59 |         if xw > x2:
 60 |             x2=xw
 61 |         if yh >y2:
 62 |             y2=yh
 63 |     gt_boxes.append([x1,y1,x2,y2,1])                                     ######### to here i find the bbox of the person as the mask for the person might contain multiple blobs
 64 |     H = image.shape[0]
 65 |     W = image.shape[1]
 66 |     masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts
 67 |     masks_for_person[...,0] = instance_mask.copy()
 68 |     for x in range(1,11):
 69 |         part = (parts_mask == x).astype(np.uint8)
 70 |         masks_for_person[...,body_parts_dict[x]] = np.logical_or(masks_for_person[...,body_parts_dict[x]],part) #this is where I combine for example right upper leg and right lower leg
 71 | 
 72 |     for x in range(15): #there are 15 keypoints
 73 |         # keypoints[0,x] = keypoints[0,x]-x1
 74 |         # keypoints[1,x] = keypoints[1,x]-y1
 75 |         keypoints[0,x] = map_value(keypoints[0,x],x1,x2,0.0,112.0) #I first normalize to the keypoint to the size of the output mask (112x112) because the keypoint regression branch comes from the mask branch (this is how i decided to attach it)
 76 |         keypoints[1,x] = map_value(keypoints[1,x],y1,y2,0.0,112.0)
 77 |         keypoints[0,x] = map_value(keypoints[0,x],0.0,112.0,-1,1) #then I normalize it to -1 1 #the above operations are redundant but i left them there for visualization/debugging
 78 |         keypoints[1,x] = map_value(keypoints[1,x],0.0,112.0,-1,1)
 79 | 
 80 |     if True:####################BODYYY
 81 |         masks_for_person = np.zeros((H,W,7),dtype=np.uint8) # whole body + 6 parts
 82 |         masks_for_person[...,0] = instance_mask.copy()
 83 | 
 84 |     masks_instances.append(masks_for_person)
 85 |     masks_instances = np.array(masks_instances,dtype=np.uint8)
 86 |     gt_boxes = np.array(gt_boxes,dtype=np.float32)
 87 |     mask = masks_instances[0,:,:,1] # this mask is used for visualization in tensorboard
 88 |     keypoints = keypoints.astype(np.float32)
 89 |     return gt_boxes,masks_instances,mask,H,W,keypoints
 90 | 
 91 | 
 92 | def _int64_feature(values):
 93 |   if not isinstance(values, (tuple, list)):
 94 |     values = [values]
 95 |   return tf.train.Feature(int64_list=tf.train.Int64List(value=values))
 96 | 
 97 | def _bytes_feature(values):
 98 |   return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))
 99 | 
100 | def _to_tfexample_coco_raw(image_id, image_data, label_data,
101 |                            height, width,
102 |                            num_instances, gt_boxes, masks,keypoints):
103 |   """ just write a raw input"""
104 |   return tf.train.Example(features=tf.train.Features(feature={
105 |     'image/img_id': _int64_feature(image_id),
106 |     'image/encoded': _bytes_feature(image_data),
107 |     'image/height': _int64_feature(height),
108 |     'image/width': _int64_feature(width),
109 |     'label/num_instances': _int64_feature(num_instances),  # N
110 |     'label/gt_boxes': _bytes_feature(gt_boxes),  # of shape (N, 5), (x1, y1, x2, y2, classid)
111 |     'label/gt_masks': _bytes_feature(masks),  # of shape (N, height, width)
112 |     'label/encoded': _bytes_feature(label_data),  # deprecated, this is used for pixel-level segmentation
113 |     'label/keypoints': _bytes_feature(keypoints)
114 |   }))
115 | 
116 | img_id = 0
117 | scenes = os.listdir('JHMDB_video/ReCompress_Videos')
118 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
119 | record_filename = "out_human_and_body_parts_keypoints_JHMDB.tfrecord"
120 | with tf.python_io.TFRecordWriter(record_filename, options=options) as tfrecord_writer:
121 |     for s in scenes:
122 |         if s=='.DS_Store':
123 |             continue
124 |         mask_dir = os.listdir('puppet_mask/'+s)
125 |         for mask in mask_dir:
126 |             mat_file_instance = sio.loadmat('puppet_mask/'+s+'/'+mask+'/puppet_mask.mat')
127 |             video_file = cv2.VideoCapture('JHMDB_video/ReCompress_Videos/'+s+'/'+mask+".avi")
128 |             mat_file_parts = sio.loadmat('puppet_flow_com/'+s+'/'+mask+'/puppet_flow.mat')
129 |             mat_file_keypoints = sio.loadmat('joint_positions/'+s+'/'+mask+'/joint_positions.mat')
130 | 
131 |             #ret, image = video_file.read()
132 |             for x in range(0,mat_file_parts['part_mask'].shape[2]):
133 |                 ret, image = video_file.read()
134 |                 parts = mat_file_parts['part_mask'][...,x]
135 |                 instance = mat_file_instance['part_mask'][...,x]
136 |                 keypoints = mat_file_keypoints['pos_img'][...,x]
137 |                 # parts = mat_file_parts['part_mask'][...,0]
138 |                 # instance = mat_file_instance['part_mask'][...,0]
139 |                 # keypoints = mat_file_keypoints['pos_img'][...,0]
140 | 
141 |                 gt_boxes,masks_instances,mask,H,W,keypoints = loadData(image,instance,parts,keypoints)
142 |                 mask_raw = mask.tostring()
143 |                 img_raw = image.tostring()
144 |                 example = _to_tfexample_coco_raw(
145 |                       img_id,
146 |                       img_raw,
147 |                       mask_raw,
148 |                       H, W, gt_boxes.shape[0],
149 |                       gt_boxes.tostring(), masks_instances.tostring(),keypoints.tostring())
150 |                 tfrecord_writer.write(example.SerializeToString())
151 | 
152 |                 # cv2.imshow("ar",parts*25)
153 |                 # cv2.imshow("image",image)
154 |                 # cv2.imshow("instance",instance*255)
155 |                 # cv2.waitKey(100)
156 |     tfrecord_writer.close()
157 | 
158 | 
159 | 


--------------------------------------------------------------------------------
/convert_data/download_and_convert_data.sh:
--------------------------------------------------------------------------------
 1 | #mkdir data
 2 | 
 3 | ############################################################################################VOC
 4 | #wget http://host.robots.ox.ac.uk/pascal/VOC/voc2010/VOCtrainval_03-May-2010.tar
 5 | #tar -xvzf VOCtrainval_03-May-2010.tar -C data/
 6 | #tar -xvf VOCtrainval_03-May-2010.tar -C data/
 7 | #mv  data/VOCdevkit/VOC2010/JPEGImages/ data/
 8 | #wget http://www.stat.ucla.edu/~xianjie.chen/pascal_part_dataset/trainval.tar.gz
 9 | #tar -xvzf trainval.tar.gz -C data/
10 | #python convert_VOC_human_body_parts.py
11 | #mv data/out_human_and_body_parts.tfrecord out_human_and_body_parts.tfrecord
12 | 
13 | ############################################################################################Chalearn
14 | #mkdir data/chalearn
15 | #mkdir data/chalearn/api_code
16 | #wget https://competitions.codalab.org/my/datasets/download/764962c6-c270-4ee1-8721-e5611a5665f2 --no-check-certificate
17 | #wget https://competitions.codalab.org/my/datasets/download/27f9a04b-5499-4acf-b7b2-8aabb26f283c --no-check-certificate
18 | #mv 27f9a04b-5499-4acf-b7b2-8aabb26f283c dataset.zip
19 | #unzip dataset.zip -d data/chalearn/api_code/
20 | #mv ChalearnLAPEvaluation.py data/chalearn/api_code/ChalearnLAPEvaluation.py
21 | #mv ChalearnLAPSample.py data/chalearn/api_code/ChalearnLAPSample.py
22 | #mv convert_CHALEARN_human_body_parts.py data/chalearn/api_code/convert_CHALEARN_human_body_parts.py
23 | #cd data/chalearn/api_code
24 | #python convert_CHALEARN_human_body_parts.py
25 | #cd ../../..
26 | #mv data/chalearn/api_code/out_human_and_body_parts_chalearn.tfrecord out_human_and_body_parts_chalearn.tfrecord
27 | 
28 | #############################################################################################ADE20K
29 | #wget http://groups.csail.mit.edu/vision/datasets/ADE20K/ADE20K_2016_07_26.zip
30 | #wget http://groups.csail.mit.edu/vision/datasets/ADE20K/code.zip
31 | #mkdir data/ade20k
32 | #unzip ADE20K_2016_07_26.zip -d data/ade20k/
33 | #unzip code.zip -d data/ade20k/
34 | #mkdir data/ade20k/output_dir
35 | #mv data/ade20k/ADE20K_2016_07_26/index_ade20k.mat data/ade20k/index_ade20k.mat
36 | #mv human_body_parts.m data/ade20k/human_body_parts.m
37 | #cp data/ade20k/code/loadAde20K.m data/ade20k/loadAde20K.m
38 | #cd data/ade20k/
39 | #octave human_body_parts.m
40 | #cd ../..
41 | #python convert_ADE20k_human_body_parts.py
42 | #mv data/out_human_and_body_parts_ade_20k_max640edge.tfrecord out_human_and_body_parts_ade_20k.tfrecord
43 | 
44 | ###############################################################################################JHMDB
45 | #wget http://files.is.tue.mpg.de/jhmdb/JHMDB_video.zip
46 | #wget http://files.is.tue.mpg.de/jhmdb/joint_positions.zip
47 | #wget http://files.is.tue.mpg.de/jhmdb/puppet_mask.zip
48 | #wget http://files.is.tue.mpg.de/jhmdb/puppet_flow_com.zip
49 | #mkdir data/jhmdb
50 | mkdir data/jhmdb/JHMDB_video
51 | #unzip JHMDB_video.zip -d data/jhmdb/
52 | mv data/jhmdb/ReCompress_Videos/ data/jhmdb/JHMDB_video
53 | #unzip joint_positions.zip -d data/jhmdb/
54 | #unzip puppet_mask.zip -d data/jhmdb/
55 | #unzip puppet_flow_com.zip -d data/jhmdb/
56 | 
57 | #mv convert_jhmdb.py data/jhmdb/convert_jhmdb.py
58 | #mv read_my_data_keypoints.py data/jhmdb/read_my_data_keypoints.py
59 | #cd data/jhmdb/
60 | #python convert_jhmdb.py
61 | #cd ../..
62 | #mv data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord out_human_and_body_parts_keypoints_JHMDB.tfrecord
63 | 


--------------------------------------------------------------------------------
/convert_data/human_body_parts.m:
--------------------------------------------------------------------------------
 1 | #human_body_pars
 2 | load('index_ade20k.mat');
 3 | 
 4 | 
 5 | strings={'back','head','left arm','left foot','left hand','left leg','left shoulder','neck','right arm','right foot','right hand','right leg','right shoulder','torso'};
 6 | N=22210;
 7 |   
 8 | for n = 1:N
 9 |   filename = fullfile(index.folder{n}, index.filename{n});
10 |   [Om, Oi, Pm, Pi, objects, parts] = loadAde20K(filename);
11 | 
12 |   object_class = objects.class;
13 |   r = rows(objects.class);
14 |   ok=0;
15 |   for i =1:r
16 |     if findstr(object_class{i,1},'person')
17 |       ok=1;
18 |       break
19 |     endif
20 |   end
21 |   
22 |   pndx = setdiff(unique(Pm),0);
23 |   index_object_names = index.objectnames(pndx);
24 |   if ok==0 || isempty(index_object_names)
25 |     continue
26 |   endif
27 |   ok=0;
28 |   for i=1:14
29 |     if any(ismember(index_object_names,strings{i})) != 0
30 |       ok=1;
31 |       break
32 |     endif
33 |   end
34 |   
35 |   if ok ==1
36 |     
37 |     #disp('ok');
38 |     #figure; imshow(Om, []); title('Object classes');
39 |     #colormap(cat(1, [0 0 0], hsv(255)));
40 |     
41 |     #figure; imshow(Oi, []); title('Object classes');
42 |     #colormap(cat(1, [0 0 0], hsv(255)));
43 |     
44 |     #subplot(round(sqrt(Nlevels)), ceil(sqrt(Nlevels)), 1)
45 |     #imshow(Pm(:,:,1), []); title('Part classes')
46 |     #colormap(cat(1, [0 0 0], hsv(255)))
47 |     
48 |     file_Om = sprintf('output_dir/Om%d.mat',n);
49 |     file_Oi = sprintf('output_dir/Oi%d.mat',n);
50 |     file_Pm = sprintf('output_dir/Pm%d.mat',n);
51 |     file_Pi = sprintf('output_dir/Pi%d.mat',n);
52 |     file_objects = sprintf('output_dir/objects%d.mat',n);
53 |     file_parts = sprintf('output_dir/parts%d.mat',n);
54 |     file_name = sprintf('output_dir/file%d.jpg',n);
55 |     
56 |     save(file_Om, 'Om',"-mat7-binary");
57 |     save(file_Oi, 'Oi',"-mat7-binary");
58 |     save(file_Pm, 'Pm',"-mat7-binary");
59 |     save(file_Pi, 'Pi',"-mat7-binary");
60 |     save(file_objects, 'objects',"-mat7-binary");
61 |     save(file_parts, 'parts',"-mat7-binary");
62 |     copyfile(filename,file_name);
63 |     
64 |     pndx = setdiff(unique(Pm),0);
65 |     disp('Parts present in this image:');
66 |     disp(n);
67 |   endif
68 |   #disp('next');
69 |   #fflush(stdout)
70 |   
71 |   
72 |   
73 |   #{
74 |   wndx = setdiff(unique(Om),0);
75 |   disp('Objects present in this image (and their wordnet hierarchy):')
76 |   for i = 1:length(wndx)
77 |       %disp(sprintf('%60s', index.objectnames{wndx(n)}))
78 |       if findstr(index.objectnames{wndx(i)},'person')
79 |         disp('ok')
80 |         figure; imshow(Om, []); title('Object classes')
81 |         colormap(cat(1, [0 0 0], hsv(255)))
82 |       endif
83 |   end
84 |   #}
85 | end
86 | 


--------------------------------------------------------------------------------
/convert_data/read_my_data_keypoints.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
 3 | import numpy as np
 4 | from PIL import Image
 5 | import scipy.io as sio
 6 | import cv2
 7 | 
 8 | def map_value(x,A,B,a,b):
 9 |     return (x-A)*(b-a)/(B-A)+a
10 | 
11 | random_color =np.random.randint(0,180,(7))
12 | i=0
13 | example = tf.train.Example()
14 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
15 | for record in tf.python_io.tf_record_iterator('data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord',options):
16 | 
17 |     i = i+1
18 |     print i
19 |     if i %70 !=0:
20 |         continue
21 |     example.ParseFromString(record)
22 |     f = example.features.feature
23 |     img_idnp = f['image/img_id'].int64_list.value[0]
24 |     image_np = f['image/encoded'].bytes_list.value[0]
25 |     heightnp = f['image/height'].int64_list.value[0]
26 |     widthnp = f['image/width'].int64_list.value[0]
27 |     num_instancesnp = f['label/num_instances'].int64_list.value[0]
28 |     gt_masksnp = f['label/gt_masks'].bytes_list.value[0]
29 |     gt_boxesnp = f['label/gt_boxes'].bytes_list.value[0]
30 |     encoded = f['label/encoded'].bytes_list.value[0]
31 |     gt_keypoints = f['label/keypoints'].bytes_list.value[0]
32 | 
33 |     image_np = np.fromstring(image_np, dtype=np.uint8)
34 |     image_np = image_np.reshape((heightnp, widthnp, 3))
35 |     gt_masksnp = np.fromstring(gt_masksnp, dtype=np.uint8)
36 |     gt_masksnp = gt_masksnp.reshape((num_instancesnp, heightnp, widthnp,7))
37 |     gt_boxesnp = np.fromstring(gt_boxesnp, dtype=np.float32)
38 |     gt_boxesnp = gt_boxesnp.reshape((num_instancesnp,5))
39 |     gt_keypointsnp = np.fromstring(gt_keypoints, dtype=np.float32).reshape((2,15))
40 |     cv2.imshow("img",image_np)
41 |     cv2.waitKey(100)
42 |     hsv = cv2.cvtColor(image_np,cv2.COLOR_BGR2HSV)
43 |     for h_box,human_masks in zip(gt_boxesnp,gt_masksnp):
44 |         hsv = cv2.rectangle(hsv,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2)
45 |         for mask_part in range(7):
46 |             mask = human_masks[:,:,mask_part]
47 |             mask = mask.astype(np.uint8)
48 |             S = 255
49 |             if mask_part ==0:
50 |                 S=100
51 |             for x in range(int(h_box[0]),int(h_box[2])):
52 |                 for y in range(int(h_box[1]),int(h_box[3])):
53 |                     if mask[y,x]==1:
54 |                         hsv[y,x,0] = random_color[mask_part]
55 |                         hsv[y,x,1] = S
56 |         for x in range(15):
57 |             gt_keypointsnp[0,x] = map_value(gt_keypointsnp[0,x],-10.0,10.0,h_box[0],h_box[2])
58 |             gt_keypointsnp[1,x] = map_value(gt_keypointsnp[1,x],-10.0,10.0,h_box[1],h_box[3])
59 |             hsv = cv2.circle(hsv,(int(gt_keypointsnp[0,x]),int(gt_keypointsnp[1,x])),2,(255,255,255))
60 |             print int(gt_keypointsnp[0,x]),int(gt_keypointsnp[1,x])
61 |             bgrr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
62 |             cv2.imshow("img",bgrr)
63 |             cv2.waitKey(700)
64 | 
65 | 
66 | 
67 |     bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
68 |     cv2.imshow("img",bgr)
69 |     cv2.waitKey(700)
70 | 
71 | 
72 | 


--------------------------------------------------------------------------------
/convert_data/visualize_records_human_body_parts.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
 3 | import numpy as np
 4 | import cv2
 5 | 
 6 | random_color =np.random.randint(0,180,(7))
 7 | 
 8 | example = tf.train.Example()
 9 | options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
10 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_keypoints_to_body_parts_COCO.tfrecord',options):
11 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_body_parts_ade_20k_max640edge.tfrecord',options):
12 | #for record in tf.python_io.tf_record_iterator('data/out_human_and_body_parts.tfrecord',options):
13 | #for record in tf.python_io.tf_record_iterator('data/chalearn/api_code/out_human_and_body_parts_chalearn.tfrecord',options):
14 | for record in tf.python_io.tf_record_iterator('data/freiburg/out_human_and_body_parts_Freiburg.tfrecord',options):
15 | #for record in tf.python_io.tf_record_iterator('data/jhmdb/out_human_and_body_parts_keypoints_JHMDB.tfrecord',options):
16 |     example.ParseFromString(record)
17 |     f = example.features.feature
18 |     img_idnp = f['image/img_id'].int64_list.value[0]
19 |     image_np = f['image/encoded'].bytes_list.value[0]
20 |     heightnp = f['image/height'].int64_list.value[0]
21 |     widthnp = f['image/width'].int64_list.value[0]
22 |     num_instancesnp = f['label/num_instances'].int64_list.value[0]
23 |     gt_masksnp = f['label/gt_masks'].bytes_list.value[0]
24 |     gt_boxesnp = f['label/gt_boxes'].bytes_list.value[0]
25 |     encoded = f['label/encoded'].bytes_list.value[0]
26 |     image_np = np.fromstring(image_np, dtype=np.uint8)
27 |     image_np = image_np.reshape((heightnp, widthnp, 3))
28 |     gt_masksnp = np.fromstring(gt_masksnp, dtype=np.uint8)
29 |     gt_masksnp = gt_masksnp.reshape((num_instancesnp, heightnp, widthnp,7))
30 |     gt_boxesnp = np.fromstring(gt_boxesnp, dtype=np.float32)
31 |     gt_boxesnp = gt_boxesnp.reshape((num_instancesnp,5))
32 |     cv2.imshow("img",image_np)
33 |     cv2.waitKey(100)
34 |     hsv = cv2.cvtColor(image_np,cv2.COLOR_BGR2HSV)
35 |     for h_box,human_masks in zip(gt_boxesnp,gt_masksnp):
36 |         hsv = cv2.rectangle(hsv,(h_box[0],h_box[1]),(h_box[2],h_box[3]),(255,255,255),2)
37 |         for mask_part in range(7):
38 |             mask = human_masks[:,:,mask_part]
39 |             mask = mask.astype(np.uint8)
40 |             S = 255
41 |             if mask_part ==0:
42 |                 S=100
43 |             for x in range(int(h_box[0]),int(h_box[2])):
44 |                 for y in range(int(h_box[1]),int(h_box[3])):
45 |                     if mask[y,x]==1:
46 |                         hsv[y,x,0] = random_color[mask_part]
47 |                         hsv[y,x,1] = S
48 |     bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
49 |     cv2.imshow("img",bgr)
50 |     cv2.waitKey(1000)
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/crontab.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | out=$(ps aux | grep '/usr/bin/python /hdd1/Alex/testMaskRCNN_human_bodyparts/MaskRCNN_body/train/train.py' | rev | cut -d ' ' -f 1 | rev | wc -l)
 3 | if [ $out -eq "2" ];then
 4 | 	echo "2 processes" >> /tmp/testing.txt
 5 | else
 6 | 	echo "1 processes" >> /tmp/testing.txt
 7 | 	echo $(date) >> /tmp/testing.txt
 8 | 	export CUDA_VISIBLE_DEVICES=0
 9 | 	export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda/lib64/
10 | 	`(/usr/bin/python /hdd1/Alex/testMaskRCNN_human_bodyparts/MaskRCNN_body/train/train.py &>> /tmp/testing.txt)`
11 | 	echo "tried to start" >> /tmp/testing.txt;
12 | fi
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | Place and unzip your coco in this dir, like
 2 | 
 3 | ```buildoutcfg
 4 | ./data
 5 |     ./coco
 6 |         ./annotations
 7 |         ./train2014
 8 |         ./val2014
 9 | ```
10 | 


--------------------------------------------------------------------------------
/document.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/document.pdf


--------------------------------------------------------------------------------
/draw/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/draw/__init__.py


--------------------------------------------------------------------------------
/draw/draw.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from utils import draw_human_body_parts
 3 | 
 4 | for x in range(1,150):
 5 |     array = np.load("/home/alex/PycharmProjects/data/array"+str(x)+".npy")
 6 |     image = array[0]
 7 |     bbox = array[1]
 8 |     label =array[2]
 9 |     prob = array[3]
10 |     gt_bbox = array[4]
11 |     gt_label  = array[5]
12 |     final_mask = array[6]
13 |     gt_mask = array[7]
14 | 
15 |     #visualize_mask_gt(bbox,final_mask,gt_mask,label,prob)
16 |     #draw_segmentation_parts(1,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask)
17 |     #draw_bbox_better(1,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask) ############this is for voc independent body parts
18 |     print (x)
19 |     draw_human_body_parts(x,image,name="seg"+str(x),bbox=bbox,label=label,gt_label=gt_label,prob=prob,final_mask=final_mask)
20 | 
21 | 
22 | 


--------------------------------------------------------------------------------
/draw/metric.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | 
 4 | def bbox_overlaps(boxes,query_boxes): # boxes is the predicted boxes and query_boxes is the ground truth boxes
 5 |     N = boxes.shape[0]
 6 |     K = query_boxes.shape[0]
 7 |     overlaps = np.zeros((N, K), dtype=np.float32)
 8 |     iw, ih, box_area,ua,k, n = 0,0,0,0,0,0
 9 |     for k in range(K):
10 |         box_area = (
11 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
12 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
13 |         )
14 |         for n in range(N):
15 |             iw = (
16 |                 min(boxes[n, 2], query_boxes[k, 2]) -
17 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
18 |             )
19 |             if iw > 0:
20 |                 ih = (
21 |                     min(boxes[n, 3], query_boxes[k, 3]) -
22 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
23 |                 )
24 |                 if ih > 0:
25 |                     ua = float(
26 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
27 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
28 |                         box_area - iw * ih
29 |                     )
30 |                     overlaps[n, k] = iw * ih / ua
31 |     return overlaps
32 | 
33 | def IOU_mask(mask,gt_mask):
34 |     intersection = np.sum( (mask * gt_mask) > 0 )
35 |     union = np.sum((np.logical_or(mask,gt_mask))> 0)
36 |     return float(intersection)/float(union+1)
37 | 
38 | def metric_for_image(bbox=None,gt_bbox=None,label=None, gt_label=None, prob=None,final_mask=None):
39 |     #find the overlaps between each predicted box and gt_box
40 |     overlaps = bbox_overlaps(np.ascontiguousarray(bbox[:, :4], dtype=np.float),np.ascontiguousarray(gt_bbox[:, :4], dtype=np.float))
41 |     gt_assignment = overlaps.argmax(axis=1)  #multiple bboxes may have a single GT
42 | 
43 |     max_overlaps = overlaps[np.arange(bbox.shape[0]), gt_assignment] #select the predicted boxes that are closest to the gt_box
44 | 
45 |     good = 0
46 |     total_boxes = 0
47 |     for i,overlap  in enumerate(max_overlaps):
48 |         box = bbox[i]
49 |         width = int(box[2])-int(box[0])
50 |         height = int(box[3])-int(box[1])
51 |         if prob[i,label[i]] > 0.5 and width*height >1000 and label[i]!=0: #eliminate if classification is less than 0.5. if the box is too small or the label is background
52 |             total_boxes = total_boxes+1 #this will be the denominator
53 |             if label[i] == gt_label[i]:
54 |                 if overlap >0.5: #if overlap of the BOXES is bigger than 0.5
55 |                     output_mask = (final_mask[i] > 0.6).astype(np.uint8)
56 | 
57 |                     gt_maski = gt_mask[:,int(box[1]):int(box[3]),int(box[0]):int(box[2]),:] #crop from gt_mask given the predicted box
58 |                     gt_maskii = np.zeros([112,112,7],np.uint8)
59 |                     for x in range(7):
60 |                         mask = gt_maski[...,x]
61 |                         mask = mask[0]
62 |                         gt_maskii[...,x] = cv2.resize(mask.astype(np.uint8),(112,112))
63 | 
64 |                     if IOU_mask(output_mask,gt_maskii) > 0.5: #if overlap of the MASKS is bigger than 0.5
65 |                         good = good +1
66 |     precision_over_image = float(good)/(float(total_boxes)+np.finfo(np.float32).eps)
67 |     return precision_over_image
68 | 
69 | metrics = []
70 | for i in range(0,512):
71 |     bbox = np.load('data/bbox'+str(i)+'.npy')
72 |     gt_bbox = np.load('data/gt_boxes'+str(i)+'.npy')
73 |     final_mask = np.load('data/final_mask'+str(i)+'.npy')
74 |     gt_label = np.load('data/gt_label'+str(i)+'.npy')
75 |     image = np.load('data/image'+str(i)+'.npy')
76 |     label = np.load('data/label'+str(i)+'.npy')
77 |     prob = np.load('data/prob'+str(i)+'.npy')
78 |     gt_mask = np.load('data/gt_mask'+str(i)+'.npy')
79 |     metrics.append(metric_for_image(bbox,gt_bbox,label,gt_label,prob,final_mask))
80 | 
81 | print reduce(lambda x, y: x + y, metrics) / len(metrics)
82 | 


--------------------------------------------------------------------------------
/draw/utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
  4 | import scipy.misc
  5 | import cv2
  6 | import numpy.ma as ma
  7 | 
  8 | FLAGS = tf.app.flags.FLAGS
  9 | _DEBUG = False
 10 | 
 11 | 
 12 | #not used
 13 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None):
 14 |     #print("image")
 15 |     #print(image)
 16 |     #norm_image = np.uint8(image/np.max(np.abs(image))*255.0)
 17 |     norm_image = np.uint8(image/0.1*127.0 + 127.0)
 18 |     #print("norm_image")
 19 |     #print(norm_image)
 20 |     source_img = Image.fromarray(norm_image)
 21 |     return source_img.save(FLAGS.train_dir + 'test_' + name + '_' +  str(step) +'.jpg', 'JPEG')
 22 | 
 23 | 
 24 | #label colors
 25 | colors = []
 26 | colors.append([180,255,255])
 27 | colors.append([150,255,255])
 28 | colors.append([120,255,255])
 29 | colors.append([90,255,255])
 30 | colors.append([60,255,255])
 31 | colors.append([30,255,255])
 32 | colors.append([0,255,255])
 33 | 
 34 | 
 35 | 
 36 | def draw_human_body_parts(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None):
 37 |     import cv2
 38 |     hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 39 |     hsv_body = hsv.copy()
 40 |     if bbox is not None:
 41 |         dictinary = {} #key: area, value:[box,label,gt_label,prob,mask,color] #i create this dictionary in order to sort by area in order to draw smaller boxes in front
 42 |         for i, box in enumerate(bbox):
 43 |             width = int(box[2])-int(box[0])
 44 |             height = int(box[3])-int(box[1])
 45 |             #l=label[i]
 46 |             #p = prob[i,label[i]]
 47 |             if (prob[i,label[i]] > 0.5) and width*height >1000 and label[i]!=0: #eliminate some boxes. label is the predicted score
 48 |                 area = float((box[2]-box[0])*(box[3]-box[1]))
 49 |                 while area in dictinary: #i compute the area in order to draw smaller boxes in front
 50 |                     area+=1
 51 | 
 52 |                 mask = final_mask[i]
 53 |                 masks = np.zeros((height,width,7))
 54 |                 body_mask = mask[...,0] > 0.6
 55 |                 body_mask2 = np.array(body_mask,np.uint8)
 56 |                 masks[...,0] = scipy.misc.imresize(body_mask2,(height,width))
 57 | 
 58 |                 # cv2.imshow("body_mask",body_mask.astype(np.uint8)*255)
 59 |                 # cv2.waitKey(3000)
 60 |                 for x in range(1,7):
 61 |                     maska = mask[...,x] > 0.6 # if prop for a pixel is bigger than 0.6, draw it
 62 |                     # cv2.imshow("maska"+str(x),maska.astype(np.uint8)*255)
 63 |                     # cv2.waitKey(3000)
 64 |                     maska = np.logical_and(maska,body_mask) # clip the parts in order to fit inside the body. the body is better segmented
 65 |                     maska = ma.masked_array(mask[...,x], mask=np.logical_not(maska))
 66 |                     maska = np.ma.filled(maska, 0)
 67 |                     #maska = maska >0
 68 |                     maska = scipy.misc.imresize(maska,(height,width))
 69 | 
 70 |                     masks[...,x] = maska
 71 |                 dictinary[round(area,4)]=(box,label[i],gt_label[i],prob[i,label[i]],masks,colors[label[i]])
 72 |         sorted_keys = sorted(dictinary.iterkeys(),reverse=True)
 73 |         # cv2.waitKey(6000)
 74 |         for key,i in zip(sorted_keys,range(len(sorted_keys))):
 75 |             bo, lab,gt_lab,_,mask,col= dictinary[key] #mask has shape [H,W,7]
 76 | 
 77 |             max_indices = np.argmax(mask,axis=2) # this is for when two parts masks are overlapping. there i select the part with the highest probability
 78 |             #max_indices is an array with size [H,W] and its values represent the per-pixel label of the parts
 79 |             for x in range(int(bo[0]),int(bo[2])):
 80 |                 for y in range(int(bo[1]),int(bo[3])):
 81 | 
 82 |                     xm = x-(int(bo[0]))
 83 |                     ym = y-(int(bo[1]))
 84 |                     if mask[ym,xm,max_indices[ym,xm]] >0: #
 85 |                         hsv[y,x,0] = colors[max_indices[ym,xm]][0]
 86 |                         hsv[y,x,1] = 255
 87 | 
 88 |             for x in range(int(bo[0]),int(bo[2])):
 89 |                 for y in range(int(bo[1]),int(bo[3])):
 90 | 
 91 |                     xm = x-(int(bo[0]))
 92 |                     ym = y-(int(bo[1]))
 93 |                     if(mask[ym,xm,0]==1):
 94 |                         hsv_body[y,x,0] = colors[0][0]
 95 |                         hsv_body[y,x,1] = 150
 96 | 
 97 |         hsv = cv2.cvtColor(hsv, cv2.COLOR_HSV2RGB)
 98 |         hsv_body = cv2.cvtColor(hsv_body, cv2.COLOR_HSV2RGB)
 99 |         i=0
100 |         for key in sorted_keys:
101 |             bo, lab,gt_lab,_,_,col= dictinary[key]
102 |             c = (255,0,0)
103 |             bo, lab,gt_lab,_,_,col= dictinary[key]
104 |             text = cat_id_to_cls_name(lab)
105 |             i=i+1
106 |             hsv = cv2.rectangle(hsv,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3)
107 |             hsv = cv2.putText(hsv,text+' '+str(i),(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX,0.5, color =(255,255,255))
108 |             hsv_body = cv2.rectangle(hsv_body,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3)
109 |             hsv_body = cv2.putText(hsv_body,text+' '+str(i),(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX,0.5, color =(255,255,255))
110 |     #cv2.imwrite('test_' + name + '_' +  str(step) +'.jpg',image)
111 |     cv2.imwrite('/home/alex/PycharmProjects/data/test_seg' + name + '_' +  str(step) +'.jpg',hsv)
112 |     cv2.imwrite('/home/alex/PycharmProjects/data/test_hsv' + name + '_' +  str(step) +'.jpg',hsv_body)
113 | 
114 | def cat_id_to_cls_name(catId):
115 |     cls_name = np.array(['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
116 |                        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
117 |                        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
118 |                        'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
119 |                        'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
120 |                        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
121 |                        'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
122 |                        'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
123 |                        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
124 |                        'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
125 |                        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
126 |                        'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
127 |                        'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
128 |                        'scissors', 'teddy bear', 'hair drier', 'toothbrush'])
129 |     return cls_name[catId]
130 | 


--------------------------------------------------------------------------------
/draw/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/draw/utils.pyc


--------------------------------------------------------------------------------
/libs/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 | 	python setup.py build_ext --inplace
3 | 	rm -rf build
4 | 	sh make.sh


--------------------------------------------------------------------------------
/libs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/__init__.py


--------------------------------------------------------------------------------
/libs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/__init__.pyc


--------------------------------------------------------------------------------
/libs/boxes/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/libs/boxes/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | from . import cython_nms
 8 | from . import cython_bbox
 9 | import nms
10 | import timer
11 | from .anchor import anchors
12 | from .anchor import anchors_plane
13 | from .roi import roi_cropping
14 | from .roi import roi_cropping
15 | from . import cython_anchor
16 | from . import cython_bbox_transform


--------------------------------------------------------------------------------
/libs/boxes/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/__init__.pyc


--------------------------------------------------------------------------------
/libs/boxes/anchor.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | from libs.boxes import cython_anchor
  7 | 
  8 | def anchors(scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16):
  9 |   """Get a set of anchors at one position """
 10 |   return generate_anchors(base_size=base, scales=np.asarray(scales, np.int32), ratios=ratios)
 11 | 
 12 | def anchors_plane(height, width, stride = 1.0, 
 13 |         scales=[2, 4, 8, 16, 32], ratios=[0.5, 1, 2.0], base=16):
 14 |   """Get a complete set of anchors in a spatial plane,
 15 |   height, width are plane dimensions
 16 |   stride is scale ratio of
 17 |   """
 18 |   # TODO: implement in C, or pre-compute them, or set to a fixed input-shape
 19 |   # enum all anchors in a plane
 20 |   # scales = kwargs.setdefault('scales', [2, 4, 8, 16, 32])
 21 |   # ratios = kwargs.setdefault('ratios', [0.5, 1, 2.0])
 22 |   # base = kwargs.setdefault('base', 16)
 23 |   anc = anchors(scales, ratios, base)
 24 |   all_anchors = cython_anchor.anchors_plane(height, width, stride, anc)
 25 |   #print (all_anchors.shape)
 26 |   return all_anchors
 27 | 
 28 | # Written by Ross Girshick and Sean Bell
 29 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
 30 |                      scales=2 ** np.arange(3, 6)):
 31 |   """
 32 |   Generate anchor (reference) windows by enumerating aspect ratios X
 33 |   scales wrt a reference (0, 0, 15, 15) window.
 34 |   """
 35 | 
 36 |   base_anchor = np.array([1, 1, base_size, base_size]) - 1
 37 |   ratio_anchors = _ratio_enum(base_anchor, ratios)
 38 |   anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
 39 |                        for i in xrange(ratio_anchors.shape[0])])
 40 |   return anchors
 41 | 
 42 | def _whctrs(anchor):
 43 |   """
 44 |   Return width, height, x center, and y center for an anchor (window).
 45 |   """
 46 | 
 47 |   w = anchor[2] - anchor[0] + 1
 48 |   h = anchor[3] - anchor[1] + 1
 49 |   x_ctr = anchor[0] + 0.5 * (w - 1)
 50 |   y_ctr = anchor[1] + 0.5 * (h - 1)
 51 |   return w, h, x_ctr, y_ctr
 52 | 
 53 | 
 54 | def _mkanchors(ws, hs, x_ctr, y_ctr):
 55 |   """
 56 |   Given a vector of widths (ws) and heights (hs) around a center
 57 |   (x_ctr, y_ctr), output a set of anchors (windows).
 58 |   """
 59 |   
 60 |   ws = ws[:, np.newaxis]
 61 |   hs = hs[:, np.newaxis]
 62 |   anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
 63 |                        y_ctr - 0.5 * (hs - 1),
 64 |                        x_ctr + 0.5 * (ws - 1),
 65 |                        y_ctr + 0.5 * (hs - 1)))
 66 |   return anchors
 67 | 
 68 | 
 69 | def _ratio_enum(anchor, ratios):
 70 |   """
 71 |   Enumerate a set of anchors for each aspect ratio wrt an anchor.
 72 |   """
 73 |   
 74 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
 75 |   size = w * h
 76 |   size_ratios = size / ratios
 77 |   ws = np.round(np.sqrt(size_ratios))
 78 |   hs = np.round(ws * ratios)
 79 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 80 |   return anchors
 81 | 
 82 | 
 83 | def _scale_enum(anchor, scales):
 84 |   """
 85 |   Enumerate a set of anchors for each scale wrt an anchor.
 86 |   """
 87 |   
 88 |   w, h, x_ctr, y_ctr = _whctrs(anchor)
 89 |   ws = w * scales
 90 |   hs = h * scales
 91 |   anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
 92 |   return anchors
 93 | 
 94 | def _unmap(data, count, inds, fill=0):
 95 |   """ Unmap a subset of item (data) back to the original set of items (of
 96 |   size count) """
 97 |   if len(data.shape) == 1:
 98 |     ret = np.empty((count,), dtype=np.float32)
 99 |     ret.fill(fill)
100 |     ret[inds] = data
101 |   else:
102 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
103 |     ret.fill(fill)
104 |     ret[inds, :] = data
105 |   return ret
106 | 
107 | if __name__ == '__main__':
108 |   import time
109 |   
110 |   t = time.time()
111 |   a = anchors()
112 |   num_anchors = 0
113 | 
114 |   # all_anchors = anchors_plane(200, 250, stride=4, boarder=0)
115 |   # num_anchors += all_anchors.shape[0]
116 |   for i in range(10):
117 |     ancs = anchors()
118 |     all_anchors = cython_anchor.anchors_plane(200, 250, 4, ancs)
119 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
120 |     all_anchors = cython_anchor.anchors_plane(100, 125, 8, ancs)
121 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
122 |     all_anchors = cython_anchor.anchors_plane(50, 63, 16, ancs)
123 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
124 |     all_anchors = cython_anchor.anchors_plane(25, 32, 32, ancs)
125 |     num_anchors += all_anchors.shape[0] * all_anchors.shape[1] * all_anchors.shape[2]
126 |   print('average time: %f' % ((time.time() - t) / 10))
127 |   print('anchors: %d' % (num_anchors / 10))
128 |   print(a.shape, '\n', a)
129 |   print (all_anchors.shape)
130 |   # from IPython import embed
131 |   # embed()
132 | 


--------------------------------------------------------------------------------
/libs/boxes/anchor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/anchor.pyc


--------------------------------------------------------------------------------
/libs/boxes/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 
57 | def bbox_intersections(
58 |         np.ndarray[DTYPE_t, ndim=2] boxes,
59 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
60 |     """
61 |     For each query box compute the intersection ratio covered by boxes
62 |     ----------
63 |     Parameters
64 |     ----------
65 |     boxes: (N, 4) ndarray of float
66 |     query_boxes: (K, 4) ndarray of float
67 |     Returns
68 |     -------
69 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
70 |     """
71 |     cdef unsigned int N = boxes.shape[0]
72 |     cdef unsigned int K = query_boxes.shape[0]
73 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
74 |     cdef DTYPE_t iw, ih, box_area
75 |     cdef DTYPE_t ua
76 |     cdef unsigned int k, n
77 |     for k in range(K):
78 |         box_area = (
79 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
80 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
81 |         )
82 |         for n in range(N):
83 |             iw = (
84 |                 min(boxes[n, 2], query_boxes[k, 2]) -
85 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
86 |             )
87 |             if iw > 0:
88 |                 ih = (
89 |                     min(boxes[n, 3], query_boxes[k, 3]) -
90 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
91 |                 )
92 |                 if ih > 0:
93 |                     intersec[n, k] = iw * ih / box_area
94 |     return intersec


--------------------------------------------------------------------------------
/libs/boxes/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import warnings
10 | 
11 | def bbox_transform(ex_rois, gt_rois):
12 |     """
13 |     computes the distance from ground-truth boxes to the given boxes, normed by their size
14 |     :param ex_rois: n * 4 numpy array, given boxes
15 |     :param gt_rois: n * 4 numpy array, ground-truth boxes
16 |     :return: deltas: n * 4 numpy array, ground-truth boxes
17 |     """
18 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
19 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
20 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
21 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
22 | 
23 |     # assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \
24 |         # 'Invalid boxes found: {} {}'. \
25 |             # format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :])
26 | 
27 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
28 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
29 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
30 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
31 | 
32 |     # warnings.catch_warnings()
33 |     # warnings.filterwarnings('error')
34 |     targets_dx = 10.0 * (gt_ctr_x - ex_ctr_x) / ex_widths #####he multiplied these probably to have bigger numbers
35 |     targets_dy = 10.0 * (gt_ctr_y - ex_ctr_y) / ex_heights
36 |     targets_dw = 5.0 * np.log(gt_widths / ex_widths)
37 |     targets_dh = 5.0 * np.log(gt_heights / ex_heights)
38 | 
39 |     targets = np.vstack(
40 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
41 |     return targets
42 | 
43 | def bbox_transform_inv(boxes, deltas):# from file roi.py line  116 the shapes are: boxes=(R, 4), [x1, y1, x2, y2] deltas=(R, Kx4)
44 |     if boxes.shape[0] == 0:
45 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
46 | 
47 |     boxes = boxes.astype(deltas.dtype, copy=False)
48 | 
49 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
50 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
51 |     ctr_x = boxes[:, 0] + 0.5 * widths
52 |     ctr_y = boxes[:, 1] + 0.5 * heights
53 | 
54 |     dx = deltas[:, 0::4] * 0.1 #####he divided here as to cancel the multiplication at 34
55 |     dy = deltas[:, 1::4] * 0.1
56 |     dw = deltas[:, 2::4] * 0.2
57 |     dh = deltas[:, 3::4] * 0.2
58 | 
59 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
60 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
61 |     # pred_w = np.exp(dw) * widths[:, np.newaxis]
62 |     # pred_h = np.exp(dh) * heights[:, np.newaxis]
63 | 
64 |     pred_w = np.exp(dw + np.log(widths[:, np.newaxis]))
65 |     pred_h = np.exp(dh + np.log(heights[:, np.newaxis]))
66 | 
67 | 
68 |     #pred_w = np.exp(dw + np.log(widths[:, np.newaxis]))
69 |     #pred_h = np.exp(dh + np.log(heights[:, np.newaxis]))
70 | 
71 | 
72 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
73 |     # x1
74 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
75 |     # y1
76 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
77 |     # x2
78 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - 1
79 |     # y2
80 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - 1
81 | 
82 |     return pred_boxes
83 | 
84 | def clip_boxes(boxes, im_shape):
85 |     """
86 |     Clip boxes to image boundaries.
87 |     """
88 | 
89 |     # x1 >= 0
90 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
91 |     # y1 >= 0
92 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
93 |     # x2 < im_shape[1]
94 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
95 |     # y2 < im_shape[0]
96 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
97 |     return boxes
98 | 


--------------------------------------------------------------------------------
/libs/boxes/bbox_transform.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/bbox_transform.pyc


--------------------------------------------------------------------------------
/libs/boxes/blob.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | """Blob helper functions."""
 9 | 
10 | import numpy as np
11 | import cv2
12 | from ..fast_rcnn.config import cfg
13 | 
14 | def im_list_to_blob(ims):
15 |     """Convert a list of images into a network input.
16 | 
17 |     Assumes images are already prepared (means subtracted, BGR order, ...).
18 |     """
19 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
20 |     num_images = len(ims)
21 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
22 |                     dtype=np.float32)
23 |     for i in xrange(num_images):
24 |         im = ims[i]
25 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
26 | 
27 |     return blob
28 | 
29 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
30 |     """Mean subtract and scale an image for use in a blob."""
31 |     im = im.astype(np.float32, copy=False)
32 |     im -= pixel_means
33 |     im_shape = im.shape
34 |     im_size_min = np.min(im_shape[0:2])
35 |     im_size_max = np.max(im_shape[0:2])
36 |     im_scale = float(target_size) / float(im_size_min)
37 |     # Prevent the biggest axis from being more than MAX_SIZE
38 |     if np.round(im_scale * im_size_max) > max_size:
39 |         im_scale = float(max_size) / float(im_size_max)
40 |     if cfg.TRAIN.RANDOM_DOWNSAMPLE:
41 |         r = 0.6 + np.random.rand() * 0.4
42 |         im_scale *= r
43 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
44 |                     interpolation=cv2.INTER_LINEAR)
45 | 
46 |     return im, im_scale
47 | 


--------------------------------------------------------------------------------
/libs/boxes/cython_anchor.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_anchor.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/cython_anchor.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Mask RCNN
 3 | # Licensed under The MIT License [see LICENSE for details]
 4 | # Written by CharlesShang@github
 5 | # --------------------------------------------------------
 6 | 
 7 | cimport cython
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | DTYPE = np.float
12 | ctypedef np.float_t DTYPE_t
13 | 
14 | def anchors_plane(
15 |         int height, int width, int stride,
16 |         np.ndarray[DTYPE_t, ndim=2] anchors_base):
17 |     """
18 |     Parameters
19 |     ----------
20 |     height: height of plane
21 |     width:  width of plane
22 |     stride: stride ot the original image
23 |     anchors_base: (A, 4) a base set of anchors
24 |     Returns
25 |     -------
26 |     all_anchors: (height, width, A, 4) ndarray of anchors spreading over the plane
27 |     """
28 |     cdef unsigned int A = anchors_base.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=4] all_anchors = np.zeros((height, width, A, 4), dtype=DTYPE)
30 |     cdef unsigned int iw, ih
31 |     cdef unsigned int k
32 |     cdef unsigned int A4
33 |     cdef unsigned int sh
34 |     cdef unsigned int sw
35 |     A4 = A*4
36 |     for iw in range(width):
37 |         sw = iw * stride
38 |         for ih in range(height):
39 |             sh = ih * stride
40 |             for k in range(A):
41 |                 all_anchors[ih, iw, k, 0] = anchors_base[k, 0] + sw
42 |                 all_anchors[ih, iw, k, 1] = anchors_base[k, 1] + sh
43 |                 all_anchors[ih, iw, k, 2] = anchors_base[k, 2] + sw
44 |                 all_anchors[ih, iw, k, 3] = anchors_base[k, 3] + sh
45 |     return all_anchors


--------------------------------------------------------------------------------
/libs/boxes/cython_bbox.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/cython_bbox_transform.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_bbox_transform.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/cython_bbox_transform.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Mask RCNN
  3 | # Licensed under The MIT License [see LICENSE for details]
  4 | # Written by CharlesShang@github
  5 | # --------------------------------------------------------
  6 | 
  7 | cimport cython
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | DTYPE = np.float
 12 | ctypedef np.float_t DTYPE_t
 13 | # ctypedef float DTYPE_t
 14 | 
 15 | #def bbox_transform(
 16 | #        np.ndarray[DTYPE_t, ndim=2] ex_rois,
 17 | #        np.ndarray[DTYPE_t, ndim=2] gt_rois):
 18 | def bbox_transform(
 19 |         np.ndarray[DTYPE_t, ndim=2] ex_rois,
 20 |         np.ndarray[DTYPE_t, ndim=2] gt_rois):
 21 |     """
 22 |     Parameters
 23 |     ----------
 24 |     ex_rois: n * 4 numpy array, given boxes
 25 |     gt_rois: n * 4 numpy array, ground-truth boxes
 26 |     Returns
 27 |     -------
 28 |     targets: (n, 4) ndarray
 29 |     """
 30 |     cdef unsigned int R = ex_rois.shape[0]
 31 |     cdef np.ndarray[DTYPE_t, ndim=2] targets = np.zeros((R, 4), dtype=DTYPE)
 32 |     cdef unsigned int i
 33 |     cdef DTYPE_t gt_w
 34 |     cdef DTYPE_t gt_h
 35 |     cdef DTYPE_t gt_cx
 36 |     cdef DTYPE_t gt_cy
 37 |     cdef DTYPE_t ex_w
 38 |     cdef DTYPE_t ex_h
 39 |     cdef DTYPE_t ex_cx
 40 |     cdef DTYPE_t ex_cy
 41 |     for i in range(R):
 42 |         gt_w = gt_rois[i, 2] - gt_rois[i, 0] + 1.0
 43 |         gt_h = gt_rois[i, 3] - gt_rois[i, 1] + 1.0
 44 |         ex_w = ex_rois[i, 2] - ex_rois[i, 0] + 1.0
 45 |         ex_h = ex_rois[i, 3] - ex_rois[i, 1] + 1.0
 46 |         gt_cx = gt_rois[i, 0] + gt_w * 0.5
 47 |         gt_cy = gt_rois[i, 1] + gt_h * 0.5
 48 |         ex_cx = ex_rois[i, 0] + ex_w * 0.5
 49 |         ex_cy = ex_rois[i, 1] + ex_h * 0.5
 50 |         targets[i, 0] = (gt_cx - ex_cx) / ex_w
 51 |         targets[i, 1] = (gt_cy - ex_cy) / ex_h
 52 |         targets[i, 2] = np.log(gt_w / ex_w)
 53 |         targets[i, 3] = np.log(gt_h / ex_h)
 54 |     return targets
 55 | 
 56 | cdef inline DTYPE_t my_max(DTYPE_t a, DTYPE_t b): return a if a >= b else b
 57 | cdef inline DTYPE_t my_min(DTYPE_t a, DTYPE_t b): return a if a <= b else b
 58 | 
 59 | def bbox_transform_inv(
 60 |         np.ndarray[DTYPE_t, ndim=2] boxes,
 61 |         np.ndarray[DTYPE_t, ndim=2] deltas):
 62 |     """
 63 |     Parameters
 64 |     ----------
 65 |     boxes: n * 4 numpy array, given boxes
 66 |     deltas: (n, kx4) numpy array,
 67 |     Returns
 68 |     -------
 69 |     pred_boxes: (n, kx4) ndarray
 70 |     """
 71 |     cdef unsigned int R = boxes.shape[0]
 72 |     cdef unsigned int k4 = deltas.shape[1]
 73 |     cdef unsigned int k
 74 |     k = k4 / 4
 75 |     cdef np.ndarray[DTYPE_t, ndim=2] pred_boxes = np.zeros((R, k4), dtype=DTYPE)
 76 |     if R == 0:
 77 |         return pred_boxes
 78 | 
 79 |     cdef unsigned int i
 80 |     cdef unsigned int j
 81 |     cdef unsigned int j4
 82 |     cdef DTYPE_t w
 83 |     cdef DTYPE_t h
 84 |     cdef DTYPE_t cx
 85 |     cdef DTYPE_t cy
 86 |     cdef DTYPE_t px
 87 |     cdef DTYPE_t py
 88 |     cdef DTYPE_t pw
 89 |     cdef DTYPE_t ph
 90 |     for i in range(R):
 91 |         w = boxes[i, 2] - boxes[i, 0] + 1.0
 92 |         h = boxes[i, 3] - boxes[i, 1] + 1.0
 93 |         cx = boxes[i, 0] + w * 0.5
 94 |         cy = boxes[i, 1] + h * 0.5
 95 |         for j in range(k):
 96 |             j4 = j * 4
 97 |             px = deltas[i, j4    ] * w + cx
 98 |             py = deltas[i, j4 + 1] * h + cy
 99 |             pw = np.exp(deltas[i, j4 + 2]) * w
100 |             ph = np.exp(deltas[i, j4 + 3]) * h
101 |             pred_boxes[i, j4    ] = px - 0.5 * pw
102 |             pred_boxes[i, j4 + 1] = py - 0.5 * ph
103 |             pred_boxes[i, j4 + 2] = px + 0.5 * pw
104 |             pred_boxes[i, j4 + 3] = py + 0.5 * ph
105 |     return pred_boxes
106 | 
107 | def clip_boxes(
108 |         np.ndarray[DTYPE_t, ndim=2] boxes,
109 |         np.ndarray[DTYPE_t, ndim=1] im_shape):
110 |     """
111 |     Parameters
112 |     ----------
113 |     boxes: (n ,kx4) numpy array, given boxes
114 |     im_shape:(2,) numpy array, (image_height, image_width)
115 |     Returns
116 |     -------
117 |     clipped: (n, kx4) ndarray
118 |     """
119 |     cdef unsigned int R = boxes.shape[0]
120 |     cdef unsigned int k4 = boxes.shape[1]
121 |     cdef unsigned int k  = k4 / 4
122 |     cdef np.ndarray[DTYPE_t, ndim=2] clipped = np.zeros((R, k4), dtype=DTYPE)
123 |     cdef unsigned int i
124 |     cdef unsigned int j
125 |     cdef unsigned int j4
126 |     for i in range(R):
127 |         for j in range(k):
128 |             j4 = j * 4
129 |             clipped[i, j4    ] = my_max(my_min(boxes[i, j4    ], im_shape[1]-1), 0)
130 |             clipped[i, j4 + 1] = my_max(my_min(boxes[i, j4 + 1], im_shape[0]-1), 0)
131 |             clipped[i, j4 + 2] = my_max(my_min(boxes[i, j4 + 2], im_shape[1]-1), 0)
132 |             clipped[i, j4 + 3] = my_max(my_min(boxes[i, j4 + 3], im_shape[0]-1), 0)
133 |     return clipped


--------------------------------------------------------------------------------
/libs/boxes/cython_nms.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import sys, pkg_resources, imp
 6 | 
 7 | def __bootstrap__():
 8 |   global __bootstrap__, __loader__, __file__
 9 |   __file__ = pkg_resources.resource_filename(__name__, 'cython_nms.so')
10 |   __loader__ = None
11 |   del __bootstrap__, __loader__
12 |   imp.load_dynamic(__name__, __file__)
13 | 
14 | __bootstrap__()


--------------------------------------------------------------------------------
/libs/boxes/cython_nms.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/cython_nms.pyc


--------------------------------------------------------------------------------
/libs/boxes/nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def nms(dets, thresh):
11 |     x1 = dets[:, 0]
12 |     y1 = dets[:, 1]
13 |     x2 = dets[:, 2]
14 |     y2 = dets[:, 3]
15 |     scores = dets[:, 4]
16 | 
17 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
18 |     order = scores.argsort()[::-1]
19 | 
20 |     keep = []
21 |     while order.size > 0:
22 |         i = order[0]
23 |         keep.append(i)
24 |         xx1 = np.maximum(x1[i], x1[order[1:]])
25 |         yy1 = np.maximum(y1[i], y1[order[1:]])
26 |         xx2 = np.minimum(x2[i], x2[order[1:]])
27 |         yy2 = np.minimum(y2[i], y2[order[1:]])
28 | 
29 |         w = np.maximum(0.0, xx2 - xx1 + 1)
30 |         h = np.maximum(0.0, yy2 - yy1 + 1)
31 |         inter = w * h
32 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
33 | 
34 |         inds = np.where(ovr <= thresh)[0]
35 |         order = order[inds + 1]
36 | 
37 |     return keep
38 | 


--------------------------------------------------------------------------------
/libs/boxes/nms.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/nms.pyc


--------------------------------------------------------------------------------
/libs/boxes/nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 71 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 72 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 73 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 74 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 75 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 76 | 
 77 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 78 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 79 | 
 80 |     cdef int ndets = dets.shape[0]
 81 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 82 |             np.zeros((ndets), dtype=np.int)
 83 | 
 84 |     # nominal indices
 85 |     cdef int _i, _j
 86 |     # sorted indices
 87 |     cdef int i, j
 88 |     # temp variables for box i's (the box currently under consideration)
 89 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 90 |     # variables for computing overlap with box j (lower scoring box)
 91 |     cdef np.float32_t xx1, yy1, xx2, yy2
 92 |     cdef np.float32_t w, h
 93 |     cdef np.float32_t inter, ovr
 94 | 
 95 |     keep = []
 96 |     for _i in range(ndets):
 97 |         i = order[_i]
 98 |         if suppressed[i] == 1:
 99 |             continue
100 |         keep.append(i)
101 |         ix1 = x1[i]
102 |         iy1 = y1[i]
103 |         ix2 = x2[i]
104 |         iy2 = y2[i]
105 |         iarea = areas[i]
106 |         for _j in range(_i + 1, ndets):
107 |             j = order[_j]
108 |             if suppressed[j] == 1:
109 |                 continue
110 |             xx1 = max(ix1, x1[j])
111 |             yy1 = max(iy1, y1[j])
112 |             xx2 = min(ix2, x2[j])
113 |             yy2 = min(iy2, y2[j])
114 |             w = max(0.0, xx2 - xx1 + 1)
115 |             h = max(0.0, yy2 - yy1 + 1)
116 |             inter = w * h
117 |             ovr = inter / (iarea + areas[j] - inter)
118 |             ovr1 = inter / iarea
119 |             ovr2 = inter / areas[j]
120 |             if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 |                 suppressed[j] = 1
122 | 
123 |     return keep
124 | 


--------------------------------------------------------------------------------
/libs/boxes/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import libs.configs.config_v1 as cfg
10 | import libs.nms.gpu_nms as gpu_nms
11 | import libs.nms.cpu_nms as cpu_nms
12 | 
13 | def nms(dets, thresh, force_cpu=False):
14 |     """Dispatch to either CPU or GPU NMS implementations."""
15 | 
16 |     if dets.shape[0] == 0:
17 |         return []
18 |     return gpu_nms.gpu_nms(dets, thresh, device_id=0)
19 | 
20 | def nms_wrapper(scores, boxes, threshold = 0.7, class_sets = None):
21 |     """
22 |     post-process the results of im_detect
23 |     :param boxes: N * (K * 4) numpy
24 |     :param scores: N * K numpy
25 |     :param class_sets: e.g. CLASSES = ('__background__','person','bike','motorbike','car','bus')
26 |     :return: a list of K-1 dicts, no background, each is {'class': classname, 'dets': None | [[x1,y1,x2,y2,score],...]}
27 |     """
28 |     num_class = scores.shape[1] if class_sets is None else len(class_sets)
29 |     assert num_class * 4 == boxes.shape[1],\
30 |         'Detection scores and boxes dont match %d vs %d' % (num_class, boxes.shape[1])
31 |     class_sets = ['class_' + str(i) for i in range(0, num_class)] if class_sets is None else class_sets
32 | 
33 |     res = []
34 |     for ind, cls in enumerate(class_sets[1:]):
35 |         ind += 1 # skip background
36 |         cls_boxes =  boxes[:, 4*ind : 4*(ind+1)]
37 |         cls_scores = scores[:, ind]
38 |         dets = np.hstack((cls_boxes, cls_scores[:, np.newaxis])).astype(np.float32)
39 |         keep = nms(dets, thresh=0.3)
40 |         dets = dets[keep, :]
41 |         dets = dets[np.where(dets[:, 4] > threshold)]
42 |         r = {}
43 |         if dets.shape[0] > 0:
44 |             r['class'], r['dets'] = cls, dets
45 |         else:
46 |             r['class'], r['dets'] = cls, None
47 |         res.append(r)
48 |     return res
49 | 
50 | if __name__=='__main__':
51 |   
52 |   score = np.random.rand(10, 21)
53 |   boxes = np.random.randint(0, 100, (10, 21, 2))
54 |   s = np.random.randint(0, 100, (10, 21, 2))
55 |   s = boxes + s
56 |   boxes = np.concatenate((boxes, s), axis=2)
57 |   boxes = np.reshape(boxes, [boxes.shape[0], -1])
58 |   # score = np.reshape(score, [score.shape[0], -1])
59 |   res = nms_wrapper(score, boxes)
60 |   print (res)


--------------------------------------------------------------------------------
/libs/boxes/nms_wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/nms_wrapper.pyc


--------------------------------------------------------------------------------
/libs/boxes/profile:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/profile


--------------------------------------------------------------------------------
/libs/boxes/profile.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/profile.png


--------------------------------------------------------------------------------
/libs/boxes/roi.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | import functools
 5 | 
 6 | import numpy as np
 7 | import tensorflow as tf
 8 | import tensorflow.contrib.slim as slim
 9 | 
10 | def roi_align(feat, boxes):
11 |   """Given features and boxes, This function crops feature """
12 |   return
13 | 
14 | def roi_cropping(feat, boxes, clses, anchors, spatial_scale=1.0/16):
15 |   """This function computes final rpn boxes
16 |    And crops areas from the incoming features
17 |   """
18 |   return


--------------------------------------------------------------------------------
/libs/boxes/roi.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/roi.pyc


--------------------------------------------------------------------------------
/libs/boxes/timer.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import time
 9 | 
10 | class Timer(object):
11 |     """A simple timer."""
12 |     def __init__(self):
13 |         self.total_time = 0.
14 |         self.calls = 0
15 |         self.start_time = 0.
16 |         self.diff = 0.
17 |         self.average_time = 0.
18 | 
19 |     def tic(self):
20 |         # using time.time instead of time.clock because time time.clock
21 |         # does not normalize for multithreading
22 |         self.start_time = time.time()
23 | 
24 |     def toc(self, average=True):
25 |         self.diff = time.time() - self.start_time
26 |         self.total_time += self.diff
27 |         self.calls += 1
28 |         self.average_time = self.total_time / self.calls
29 |         if average:
30 |             return self.average_time
31 |         else:
32 |             return self.diff
33 | 


--------------------------------------------------------------------------------
/libs/boxes/timer.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/boxes/timer.pyc


--------------------------------------------------------------------------------
/libs/configs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/__init__.py


--------------------------------------------------------------------------------
/libs/configs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/__init__.pyc


--------------------------------------------------------------------------------
/libs/configs/config_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/configs/config_v1.pyc


--------------------------------------------------------------------------------
/libs/datasets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/__init__.py


--------------------------------------------------------------------------------
/libs/datasets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/__init__.pyc


--------------------------------------------------------------------------------
/libs/datasets/coco.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import os
  6 | import tensorflow as tf
  7 | 
  8 | import tensorflow.contrib.slim as slim
  9 | from tensorflow.python.lib.io.tf_record import TFRecordCompressionType
 10 | 
 11 | _FILE_PATTERN = 'coco_%s_*.tfrecord'
 12 | 
 13 | SPLITS_TO_SIZES = {'train2014': 82783, 'val2014': 40504}
 14 | 
 15 | _NUM_CLASSES = 81
 16 | 
 17 | _ITEMS_TO_DESCRIPTIONS = {
 18 |     'image': 'A color image of varying size.',
 19 |     'label': 'An annotation image of varying size. (pixel-level masks)',
 20 |     'gt_masks': 'masks of instances in this image. (instance-level masks), of shape (N, image_height, image_width)',
 21 |     'gt_boxes': 'bounding boxes and classes of instances in this image, of shape (N, 5), each entry is (x1, y1, x2, y2)',
 22 | }
 23 | 
 24 | 
 25 | def get_split(split_name, dataset_dir, file_pattern=None, reader=None):
 26 |   if split_name not in SPLITS_TO_SIZES:
 27 |     raise ValueError('split name %s was not recognized.' % split_name)
 28 |   
 29 |   if not file_pattern:
 30 |     file_pattern = _FILE_PATTERN
 31 |   file_pattern = os.path.join(dataset_dir, 'records', file_pattern % split_name)
 32 |   
 33 |   # Allowing None in the signature so that dataset_factory can use the default.
 34 |   if reader is None:
 35 |     reader = tf.TFRecordReader
 36 |   
 37 |   keys_to_features = {
 38 |     'image/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
 39 |     'image/format': tf.FixedLenFeature((), tf.string, default_value='jpeg'),
 40 |     'label/encoded': tf.FixedLenFeature((), tf.string, default_value=''),
 41 |     'label/format': tf.FixedLenFeature((), tf.string, default_value='png'),
 42 |     'image/height': tf.FixedLenFeature((), tf.int64),
 43 |     'image/width': tf.FixedLenFeature((), tf.int64),
 44 |     
 45 |     'label/num_instances': tf.FixedLenFeature((), tf.int64),
 46 |     'label/gt_boxes': tf.FixedLenFeature((), tf.string),
 47 |     'label/gt_masks': tf.FixedLenFeature((), tf.string),
 48 |   }
 49 |   
 50 |   def _masks_decoder(keys_to_tensors):
 51 |     masks = tf.decode_raw(keys_to_tensors['label/gt_masks'], tf.uint8)
 52 |     width = tf.cast(keys_to_tensors['image/width'], tf.int32)
 53 |     height = tf.cast(keys_to_tensors['image/height'], tf.int32)
 54 |     instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32)
 55 |     mask_shape = tf.stack([instances, height, width])
 56 |     return tf.reshape(masks, mask_shape)
 57 |   
 58 |   def _gt_boxes_decoder(keys_to_tensors):
 59 |     bboxes = tf.decode_raw(keys_to_tensors['label/gt_boxes'], tf.float32)
 60 |     instances = tf.cast(keys_to_tensors['label/num_instances'], tf.int32)
 61 |     bboxes_shape = tf.stack([instances, 5])
 62 |     return tf.reshape(bboxes, bboxes_shape)
 63 |   
 64 |   def _width_decoder(keys_to_tensors):
 65 |     width = keys_to_tensors['image/width']
 66 |     return tf.cast(width, tf.int32)
 67 |   
 68 |   def _height_decoder(keys_to_tensors):
 69 |     height = keys_to_tensors['image/height']
 70 |     return tf.cast(height, tf.int32)
 71 |   
 72 |   items_to_handlers = {
 73 |     'image': slim.tfexample_decoder.Image('image/encoded', 'image/format'),
 74 |     'label': slim.tfexample_decoder.Image('label/encoded', 'label/format', channels=1),
 75 |     'gt_masks': slim.tfexample_decoder.ItemHandlerCallback(
 76 |                 ['label/gt_masks', 'label/num_instances', 'image/width', 'image/height'], _masks_decoder),
 77 |     'gt_boxes': slim.tfexample_decoder.ItemHandlerCallback(['label/gt_boxes', 'label/num_instances'], _gt_boxes_decoder),
 78 |     'width': slim.tfexample_decoder.ItemHandlerCallback(['image/width'], _width_decoder),
 79 |     'height': slim.tfexample_decoder.ItemHandlerCallback(['image/height'], _height_decoder),
 80 |   }
 81 |   
 82 |   decoder = slim.tfexample_decoder.TFExampleDecoder(
 83 |     keys_to_features, items_to_handlers)
 84 |   
 85 |   return slim.dataset.Dataset(
 86 |     data_sources=file_pattern,
 87 |     reader=reader,
 88 |     decoder=decoder,
 89 |     num_samples=SPLITS_TO_SIZES[split_name],
 90 |     items_to_descriptions=_ITEMS_TO_DESCRIPTIONS,
 91 |     num_classes=_NUM_CLASSES)
 92 | 
 93 | def read(tfrecords_filename):
 94 | 
 95 |   if not isinstance(tfrecords_filename, list):
 96 |     tfrecords_filename = [tfrecords_filename]
 97 |   filename_queue = tf.train.string_input_producer(
 98 |     tfrecords_filename, num_epochs=100)
 99 | 
100 |   options = tf.python_io.TFRecordOptions(TFRecordCompressionType.ZLIB)
101 |   reader = tf.TFRecordReader(options=options)
102 |   _, serialized_example = reader.read(filename_queue)
103 |   features = tf.parse_single_example(
104 |     serialized_example,
105 |     features={
106 |       'image/img_id': tf.FixedLenFeature([], tf.int64),
107 |       'image/encoded': tf.FixedLenFeature([], tf.string),
108 |       'image/height': tf.FixedLenFeature([], tf.int64),
109 |       'image/width': tf.FixedLenFeature([], tf.int64),
110 |       'label/num_instances': tf.FixedLenFeature([], tf.int64),
111 |       'label/gt_masks': tf.FixedLenFeature([], tf.string),
112 |       'label/gt_boxes': tf.FixedLenFeature([], tf.string),
113 |       'label/encoded': tf.FixedLenFeature([], tf.string),
114 |       })
115 |   # image = tf.image.decode_jpeg(features['image/encoded'], channels=3)
116 |   img_id = tf.cast(features['image/img_id'], tf.int32)
117 |   ih = tf.cast(features['image/height'], tf.int32)
118 |   iw = tf.cast(features['image/width'], tf.int32)
119 |   num_instances = tf.cast(features['label/num_instances'], tf.int32)
120 |   image = tf.decode_raw(features['image/encoded'], tf.uint8)
121 |   imsize = tf.size(image)
122 |   image = tf.cond(tf.equal(imsize, ih * iw), \
123 |           lambda: tf.image.grayscale_to_rgb(tf.reshape(image, (ih, iw, 1))), \
124 |           lambda: tf.reshape(image, (ih, iw, 3)))
125 | 
126 |   gt_boxes = tf.decode_raw(features['label/gt_boxes'], tf.float32)
127 |   gt_boxes = tf.reshape(gt_boxes, [num_instances, 5])
128 |   gt_masks = tf.decode_raw(features['label/gt_masks'], tf.uint8)
129 |   gt_masks = tf.cast(gt_masks, tf.int32)
130 |   print (ih,iw)
131 |   gt_masks = tf.reshape(gt_masks, [num_instances, ih, iw,7])
132 |   ####################################################################be careful here. before 17 at the line above there was num_instances
133 | 
134 |   
135 |   return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id
136 | 


--------------------------------------------------------------------------------
/libs/datasets/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/coco.pyc


--------------------------------------------------------------------------------
/libs/datasets/dataset_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import tensorflow as tf
 6 | from libs.visualization.summary_utils import visualize_input
 7 | import glob
 8 | from libs.datasets import coco
 9 | 
10 | import libs.preprocessings.coco_v1 as coco_preprocess
11 | 
12 | def get_dataset(dataset_name, split_name, dataset_dir, 
13 |         im_batch=1, is_training=False, file_pattern=None, reader=None):
14 |     """"""
15 |     if file_pattern is None:
16 |         file_pattern = '*.tfrecord' 
17 | 
18 |     tfrecords = glob.glob(dataset_dir + '/records/' + file_pattern)
19 |     image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = coco.read(tfrecords)
20 | 
21 |     image, gt_boxes, gt_masks = coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training)
22 |     #visualize_input(gt_boxes, image, tf.expand_dims(gt_masks, axis=3))
23 | 
24 |     return image, ih, iw, gt_boxes, gt_masks, num_instances, img_id
25 | 
26 | 


--------------------------------------------------------------------------------
/libs/datasets/dataset_factory.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/dataset_factory.pyc


--------------------------------------------------------------------------------
/libs/datasets/download_and_convert_coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/download_and_convert_coco.pyc


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/Makefile:
--------------------------------------------------------------------------------
1 | all:
2 |     # install pycocotools locally
3 | 	python setup.py build_ext --inplace
4 | 	rm -rf build
5 | 
6 | install:
7 | 	# install pycocotools to the Python site-packages
8 | 	python setup.py build_ext install
9 | 	rm -rf build


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/__init__.py:
--------------------------------------------------------------------------------
1 | __author__ = 'tylin'
2 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/__init__.pyc


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/_mask.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/_mask.so


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/coco.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/coco.pyc


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/gason.h:
--------------------------------------------------------------------------------
  1 | // https://github.com/vivkin/gason - pulled January 10, 2016
  2 | #pragma once
  3 | 
  4 | #include <stdint.h>
  5 | #include <stddef.h>
  6 | #include <assert.h>
  7 | 
  8 | enum JsonTag {
  9 |     JSON_NUMBER = 0,
 10 |     JSON_STRING,
 11 |     JSON_ARRAY,
 12 |     JSON_OBJECT,
 13 |     JSON_TRUE,
 14 |     JSON_FALSE,
 15 |     JSON_NULL = 0xF
 16 | };
 17 | 
 18 | struct JsonNode;
 19 | 
 20 | #define JSON_VALUE_PAYLOAD_MASK 0x00007FFFFFFFFFFFULL
 21 | #define JSON_VALUE_NAN_MASK 0x7FF8000000000000ULL
 22 | #define JSON_VALUE_TAG_MASK 0xF
 23 | #define JSON_VALUE_TAG_SHIFT 47
 24 | 
 25 | union JsonValue {
 26 |     uint64_t ival;
 27 |     double fval;
 28 | 
 29 |     JsonValue(double x)
 30 |         : fval(x) {
 31 |     }
 32 |     JsonValue(JsonTag tag = JSON_NULL, void *payload = nullptr) {
 33 |         assert((uintptr_t)payload <= JSON_VALUE_PAYLOAD_MASK);
 34 |         ival = JSON_VALUE_NAN_MASK | ((uint64_t)tag << JSON_VALUE_TAG_SHIFT) | (uintptr_t)payload;
 35 |     }
 36 |     bool isDouble() const {
 37 |         return (int64_t)ival <= (int64_t)JSON_VALUE_NAN_MASK;
 38 |     }
 39 |     JsonTag getTag() const {
 40 |         return isDouble() ? JSON_NUMBER : JsonTag((ival >> JSON_VALUE_TAG_SHIFT) & JSON_VALUE_TAG_MASK);
 41 |     }
 42 |     uint64_t getPayload() const {
 43 |         assert(!isDouble());
 44 |         return ival & JSON_VALUE_PAYLOAD_MASK;
 45 |     }
 46 |     double toNumber() const {
 47 |         assert(getTag() == JSON_NUMBER);
 48 |         return fval;
 49 |     }
 50 |     char *toString() const {
 51 |         assert(getTag() == JSON_STRING);
 52 |         return (char *)getPayload();
 53 |     }
 54 |     JsonNode *toNode() const {
 55 |         assert(getTag() == JSON_ARRAY || getTag() == JSON_OBJECT);
 56 |         return (JsonNode *)getPayload();
 57 |     }
 58 | };
 59 | 
 60 | struct JsonNode {
 61 |     JsonValue value;
 62 |     JsonNode *next;
 63 |     char *key;
 64 | };
 65 | 
 66 | struct JsonIterator {
 67 |     JsonNode *p;
 68 | 
 69 |     void operator++() {
 70 |         p = p->next;
 71 |     }
 72 |     bool operator!=(const JsonIterator &x) const {
 73 |         return p != x.p;
 74 |     }
 75 |     JsonNode *operator*() const {
 76 |         return p;
 77 |     }
 78 |     JsonNode *operator->() const {
 79 |         return p;
 80 |     }
 81 | };
 82 | 
 83 | inline JsonIterator begin(JsonValue o) {
 84 |     return JsonIterator{o.toNode()};
 85 | }
 86 | inline JsonIterator end(JsonValue) {
 87 |     return JsonIterator{nullptr};
 88 | }
 89 | 
 90 | #define JSON_ERRNO_MAP(XX)                           \
 91 |     XX(OK, "ok")                                     \
 92 |     XX(BAD_NUMBER, "bad number")                     \
 93 |     XX(BAD_STRING, "bad string")                     \
 94 |     XX(BAD_IDENTIFIER, "bad identifier")             \
 95 |     XX(STACK_OVERFLOW, "stack overflow")             \
 96 |     XX(STACK_UNDERFLOW, "stack underflow")           \
 97 |     XX(MISMATCH_BRACKET, "mismatch bracket")         \
 98 |     XX(UNEXPECTED_CHARACTER, "unexpected character") \
 99 |     XX(UNQUOTED_KEY, "unquoted key")                 \
100 |     XX(BREAKING_BAD, "breaking bad")                 \
101 |     XX(ALLOCATION_FAILURE, "allocation failure")
102 | 
103 | enum JsonErrno {
104 | #define XX(no, str) JSON_##no,
105 |     JSON_ERRNO_MAP(XX)
106 | #undef XX
107 | };
108 | 
109 | const char *jsonStrError(int err);
110 | 
111 | class JsonAllocator {
112 |     struct Zone {
113 |         Zone *next;
114 |         size_t used;
115 |     } *head = nullptr;
116 | 
117 | public:
118 |     JsonAllocator() = default;
119 |     JsonAllocator(const JsonAllocator &) = delete;
120 |     JsonAllocator &operator=(const JsonAllocator &) = delete;
121 |     JsonAllocator(JsonAllocator &&x) : head(x.head) {
122 |         x.head = nullptr;
123 |     }
124 |     JsonAllocator &operator=(JsonAllocator &&x) {
125 |         head = x.head;
126 |         x.head = nullptr;
127 |         return *this;
128 |     }
129 |     ~JsonAllocator() {
130 |         deallocate();
131 |     }
132 |     void *allocate(size_t size);
133 |     void deallocate();
134 | };
135 | 
136 | int jsonParse(char *str, char **endptr, JsonValue *value, JsonAllocator &allocator);
137 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/common/maskApi.h:
--------------------------------------------------------------------------------
 1 | /**************************************************************************
 2 | * Microsoft COCO Toolbox.      version 2.0
 3 | * Data, paper, and tutorials available at:  http://mscoco.org/
 4 | * Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 5 | * Licensed under the Simplified BSD License [see coco/license.txt]
 6 | **************************************************************************/
 7 | #pragma once
 8 | 
 9 | typedef unsigned int uint;
10 | typedef unsigned long siz;
11 | typedef unsigned char byte;
12 | typedef double* BB;
13 | typedef struct { siz h, w, m; uint *cnts; } RLE;
14 | 
15 | /* Initialize/destroy RLE. */
16 | void rleInit( RLE *R, siz h, siz w, siz m, uint *cnts );
17 | void rleFree( RLE *R );
18 | 
19 | /* Initialize/destroy RLE array. */
20 | void rlesInit( RLE **R, siz n );
21 | void rlesFree( RLE **R, siz n );
22 | 
23 | /* Encode binary masks using RLE. */
24 | void rleEncode( RLE *R, const byte *mask, siz h, siz w, siz n );
25 | 
26 | /* Decode binary masks encoded via RLE. */
27 | void rleDecode( const RLE *R, byte *mask, siz n );
28 | 
29 | /* Compute union or intersection of encoded masks. */
30 | void rleMerge( const RLE *R, RLE *M, siz n, int intersect );
31 | 
32 | /* Compute area of encoded masks. */
33 | void rleArea( const RLE *R, siz n, uint *a );
34 | 
35 | /* Compute intersection over union between masks. */
36 | void rleIou( RLE *dt, RLE *gt, siz m, siz n, byte *iscrowd, double *o );
37 | 
38 | /* Compute non-maximum suppression between bounding masks */
39 | void rleNms( RLE *dt, siz n, uint *keep, double thr );
40 | 
41 | /* Compute intersection over union between bounding boxes. */
42 | void bbIou( BB dt, BB gt, siz m, siz n, byte *iscrowd, double *o );
43 | 
44 | /* Compute non-maximum suppression between bounding boxes */
45 | void bbNms( BB dt, siz n, uint *keep, double thr );
46 | 
47 | /* Get bounding boxes surrounding encoded masks. */
48 | void rleToBbox( const RLE *R, BB bb, siz n );
49 | 
50 | /* Convert bounding boxes to encoded masks. */
51 | void rleFrBbox( RLE *R, const BB bb, siz h, siz w, siz n );
52 | 
53 | /* Convert polygon to encoded mask. */
54 | void rleFrPoly( RLE *R, const double *xy, siz k, siz h, siz w );
55 | 
56 | /* Get compressed string representation of encoded mask. */
57 | char* rleToString( const RLE *R );
58 | 
59 | /* Convert from compressed string representation of encoded mask. */
60 | void rleFrString( RLE *R, char *s, siz h, siz w );
61 | 


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/mask.py:
--------------------------------------------------------------------------------
  1 | __author__ = 'tsungyi'
  2 | 
  3 | import libs.datasets.pycocotools._mask as _mask
  4 | 
  5 | # Interface for manipulating masks stored in RLE format.
  6 | #
  7 | # RLE is a simple yet efficient format for storing binary masks. RLE
  8 | # first divides a vector (or vectorized image) into a series of piecewise
  9 | # constant regions and then for each piece simply stores the length of
 10 | # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
 11 | # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
 12 | # (note that the odd counts are always the numbers of zeros). Instead of
 13 | # storing the counts directly, additional compression is achieved with a
 14 | # variable bitrate representation based on a common scheme called LEB128.
 15 | #
 16 | # Compression is greatest given large piecewise constant regions.
 17 | # Specifically, the size of the RLE is proportional to the number of
 18 | # *boundaries* in M (or for an image the number of boundaries in the y
 19 | # direction). Assuming fairly simple shapes, the RLE representation is
 20 | # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
 21 | # is substantially lower, especially for large simple objects (large n).
 22 | #
 23 | # Many common operations on masks can be computed directly using the RLE
 24 | # (without need for decoding). This includes computations such as area,
 25 | # union, intersection, etc. All of these operations are linear in the
 26 | # size of the RLE, in other words they are O(sqrt(n)) where n is the area
 27 | # of the object. Computing these operations on the original mask is O(n).
 28 | # Thus, using the RLE can result in substantial computational savings.
 29 | #
 30 | # The following API functions are defined:
 31 | #  encode         - Encode binary masks using RLE.
 32 | #  decode         - Decode binary masks encoded via RLE.
 33 | #  merge          - Compute union or intersection of encoded masks.
 34 | #  iou            - Compute intersection over union between masks.
 35 | #  area           - Compute area of encoded masks.
 36 | #  toBbox         - Get bounding boxes surrounding encoded masks.
 37 | #  frPyObjects    - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
 38 | #
 39 | # Usage:
 40 | #  Rs     = encode( masks )
 41 | #  masks  = decode( Rs )
 42 | #  R      = merge( Rs, intersect=false )
 43 | #  o      = iou( dt, gt, iscrowd )
 44 | #  a      = area( Rs )
 45 | #  bbs    = toBbox( Rs )
 46 | #  Rs     = frPyObjects( [pyObjects], h, w )
 47 | #
 48 | # In the API the following formats are used:
 49 | #  Rs      - [dict] Run-length encoding of binary masks
 50 | #  R       - dict Run-length encoding of binary mask
 51 | #  masks   - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
 52 | #  iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
 53 | #  bbs     - [nx4] Bounding box(es) stored as [x y w h]
 54 | #  poly    - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
 55 | #  dt,gt   - May be either bounding boxes or encoded masks
 56 | # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
 57 | #
 58 | # Finally, a note about the intersection over union (iou) computation.
 59 | # The standard iou of a ground truth (gt) and detected (dt) object is
 60 | #  iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
 61 | # For "crowd" regions, we use a modified criteria. If a gt object is
 62 | # marked as "iscrowd", we allow a dt to match any subregion of the gt.
 63 | # Choosing gt' in the crowd gt that best matches the dt can be done using
 64 | # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
 65 | #  iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
 66 | # For crowd gt regions we use this modified criteria above for the iou.
 67 | #
 68 | # To compile run "python setup.py build_ext --inplace"
 69 | # Please do not contact us for help with compiling.
 70 | #
 71 | # Microsoft COCO Toolbox.      version 2.0
 72 | # Data, paper, and tutorials available at:  http://mscoco.org/
 73 | # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
 74 | # Licensed under the Simplified BSD License [see coco/license.txt]
 75 | 
 76 | iou         = _mask.iou
 77 | merge       = _mask.merge
 78 | frPyObjects = _mask.frPyObjects
 79 | 
 80 | def encode(bimask):
 81 |     if len(bimask.shape) == 3:
 82 |         return _mask.encode(bimask)
 83 |     elif len(bimask.shape) == 2:
 84 |         h, w = bimask.shape
 85 |         return _mask.encode(bimask.reshape((h, w, 1), order='F'))[0]
 86 | 
 87 | def decode(rleObjs):
 88 |     if type(rleObjs) == list:
 89 |         return _mask.decode(rleObjs)
 90 |     else:
 91 |         return _mask.decode([rleObjs])[:,:,0]
 92 | 
 93 | def area(rleObjs):
 94 |     if type(rleObjs) == list:
 95 |         return _mask.area(rleObjs)
 96 |     else:
 97 |         return _mask.area([rleObjs])[0]
 98 | 
 99 | def toBbox(rleObjs):
100 |     if type(rleObjs) == list:
101 |         return _mask.toBbox(rleObjs)
102 |     else:
103 |         return _mask.toBbox([rleObjs])[0]


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/mask.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/datasets/pycocotools/mask.pyc


--------------------------------------------------------------------------------
/libs/datasets/pycocotools/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | from Cython.Build import cythonize
 3 | from distutils.extension import Extension
 4 | import numpy as np
 5 | 
 6 | # To compile and install locally run "python setup.py build_ext --inplace"
 7 | # To install library to Python site-packages run "python setup.py build_ext install"
 8 | 
 9 | ext_modules = [
10 |     Extension(
11 |         '_mask',
12 |         sources=['./common/maskApi.c', '_mask.pyx'],
13 |         include_dirs = [np.get_include(), './common'],
14 |         extra_compile_args=['-Wno-cpp', '-Wno-unused-function', '-std=c99'],
15 |     )
16 | ]
17 | 
18 | setup(name='pycocotools',
19 |       packages=['pycocotools'],
20 |       package_dir = {'pycocotools': 'pycocotools'},
21 |       version='2.0',
22 |       ext_modules=
23 |           cythonize(ext_modules)
24 |       )


--------------------------------------------------------------------------------
/libs/layers/__init__.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Mask RCNN
 3 | # Written by CharlesShang@github
 4 | # --------------------------------------------------------
 5 | from __future__ import absolute_import
 6 | from __future__ import division
 7 | from __future__ import print_function
 8 | 
 9 | from .wrapper import anchor_decoder
10 | from .wrapper import anchor_encoder
11 | from .wrapper import roi_decoder
12 | from .wrapper import roi_encoder
13 | from .wrapper import mask_decoder
14 | from .wrapper import mask_encoder
15 | from .wrapper import sample_wrapper as sample_rpn_outputs
16 | from .wrapper import sample_with_gt_wrapper as sample_rpn_outputs_with_gt
17 | from .wrapper import gen_all_anchors
18 | from .wrapper import assign_boxes
19 | from .crop import crop as ROIAlign
20 | from .crop import crop_ as ROIAlign_
21 | 


--------------------------------------------------------------------------------
/libs/layers/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/__init__.pyc


--------------------------------------------------------------------------------
/libs/layers/anchor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/anchor.pyc


--------------------------------------------------------------------------------
/libs/layers/assign.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | 
 7 | import numpy as np
 8 | 
 9 | import libs.boxes.cython_bbox as cython_bbox
10 | import libs.configs.config_v1 as cfg
11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
12 | from libs.boxes.anchor import anchors_plane
13 | from libs.logs.log import LOG
14 | # FLAGS = tf.app.flags.FLAGS
15 | 
16 | _DEBUG = False
17 | 
18 | def assign_boxes(gt_boxes, min_k=2, max_k=5):
19 |     """assigning boxes to layers in a pyramid according to its area
20 |     Params
21 |     -----
22 |     gt_boxes: of shape (N, 5), each entry is [x1, y1, x2, y2, cls]
23 |     strides:  the stride of each layer, like [4, 8, 16, 32]
24 | 
25 |     Returns
26 |     -----
27 |     layer_ids: of shape (N,), each entry is a id indicating the assigned layer id
28 |     """
29 |     k0 = 4
30 |     if gt_boxes.size > 0:
31 |         layer_ids = np.zeros((gt_boxes.shape[0], ), dtype=np.int32)
32 |         ws = gt_boxes[:, 2] - gt_boxes[:, 0]
33 |         hs = gt_boxes[:, 3] - gt_boxes[:, 1]
34 |         areas = ws * hs
35 |         k = np.floor(k0 + np.log2(np.sqrt(areas) / 224))
36 |         inds = np.where(k < min_k)[0]
37 |         k[inds] = min_k
38 |         inds = np.where(k > max_k)[0]
39 |         k[inds] = max_k
40 |         if _DEBUG: 
41 |             print ("### boxes and layer ids")
42 |             print (np.hstack((gt_boxes[:, 0:4], k[:, np.newaxis])))
43 |         return k.astype(np.int32)
44 | 
45 |     else:
46 |         return np.asarray([], dtype=np.int32)
47 | 


--------------------------------------------------------------------------------
/libs/layers/assign.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/assign.pyc


--------------------------------------------------------------------------------
/libs/layers/crop.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import tensorflow as tf
 6 | 
 7 | def crop(images, boxes, batch_inds, stride = 1, pooled_height = 56, pooled_width = 56, scope='ROIAlign'):
 8 |   """Cropping areas of features into fixed size
 9 |   Params:
10 |   --------
11 |   images: a 4-d Tensor of shape (N, H, W, C)
12 |   boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
13 |   batch_inds: 
14 | 
15 |   Returns:
16 |   --------
17 |   A Tensor of shape (N, pooled_height, pooled_width, C)
18 |   """
19 |   with tf.name_scope(scope):
20 |     #
21 |     boxes = boxes / (stride + 0.0)#stride = 32, 16, 8, 4. we need to do that in order to get the crop of feature layer
22 |     boxes = tf.reshape(boxes, [-1, 4])
23 | 
24 |     # normalize the boxes and swap x y dimensions
25 |     shape = tf.shape(images)
26 |     boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
27 |     xs = boxes[:, 0] 
28 |     ys = boxes[:, 1]
29 |     xs = xs / tf.cast(shape[2], tf.float32)#######WHYYYY???? because the crop_and resize function needs the values in normalized fucking form
30 |     ys = ys / tf.cast(shape[1], tf.float32)
31 |     boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
32 |     boxes = tf.reshape(boxes, [-1, 4])  # to (y1, x1, y2, x2)
33 |     
34 |     # if batch_inds is False:
35 |     #   num_boxes = tf.shape(boxes)[0]
36 |     #   batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
37 |     # batch_inds = boxes[:, 0] * 0
38 |     # batch_inds = tf.cast(batch_inds, tf.int32)
39 | 
40 |     # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
41 |     if batch_inds is False:
42 |       num_boxes = tf.shape(boxes)[0]
43 |       batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
44 |       batch_inds = boxes[:, 0] * 0
45 |       batch_inds = tf.cast(batch_inds, tf.int32)
46 |     assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
47 |     with tf.control_dependencies([assert_op, images, batch_inds]):
48 |         return  tf.image.crop_and_resize(images, boxes, batch_inds,###
49 |                                          [pooled_height, pooled_width],
50 |                                          method='bilinear',
51 |                                          name='Crop')
52 | 
53 | def crop_(images, boxes, batch_inds, ih, iw, stride = 1, pooled_height = 7, pooled_width = 7, scope='ROIAlign'):
54 |   """Cropping areas of features into fixed size
55 |   Params:
56 |   --------
57 |   images: a 4-d Tensor of shape (N, H, W, C)
58 |   boxes: rois in the original image, of shape (N, ..., 4), [x1, y1, x2, y2]
59 |   batch_inds: 
60 | 
61 |   Returns:
62 |   --------
63 |   A Tensor of shape (N, pooled_height, pooled_width, C)
64 |   """
65 |   with tf.name_scope(scope):
66 |     #
67 |     boxes = boxes / (stride + 0.0)#stride = 32, 16, 8, 4. we need to do that in order to get the crop of feature layer
68 |     boxes = tf.reshape(boxes, [-1, 4])
69 | 
70 |     # normalize the boxes and swap x y dimensions
71 |     shape = tf.shape(images)#W/stride, H/stride
72 |     boxes = tf.reshape(boxes, [-1, 2]) # to (x, y)
73 |     xs = boxes[:, 0] 
74 |     ys = boxes[:, 1]
75 |     xs = xs / tf.cast(shape[2], tf.float32)#######WHYYYY???? because the crop_and resize function needs the values in normalized fucking form
76 |     ys = ys / tf.cast(shape[1], tf.float32)
77 |     boxes = tf.concat([ys[:, tf.newaxis], xs[:, tf.newaxis]], axis=1)
78 |     boxes = tf.reshape(boxes, [-1, 4])  # to (y1, x1, y2, x2)
79 |     
80 |     # if batch_inds is False:
81 |     #   num_boxes = tf.shape(boxes)[0]
82 |     #   batch_inds = tf.zeros([num_boxes], dtype=tf.int32, name='batch_inds')
83 |     # batch_inds = boxes[:, 0] * 0
84 |     # batch_inds = tf.cast(batch_inds, tf.int32)
85 | 
86 |     # assert_op = tf.Assert(tf.greater(tf.shape(images)[0], tf.reduce_max(batch_inds)), [images, batch_inds])
87 |     assert_op = tf.Assert(tf.greater(tf.size(images), 0), [images, batch_inds])
88 |     with tf.control_dependencies([assert_op, images, batch_inds]):
89 |         return  [tf.image.crop_and_resize(images, boxes, batch_inds,
90 |                                          [pooled_height, pooled_width],
91 |                                          method='bilinear',
92 |                                          name='Crop')] + [boxes]
93 | 
94 | 


--------------------------------------------------------------------------------
/libs/layers/crop.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/crop.pyc


--------------------------------------------------------------------------------
/libs/layers/mask.py:
--------------------------------------------------------------------------------
  1 | # coding=utf-8
  2 | from __future__ import absolute_import
  3 | from __future__ import division
  4 | from __future__ import print_function
  5 | 
  6 | import numpy as np
  7 | import cv2
  8 | import libs.boxes.cython_bbox as cython_bbox
  9 | import libs.configs.config_v1 as cfg
 10 | from libs.logs.log import LOG
 11 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
 12 | 
 13 | _DEBUG = False 
 14 | def encode(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width):
 15 |   """Encode masks groundtruth into learnable targets
 16 |   Sample some exmaples
 17 |   
 18 |   Params
 19 |   ------
 20 |   gt_masks: image_height x image_width {0, 1} matrix, of shape (G, imh, imw)
 21 |   #actually modified by me, gt_mask is of shape (G,imh,imw,7)
 22 |   gt_boxes: ground-truth boxes of shape (G, 5), each raw is [x1, y1, x2, y2, class]
 23 |   rois:     the bounding boxes of shape (N, 4),
 24 |   ## scores:   scores of shape (N, 1)
 25 |   num_classes; K
 26 |   mask_height, mask_width: height and width of output masks
 27 |   
 28 |   Returns
 29 |   -------
 30 |   # rois: boxes sampled for cropping masks, of shape (M, 4)
 31 |   labels: class-ids of shape (M, 1)
 32 |   mask_targets: learning targets of shape (M, pooled_height, pooled_width, K) in {0, 1} values
 33 |   mask_inside_weights: of shape (M, pooled_height, pooled_width, K) in {0, 1}Í indicating which mask is sampled
 34 |   """
 35 |   total_masks = rois.shape[0]
 36 |   if gt_boxes.size > 0: 
 37 |       # B x G
 38 |       overlaps = cython_bbox.bbox_overlaps(
 39 |           np.ascontiguousarray(rois[:, 0:4], dtype=np.float),
 40 |           np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
 41 |       gt_assignment = overlaps.argmax(axis=1)  # shape is N
 42 |       max_overlaps = overlaps[np.arange(len(gt_assignment)), gt_assignment] # N
 43 |       # note: this will assign every rois with a positive label 
 44 |       # labels = gt_boxes[gt_assignment, 4] # N
 45 |       labels = np.zeros((total_masks, ), np.float32)
 46 |       labels[:] = -1
 47 | 
 48 |       # sample positive rois which intersection is more than 0.5
 49 |       keep_inds = np.where(max_overlaps >= cfg.FLAGS.mask_threshold)[0]
 50 |       num_masks = int(min(keep_inds.size, cfg.FLAGS.masks_per_image))
 51 |       if keep_inds.size > 0 and num_masks < keep_inds.size:
 52 |         keep_inds = np.random.choice(keep_inds, size=num_masks, replace=False)
 53 |         LOG('Masks: %d of %d rois are considered positive mask. Number of masks %d'\
 54 |                      %(num_masks, rois.shape[0], gt_masks.shape[0]))
 55 | 
 56 |       labels[keep_inds] = gt_boxes[gt_assignment[keep_inds], -1]
 57 |         
 58 |       # rois = rois[inds]
 59 |       # labels = labels[inds].astype(np.int32)
 60 |       # gt_assignment = gt_assignment[inds]
 61 | 
 62 |       # ignore rois with overlaps between fg_threshold and bg_threshold 
 63 |       # mask are only defined on positive rois
 64 |       ignore_inds = np.where((max_overlaps < cfg.FLAGS.fg_threshold))[0]
 65 |       labels[ignore_inds] = -1 
 66 | 
 67 |       mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
 68 |       mask_inside_weights = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.float32)
 69 |       rois [rois < 0] = 0
 70 |       
 71 |       # TODO: speed bottleneck?
 72 |       for i in keep_inds:
 73 |         roi = rois[i, :4]
 74 | 
 75 |         for x in range(7):
 76 |             cropped = gt_masks[gt_assignment[i], int(roi[1]):int(roi[3])+1, int(roi[0]):int(roi[2])+1,x]
 77 |             cropped = cv2.resize(cropped, (mask_width, mask_height), interpolation=cv2.INTER_NEAREST)
 78 |             mask_targets[i, :, :, x] = cropped
 79 |             mask_inside_weights[i, :, :, x] = 1
 80 |   else:
 81 |       # there is no gt
 82 |       labels = np.zeros((total_masks, ), np.float32)
 83 |       labels[:] = -1
 84 |       mask_targets = np.zeros((total_masks, mask_height, mask_width, num_classes), dtype=np.int32)
 85 |       mask_inside_weights = np.zeros((total_masks, mask_height, mask_height, num_classes), dtype=np.float32)
 86 |   #np.save("/home/czurini/Alex/rois.npy",rois)
 87 |   #                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     np.save("/home/czurini/Alex/mask_targets.npy",mask_targets)
 88 |   return labels, mask_targets, mask_inside_weights
 89 | 
 90 | def decode(mask_targets, rois, classes, ih, iw):
 91 |   """Decode outputs into final masks
 92 |   Params
 93 |   ------
 94 |   mask_targets: of shape (N, h, w, K)
 95 |   rois: of shape (N, 4) [x1, y1, x2, y2]
 96 |   classes: of shape (N, 1) the class-id of each roi
 97 |   height: image height
 98 |   width:  image width
 99 |   
100 |   Returns
101 |   ------
102 |   M: a painted image with all masks, of shape (height, width), in [0, K]
103 |   """
104 |   Mask = np.zeros((ih, iw), dtype=np.float32)
105 |   assert rois.shape[0] == mask_targets.shape[0], \
106 |     '%s rois vs %d masks' %(rois.shape[0], mask_targets.shape[0])
107 |   num = rois.shape[0]
108 |   rois = clip_boxes(rois, (ih, iw))
109 |   for i in np.arange(num):
110 |     k = classes[i]
111 |     mask = mask_targets[i, :, :, k]
112 |     h, w = rois[i, 3] - rois[i, 1] + 1, rois[i, 2] - rois[i, 0] + 1
113 |     x, y = rois[i, 0], rois[i, 1]
114 |     mask = cv2.resize(mask, (w, h), interpolation=cv2.INTER_NEAREST)
115 |     mask *= k
116 |     
117 |     # paint
118 |     Mask[y:y+h, x:x+w] = mask
119 |   
120 |   return Mask
121 | 
122 | 
123 | 
124 | if __name__ == '__main__':
125 |   
126 |   import time
127 |   import matplotlib.pyplot as plt
128 |   
129 |   t = time.time()
130 |   
131 |   for i in range(10):
132 |     cfg.FLAGS.mask_threshold = 0.2
133 |     N = 50
134 |     W, H = 200, 200
135 |     M = 50
136 |     
137 |     gt_masks = np.zeros((2, H, W), dtype=np.int32)
138 |     gt_masks[0, 50:150, 50:150] = 1
139 |     gt_masks[1, 100:150, 50:150] = 1
140 |     gt_boxes = np.asarray(
141 |       [
142 |         [20, 20, 100, 100, 1],
143 |         [100, 100, 180, 180, 2]
144 |       ])
145 |     rois = gt_boxes[:, :4]
146 |     print (rois)
147 |     rois, labels, mask_targets, mask_inside_weights = encode(gt_masks, gt_boxes, rois, 3, 7, 7)
148 |     print (rois)
149 |     Mask = decode(mask_targets, rois, labels, H, W)
150 |     if True:
151 |       plt.figure(1)
152 |       plt.imshow(Mask)
153 |       plt.show()
154 |       time.sleep(2)
155 |   print(labels)
156 |   print('average time: %f' % ((time.time() - t) / 10.0))
157 |   
158 | 


--------------------------------------------------------------------------------
/libs/layers/mask.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/mask.pyc


--------------------------------------------------------------------------------
/libs/layers/roi.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import numpy as np
  6 | 
  7 | import libs.boxes.cython_bbox as cython_bbox
  8 | import libs.configs.config_v1 as cfg
  9 | from libs.boxes.bbox_transform import bbox_transform, bbox_transform_inv, clip_boxes
 10 | from libs.logs.log import LOG 
 11 | 
 12 | # FLAGS = tf.app.flags.FLAGS
 13 | 
 14 | _DEBUG = False 
 15 | 
 16 | def encode(gt_boxes, rois, num_classes):
 17 |   """Matching and Encoding groundtruth boxes (gt_boxes) into learning targets to boxes
 18 |   Sampling
 19 |   Parameters
 20 |   ---------
 21 |   gt_boxes an array of shape (G x 5), [x1, y1, x2, y2, class]
 22 |   rois an array of shape (R x 4), [x1, y1, x2, y2]
 23 |   num_classes: scalar, number of classes
 24 |   
 25 |   Returns
 26 |   --------
 27 |   labels: Nx1 array in [0, num_classes)
 28 |   bbox_targets: of shape (N, Kx4) regression targets
 29 |   bbox_inside_weights: of shape (N, Kx4), in {0, 1} indicating which class is assigned.
 30 |   """
 31 |   
 32 |   all_rois = rois
 33 |   num_rois = rois.shape[0]
 34 |   if gt_boxes.size > 0: 
 35 |       # R x G matrix
 36 |       overlaps = cython_bbox.bbox_overlaps(
 37 |         np.ascontiguousarray(all_rois[:, 0:4], dtype=np.float),
 38 |         np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
 39 |       gt_assignment = overlaps.argmax(axis=1)  # R
 40 |       # max_overlaps = overlaps.max(axis=1)      # R
 41 |       max_overlaps = overlaps[np.arange(rois.shape[0]), gt_assignment]
 42 |       # note: this will assign every rois with a positive label 
 43 |       # labels = gt_boxes[gt_assignment, 4]
 44 |       labels = np.zeros([num_rois], dtype=np.float32)
 45 |       labels[:] = -1
 46 | 
 47 |       # if _DEBUG:
 48 |       #     print ('gt_assignment')
 49 |       #     print (gt_assignment)
 50 | 
 51 |       # sample rois as to 1:3
 52 |       fg_inds = np.where(max_overlaps >= cfg.FLAGS.fg_threshold)[0]
 53 |       fg_rois = int(min(fg_inds.size, cfg.FLAGS.rois_per_image * cfg.FLAGS.fg_roi_fraction))#rois_per_image =256,fg_roi_fraction=0.25
 54 |       if fg_inds.size > 0 and fg_rois < fg_inds.size:
 55 |         fg_inds = np.random.choice(fg_inds, size=fg_rois, replace=False)
 56 |       labels[fg_inds] = gt_boxes[gt_assignment[fg_inds], 4] 
 57 |       
 58 |       # TODO: sampling strategy
 59 |       bg_inds = np.where((max_overlaps < cfg.FLAGS.bg_threshold))[0]
 60 |       bg_rois = max(min(cfg.FLAGS.rois_per_image - fg_rois, fg_rois * 3), 64)#rois_per_image =256
 61 |       if bg_inds.size > 0 and bg_rois < bg_inds.size:
 62 |         bg_inds = np.random.choice(bg_inds, size=bg_rois, replace=False)
 63 |       labels[bg_inds] = 0
 64 |       
 65 |       # ignore rois with overlaps between fg_threshold and bg_threshold 
 66 |       ignore_inds = np.where(((max_overlaps > cfg.FLAGS.bg_threshold) &\
 67 |               (max_overlaps < cfg.FLAGS.fg_threshold)))[0]
 68 |       labels[ignore_inds] = -1 
 69 | 
 70 |       keep_inds = np.append(fg_inds, bg_inds)
 71 |       if _DEBUG: 
 72 |           print ('keep_inds')
 73 |           print (keep_inds)
 74 |           print ('fg_inds')
 75 |           print (fg_inds)
 76 |           print ('bg_inds')
 77 |           print (bg_inds)
 78 |           print ('bg_rois:', bg_rois)
 79 |           print ('cfg.FLAGS.bg_threshold:', cfg.FLAGS.bg_threshold)
 80 |           # print (max_overlaps)
 81 | 
 82 |           LOG('ROIEncoder: %d positive rois, %d negative rois' % (len(fg_inds), len(bg_inds)))
 83 | 
 84 |       bbox_targets, bbox_inside_weights = _compute_targets(
 85 |         rois[keep_inds, 0:4], gt_boxes[gt_assignment[keep_inds], :4], labels[keep_inds], num_classes)
 86 |       bbox_targets = _unmap(bbox_targets, num_rois, keep_inds, 0)
 87 |       bbox_inside_weights = _unmap(bbox_inside_weights, num_rois, keep_inds, 0)
 88 |    
 89 |   else:
 90 |       # there is no gt
 91 |       labels = np.zeros((num_rois, ), np.float32)
 92 |       bbox_targets = np.zeros((num_rois, 4 * num_classes), np.float32)
 93 |       bbox_inside_weights = np.zeros((num_rois, 4 * num_classes), np.float32)
 94 |       bg_rois  = min(int(cfg.FLAGS.rois_per_image * (1 - cfg.FLAGS.fg_roi_fraction)), 64)
 95 |       if bg_rois < num_rois:
 96 |           bg_inds = np.arange(num_rois)
 97 |           ignore_inds = np.random.choice(bg_inds, size=num_rois - bg_rois, replace=False)
 98 |           labels[ignore_inds] = -1 
 99 | 
100 |   return labels, bbox_targets, bbox_inside_weights
101 | 
102 | def decode(boxes, scores, rois, ih, iw):
103 |   """Decode prediction targets into boxes and only keep only one boxes of greatest possibility for each rois
104 |     Parameters
105 |   ---------
106 |   boxes: an array of shape (R, Kx4), [x1, y1, x2, y2, x1, x2, y1, y2]
107 |   scores: an array of shape (R, K),
108 |   rois: an array of shape (R, 4), [x1, y1, x2, y2]
109 |   
110 |   Returns
111 |   --------
112 |   final_boxes: of shape (R x 4)
113 |   classes: of shape (R) in {0,1,2,3... K-1}
114 |   scores: of shape (R) in [0 ~ 1]
115 |   """
116 |   boxes = bbox_transform_inv(rois, deltas=boxes)
117 |   classes = np.argmax(scores, axis=1)
118 |   classes = classes.astype(np.int32)
119 |   scores = np.max(scores, axis=1)
120 |   final_boxes = np.zeros((boxes.shape[0], 4), dtype=np.float32)
121 |   for i in np.arange(0, boxes.shape[0]):
122 |     ind = classes[i]*4
123 |     final_boxes[i, 0:4] = boxes[i, ind:ind+4]
124 |   final_boxes = clip_boxes(final_boxes, (ih, iw))
125 |   return final_boxes, classes, scores
126 | 
127 | def _compute_targets(ex_rois, gt_rois, labels, num_classes):
128 |   """
129 |   This function expands those targets into the 4-of-4*K representation used
130 |   by the network (i.e. only one class has non-zero targets).
131 |   
132 |   Returns:
133 |     bbox_target (ndarray): N x 4K blob of regression targets
134 |     bbox_inside_weights (ndarray): N x 4K blob of loss weights
135 |   """
136 | 
137 |   assert ex_rois.shape[0] == gt_rois.shape[0]
138 |   assert ex_rois.shape[1] == 4
139 |   assert gt_rois.shape[1] == 4
140 | 
141 |   targets = bbox_transform(ex_rois, gt_rois)
142 | 
143 |   clss = labels
144 |   bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
145 |   bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
146 |   inds = np.where(clss > 0)[0]
147 |   for ind in inds:
148 |     cls = int(clss[ind])
149 |     start = 4 * cls
150 |     end = start + 4
151 |     bbox_targets[ind, start:end] = targets[ind, 0:4]
152 |     bbox_inside_weights[ind, start:end] = 1
153 |   return bbox_targets, bbox_inside_weights
154 | 
155 | def _unmap(data, count, inds, fill=0):
156 |   """ Unmap a subset of item (data) back to the original set of items (of
157 |   size count) """
158 |   if len(data.shape) == 1:
159 |     ret = np.empty((count,), dtype=np.float32)
160 |     ret.fill(fill)
161 |     ret[inds] = data
162 |   else:
163 |     ret = np.empty((count,) + data.shape[1:], dtype=np.float32)
164 |     ret.fill(fill)
165 |     ret[inds, :] = data
166 |   return ret
167 | 
168 | if __name__ == '__main__':
169 |   cfg.FLAGS.fg_threshold = 0.1
170 |   classes = np.random.randint(0, 3, (10, 1))
171 |   boxes = np.random.randint(10, 50, (10, 2))
172 |   s = np.random.randint(10, 20, (10, 2))
173 |   s = boxes + s
174 |   boxes = np.concatenate((boxes, s), axis=1)
175 |   gt_boxes = np.hstack((boxes, classes))
176 |   noise = np.random.randint(-3, 3, (10, 4))
177 |   rois = gt_boxes[:, :4] + noise
178 |   labels, rois, bbox_targets, bbox_inside_weights = encode(gt_boxes, rois, num_classes=3)
179 |   print (labels)
180 |   print (bbox_inside_weights)
181 |   
182 |   ls = np.zeros((labels.shape[0], 3))
183 |   for i in range(labels.shape[0]):
184 |     ls[i, labels[i]] = 1
185 |   final_boxes, classes, scores = decode(bbox_targets, ls, rois, 100, 100)
186 |   print('gt_boxes:\n', gt_boxes)
187 |   print ('final boxes:\n', np.hstack((final_boxes, np.expand_dims(classes, axis=1))).astype(np.int32))
188 |   # print (final_boxes.astype(np.int32))
189 | 


--------------------------------------------------------------------------------
/libs/layers/roi.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/roi.pyc


--------------------------------------------------------------------------------
/libs/layers/sample.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/sample.pyc


--------------------------------------------------------------------------------
/libs/layers/wrapper.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Mask RCNN
  3 | # Written by CharlesShang@github
  4 | # --------------------------------------------------------
  5 | from __future__ import absolute_import
  6 | from __future__ import division
  7 | from __future__ import print_function
  8 | 
  9 | import tensorflow as tf
 10 | from . import anchor
 11 | from . import roi
 12 | from . import mask
 13 | from . import sample
 14 | from . import assign
 15 | from libs.boxes.anchor import anchors_plane
 16 | 
 17 | def anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder'):
 18 |   
 19 |   with tf.name_scope(scope) as sc:
 20 |     labels, bbox_targets, bbox_inside_weights = \
 21 |       tf.py_func(anchor.encode,
 22 |                  [gt_boxes, all_anchors, height, width, stride],
 23 |                  [tf.float32, tf.float32, tf.float32])
 24 |     labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels')
 25 |     bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets')
 26 |     bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights')
 27 |     labels = tf.reshape(labels, (1, height, width, -1))
 28 |     bbox_targets = tf.reshape(bbox_targets, (1, height, width, -1))
 29 |     bbox_inside_weights = tf.reshape(bbox_inside_weights, (1, height, width, -1))
 30 |   
 31 |   return labels, bbox_targets, bbox_inside_weights
 32 | 
 33 | 
 34 | def anchor_decoder(boxes, scores, all_anchors, ih, iw, scope='AnchorDecoder'):
 35 |   
 36 |   with tf.name_scope(scope) as sc:
 37 |     final_boxes, classes, scores = \
 38 |       tf.py_func(anchor.decode,
 39 |                  [boxes, scores, all_anchors, ih, iw],
 40 |                  [tf.float32, tf.int32, tf.float32])
 41 |     final_boxes = tf.convert_to_tensor(final_boxes, name='boxes')
 42 |     classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes')
 43 |     scores = tf.convert_to_tensor(scores, name='scores')
 44 |     final_boxes = tf.reshape(final_boxes, (-1, 4))
 45 |     classes = tf.reshape(classes, (-1, ))
 46 |     scores = tf.reshape(scores, (-1, ))
 47 |   
 48 |   return final_boxes, classes, scores
 49 | 
 50 | 
 51 | def roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder'):
 52 |   
 53 |   with tf.name_scope(scope) as sc:
 54 |     labels, bbox_targets, bbox_inside_weights = \
 55 |       tf.py_func(roi.encode,
 56 |                 [gt_boxes, rois, num_classes],
 57 |                 [tf.float32, tf.float32, tf.float32])
 58 |     labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='labels')
 59 |     bbox_targets = tf.convert_to_tensor(bbox_targets, name='bbox_targets')
 60 |     bbox_inside_weights = tf.convert_to_tensor(bbox_inside_weights, name='bbox_inside_weights')
 61 |     labels = tf.reshape(labels, (-1, ))
 62 |     bbox_targets = tf.reshape(bbox_targets, (-1, num_classes * 4))
 63 |     bbox_inside_weights = tf.reshape(bbox_inside_weights, (-1, num_classes * 4))
 64 |   
 65 |   return labels, bbox_targets, bbox_inside_weights
 66 | 
 67 | 
 68 | def roi_decoder(boxes, scores, rois, ih, iw, scope='ROIDecoder'):
 69 |   
 70 |   with tf.name_scope(scope) as sc:
 71 |     final_boxes, classes, scores = \
 72 |       tf.py_func(roi.decode,
 73 |                  [boxes, scores, rois, ih, iw],
 74 |                  [tf.float32, tf.int32, tf.float32])
 75 |     final_boxes = tf.convert_to_tensor(final_boxes, name='boxes')
 76 |     classes = tf.convert_to_tensor(tf.cast(classes, tf.int32), name='classes')
 77 |     scores = tf.convert_to_tensor(scores, name='scores')
 78 |     final_boxes = tf.reshape(final_boxes, (-1, 4))
 79 |     
 80 |   return final_boxes, classes, scores
 81 | 
 82 | def mask_encoder(gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width, scope='MaskEncoder'):
 83 |   
 84 |   with tf.name_scope(scope) as sc:
 85 |     labels, mask_targets, mask_inside_weights = \
 86 |       tf.py_func(mask.encode,
 87 |                  [gt_masks, gt_boxes, rois, num_classes, mask_height, mask_width],
 88 |                  [tf.float32, tf.int32, tf.float32])
 89 |     labels = tf.convert_to_tensor(tf.cast(labels, tf.int32), name='classes')
 90 |     mask_targets = tf.convert_to_tensor(mask_targets, name='mask_targets')
 91 |     mask_inside_weights = tf.convert_to_tensor(mask_inside_weights, name='mask_inside_weights')
 92 |     labels = tf.reshape(labels, (-1,))
 93 |     mask_targets = tf.reshape(mask_targets, (-1, mask_height, mask_width, num_classes))
 94 |     mask_inside_weights = tf.reshape(mask_inside_weights, (-1, mask_height, mask_width, num_classes))
 95 |   
 96 |   return labels, mask_targets, mask_inside_weights
 97 | 
 98 | def mask_decoder(mask_targets, rois, classes, ih, iw, scope='MaskDecoder'):
 99 |   
100 |   with tf.name_scope(scope) as sc:
101 |     Mask = \
102 |       tf.py_func(mask.decode,
103 |                  [mask_targets, rois, classes, ih, iw,],
104 |                  [tf.float32])
105 |     Mask = tf.convert_to_tensor(Mask, name='MaskImage')
106 |     Mask = tf.reshape(Mask, (ih, iw))
107 |   
108 |   return Mask
109 | 
110 | 
111 | def sample_wrapper(boxes, scores, is_training=True, scope='SampleBoxes'):
112 |   
113 |   with tf.name_scope(scope) as sc:
114 |     boxes, scores, batch_inds = \
115 |       tf.py_func(sample.sample_rpn_outputs,
116 |                  [boxes, scores, is_training],
117 |                  [tf.float32, tf.float32, tf.int32])
118 |     boxes = tf.convert_to_tensor(boxes, name='Boxes')
119 |     scores = tf.convert_to_tensor(scores, name='Scores')
120 |     batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds')
121 |     boxes = tf.reshape(boxes, (-1, 4))
122 |     batch_inds = tf.reshape(batch_inds, [-1])
123 |   
124 |   return boxes, scores, batch_inds
125 | 
126 | def sample_with_gt_wrapper(boxes, scores, gt_boxes, is_training=True, scope='SampleBoxesWithGT'):
127 |   
128 |   with tf.name_scope(scope) as sc:
129 |     boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds = \
130 |       tf.py_func(sample.sample_rpn_outputs_wrt_gt_boxes,
131 |                  [boxes, scores, gt_boxes, is_training],
132 |                  [tf.float32, tf.float32, tf.int32, tf.float32, tf.float32, tf.int32])
133 |     boxes = tf.convert_to_tensor(boxes, name='Boxes')
134 |     scores = tf.convert_to_tensor(scores, name='Scores')
135 |     batch_inds = tf.convert_to_tensor(batch_inds, name='BatchInds')
136 |     
137 |     mask_boxes = tf.convert_to_tensor(mask_boxes, name='MaskBoxes')
138 |     mask_scores = tf.convert_to_tensor(mask_scores, name='MaskScores')
139 |     mask_batch_inds = tf.convert_to_tensor(mask_batch_inds, name='MaskBatchInds')
140 |   
141 |   return boxes, scores, batch_inds, mask_boxes, mask_scores, mask_batch_inds
142 | 
143 | def gen_all_anchors(height, width, stride, scales, scope='GenAnchors'):
144 |   
145 |   with tf.name_scope(scope) as sc:
146 |     all_anchors = \
147 |       tf.py_func(anchors_plane,
148 |                  [height, width, stride, scales],
149 |                  [tf.float64]
150 |                  )
151 |     all_anchors = tf.convert_to_tensor(tf.cast(all_anchors, tf.float32), name='AllAnchors')
152 |     all_anchors = tf.reshape(all_anchors, (height, width, -1))
153 |     
154 |     return all_anchors
155 | 
156 | def assign_boxes(gt_boxes, tensors, layers, scope='AssignGTBoxes'):
157 | 
158 |     with tf.name_scope(scope) as sc:
159 |         min_k = layers[0]
160 |         max_k = layers[-1]
161 |         assigned_layers = \
162 |             tf.py_func(assign.assign_boxes, 
163 |                      [ gt_boxes, min_k, max_k ],
164 |                      tf.int32)
165 |         assigned_layers = tf.reshape(assigned_layers, [-1])
166 | 
167 |         assigned_tensors = []
168 |         for t in tensors:
169 |             split_tensors = []
170 |             for l in layers:
171 |                 tf.cast(l, tf.int32)
172 |                 inds = tf.where(tf.equal(assigned_layers, l))
173 |                 inds = tf.reshape(inds, [-1])
174 |                 split_tensors.append(tf.gather(t, inds))
175 |             assigned_tensors.append(split_tensors)
176 | 
177 |         return assigned_tensors + [assigned_layers]


--------------------------------------------------------------------------------
/libs/layers/wrapper.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/layers/wrapper.pyc


--------------------------------------------------------------------------------
/libs/logs/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/__init__.py


--------------------------------------------------------------------------------
/libs/logs/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/__init__.pyc


--------------------------------------------------------------------------------
/libs/logs/log.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | 
 5 | import logging
 6 | import libs.configs.config_v1 as cfg
 7 | 
 8 | def LOG(mssg):
 9 |   logging.basicConfig(filename=cfg.FLAGS.train_dir + '/maskrcnn.log',
10 |                       level=logging.INFO,
11 |                       datefmt='%m/%d/%Y %I:%M:%S %p', format='%(asctime)s %(message)s')
12 |   logging.info(mssg)


--------------------------------------------------------------------------------
/libs/logs/log.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/logs/log.pyc


--------------------------------------------------------------------------------
/libs/make.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | # build pycocotools
4 | cd datasets/pycocotools
5 | make
6 | cd -
7 | 


--------------------------------------------------------------------------------
/libs/nets/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/__init__.py


--------------------------------------------------------------------------------
/libs/nets/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/__init__.pyc


--------------------------------------------------------------------------------
/libs/nets/nets_factory.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | import functools
 5 | 
 6 | import tensorflow as tf
 7 | 
 8 | from . import resnet_v1
 9 | from .resnet_v1 import resnet_v1_50 as resnet50
10 | from .resnet_utils import resnet_arg_scope
11 | from .resnet_v1 import resnet_v1_101 as resnet101
12 | 
13 | slim = tf.contrib.slim
14 | 
15 | pyramid_maps = {
16 |   'resnet50': {'C1':'resnet_v1_50/conv1/Relu:0',
17 |                'C2':'resnet_v1_50/block1/unit_2/bottleneck_v1',
18 |                'C3':'resnet_v1_50/block2/unit_3/bottleneck_v1',
19 |                'C4':'resnet_v1_50/block3/unit_5/bottleneck_v1',
20 |                'C5':'resnet_v1_50/block4/unit_3/bottleneck_v1',
21 |                },
22 |   'resnet101': {'C1': '', 'C2': '',
23 |                 'C3': '', 'C4': '',
24 |                 'C5': '',
25 |                }
26 | }
27 | 
28 | def get_network(name, image, weight_decay=0.000005, is_training=False):
29 | 
30 |     if name == 'resnet50':
31 |         with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
32 |             logits, end_points = resnet50(image, 1000, is_training=is_training)
33 |     
34 |     if name == 'resnet101':
35 |         with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)):
36 |             logits, end_points = resnet50(image, 1000, is_training=is_training)
37 | 
38 |     if name == 'resnext50':
39 |         name
40 | 
41 |     end_points['input'] = image
42 |     return logits, end_points, pyramid_maps[name]
43 | 


--------------------------------------------------------------------------------
/libs/nets/nets_factory.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/nets_factory.pyc


--------------------------------------------------------------------------------
/libs/nets/pyramid_network.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/pyramid_network.pyc


--------------------------------------------------------------------------------
/libs/nets/resnet_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/resnet_utils.pyc


--------------------------------------------------------------------------------
/libs/nets/resnet_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nets/resnet_v1.pyc


--------------------------------------------------------------------------------
/libs/nets/train_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import collections
  6 | import tensorflow as tf
  7 | import libs.configs.config_v1 as cfg
  8 | 
  9 | slim = tf.contrib.slim
 10 | FLAGS = tf.app.flags.FLAGS
 11 | 
 12 | def _configure_optimizer(learning_rate):
 13 |   """Configures the optimizer used for training.
 14 | 
 15 |   Args:
 16 |     learning_rate: A scalar or `Tensor` learning rate.
 17 | 
 18 |   Returns:
 19 |     An instance of an optimizer.
 20 | 
 21 |   Raises:
 22 |     ValueError: if FLAGS.optimizer is not recognized.
 23 |   """
 24 |   if FLAGS.optimizer == 'adadelta':
 25 |     optimizer = tf.train.AdadeltaOptimizer(
 26 |         learning_rate,
 27 |         rho=FLAGS.adadelta_rho,
 28 |         epsilon=FLAGS.opt_epsilon)
 29 |   elif FLAGS.optimizer == 'adagrad':
 30 |     optimizer = tf.train.AdagradOptimizer(
 31 |         learning_rate,
 32 |         initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value)
 33 |   elif FLAGS.optimizer == 'adam':
 34 |     optimizer = tf.train.AdamOptimizer(
 35 |         learning_rate,
 36 |         beta1=FLAGS.adam_beta1,
 37 |         beta2=FLAGS.adam_beta2,
 38 |         epsilon=FLAGS.opt_epsilon)
 39 |   elif FLAGS.optimizer == 'ftrl':
 40 |     optimizer = tf.train.FtrlOptimizer(
 41 |         learning_rate,
 42 |         learning_rate_power=FLAGS.ftrl_learning_rate_power,
 43 |         initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
 44 |         l1_regularization_strength=FLAGS.ftrl_l1,
 45 |         l2_regularization_strength=FLAGS.ftrl_l2)
 46 |   elif FLAGS.optimizer == 'momentum':
 47 |     optimizer = tf.train.MomentumOptimizer(
 48 |         learning_rate,
 49 |         momentum=FLAGS.momentum,
 50 |         name='Momentum')
 51 |   elif FLAGS.optimizer == 'rmsprop':
 52 |     optimizer = tf.train.RMSPropOptimizer(
 53 |         learning_rate,
 54 |         decay=FLAGS.rmsprop_decay,
 55 |         momentum=FLAGS.rmsprop_momentum,
 56 |         epsilon=FLAGS.opt_epsilon)
 57 |   elif FLAGS.optimizer == 'sgd':
 58 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 59 |   else:
 60 |     raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer)
 61 |   return optimizer
 62 | 
 63 | def _configure_learning_rate(num_samples_per_epoch, global_step):
 64 |   """Configures the learning rate.
 65 | 
 66 |   Args:
 67 |     num_samples_per_epoch: The number of samples in each epoch of training.
 68 |     global_step: The global_step tensor.
 69 | 
 70 |   Returns:
 71 |     A `Tensor` representing the learning rate.
 72 | 
 73 |   Raises:
 74 |     ValueError: if
 75 |   """
 76 |   decay_steps = int(num_samples_per_epoch / FLAGS.batch_size *
 77 |                     FLAGS.num_epochs_per_decay)
 78 |   if FLAGS.sync_replicas:
 79 |     decay_steps /= FLAGS.replicas_to_aggregate
 80 | 
 81 |   if FLAGS.learning_rate_decay_type == 'exponential':
 82 |     return tf.train.exponential_decay(FLAGS.learning_rate,
 83 |                                       global_step,
 84 |                                       decay_steps,
 85 |                                       FLAGS.learning_rate_decay_factor,
 86 |                                       staircase=True,
 87 |                                       name='exponential_decay_learning_rate')
 88 |   elif FLAGS.learning_rate_decay_type == 'fixed':
 89 |     return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate')
 90 |   elif FLAGS.learning_rate_decay_type == 'polynomial':
 91 |     return tf.train.polynomial_decay(FLAGS.learning_rate,
 92 |                                      global_step,
 93 |                                      decay_steps,
 94 |                                      FLAGS.end_learning_rate,
 95 |                                      power=0.9,
 96 |                                      cycle=False,
 97 |                                      name='polynomial_decay_learning_rate')
 98 |   else:
 99 |     raise ValueError('learning_rate_decay_type [%s] was not recognized',
100 |                      FLAGS.learning_rate_decay_type)
101 |   
102 | def _get_variables_to_train():
103 |   """Returns a list of variables to train.
104 | 
105 |   Returns:
106 |     A list of variables to train by the optimizer.
107 |   """
108 |   if FLAGS.trainable_scopes is None:
109 |     return tf.trainable_variables()
110 |   else:
111 |     scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
112 | 
113 |   variables_to_train = []
114 |   for scope in scopes:
115 |     variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
116 |     variables_to_train.extend(variables)
117 |   return variables_to_train
118 | 
119 | def _get_init_fn():
120 |   """Returns a function run by the chief worker to warm-start the training.
121 | 
122 |   Note that the init_fn is only run when initializing the model during the very
123 |   first global step.
124 | 
125 |   Returns:
126 |     An init function run by the supervisor.
127 |   """
128 |   if FLAGS.checkpoint_path is None:
129 |     return None
130 | 
131 |   # Warn the user if a checkpoint exists in the train_dir. Then we'll be
132 |   # ignoring the checkpoint anyway.
133 |   if tf.train.latest_checkpoint(FLAGS.train_dir):
134 |     tf.logging.info(
135 |         'Ignoring --checkpoint_path because a checkpoint already exists in %s'
136 |         % FLAGS.train_dir)
137 |     return None
138 | 
139 |   exclusions = []
140 |   if FLAGS.checkpoint_exclude_scopes:
141 |     exclusions = [scope.strip()
142 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
143 | 
144 |   # TODO(sguada) variables.filter_variables()
145 |   variables_to_restore = []
146 |   for var in slim.get_model_variables():
147 |     excluded = False
148 |     for exclusion in exclusions:
149 |       if var.op.name.startswith(exclusion):
150 |         excluded = True
151 |         break
152 |     if not excluded:
153 |       variables_to_restore.append(var)
154 | 
155 |   if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
156 |     checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
157 |   else:
158 |     checkpoint_path = FLAGS.checkpoint_path
159 | 
160 |   tf.logging.info('Fine-tuning from %s' % checkpoint_path)
161 | 
162 |   return slim.assign_from_checkpoint_fn(
163 |       checkpoint_path,
164 |       variables_to_restore,
165 |       ignore_missing_vars=FLAGS.ignore_missing_vars)
166 | 
167 | def get_var_list_to_restore():
168 |   """Choosing which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """
169 | 
170 |   variables_to_restore = []
171 |   if FLAGS.checkpoint_exclude_scopes is not None:
172 |     exclusions = [scope.strip()
173 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
174 | 
175 |     # build restore list
176 |     for var in tf.model_variables():
177 |       excluded = False
178 |       for exclusion in exclusions:
179 |         if var.name.startswith(exclusion):
180 |           excluded = True
181 |           break
182 |       if not excluded:
183 |         variables_to_restore.append(var)
184 |   else:
185 |     variables_to_restore = tf.model_variables()
186 | 
187 |   variables_to_restore_final = []
188 |   if FLAGS.checkpoint_include_scopes is not None:
189 |       includes = [
190 |               scope.strip()
191 |               for scope in FLAGS.checkpoint_include_scopes.split(',')
192 |               ]
193 |       for var in variables_to_restore:
194 |           included = False
195 |           for include in includes:
196 |               if var.name.startswith(include):
197 |                   included = True
198 |                   break
199 |           if included:
200 |               variables_to_restore_final.append(var)
201 |   else:
202 |       variables_to_restore_final = variables_to_restore
203 | 
204 |   return variables_to_restore_final
205 | 


--------------------------------------------------------------------------------
/libs/nms/.gitignore:
--------------------------------------------------------------------------------
1 | *.c
2 | *.cpp
3 | *.so
4 | 


--------------------------------------------------------------------------------
/libs/nms/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nms/__init__.py


--------------------------------------------------------------------------------
/libs/nms/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/nms/__init__.pyc


--------------------------------------------------------------------------------
/libs/nms/cpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
12 |     return a if a >= b else b
13 | 
14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
15 |     return a if a <= b else b
16 | 
17 | def cpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
23 | 
24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
26 | 
27 |     cdef int ndets = dets.shape[0]
28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
29 |             np.zeros((ndets), dtype=np.int)
30 | 
31 |     # nominal indices
32 |     cdef int _i, _j
33 |     # sorted indices
34 |     cdef int i, j
35 |     # temp variables for box i's (the box currently under consideration)
36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
37 |     # variables for computing overlap with box j (lower scoring box)
38 |     cdef np.float32_t xx1, yy1, xx2, yy2
39 |     cdef np.float32_t w, h
40 |     cdef np.float32_t inter, ovr
41 | 
42 |     keep = []
43 |     for _i in range(ndets):
44 |         i = order[_i]
45 |         if suppressed[i] == 1:
46 |             continue
47 |         keep.append(i)
48 |         ix1 = x1[i]
49 |         iy1 = y1[i]
50 |         ix2 = x2[i]
51 |         iy2 = y2[i]
52 |         iarea = areas[i]
53 |         for _j in range(_i + 1, ndets):
54 |             j = order[_j]
55 |             if suppressed[j] == 1:
56 |                 continue
57 |             xx1 = max(ix1, x1[j])
58 |             yy1 = max(iy1, y1[j])
59 |             xx2 = min(ix2, x2[j])
60 |             yy2 = min(iy2, y2[j])
61 |             w = max(0.0, xx2 - xx1 + 1)
62 |             h = max(0.0, yy2 - yy1 + 1)
63 |             inter = w * h
64 |             ovr = inter / (iarea + areas[j] - inter)
65 |             if ovr >= thresh:
66 |                 suppressed[j] = 1
67 | 
68 |     return keep
69 | 


--------------------------------------------------------------------------------
/libs/nms/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/libs/nms/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/libs/nms/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/libs/nms/py_cpu_nms.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def py_cpu_nms(dets, thresh):
11 |     """Pure Python NMS baseline."""
12 |     x1 = dets[:, 0]
13 |     y1 = dets[:, 1]
14 |     x2 = dets[:, 2]
15 |     y2 = dets[:, 3]
16 |     scores = dets[:, 4]
17 | 
18 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
19 |     order = scores.argsort()[::-1]
20 | 
21 |     keep = []
22 |     while order.size > 0:
23 |         i = order[0]
24 |         keep.append(i)
25 |         xx1 = np.maximum(x1[i], x1[order[1:]])
26 |         yy1 = np.maximum(y1[i], y1[order[1:]])
27 |         xx2 = np.minimum(x2[i], x2[order[1:]])
28 |         yy2 = np.minimum(y2[i], y2[order[1:]])
29 | 
30 |         w = np.maximum(0.0, xx2 - xx1 + 1)
31 |         h = np.maximum(0.0, yy2 - yy1 + 1)
32 |         inter = w * h
33 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
34 | 
35 |         inds = np.where(ovr <= thresh)[0]
36 |         order = order[inds + 1]
37 | 
38 |     return keep
39 | 


--------------------------------------------------------------------------------
/libs/preprocessings/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/__init__.py


--------------------------------------------------------------------------------
/libs/preprocessings/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/__init__.pyc


--------------------------------------------------------------------------------
/libs/preprocessings/coco_v1.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | 
  4 | from __future__ import absolute_import
  5 | from __future__ import division
  6 | from __future__ import print_function
  7 | 
  8 | import time
  9 | import tensorflow as tf
 10 | import libs.configs.config_v1 as cfg
 11 | from . import utils as preprocess_utils
 12 | 
 13 | FLAGS = tf.app.flags.FLAGS 
 14 | 
 15 | def preprocess_image(image, gt_boxes, gt_masks, is_training=False):
 16 |     """preprocess image for coco
 17 |     1. random flipping
 18 |     2. min size resizing
 19 |     3. zero mean 
 20 |     4. ... 
 21 |     """
 22 |     if is_training:
 23 |         return preprocess_for_training(image, gt_boxes, gt_masks)
 24 |     else:
 25 |         return preprocess_for_test(image, gt_boxes, gt_masks)
 26 | 
 27 | 
 28 | def preprocess_for_training(image, gt_boxes, gt_masks):
 29 |     
 30 |     ih, iw = tf.shape(image)[0], tf.shape(image)[1]
 31 |     ## random flipping
 32 |     coin = tf.to_float(tf.random_uniform([1]))[0]
 33 |     image, gt_boxes, gt_masks =\
 34 |             tf.cond(tf.greater_equal(coin, 0.5), 
 35 |                     lambda: (preprocess_utils.flip_image(image),
 36 |                             preprocess_utils.flip_gt_boxes(gt_boxes, ih, iw),
 37 |                             preprocess_utils.flip_gt_masks(gt_masks)),
 38 |                     lambda: (image, gt_boxes, gt_masks))
 39 | 
 40 |     ## min size resizing
 41 |     new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size)#FLAGS.image_min_size = 640(min edge)
 42 |     image = tf.expand_dims(image, 0)
 43 |     image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False)
 44 |     image = tf.squeeze(image, axis=[0])
 45 | 
 46 |     #gt_masks = tf.expand_dims(gt_masks, -1)
 47 |     #if uncomment the above error:ValueError: Shape must be rank 4 but is rank 5 for 'ResizeNearestNeighbor' (op: 'ResizeNearestNeighbor') with input shapes: [?,?,?,7,1], [2].
 48 |     gt_masks = tf.cast(gt_masks, tf.float32)
 49 |     gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False)
 50 |     gt_masks = tf.cast(gt_masks, tf.int32)
 51 |     #gt_masks = tf.squeeze(gt_masks, axis=[-1])
 52 | 
 53 |     scale_ratio = tf.to_float(new_ih) / tf.to_float(ih)
 54 |     gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio)
 55 | 
 56 |     ## random flip image
 57 |     # val_lr = tf.to_float(tf.random_uniform([1]))[0]
 58 |     # image = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_image(image), lambda: image)
 59 |     # gt_masks = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_masks(gt_masks), lambda: gt_masks)
 60 |     # gt_boxes = tf.cond(val_lr > 0.5, lambda: preprocess_utils.flip_gt_boxes(gt_boxes, new_ih, new_iw), lambda: gt_boxes)
 61 | 
 62 |     ## zero mean image
 63 |     image = tf.cast(image, tf.float32)
 64 |     image = image / 256.0
 65 |     image = (image - 0.5) * 2.0
 66 |     image = tf.expand_dims(image, axis=0)
 67 | 
 68 |     ## rgb to bgr
 69 |     image = tf.reverse(image, axis=[-1])
 70 | 
 71 |     return image, gt_boxes, gt_masks 
 72 | 
 73 | def preprocess_for_test(image, gt_boxes, gt_masks):
 74 | 
 75 | 
 76 |     ih, iw = tf.shape(image)[0], tf.shape(image)[1]
 77 | 
 78 |     ## min size resizing
 79 |     new_ih, new_iw = preprocess_utils._smallest_size_at_least(ih, iw, cfg.FLAGS.image_min_size)
 80 |     image = tf.expand_dims(image, 0)
 81 |     image = tf.image.resize_bilinear(image, [new_ih, new_iw], align_corners=False)
 82 |     image = tf.squeeze(image, axis=[0])
 83 | 
 84 |     gt_masks = tf.expand_dims(gt_masks, -1)
 85 |     gt_masks = tf.cast(gt_masks, tf.float32)
 86 |     gt_masks = tf.image.resize_nearest_neighbor(gt_masks, [new_ih, new_iw], align_corners=False)
 87 |     gt_masks = tf.cast(gt_masks, tf.int32)
 88 |     gt_masks = tf.squeeze(gt_masks, axis=[-1])
 89 | 
 90 |     scale_ratio = tf.to_float(new_ih) / tf.to_float(ih)
 91 |     gt_boxes = preprocess_utils.resize_gt_boxes(gt_boxes, scale_ratio)
 92 |     
 93 |     ## zero mean image
 94 |     image = tf.cast(image, tf.float32)
 95 |     image = image / 256.0
 96 |     image = (image - 0.5) * 2.0
 97 |     image = tf.expand_dims(image, axis=0)
 98 | 
 99 |     ## rgb to bgr
100 |     image = tf.reverse(image, axis=[-1])
101 | 
102 |     return image, gt_boxes, gt_masks 
103 | 


--------------------------------------------------------------------------------
/libs/preprocessings/coco_v1.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/coco_v1.pyc


--------------------------------------------------------------------------------
/libs/preprocessings/utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | 
  5 | import tensorflow as tf
  6 | 
  7 | from tensorflow.python.ops import control_flow_ops
  8 | from tensorflow.contrib import slim
  9 | 
 10 | 
 11 | def _crop(image, offset_height, offset_width, crop_height, crop_width):
 12 |   original_shape = tf.shape(image)
 13 | 
 14 |   rank_assertion = tf.Assert(
 15 |       tf.equal(tf.rank(image), 3),
 16 |       ['Rank of image must be equal to 3.'])
 17 |   cropped_shape = control_flow_ops.with_dependencies(
 18 |       [rank_assertion],
 19 |       tf.stack([crop_height, crop_width, original_shape[2]]))
 20 | 
 21 |   size_assertion = tf.Assert(
 22 |       tf.logical_and(
 23 |           tf.greater_equal(original_shape[0], crop_height),
 24 |           tf.greater_equal(original_shape[1], crop_width)),
 25 |       ['Crop size greater than the image size.'])
 26 | 
 27 |   offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
 28 | 
 29 |   # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
 30 |   # define the crop size.
 31 |   image = control_flow_ops.with_dependencies(
 32 |       [size_assertion],
 33 |       tf.slice(image, offsets, cropped_shape))
 34 |   return tf.reshape(image, cropped_shape)
 35 | 
 36 | 
 37 | def _random_crop(image_list, label_list, crop_height, crop_width):
 38 |   if not image_list:
 39 |     raise ValueError('Empty image_list.')
 40 | 
 41 |   # Compute the rank assertions.
 42 |   rank_assertions = []
 43 |   for i in range(len(image_list)):
 44 |     image_rank = tf.rank(image_list[i])
 45 |     rank_assert = tf.Assert(
 46 |         tf.equal(image_rank, 3),
 47 |         ['Wrong rank for tensor  %s [expected] [actual]',
 48 |          image_list[i].name, 3, image_rank])
 49 |     rank_assertions.append(rank_assert)
 50 | 
 51 |   image_shape = control_flow_ops.with_dependencies(
 52 |       [rank_assertions[0]],
 53 |       tf.shape(image_list[0]))
 54 |   image_height = image_shape[0]
 55 |   image_width = image_shape[1]
 56 |   crop_size_assert = tf.Assert(
 57 |       tf.logical_and(
 58 |           tf.greater_equal(image_height, crop_height),
 59 |           tf.greater_equal(image_width, crop_width)),
 60 |       ['Crop size greater than the image size.', image_height, image_width, crop_height, crop_width])
 61 | 
 62 |   asserts = [rank_assertions[0], crop_size_assert]
 63 | 
 64 |   for i in range(1, len(image_list)):
 65 |     image = image_list[i]
 66 |     asserts.append(rank_assertions[i])
 67 |     shape = control_flow_ops.with_dependencies([rank_assertions[i]],
 68 |                                                tf.shape(image))
 69 |     height = shape[0]
 70 |     width = shape[1]
 71 | 
 72 |     height_assert = tf.Assert(
 73 |         tf.equal(height, image_height),
 74 |         ['Wrong height for tensor %s [expected][actual]',
 75 |          image.name, height, image_height])
 76 |     width_assert = tf.Assert(
 77 |         tf.equal(width, image_width),
 78 |         ['Wrong width for tensor %s [expected][actual]',
 79 |          image.name, width, image_width])
 80 |     asserts.extend([height_assert, width_assert])
 81 | 
 82 |   # Create a random bounding box.
 83 |   #
 84 |   # Use tf.random_uniform and not numpy.random.rand as doing the former would
 85 |   # generate random numbers at graph eval time, unlike the latter which
 86 |   # generates random numbers at graph definition time.
 87 |   max_offset_height = control_flow_ops.with_dependencies(
 88 |       asserts, tf.reshape(image_height - crop_height + 1, []))
 89 |   max_offset_width = control_flow_ops.with_dependencies(
 90 |       asserts, tf.reshape(image_width - crop_width + 1, []))
 91 |   offset_height = tf.random_uniform(
 92 |       [], maxval=max_offset_height, dtype=tf.int32)
 93 |   offset_width = tf.random_uniform(
 94 |       [], maxval=max_offset_width, dtype=tf.int32)
 95 | 
 96 |   cropped_images = [_crop(image, offset_height, offset_width,
 97 |                           crop_height, crop_width) for image in image_list]
 98 |   cropped_labels = [_crop(label, offset_height, offset_width,
 99 |                           crop_height, crop_width) for label in label_list]
100 |   return cropped_images, cropped_labels
101 | 
102 | 
103 | def _central_crop(image_list, label_list, crop_height, crop_width):
104 |   output_images = []
105 |   output_labels = []
106 |   for image, label in zip(image_list, label_list):
107 |     image_height = tf.shape(image)[0]
108 |     image_width = tf.shape(image)[1]
109 | 
110 |     offset_height = (image_height - crop_height) / 2
111 |     offset_width = (image_width - crop_width) / 2
112 | 
113 |     output_images.append(_crop(image, offset_height, offset_width,
114 |                                crop_height, crop_width))
115 |     output_labels.append(_crop(label, offset_height, offset_width,
116 |                                crop_height, crop_width))
117 |   return output_images, output_labels
118 | 
119 | 
120 | def _smallest_size_at_least(height, width, smallest_side):
121 |   smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
122 | 
123 |   height = tf.to_float(height)
124 |   width = tf.to_float(width)
125 |   smallest_side = tf.to_float(smallest_side)
126 | 
127 |   scale = tf.cond(tf.greater(height, width),
128 |                   lambda: smallest_side / width,
129 |                   lambda: smallest_side / height)
130 |   new_height = tf.to_int32(height * scale)
131 |   new_width = tf.to_int32(width * scale)
132 |   return new_height, new_width
133 | 
134 | def _aspect_preserving_resize(image, label, smallest_side):
135 |   smallest_side = tf.convert_to_tensor(smallest_side, dtype=tf.int32)
136 | 
137 |   shape = tf.shape(image)
138 |   height = shape[0]
139 |   width = shape[1]
140 |   new_height, new_width = _smallest_size_at_least(height, width, smallest_side)
141 | 
142 |   image = tf.expand_dims(image, 0)
143 |   resized_image = tf.image.resize_bilinear(image, [new_height, new_width],
144 |                                            align_corners=False)
145 |   resized_image = tf.squeeze(resized_image, axis=[0])
146 |   resized_image.set_shape([None, None, 3])
147 | 
148 |   label = tf.expand_dims(label, 0)
149 |   resized_label = tf.image.resize_nearest_neighbor(label, [new_height, new_width],
150 |                                                    align_corners=False)
151 |   resized_label = tf.squeeze(resized_label, axis=[0])
152 |   resized_label.set_shape([None, None, 1])
153 |   return resized_image, resized_label
154 | 
155 | def flip_gt_boxes(gt_boxes, ih, iw):
156 |     x1s, y1s, x2s, y2s, cls = \
157 |             gt_boxes[:, 0], gt_boxes[:, 1], gt_boxes[:, 2], gt_boxes[:, 3], gt_boxes[:, 4]
158 |     x1s = tf.to_float(iw) - x1s
159 |     x2s = tf.to_float(iw) - x2s
160 |     return tf.concat(values=(x2s[:, tf.newaxis], 
161 |                              y1s[:, tf.newaxis], 
162 |                              x1s[:, tf.newaxis], 
163 |                              y2s[:, tf.newaxis], 
164 |                              cls[:, tf.newaxis]), axis=1)
165 | 
166 | def flip_gt_masks(gt_masks):
167 |     return tf.reverse(gt_masks, axis=[2])
168 | 
169 | def flip_image(image):
170 |     return tf.reverse(image, axis=[1])
171 | 
172 | def resize_gt_boxes(gt_boxes, scale_ratio):
173 |     xys, cls = \
174 |             gt_boxes[:, 0:4], gt_boxes[:, 4]
175 |     xys = xys * scale_ratio 
176 |     return tf.concat(values=(xys, cls[:, tf.newaxis]), axis=1)
177 | 
178 | 


--------------------------------------------------------------------------------
/libs/preprocessings/utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/preprocessings/utils.pyc


--------------------------------------------------------------------------------
/libs/setup.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | from os.path import join as pjoin
 10 | import numpy as np
 11 | from distutils.core import setup
 12 | from distutils.extension import Extension
 13 | from Cython.Distutils import build_ext
 14 | 
 15 | def find_in_path(name, path):
 16 |     "Find a file in a search path"
 17 |     #adapted fom http://code.activestate.com/recipes/52224-find-a-file-given-a-search-path/
 18 |     for dir in path.split(os.pathsep):
 19 |         binpath = pjoin(dir, name)
 20 |         if os.path.exists(binpath):
 21 |             return os.path.abspath(binpath)
 22 |     return None
 23 | 
 24 | def locate_cuda():
 25 |     """Locate the CUDA environment on the system
 26 | 
 27 |     Returns a dict with keys 'home', 'nvcc', 'include', and 'lib64'
 28 |     and values giving the absolute path to each directory.
 29 | 
 30 |     Starts by looking for the CUDAHOME env variable. If not found, everything
 31 |     is based on finding 'nvcc' in the PATH.
 32 |     """
 33 | 
 34 |     # first check if the CUDAHOME env variable is in use
 35 |     if 'CUDAHOME' in os.environ:
 36 |         home = os.environ['CUDAHOME']
 37 |         nvcc = pjoin(home, 'bin', 'nvcc')
 38 |     else:
 39 |         # otherwise, search the PATH for NVCC
 40 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 41 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 42 |         if nvcc is None:
 43 |             raise EnvironmentError('The nvcc binary could not be '
 44 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 45 |         home = os.path.dirname(os.path.dirname(nvcc))
 46 | 
 47 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 48 |                   'include': pjoin(home, 'include'),
 49 |                   'lib64': pjoin(home, 'lib64')}
 50 |     for k, v in cudaconfig.iteritems():
 51 |         if not os.path.exists(v):
 52 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 53 | 
 54 |     return cudaconfig
 55 | CUDA = locate_cuda()
 56 | 
 57 | # Obtain the numpy include directory.  This logic works across numpy versions.
 58 | try:
 59 |     numpy_include = np.get_include()
 60 | except AttributeError:
 61 |     numpy_include = np.get_numpy_include()
 62 | 
 63 | def customize_compiler_for_nvcc(self):
 64 |     """inject deep into distutils to customize how the dispatch
 65 |     to gcc/nvcc works.
 66 | 
 67 |     If you subclass UnixCCompiler, it's not trivial to get your subclass
 68 |     injected in, and still have the right customizations (i.e.
 69 |     distutils.sysconfig.customize_compiler) run on it. So instead of going
 70 |     the OO route, I have this. Note, it's kindof like a wierd functional
 71 |     subclassing going on."""
 72 | 
 73 |     # tell the compiler it can processes .cu
 74 |     self.src_extensions.append('.cu')
 75 | 
 76 |     # save references to the default compiler_so and _comple methods
 77 |     default_compiler_so = self.compiler_so
 78 |     super = self._compile
 79 | 
 80 |     # now redefine the _compile method. This gets executed for each
 81 |     # object but distutils doesn't have the ability to change compilers
 82 |     # based on source extension: we add it.
 83 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 84 |         print extra_postargs
 85 |         if os.path.splitext(src)[1] == '.cu':
 86 |             # use the cuda for .cu files
 87 |             self.set_executable('compiler_so', CUDA['nvcc'])
 88 |             # use only a subset of the extra_postargs, which are 1-1 translated
 89 |             # from the extra_compile_args in the Extension class
 90 |             postargs = extra_postargs['nvcc']
 91 |         else:
 92 |             postargs = extra_postargs['gcc']
 93 | 
 94 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 95 |         # reset the default compiler_so, which we might have changed for cuda
 96 |         self.compiler_so = default_compiler_so
 97 | 
 98 |     # inject our redefined _compile method into the class
 99 |     self._compile = _compile
100 | 
101 | 
102 | # run the customize_compiler
103 | class custom_build_ext(build_ext):
104 |     def build_extensions(self):
105 |         customize_compiler_for_nvcc(self.compiler)
106 |         build_ext.build_extensions(self)
107 | 
108 | ext_modules = [
109 |     Extension(
110 |         "boxes.cython_bbox",
111 |         ["boxes/bbox.pyx"],
112 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
113 |         include_dirs = [numpy_include]
114 |     ),
115 |     Extension(
116 |         "boxes.cython_anchor",
117 |         ["boxes/cython_anchor.pyx"],
118 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
119 |         include_dirs = [numpy_include]
120 |     ),
121 |     Extension(
122 |       "boxes.cython_bbox_transform",
123 |       ["boxes/cython_bbox_transform.pyx"],
124 |       extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
125 |       include_dirs=[numpy_include]
126 |     ),
127 |     Extension(
128 |         "boxes.cython_nms",
129 |         ["boxes/nms.pyx"],
130 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
131 |         include_dirs = [numpy_include]
132 |     ),
133 |     Extension(
134 |         "nms.cpu_nms",
135 |         ["nms/cpu_nms.pyx"],
136 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
137 |         include_dirs = [numpy_include]
138 |     ),
139 |     Extension(
140 |         'nms.gpu_nms',
141 |         ['nms/nms_kernel.cu', 'nms/gpu_nms.pyx'],
142 |         library_dirs=[CUDA['lib64']],
143 |         libraries=['cudart'],
144 |         language='c++',
145 |         runtime_library_dirs=[CUDA['lib64']],
146 |         # this syntax is specific to this build system
147 |         # we're only going to use certain compiler args with nvcc and not with gcc
148 |         # the implementation of this trick is in customize_compiler() below
149 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
150 |                             'nvcc': ['-arch=sm_52',
151 |                                      '--ptxas-options=-v',
152 |                                      '-c',
153 |                                      '--compiler-options',
154 |                                      "'-fPIC'"]},
155 |         include_dirs = [numpy_include, CUDA['include']]
156 |     ),
157 | ]
158 | 
159 | setup(
160 |     name='fast_rcnn',
161 |     ext_modules=ext_modules,
162 |     # inject our custom trigger
163 |     cmdclass={'build_ext': custom_build_ext},
164 | )
165 | 


--------------------------------------------------------------------------------
/libs/visualization/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/__init__.py


--------------------------------------------------------------------------------
/libs/visualization/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/__init__.pyc


--------------------------------------------------------------------------------
/libs/visualization/pil_utils.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import tensorflow as tf
  3 | from PIL import Image, ImageFont, ImageDraw, ImageEnhance
  4 | import scipy.misc
  5 | 
  6 | FLAGS = tf.app.flags.FLAGS
  7 | _DEBUG = False
  8 | 
  9 | def draw_img(step, image, name='', image_height=1, image_width=1, rois=None):
 10 |     #print("image")
 11 |     #print(image)
 12 |     #norm_image = np.uint8(image/np.max(np.abs(image))*255.0)
 13 |     norm_image = np.uint8(image/0.1*127.0 + 127.0)
 14 |     #print("norm_image")
 15 |     #print(norm_image)
 16 |     source_img = Image.fromarray(norm_image)
 17 |     return source_img.save(FLAGS.train_dir + 'test_' + name + '_' +  str(step) +'.jpg', 'JPEG')
 18 | 
 19 | colors = np.random.randint(5, size=(80, 3))
 20 | 
 21 | 
 22 | def draw_bbox_better(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None):
 23 |     import cv2
 24 |     #source_img = Image.fromarray(image)
 25 |     hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
 26 |     #b, g, r = source_img.split()
 27 |     #source_img = Image.merge("RGB", (r, g, b))
 28 |     #draw = ImageDraw.Draw(source_img)
 29 |     #color = '#0000ff'
 30 |     if bbox is not None:
 31 |         dictinary = {}
 32 | 
 33 |         for i, box in enumerate(bbox):
 34 |             if (prob[i,label[i]] > 0.5) and (label[i] > 0):
 35 |                 area = float((box[2]-box[0])*(box[3]-box[1]))
 36 |                 while area in dictinary:
 37 |                     area+=1
 38 |                 width = int(box[2])-int(box[0])
 39 |                 height = int(box[3])-int(box[1])
 40 |                 mask = final_mask[i]
 41 |                 mask = mask[...,label[i]]
 42 |                 mask = scipy.misc.imresize(mask,(height,width))
 43 | 
 44 |                 dictinary[round(area,4)]=(box,label[i],gt_label[i],prob[i,label[i]],mask,colors[label[i],:])
 45 |         sorted_keys = sorted(dictinary.iterkeys(),reverse=True)
 46 | 
 47 |         big_mask = np.zeros((image.shape[0],image.shape[1],len(bbox)),dtype=np.float32)
 48 | 
 49 |         i=0
 50 |         for key in sorted_keys:
 51 |             bo, _,_,_,msk,_= dictinary[key]
 52 |             big_mask[int(bo[1]):int(bo[3]),int(bo[0]):int(bo[2]),i] = msk
 53 |             i=i+1
 54 | 
 55 |         max_indices = np.argmax(big_mask,axis=2)
 56 |         for key in sorted_keys:
 57 |             bo, lab,gt_lab,_,_,col= dictinary[key]
 58 |             for x in range(int(bo[0]),int(bo[2])):
 59 |                 for y in range(int(bo[1]),int(bo[3])):
 60 |                     _,_,_,_,_,col = dictinary.values()[max_indices[y,x]]
 61 |                     #print col
 62 |                     #print (image[y,x,0] )
 63 |                     image[y,x,...] = col
 64 |                     #hsv[y,x,0]=color[0]
 65 |                     #hsv[y,x,1]=hsv[y,x,1]*0.9
 66 |             text = cat_id_to_cls_name(lab)
 67 |             image = cv2.putText(image,text,(2+int(bo[0]),2+int(bo[1])), cv2.FONT_HERSHEY_SIMPLEX, 4,(255,255,255),2)
 68 |             if lab != gt_lab:
 69 |                 c = (255,0,0)
 70 |             else:
 71 |                 c = (0,0,255)
 72 |             image = cv2.rectangle(image,(int(bo[0]),int(bo[1])),(int(bo[2]),int(bo[3])),c,3)
 73 |     cv2.imwrite('output/est_imgs/test_' + name + '_' +  str(step) +'.jpg',image)
 74 | 
 75 | 
 76 | 
 77 | 
 78 | 
 79 | 
 80 | 
 81 | def draw_bbox(step, image, name='', image_height=1, image_width=1, bbox=None, label=None, gt_label=None, prob=None,final_mask=None):
 82 |     #print(prob[:,label])
 83 |     source_img = Image.fromarray(image)
 84 |     b, g, r = source_img.split()
 85 |     source_img = Image.merge("RGB", (r, g, b))
 86 |     draw = ImageDraw.Draw(source_img)
 87 |     color = '#0000ff'
 88 |     if bbox is not None:
 89 |         for i, box in enumerate(bbox):
 90 |             if label is not None:
 91 |                 if prob is not None:
 92 |                     if (prob[i,label[i]] > 0.5) and (label[i] > 0):
 93 |                         if gt_label is not None:
 94 |                             text  = cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i])
 95 |                             if label[i] != gt_label[i]:
 96 |                                 color = '#ff0000'#draw.text((2+bbox[i,0], 2+bbox[i,1]), cat_id_to_cls_name(label[i]) + ' : ' + cat_id_to_cls_name(gt_label[i]), fill='#ff0000')
 97 |                             else:
 98 |                                 color = '#0000ff'  
 99 |                         else: 
100 |                             text = cat_id_to_cls_name(label[i])
101 |                         #############################DRAW SEGMENTATION
102 |                         width = box[2]-box[0]
103 |                         height = box[3]-box[1]
104 |                         #print (final_mask.shape)
105 |                         mask = final_mask[i]
106 |                         mask = mask[...,label[i]]
107 |                         mask = scipy.misc.imresize(mask,(height,width))
108 |                         mask_pil = Image.fromarray(mask)
109 |                         source_img.paste(mask_pil,(int(box[0]),int(box[1])))
110 |                         #draw.bitmap((int(box[0]),int(box[1])),mask_pil,fill='#00ffff')
111 |                         draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color)
112 |                         if _DEBUG is True:
113 |                             print("plot",label[i], prob[i,label[i]])
114 |                         draw.rectangle(box,fill=None,outline=color)
115 |                         
116 |                     else: 
117 |                         if _DEBUG is True:
118 |                             print("skip",label[i], prob[i,label[i]])
119 |                 else:
120 |                     #############################DRAW GT SEGMENTATION
121 |                     if final_mask is not None:
122 |                         mask = final_mask[i]
123 |                         mask_pil = Image.fromarray(mask)
124 |                         mask_pil = mask_pil.crop([int(box[0]),int(box[1]),int(box[2]),int(box[3])])
125 |                         source_img.paste(mask_pil,(int(box[0]),int(box[1])))
126 |                     text = cat_id_to_cls_name(label[i])
127 |                     draw.text((2+bbox[i,0], 2+bbox[i,1]), text, fill=color)
128 |                     draw.rectangle(box,fill=None,outline=color)
129 | 
130 | 
131 |     return source_img.save(FLAGS.train_dir + 'est_imgs/test_' + name + '_' +  str(step) +'.jpg', 'JPEG')
132 | 
133 | def cat_id_to_cls_name(catId):
134 |     cls_name = np.array([  'background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
135 |                        'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
136 |                        'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
137 |                        'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
138 |                        'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
139 |                        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
140 |                        'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
141 |                        'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
142 |                        'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
143 |                        'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
144 |                        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
145 |                        'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
146 |                        'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
147 |                        'scissors', 'teddy bear', 'hair drier', 'toothbrush'])
148 |     return cls_name[catId]
149 | 


--------------------------------------------------------------------------------
/libs/visualization/pil_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/pil_utils.pyc


--------------------------------------------------------------------------------
/libs/visualization/summary_utils.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | 
 4 | def visualize_input(boxes, image, masks):
 5 |     image_sum_sample = image[:1]
 6 |     visualize_masks(masks, "input_image_gt_mask")
 7 |     visualize_bb(image, boxes, "input_image_gt_bb")
 8 |     visualize_input_image(image_sum_sample)
 9 | 
10 | 
11 | def visualize_rpn_predictions(boxes, image):
12 |     image_sum_sample = image[:1]
13 |     visualize_bb(image_sum_sample, boxes, "rpn_pred_bb")
14 | 
15 | # TODO: Present all masks in different colors
16 | def visualize_masks(masks, name):
17 |     masks = tf.cast(masks, tf.float32)
18 |     tf.summary.image(name=name, tensor=masks, max_outputs=1)
19 | 
20 | 
21 | def visualize_bb(image, boxes, name):
22 |     image_sum_sample_shape = tf.shape(image)[1:]
23 |     gt_x_min = boxes[:, 0] / tf.cast(image_sum_sample_shape[1], tf.float32)
24 |     gt_y_min = boxes[:, 1] / tf.cast(image_sum_sample_shape[0], tf.float32)
25 |     gt_x_max = boxes[:, 2] / tf.cast(image_sum_sample_shape[1], tf.float32)
26 |     gt_y_max = boxes[:, 3] / tf.cast(image_sum_sample_shape[0], tf.float32)
27 |     bb = tf.stack([gt_y_min, gt_x_min, gt_y_max, gt_x_max], axis=1)
28 |     tf.summary.image(name=name,
29 |                      tensor=tf.image.draw_bounding_boxes(image, tf.expand_dims(bb, 0), name=None),
30 |                      max_outputs=1)
31 | 
32 | 
33 | def visualize_input_image(image):
34 |     tf.summary.image(name="input_image", tensor=image, max_outputs=1)
35 | 
36 | 
37 | def visualize_final_predictions(boxes, image, masks):
38 |     visualize_masks(masks, "pred_mask")
39 |     visualize_bb(image, boxes, "final_bb_pred")
40 | 


--------------------------------------------------------------------------------
/libs/visualization/summary_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/libs/visualization/summary_utils.pyc


--------------------------------------------------------------------------------
/media/file.txt:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/media/testseg122_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg122_1.jpg


--------------------------------------------------------------------------------
/media/testseg226_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg226_1.jpg


--------------------------------------------------------------------------------
/media/testseg255_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg255_1.jpg


--------------------------------------------------------------------------------
/media/testseg293_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg293_1.jpg


--------------------------------------------------------------------------------
/media/testseg296_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg296_1.jpg


--------------------------------------------------------------------------------
/media/testseg305_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg305_1.jpg


--------------------------------------------------------------------------------
/media/testseg35_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg35_1.jpg


--------------------------------------------------------------------------------
/media/testseg57_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg57_1.jpg


--------------------------------------------------------------------------------
/media/testseg70_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/media/testseg70_1.jpg


--------------------------------------------------------------------------------
/train/__init__.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding=utf-8
3 | 
4 | from . import train_utils
5 | 


--------------------------------------------------------------------------------
/train/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/train/__init__.pyc


--------------------------------------------------------------------------------
/train/train_utils.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | # coding=utf-8
  3 | from __future__ import absolute_import
  4 | from __future__ import division
  5 | from __future__ import print_function
  6 | 
  7 | import collections
  8 | import tensorflow as tf
  9 | import libs.configs.config_v1 as cfg
 10 | 
 11 | slim = tf.contrib.slim
 12 | FLAGS = tf.app.flags.FLAGS
 13 | 
 14 | def _configure_optimizer(learning_rate):
 15 |   """Configures the optimizer used for training.
 16 | 
 17 |   Args:
 18 |     learning_rate: A scalar or `Tensor` learning rate.
 19 | 
 20 |   Returns:
 21 |     An instance of an optimizer.
 22 | 
 23 |   Raises:
 24 |     ValueError: if FLAGS.optimizer is not recognized.
 25 |   """
 26 |   if FLAGS.optimizer == 'adadelta':
 27 |     optimizer = tf.train.AdadeltaOptimizer(
 28 |         learning_rate,
 29 |         rho=FLAGS.adadelta_rho,
 30 |         epsilon=FLAGS.opt_epsilon)
 31 |   elif FLAGS.optimizer == 'adagrad':
 32 |     optimizer = tf.train.AdagradOptimizer(
 33 |         learning_rate,
 34 |         initial_accumulator_value=FLAGS.adagrad_initial_accumulator_value)
 35 |   elif FLAGS.optimizer == 'adam':
 36 |     optimizer = tf.train.AdamOptimizer(
 37 |         learning_rate,
 38 |         beta1=FLAGS.adam_beta1,
 39 |         beta2=FLAGS.adam_beta2,
 40 |         epsilon=FLAGS.opt_epsilon)
 41 |   elif FLAGS.optimizer == 'ftrl':
 42 |     optimizer = tf.train.FtrlOptimizer(
 43 |         learning_rate,
 44 |         learning_rate_power=FLAGS.ftrl_learning_rate_power,
 45 |         initial_accumulator_value=FLAGS.ftrl_initial_accumulator_value,
 46 |         l1_regularization_strength=FLAGS.ftrl_l1,
 47 |         l2_regularization_strength=FLAGS.ftrl_l2)
 48 |   elif FLAGS.optimizer == 'momentum':##############this is true
 49 |     optimizer = tf.train.MomentumOptimizer(
 50 |         learning_rate,
 51 |         momentum=FLAGS.momentum,
 52 |         name='Momentum')
 53 |   elif FLAGS.optimizer == 'rmsprop':
 54 |     optimizer = tf.train.RMSPropOptimizer(
 55 |         learning_rate,
 56 |         decay=FLAGS.rmsprop_decay,
 57 |         momentum=FLAGS.rmsprop_momentum,
 58 |         epsilon=FLAGS.opt_epsilon)
 59 |   elif FLAGS.optimizer == 'sgd':
 60 |     optimizer = tf.train.GradientDescentOptimizer(learning_rate)
 61 |   else:
 62 |     raise ValueError('Optimizer [%s] was not recognized', FLAGS.optimizer)
 63 |   return optimizer
 64 | 
 65 | def _configure_learning_rate(num_samples_per_epoch, global_step):
 66 |   """Configures the learning rate.
 67 | 
 68 |   Args:
 69 |     num_samples_per_epoch: The number of samples in each epoch of training.
 70 |     global_step: The global_step tensor.
 71 | 
 72 |   Returns:
 73 |     A `Tensor` representing the learning rate.
 74 | 
 75 |   Raises:
 76 |     ValueError: if
 77 |   """
 78 |   decay_steps = int(num_samples_per_epoch / FLAGS.batch_size * #batch size is 1. num_epoch_per_decay is 2
 79 |                     FLAGS.num_epochs_per_decay) #this int will be 165566 for num_samples_per_epoch=82783
 80 |   if FLAGS.sync_replicas:
 81 |     decay_steps /= FLAGS.replicas_to_aggregate
 82 | 
 83 |   if FLAGS.learning_rate_decay_type == 'exponential': #this is true
 84 |     return tf.train.exponential_decay(FLAGS.learning_rate, #0.0002 for my setup
 85 |                                       global_step,
 86 |                                       decay_steps,#165566 for num_samples_per_epoch=82783
 87 |                                       FLAGS.learning_rate_decay_factor,#0.94
 88 |                                       staircase=True,
 89 |                                       name='exponential_decay_learning_rate')
 90 |   #0,0002×0,94^(130000÷165566) = 0,000190516
 91 |   #0,0002×0,94^(1÷165566) = 0,0002
 92 | 
 93 |   elif FLAGS.learning_rate_decay_type == 'fixed':
 94 |     return tf.constant(FLAGS.learning_rate, name='fixed_learning_rate')
 95 |   elif FLAGS.learning_rate_decay_type == 'polynomial':
 96 |     return tf.train.polynomial_decay(FLAGS.learning_rate,
 97 |                                      global_step,
 98 |                                      decay_steps,
 99 |                                      FLAGS.end_learning_rate,
100 |                                      power=0.9,
101 |                                      cycle=False,
102 |                                      name='polynomial_decay_learning_rate')
103 |   else:
104 |     raise ValueError('learning_rate_decay_type [%s] was not recognized',
105 |                      FLAGS.learning_rate_decay_type)
106 | 
107 | def _get_variables_to_train():
108 |   """Returns a list of variables to train.
109 | 
110 |   Returns:
111 |     A list of variables to train by the optimizer.
112 |   """
113 |   if FLAGS.trainable_scopes is None:
114 |     return tf.trainable_variables()
115 |   else:
116 |     scopes = [scope.strip() for scope in FLAGS.trainable_scopes.split(',')]
117 | 
118 |   variables_to_train = []
119 |   for scope in scopes:
120 |     variables = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope)
121 |     variables_to_train.extend(variables)
122 |   return variables_to_train
123 | 
124 | def _get_init_fn():
125 |   """Returns a function run by the chief worker to warm-start the training.
126 | 
127 |   Note that the init_fn is only run when initializing the model during the very
128 |   first global step.
129 | 
130 |   Returns:
131 |     An init function run by the supervisor.
132 |   """
133 |   if FLAGS.checkpoint_path is None:
134 |     return None
135 | 
136 |   # Warn the user if a checkpoint exists in the train_dir. Then we'll
137 |   # ignore the checkpoint anyway.
138 |   if tf.train.latest_checkpoint(FLAGS.train_dir):
139 |     tf.logging.info(
140 |         'Ignoring --checkpoint_path because a checkpoint already exists in %s'
141 |         % FLAGS.train_dir)
142 |     return None
143 | 
144 |   exclusions = []
145 |   if FLAGS.checkpoint_exclude_scopes:
146 |     exclusions = [scope.strip()
147 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
148 | 
149 |   # TODO(sguada) variables.filter_variables()
150 |   variables_to_restore = []
151 |   for var in slim.get_model_variables():
152 |     for exclusion in exclusions:
153 |       if var.op.name.startswith(exclusion):
154 |         break
155 |     else:
156 |       variables_to_restore.append(var)
157 | 
158 |   if tf.gfile.IsDirectory(FLAGS.checkpoint_path):
159 |     checkpoint_path = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
160 |   else:
161 |     checkpoint_path = FLAGS.checkpoint_path
162 | 
163 |   tf.logging.info('Fine-tuning from %s' % checkpoint_path)
164 | 
165 |   return slim.assign_from_checkpoint_fn(
166 |       checkpoint_path,
167 |       variables_to_restore,
168 |       ignore_missing_vars=FLAGS.ignore_missing_vars)
169 | 
170 | def get_var_list_to_restore():
171 |   """Choose which vars to restore, ignore vars by setting --checkpoint_exclude_scopes """
172 | 
173 |   variables_to_restore = []
174 |   if FLAGS.checkpoint_exclude_scopes is not None:
175 |     exclusions = [scope.strip()
176 |                   for scope in FLAGS.checkpoint_exclude_scopes.split(',')]
177 | 
178 |     # build restore list
179 |     for var in tf.model_variables():
180 |       for exclusion in exclusions:
181 |         if var.name.startswith(exclusion):
182 |           break
183 |       else:
184 |         variables_to_restore.append(var)
185 |   else:
186 |     variables_to_restore = tf.model_variables()
187 | 
188 |   variables_to_restore_final = []
189 |   if FLAGS.checkpoint_include_scopes is not None:
190 |       includes = [
191 |               scope.strip()
192 |               for scope in FLAGS.checkpoint_include_scopes.split(',')
193 |               ]
194 |       for var in variables_to_restore:
195 |           for include in includes:
196 |               if var.name.startswith(include):
197 |                   variables_to_restore_final.append(var)
198 |                   break
199 |   else:
200 |       variables_to_restore_final = variables_to_restore
201 | 
202 |   return variables_to_restore_final
203 | 


--------------------------------------------------------------------------------
/train/train_utils.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/train/train_utils.pyc


--------------------------------------------------------------------------------
/unit_test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/daijucug/Mask-RCNN-TF_detection-human_segment-body_keypoint-regression/b575f96dc63fb63034b8faf7564806417bb2a41a/unit_test/__init__.py


--------------------------------------------------------------------------------
/unit_test/data_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | import functools
 7 | 
 8 | import sys
 9 | import os
10 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
11 | import numpy as np
12 | import PIL.Image as Image
13 | from PIL import ImageDraw
14 | import tensorflow as tf
15 | import tensorflow.contrib.slim as slim
16 | from libs.logs.log import LOG
17 | import libs.configs.config_v1 as cfg
18 | import libs.nets.resnet_v1 as resnet_v1
19 | import libs.datasets.dataset_factory as dataset_factory
20 | import libs.datasets.coco as coco
21 | import libs.preprocessings.coco_v1 as preprocess_coco
22 | from libs.layers import ROIAlign
23 | 
24 | resnet50 = resnet_v1.resnet_v1_50
25 | FLAGS = tf.app.flags.FLAGS
26 | 
27 | with tf.Graph().as_default():
28 | 
29 |   image, ih, iw, gt_boxes, gt_masks, num_instances, img_id = \
30 |     coco.read('./data/coco/records/coco_train2014_00001-of-00033.tfrecord')
31 |   
32 |   image, gt_boxes, gt_masks = \
33 |     preprocess_coco.preprocess_image(image, gt_boxes, gt_masks)
34 | 
35 |   
36 | 
37 |   sess = tf.Session()
38 |   init_op = tf.group(tf.global_variables_initializer(),
39 |                      tf.local_variables_initializer())
40 |   # init_op = tf.initialize_all_variables()
41 | 
42 |   boxes = [[100, 100, 200, 200],
43 |            [50, 50, 100, 100],
44 |            [100, 100, 750, 750],
45 |            [50, 50, 60, 60]]
46 |   # boxes = np.zeros((0, 4))
47 |   boxes = tf.constant(boxes, tf.float32)
48 |   feat = ROIAlign(image, boxes, False, 16, 7, 7)
49 |   sess.run(init_op)
50 | 
51 |   tf.train.start_queue_runners(sess=sess)
52 |   with sess.as_default():
53 |       for i in range(20000):
54 |         image_np, ih_np, iw_np, gt_boxes_np, gt_masks_np, num_instances_np, img_id_np, \
55 |         feat_np = \
56 |             sess.run([image, ih, iw, gt_boxes, gt_masks, num_instances, img_id,
57 |                 feat])
58 |         # print (image_np.shape, gt_boxes_np.shape, gt_masks_np.shape)
59 |             
60 |         if i % 1 == 0:
61 |             print ('%d, image_id: %s, instances: %d'%  (i, str(img_id_np), num_instances_np))
62 |             image_np = 256 * (image_np * 0.5 + 0.5)
63 |             image_np = image_np.astype(np.uint8)
64 |             image_np = np.squeeze(image_np)
65 |             print (image_np.shape, ih_np, iw_np)
66 |             print (feat_np.shape)
67 |             im = Image.fromarray(image_np)
68 |             imd = ImageDraw.Draw(im)
69 |             for i in range(gt_boxes_np.shape[0]):
70 |                 imd.rectangle(gt_boxes_np[i, :])
71 |             im.save(str(img_id_np) + '.png')
72 |             mask = np.sum(gt_masks_np, axis=0, dtype='uint8')
73 |             white_pos = np.where(mask > 0)
74 |             mask[white_pos] = 255
75 |             mask_img = Image.fromarray(mask)
76 |             mask_img.save('mask_' + str(img_id_np) + '.png')
77 |             # print (gt_boxes_np)
78 |   sess.close()
79 | 


--------------------------------------------------------------------------------
/unit_test/preprocessing_test.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | # coding=utf-8
 3 | 
 4 | import numpy as np
 5 | import sys
 6 | import os
 7 | import tensorflow as tf 
 8 | sys.path.append(os.path.join(os.path.dirname(__file__), '..'))
 9 | 
10 | import libs.preprocessings.coco_v1 as coco_preprocess
11 | import  libs.configs.config_v1 as cfg
12 | 
13 | ih, iw, ic = 400,500, 3
14 | N = 3
15 | image = np.random.randint(0, 255, (ih, iw, ic)).astype(np.uint8)
16 | gt_masks = np.zeros((N, ih, iw)).astype(np.int32)
17 | xy = np.random.randint(0, min(iw, ih)-100, (N, 2)).astype(np.float32)
18 | wh = np.random.randint(20, 40, (N, 2)).astype(np.float32)
19 | cls = np.random.randint(1, 6, (N, 1)).astype(np.float32)
20 | gt_boxes = np.hstack((xy, xy + wh, cls)).astype(np.float32)
21 | gt_boxes_np = gt_boxes 
22 | image_np = image 
23 | gt_masks_np = gt_masks 
24 | 
25 | for i in range(N):
26 |     box = gt_boxes[i, 0:4]
27 |     gt_masks[i, int(box[1]):int(box[3]),
28 |                 int(box[0]):int(box[2])] = 1
29 | image = tf.constant(image)
30 | gt_boxes = tf.constant(gt_boxes)
31 | gt_masks = tf.constant(gt_masks)
32 | 
33 | image, gt_boxes, gt_masks = \
34 |         coco_preprocess.preprocess_image(image, gt_boxes, gt_masks, is_training=True)
35 | 
36 | with tf.Session() as sess:
37 |     # print(image.eval())
38 |     image_tf, gt_boxes_tf, gt_masks_tf = \
39 |             sess.run([image, gt_boxes, gt_masks])
40 |     print ('#######################')
41 |     print ('DATA PREPROCESSING TEST')
42 |     print ('#######################')
43 |     print ('gt_boxes shape:', gt_boxes_tf.shape)
44 |     print('mask shape:', gt_masks_tf.shape)
45 |     print(gt_boxes_tf)
46 |     for i in range(N):
47 |         box = np.round(gt_boxes_tf[i, 0:4])
48 |         box = box.astype(np.int32)
49 |         m = gt_masks_tf[i, box[1]:box[3], box[0]:box[2]]
50 |         print ('after:', box)
51 |         print (np.sum(m)/ (0.0 + m.size))
52 |         print (m)
53 |         box = np.round(gt_boxes_np[i, 0:4])
54 |         box = box.astype(np.int32)
55 |         m = gt_masks_np[i, box[1]:box[3], box[0]:box[2]]
56 |         print ('ori box:', box)
57 |         print (np.sum(m)/ (0.0 + m.size))
58 | 


--------------------------------------------------------------------------------