├── .gitignore
├── DouglasPeuker.py
├── README.md
├── auto_annotation.py
├── config.py
├── contourprocess
    ├── __init__.py
    ├── cal_dist_ang.py
    ├── line_intersection.py
    ├── rdp_alg.py
    ├── regularization.py
    └── rotate_ang.py
├── convert.py
├── data
    ├── building.names
    ├── coco_classes.txt
    └── shapes.names
├── evaluate.py
├── inference.py
├── inference_onnxruntime.py
├── largest_interior_rectangle
    ├── __init__.py
    ├── lir.py
    └── lir_within_outline.py
├── mask.png
├── mask2npz.py
├── mrcnn
    ├── __init__.py
    ├── layers.py
    ├── mask_rcnn.py
    ├── mrcnn.py
    ├── mrcnn_training.py
    └── restnet.py
├── parallel_model.py
├── regularization.py
├── requirements.txt
├── train.py
└── utils
    ├── anchors.py
    ├── config.py
    ├── customerDataset.py
    ├── dataset.py
    ├── utils.py
    └── visualize.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | *.h5
132 | *.onnx
133 | 
134 | .history
135 | 
136 | logs
137 | 
138 | train_data
139 | 
140 | result
141 | samples


--------------------------------------------------------------------------------
/DouglasPeuker.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import numpy as np
  3 | '''
  4 | 道格拉斯-普克抽稀算法
  5 | 垂距限值抽稀算法
  6 | '''
  7 | # 设定阈值
  8 | THRESHOLD = 0.0001
  9 | 
 10 | # 计算点到直线的距离
 11 | def point2Line(point_a, point_b, point_c):
 12 |     '''
 13 |     计算点a到b, c之间的距离
 14 |     point_a: (x_a, y_a)
 15 |     point_b: (x_b, y_b)
 16 |     point_c: (x_c, y_c)
 17 |     '''
 18 |     # 计算bc的直线
 19 |     if point_b[0] == point_c[0]:
 20 |         return 9999
 21 |     # y = kx+b
 22 |     k = (point_b[1]-point_c[1])/(point_b[0]-point_c[0])
 23 |     b = point_c[1]-k*point_c[0]
 24 | 
 25 |     # 计算点到直线的距离
 26 |     distance = abs(k*point_a[0]-point_a[1]+b)/math.sqrt(1+k**2)
 27 |     return distance
 28 | 
 29 | 
 30 | class DouglasPeuker(object):
 31 | 
 32 |     def __init__(self):
 33 |         self._threshold=THRESHOLD
 34 |         self._qualify_list = []
 35 |         self._disqualify_list = []
 36 |     
 37 |     def diluting(self, point_list):
 38 |         '''
 39 |         抽稀算法
 40 |         : param point_list: 二维点列表
 41 |         : return 
 42 |         '''
 43 |         if len(point_list)<3:
 44 |             self._qualify_list.extend(point_list[::-1])
 45 |         else:
 46 |             # 找到首尾相连的两点
 47 |             max_distance_index, max_distance = 0, 0
 48 |             for index, point in enumerate(point_list):
 49 |                 if index in [0, len(point_list) - 1]:
 50 |                     continue
 51 |                 distance = point2Line(point, point_list[0], point_list[-1])
 52 |                 if distance > max_distance:
 53 |                     max_distance_index = index
 54 |                     max_distance = distance
 55 | 
 56 |             # 若最大距离小于阈值，则去掉所有中间点。 反之，则将曲线按最大距离点分割
 57 |             if max_distance < self._threshold:
 58 |                 self._qualify_list.append(point_list[-1])
 59 |                 self._qualify_list.append(point_list[0])
 60 |             else:
 61 |                 # 将曲线按最大距离的点分割成两段
 62 |                 sequence_a = point_list[:max_distance_index]
 63 |                 sequence_b = point_list[max_distance_index:]
 64 | 
 65 |                 for sequence in [sequence_a, sequence_b]:
 66 |                     if len(sequence) < 3 and sequence == sequence_b:
 67 |                         self._qualify_list.extend(sequence[::-1])
 68 |                     else:
 69 |                         self._disqualify_list.append(sequence)
 70 |     def main(self, point_list):
 71 |         self.diluting(point_list)
 72 |         while len(self._disqualify_list) > 0:
 73 |             self.diluting(self._disqualify_list.pop())
 74 |         print(self._qualify_list)
 75 |         print(len(self._qualify_list))
 76 | 
 77 | class LimitVerticalDistance(object):
 78 |     def __init__(self):
 79 |         self._threshold = THRESHOLD
 80 |         self._qualify_list = []
 81 |     
 82 |     def diluting(self, point_list):
 83 |         self._qualify_list.append(point_list[0])
 84 |         check_index = 1
 85 |         while check_index<len(point_list)-1:
 86 |             distance = point2Line(point_list[check_index], self._qualify_list[-1], point_list[check_index+1])
 87 | 
 88 |             if distance<self._threshold:
 89 |                 check_index+=1 
 90 |             else:
 91 |                 self._qualify_list.append(point_list[check_index])
 92 |                 check_index+=1
 93 |         return self._qualify_list
 94 |         
 95 | 
 96 | 
 97 | 
 98 | if __name__ == '__main__':
 99 |     d = DouglasPeuker()
100 |     d.main([[104.066228, 30.644527], [104.066279, 30.643528], [104.066296, 30.642528], [104.066314, 30.641529],
101 |             [104.066332, 30.640529], [104.066383, 30.639530], [104.066400, 30.638530], [104.066451, 30.637531],
102 |             [104.066468, 30.636532], [104.066518, 30.635533], [104.066535, 30.634533], [104.066586, 30.633534],
103 |             [104.066636, 30.632536], [104.066686, 30.631537], [104.066735, 30.630538], [104.066785, 30.629539],
104 |             [104.066802, 30.628539], [104.066820, 30.627540], [104.066871, 30.626541], [104.066888, 30.625541],
105 |             [104.066906, 30.624541], [104.066924, 30.623541], [104.066942, 30.622542], [104.066960, 30.621542],
106 |             [104.067011, 30.620543], [104.066122, 30.620086], [104.065124, 30.620021], [104.064124, 30.620022],
107 |             [104.063124, 30.619990], [104.062125, 30.619958], [104.061125, 30.619926], [104.060126, 30.619894],
108 |             [104.059126, 30.619895], [104.058127, 30.619928], [104.057518, 30.620722], [104.057625, 30.621716],
109 |             [104.057735, 30.622710], [104.057878, 30.623700], [104.057984, 30.624694], [104.058094, 30.625688],
110 |             [104.058204, 30.626682], [104.058315, 30.627676], [104.058425, 30.628670], [104.058502, 30.629667],
111 |             [104.058518, 30.630667], [104.058503, 30.631667], [104.058521, 30.632666], [104.057664, 30.633182],
112 |             [104.056664, 30.633174], [104.055664, 30.633166], [104.054672, 30.633289], [104.053758, 30.633694],
113 |             [104.052852, 30.634118], [104.052623, 30.635091], [104.053145, 30.635945], [104.053675, 30.636793],
114 |             [104.054200, 30.637643], [104.054756, 30.638475], [104.055295, 30.639317], [104.055843, 30.640153],
115 |             [104.056387, 30.640993], [104.056933, 30.641830], [104.057478, 30.642669], [104.058023, 30.643507],
116 |             [104.058595, 30.644327], [104.059152, 30.645158], [104.059663, 30.646018], [104.060171, 30.646879],
117 |             [104.061170, 30.646855], [104.062168, 30.646781], [104.063167, 30.646823], [104.064167, 30.646814],
118 |             [104.065163, 30.646725], [104.066157, 30.646618], [104.066231, 30.645620], [104.066247, 30.644621]])
119 | 
120 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Mask_RCNN
 2 | 
 3 | 基于Tensorflow实现Mask RCNN，主要参考[matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN)。
 4 | 
 5 | ## 文件说明
 6 | 1. contourprocess
 7 | 
 8 |     建筑物掩膜的处理
 9 | 
10 | 2. largest interior rectangle
11 | 
12 |     最大内接矩形算法(largest_interior_rectangle)：https://github.com/lukasalexanderweber/lir
13 |     
14 |     ![image](https://user-images.githubusercontent.com/27406337/166854679-18a8a8ae-70ed-4248-8971-4a8c8875f89d.png)![image](https://user-images.githubusercontent.com/27406337/166854694-9c6d10b0-6d58-480c-9295-a502f1efc687.png)![image](https://user-images.githubusercontent.com/27406337/166854704-29f8be81-6e16-48c2-8434-61bc44d88bda.png)
15 | 
16 | 3. 道格拉斯普克算法
17 | 
18 | 运行```DouglasPeuker.py```即可
19 | 
20 | 
21 | 4. 关于config文件
22 | 
23 | config文件包含两个，一个是在```utils```文件夹下的```config.py```，这一配置文件是训练以及推理过程中的基础设置的配置文件，里面包括基础的配置。另一个是根目录下的```config```文件，继承```utils```文件夹下的```config.py```，主要配置训练集，测试图像等路径。
24 | 
25 | ## 训练
26 | 
27 | 训练的时候注意一点：GPU数量暂时只能设定为1，如果需要多块GPU进行并行训练，可查看[matterport/Mask_RCNN](https://github.com/matterport/Mask_RCNN)中有并行训练模型的代码。
28 | 
29 | 训练数据的结构遵循VOC格式，包含三个文件夹：```imgs```、```mask```以及```yaml```文件夹，分别存放原图、掩膜图像以及yaml文件，如何制作数据集请参考：[Semantic-Segmentation-Datasets](https://github.com/RyanCCC/Semantic-Segmentation-Datasets)
30 | 
31 | ![image](https://user-images.githubusercontent.com/27406337/166855198-f3761b2c-e0aa-4b51-bdd6-55ff66eb93c3.png)
32 | 
33 | 制作好数据集后，修改```config.py```文件，运行```train.py```文件即可进行训练。
34 | 
35 | ## 推理
36 | 
37 | 修改config文件后运行```inference.py```文件即可。
38 | 
39 | ONNX推理可以参考：[Deployment:MaskRCNN](https://github.com/RyanCCC/Deployment/tree/main/MaskRCNN)
40 | 
41 | ## 模型评估
42 | 
43 | 1. [Calculating mean Average Recall (mAR), mean Average Precision (mAP) and F1-Score](https://github.com/matterport/Mask_RCNN/issues/2513)
44 | 
45 | 2. [compue_ap](https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/utils.py#L715)
46 | 
47 | 3. [语义分割之评价指标](https://zhuanlan.zhihu.com/p/61880018 )
48 | 
49 | ## 模型转换
50 | 
51 | 使用`convert.py`可以进行模型转换。如将权重和计算图转换成`Tensorflow saved_model`的格式。也可以将tensorflow的模型转换成ONNX的格式。详情请参考文档。
52 | 一些简单的使用例子如下：`python .\convert.py --flag --saved_model .\save_model\ --save_onnx ./test.onnx`或者`python .\convert.py --weight .\model\village_building.h5 --label .\data\building.names --save_onnx ./test.onnx --saved_pb`
53 | 
54 | 参数说明：
55 | 
56 | - weight：待转换的权重文件
57 | - label：标签文件的路径
58 | - saved_pb：是否保存pb文件
59 | - saved_pb_dir：保存pb文件的路径
60 | - saved_model：保存tensorflow模型
61 | - save_onnx：保存onnx文件路径
62 | - opset：onnx算子类型
63 | - flag：带有这个标记说明是`tensorflow model`模型转换成onnx，否则为权重转换成onnx模型
64 | 
65 | ## 调试问题
66 | 
67 | 1. `Input image dtype is bool. Interpolation is not defined with bool data type`
68 | 
69 | ![image](https://user-images.githubusercontent.com/27406337/174923521-cf42e985-007b-44ef-b00c-20d78a95ac1e.png)
70 | 
71 | 参考：[Input image dtype is bool. Interpolation is not defined with bool data type](https://stackoverflow.com/questions/62330374/input-image-dtype-is-bool-interpolation-is-not-defined-with-bool-data-type):`pip install -U scikit-image==0.16.2`
72 | 
73 | 
74 | 
75 | ## 参考
76 | 
77 | 1. [Mask RCNN源码解读](https://blog.csdn.net/u012655441/article/details/122304099)
78 | 2. [Mask RCNN综述以及建筑物实例分割](https://blog.csdn.net/u012655441/article/details/120753214)
79 | 


--------------------------------------------------------------------------------
/auto_annotation.py:
--------------------------------------------------------------------------------
  1 | from PIL import Image
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | from utils.anchors import get_anchors
  5 | from utils.utils import mold_inputs,unmold_detections
  6 | from utils import visualize
  7 | from utils.config import Config
  8 | import os
  9 | from glob import glob
 10 | import json
 11 | import base64
 12 | import io
 13 | from tqdm import tqdm
 14 | 
 15 | '''
 16 | 模型的自动标注代码
 17 | '''
 18 | 
 19 | img_pattern  = './todo/*.jpg'
 20 | model_path = './model/village_building'
 21 | class_path = './tmp.names'
 22 | 
 23 | class intEncoder(json.JSONEncoder):
 24 |     def default(self, obj):
 25 |         if isinstance(obj, np.intc):
 26 |             return float(obj)
 27 |     
 28 |         return json.JSONEncoder.default(self, obj)
 29 | 
 30 | def get_class(classes_path):
 31 |         classes_path = os.path.expanduser(classes_path)
 32 |         with open(classes_path) as f:
 33 |             class_names = f.readlines()
 34 |         class_names = [c.strip() for c in class_names]
 35 |         class_names.insert(0,"BG")
 36 |         return class_names
 37 | 
 38 | def img2base64(image_path):
 39 |     with open(image_path, 'rb') as f:
 40 |         image = f.read()
 41 |     image_base64 = str(base64.b64encode(image), encoding='utf-8')
 42 |     return image_base64 
 43 | 
 44 | class_names = get_class(class_path)
 45 | def get_config():
 46 |     class InferenceConfig(Config):
 47 |         NUM_CLASSES = len(class_names)
 48 |         GPU_COUNT = 1
 49 |         IMAGES_PER_GPU = 1
 50 |         DETECTION_MIN_CONFIDENCE = 0.8
 51 |         NAME = "Customer"
 52 |         RPN_ANCHOR_SCALES =  (16, 32, 64, 128, 256)
 53 |         IMAGE_MIN_DIM = 512
 54 |         IMAGE_MAX_DIM = 512
 55 |         IMAGE_SHAPE =  [512, 512 ,3]
 56 | 
 57 |     config = InferenceConfig()
 58 |     return config
 59 | 
 60 | InferenceConfig = get_config()
 61 | model = tf.keras.models.load_model(model_path)
 62 | 
 63 | # 获取输入的anchor
 64 | 
 65 | 
 66 | for image_name in tqdm(glob(img_pattern)):
 67 |     image = Image.open(image_name).convert('RGB')
 68 |     imageWidth, imageHeight = image.size
 69 |     image = [np.array(image)]
 70 |     molded_images, image_metas, windows = mold_inputs(InferenceConfig,image)
 71 |     image_shape = molded_images[0].shape
 72 |     anchors = get_anchors(InferenceConfig,image_shape)
 73 |     anchors = np.broadcast_to(anchors, (1,) + anchors.shape)
 74 |     detections, _, _, mrcnn_mask, _, _, _ =model.predict([molded_images, image_metas, anchors], verbose=0)
 75 |     final_rois, final_class_ids, final_scores, final_masks =unmold_detections(detections[0], mrcnn_mask[0],image[0].shape, molded_images[0].shape,windows[0])
 76 | 
 77 |     r = {
 78 |         "rois": final_rois,
 79 |         "class_ids": final_class_ids,
 80 |         "scores": final_scores,
 81 |         "masks": final_masks,
 82 |     }
 83 | 
 84 | 
 85 |     drawed_image, shapes = visualize.display_instances(image[0], r['rois'], r['masks'], r['class_ids'], 
 86 |                                     class_names, r['scores'])
 87 | 
 88 |     # 将数据保存在json文件中
 89 |     json_info = {}
 90 |     json_info['version'] = "4.5.7"
 91 |     json_info['flags'] = {}
 92 |     json_info['shapes'] = []
 93 |     json_info['imagePath'] = os.path.basename(image_name)
 94 | 
 95 |     json_info['imageData'] = img2base64(image_name)
 96 |     json_info['imageHeight'] = imageHeight
 97 |     json_info['imageWidth'] = imageWidth
 98 |     # 处理返回的shapes
 99 |     for index, (label, points) in  enumerate(shapes):
100 |         tmp = {}
101 |         if len(points[0])<=2:
102 |             continue
103 |         tmp['label'] = label+f'.{index:03d}'
104 |         tmp['points'] = []
105 |         tmp['group_id'] = None
106 |         tmp['shape_type'] = "polygon"
107 |         tmp['flags'] = {}
108 |         for point in points[0]:
109 |             tmp['points'] .append(list(point[0]))
110 |         json_info['shapes'].append(tmp)
111 |     
112 |     # 保存.json文件
113 |     path = os.path.dirname(image_name)
114 |     suffix = '.json'
115 |     json_filename = os.path.join(path, os.path.splitext(json_info['imagePath'])[0]+suffix)
116 |     with open(json_filename, 'w') as f:
117 |         json.dump(json_info, f, ensure_ascii=False, indent=2, cls=intEncoder)
118 | 
119 |     # drawed_image.save('./tmp/6.jpg')
120 |     # drawed_image.show()


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | from utils.config import Config
 2 | import os
 3 | 
 4 | def get_class(classes_path):
 5 |     classes_path = os.path.expanduser(classes_path)
 6 |     with open(classes_path) as f:
 7 |         class_names = f.readlines()
 8 |     class_names = [c.strip() for c in class_names]
 9 |     return class_names
10 | 
11 | class CustomerConfig(Config):
12 |     NAME = "Customer"
13 |     NUM_CLASSES = 1 + 1
14 |     RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)
15 |     # IMAGE_MIN_DIM = 512
16 |     # IMAGE_MAX_DIM = 512
17 |     CLASSES = get_class(r'.\data\coco_classes.txt')
18 |     TRAIN_DATASET = './train_data'
19 |     PRETRAIN_MODEL = "model/mask_rcnn_coco.h5"
20 |     LEARNING_RATE = 1e-5
21 |     EPOCH = 100
22 |     MAX_GT_INSTANCES = 200
23 |     DETECTION_MAX_INSTANCES = 200
24 | 
25 | class InferenceConfig(Config):
26 |     NAME = 'Customer'
27 |     RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)
28 |     IMAGE_MIN_DIM = 1024
29 |     IMAGE_MAX_DIM = 1024
30 |     model = r'./model/building_20221121.h5'
31 |     class_path = r'./data/building.names'
32 | 


--------------------------------------------------------------------------------
/contourprocess/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RyanCCC/Mask_RCNN/a32837ba992eda4aa0b7799cd3317200e79d462b/contourprocess/__init__.py


--------------------------------------------------------------------------------
/contourprocess/cal_dist_ang.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | 
 4 | 
 5 | # 计算两点的距离
 6 | def cal_dist(point_1, point_2):
 7 |     diff  = point_1-point_2
 8 |     dist = math.hypot(diff[0], diff[1])
 9 |     # dist = np.sqrt(np.sum(math.pow((point_1-point_2), 2)))
10 |     return dist
11 | 
12 | # 计算两条直线的夹角  余弦定理
13 | def cal_angle(point_1, point_2, point_3):
14 |     '''
15 |     根据三个坐标计算夹角
16 |     :param point_1：点1坐标
17 |     :param point_2：点2的坐标
18 |     :param point_3：点3的坐标
19 |     :return：返回任意角的夹角值
20 |     '''
21 |     a = math.sqrt(
22 |         (point_2[0]-point_3[0])*(point_2[0]-point_3[0])+(point_2[1]-point_3[1])*(point_2[1] - point_3[1])
23 |     )
24 |     b = math.sqrt(
25 |         (point_1[0]-point_3[0])*(point_1[0]-point_3[0])+(point_1[1]-point_3[1])*(point_1[1] - point_3[1])
26 |     )
27 |     c = math.sqrt(
28 |         (point_1[0]-point_2[0])*(point_1[0]-point_2[0])+(point_1[1]-point_2[1])*(point_1[1] - point_2[1])
29 |     )
30 |     A=math.degrees(math.acos((a*a-b*b-c*c)/(-2*b*c)))
31 |     B=math.degrees(math.acos((b*b-a*a-c*c)/(-2*a*c)))
32 |     C=math.degrees(math.acos((c*c-a*a-b*b)/(-2*a*b)))
33 |     return A, B, C
34 | 
35 | # 计算线条的方位角
36 | def azimuthAngle(point_0, point_1):
37 |     x1, y1 = point_0
38 |     x2, y2 = point_1
39 |     if x1 < x2:
40 |         if y1 < y2:
41 |             ang = math.atan((y2 - y1) / (x2 - x1))
42 |             ang = ang * 180 / math.pi
43 |             return ang
44 |         elif y1 > y2:
45 |             ang = math.atan((y1 - y2) / (x2 - x1))
46 |             ang = ang * 180 / math.pi
47 |             return 90 + (90 - ang)
48 |         elif y1==y2:
49 |             return 0
50 |     elif x1 > x2:
51 |         if y1 < y2:
52 |             ang = math.atan((y2-y1)/(x1-x2))
53 |             ang = ang*180/math.pi
54 |             return 90+(90-ang)
55 |         elif y1 > y2:
56 |             ang = math.atan((y1-y2)/(x1-x2))
57 |             ang = ang * 180 / math.pi
58 |             return ang
59 |         elif y1==y2:
60 |             return 0
61 | 
62 |     elif x1==x2:
63 |         return 90
64 | 
65 | if __name__ == '__main__':
66 |     pass


--------------------------------------------------------------------------------
/contourprocess/line_intersection.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | # 线生成函数
 5 | def line(p1, p2):
 6 |     A = (p1[1] - p2[1])
 7 |     B = (p2[0] - p1[0])
 8 |     C = (p1[0]*p2[1] - p2[0]*p1[1])
 9 |     return A, B, -C
10 | 
11 | 
12 | # 计算两条直线之间的交点
13 | def intersection(L1, L2):
14 |     D  = L1[0] * L2[1] - L1[1] * L2[0]
15 |     Dx = L1[2] * L2[1] - L1[1] * L2[2]
16 |     Dy = L1[0] * L2[2] - L1[2] * L2[0]
17 |     if D != 0:
18 |         x = Dx / D
19 |         y = Dy / D
20 |         return x, y
21 |     else:
22 |         return False
23 | 
24 | 
25 | # 计算两个平行线之间的距离
26 | def par_line_dist(L1, L2):
27 |     A1, B1, C1 = L1
28 |     A2, B2, C2 = L2
29 | 
30 |     new_A1 = 1
31 |     new_B1 = B1 / A1
32 |     new_C1 = C1 / A1
33 | 
34 |     new_A2 = 1
35 |     new_B2 = B2 / A2
36 |     new_C2 = C2 / A2
37 | 
38 |     dist = (np.abs(new_C1-new_C2))/(np.sqrt(new_A2*new_A2+new_B2*new_B2))
39 |     return dist
40 | 
41 | 
42 | # 计算点在直线的投影位置
43 | def point_in_line(m, n, x1, y1, x2, y2):
44 |     x = (m * (x2 - x1) * (x2 - x1) + n * (y2 - y1) * (x2 - x1) + (x1 * y2 - x2 * y1) * (y2 - y1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))
45 |     y = (m * (x2 - x1) * (y2 - y1) + n * (y2 - y1) * (y2 - y1) + (x2 * y1 - x1 * y2) * (x2 - x1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1))
46 |     return (x, y)


--------------------------------------------------------------------------------
/contourprocess/rdp_alg.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def pldist(x0, x1, x2):
 5 |     """
 6 |     Calculates the distance from the point ``x0`` to the line given
 7 |     by the points ``x1`` and ``x2``.
 8 |     :param x0: a point
 9 |     :type x0: a 2x1 numpy array
10 |     :param x1: a point of the line
11 |     :type x1: 2x1 numpy array
12 |     :param x2: another point of the line
13 |     :type x2: 2x1 numpy array
14 |     """
15 |     x0, x1, x2 = x0[:2], x1[:2], x2[:2] # discard timestamp
16 |     if x1[0] == x2[0]:
17 |         return np.abs(x0[0] - x1[0])
18 | 
19 |     return np.divide(np.linalg.norm(np.linalg.det([x2 - x1, x1 - x0])),
20 |                      np.linalg.norm(x2 - x1))
21 | 
22 | 
23 | def _rdp(M, epsilon, dist):
24 |     """
25 |     Simplifies a given array of points.
26 |     :param M: an array
27 |     :type M: Nx2 numpy array
28 |     :param epsilon: epsilon in the rdp algorithm
29 |     :type epsilon: float
30 |     :param dist: distance function
31 |     :type dist: function with signature ``f(x1, x2, x3)``
32 |     """
33 |     dmax = 0.0
34 |     index = -1
35 | 
36 |     for i in range(1, M.shape[0]):
37 |         d = dist(M[i], M[0], M[-1])
38 | 
39 |         if d > dmax:
40 |             index = i
41 |             dmax = d
42 | 
43 |     if dmax > epsilon:
44 |         r1 = _rdp(M[:index + 1], epsilon, dist)
45 |         r2 = _rdp(M[index:], epsilon, dist)
46 | 
47 |         return np.vstack((r1[:-1], r2))
48 |     else:
49 |         return np.vstack((M[0], M[-1]))
50 | 
51 | 
52 | def _rdp_nn(seq, epsilon, dist):
53 |     """
54 |     Simplifies a given array of points.
55 |     :param seq: a series of points
56 |     :type seq: sequence of 2-tuples
57 |     :param epsilon: epsilon in the rdp algorithm
58 |     :type epsilon: float
59 |     :param dist: distance function
60 |     :type dist: function with signature ``f(x1, x2, x3)``
61 |     """
62 |     return _rdp(np.array(seq), epsilon, dist).tolist()
63 | 
64 | 
65 | def rdp(M, epsilon=0, dist=pldist):
66 |     """
67 |     Simplifies a given array of points.
68 |     :param M: a series of points
69 |     :type M: either a Nx2 numpy array or sequence of 2-tuples
70 |     :param epsilon: epsilon in the rdp algorithm
71 |     :type epsilon: float
72 |     :param dist: distance function
73 |     :type dist: function with signature ``f(x1, x2, x3)``
74 |     """
75 |     if "numpy" in str(type(M)):
76 |         return _rdp(M, epsilon, dist)
77 |     return _rdp_nn(M, epsilon, dist)


--------------------------------------------------------------------------------
/contourprocess/regularization.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import numpy as np
  3 | from .rdp_alg import rdp
  4 | from .cal_dist_ang import cal_angle, cal_dist, azimuthAngle
  5 | from .rotate_ang import Nrotation_angle_get_coor_coordinates, Srotation_angle_get_coor_coordinates
  6 | from .line_intersection import line, intersection, par_line_dist, point_in_line
  7 | 
  8 | 
  9 | 
 10 | def boundary_regularization(img, epsilon=6):
 11 |     h, w = img.shape[0:2]
 12 |     # 轮廓定位
 13 |     contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
 14 |     contours = np.squeeze(contours[0])
 15 |     # 轮廓精简（DP）
 16 |     contours = rdp(contours, epsilon=epsilon)
 17 |     contours[:, 1] = h - contours[:, 1]
 18 |     # 轮廓规则化
 19 |     dists = []
 20 |     azis = []
 21 |     azis_index = []
 22 |     # 获取每条边的长度和方位角
 23 |     for i in range(contours.shape[0]):
 24 |         cur_index = i
 25 |         next_index = i+1 if i < contours.shape[0]-1 else 0
 26 |         prev_index = i-1
 27 |         cur_point = contours[cur_index]
 28 |         nest_point = contours[next_index]
 29 |         prev_point = contours[prev_index]
 30 | 
 31 |         dist = cal_dist(cur_point, nest_point)
 32 |         azi = azimuthAngle(cur_point, nest_point)
 33 | 
 34 |         dists.append(dist)
 35 |         azis.append(azi)
 36 |         azis_index.append([cur_index, next_index])
 37 | 
 38 |     # 以最长的边的方向作为主方向
 39 |     longest_edge_idex = np.argmax(dists)
 40 |     main_direction = azis[longest_edge_idex]
 41 |     # 方向纠正，绕中心点旋转到与主方向垂直或者平行
 42 |     correct_points = []
 43 |     para_vetr_idxs = []  # 0平行 1垂直
 44 |     for i, (azi, (point_0_index, point_1_index)) in enumerate(zip(azis, azis_index)):
 45 | 
 46 |         if i == longest_edge_idex:
 47 |             correct_points.append([contours[point_0_index], contours[point_1_index]])
 48 |             para_vetr_idxs.append(0)
 49 |         else:
 50 |             # 确定旋转角度
 51 |             rotate_ang = main_direction - azi
 52 | 
 53 |             if np.abs(rotate_ang) < 180/4:
 54 |                 rotate_ang = rotate_ang
 55 |                 para_vetr_idxs.append(0)
 56 |             elif np.abs(rotate_ang) >= 90-180/4:
 57 |                 rotate_ang = rotate_ang + 90
 58 |                 para_vetr_idxs.append(1)
 59 | 
 60 |             # 执行旋转任务
 61 |             point_0 = contours[point_0_index]
 62 |             point_1 = contours[point_1_index]
 63 |             point_middle = (point_0 + point_1) / 2
 64 | 
 65 |             if rotate_ang > 0:
 66 |                 rotate_point_0 = Srotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang))
 67 |                 rotate_point_1 = Srotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang))
 68 |             elif rotate_ang < 0:
 69 |                 rotate_point_0 = Nrotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang))
 70 |                 rotate_point_1 = Nrotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang))
 71 |             else:
 72 |                 rotate_point_0 = point_0
 73 |                 rotate_point_1 = point_1
 74 |             correct_points.append([rotate_point_0, rotate_point_1])
 75 | 
 76 |     correct_points = np.array(correct_points)
 77 | 
 78 | 
 79 |     # 相邻边校正，垂直取交点，平行平移短边或者加线
 80 |     final_points = []
 81 |     final_points.append(correct_points[0][0])
 82 |     for i in range(correct_points.shape[0]-1):
 83 |         cur_index = i
 84 |         next_index = i + 1 if i < correct_points.shape[0] - 1 else 0
 85 | 
 86 |         cur_edge_point_0 = correct_points[cur_index][0]
 87 |         cur_edge_point_1 = correct_points[cur_index][1]
 88 |         next_edge_point_0 = correct_points[next_index][0]
 89 |         next_edge_point_1 = correct_points[next_index][1]
 90 | 
 91 |         cur_para_vetr_idx = para_vetr_idxs[cur_index]
 92 |         next_para_vetr_idx = para_vetr_idxs[next_index]
 93 | 
 94 |         if cur_para_vetr_idx != next_para_vetr_idx:
 95 |             # 垂直取交点
 96 |             L1 = line(cur_edge_point_0, cur_edge_point_1)
 97 |             L2 = line(next_edge_point_0, next_edge_point_1)
 98 | 
 99 |             point_intersection = intersection(L1, L2)
100 |             final_points.append(point_intersection)
101 | 
102 |         elif cur_para_vetr_idx == next_para_vetr_idx:
103 |             # 平行分两种，一种加短线，一种平移，取决于距离阈值
104 |             L1 = line(cur_edge_point_0, cur_edge_point_1)
105 |             L2 = line(next_edge_point_0, next_edge_point_1)
106 |             marg = par_line_dist(L1, L2)
107 | 
108 |             if marg < 3:
109 |                 # 平移
110 |                 point_move = point_in_line(next_edge_point_0[0], next_edge_point_0[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1])
111 |                 final_points.append(point_move)
112 |                 # 更新平移之后的下一条边
113 |                 correct_points[next_index][0] = point_move
114 |                 correct_points[next_index][1] = point_in_line(next_edge_point_1[0], next_edge_point_1[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1])
115 | 
116 | 
117 |             else:
118 |                 # 加线
119 |                 add_mid_point = (cur_edge_point_1 + next_edge_point_0) / 2
120 |                 add_point_1 = point_in_line(add_mid_point[0], add_mid_point[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1])
121 |                 add_point_2 = point_in_line(add_mid_point[0], add_mid_point[1], next_edge_point_0[0], next_edge_point_0[1], next_edge_point_1[0], next_edge_point_1[1])
122 |                 final_points.append(add_point_1)
123 |                 final_points.append(add_point_2)
124 | 
125 | 
126 |     final_points.append(final_points[0])
127 |     final_points = np.array(final_points)
128 | 
129 |     final_points[:, 1] = h - final_points[:, 1]
130 |     return final_points
131 | 


--------------------------------------------------------------------------------
/contourprocess/rotate_ang.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | 
 3 | 
 4 | # 顺时针旋转
 5 | def Nrotation_angle_get_coor_coordinates(point, center, angle):
 6 |     src_x, src_y = point
 7 |     center_x, center_y = center
 8 |     radian = math.radians(angle)
 9 | 
10 |     dest_x = (src_x - center_x) * math.cos(radian) + (src_y - center_y) * math.sin(radian) + center_x
11 |     dest_y = (src_y - center_y) * math.cos(radian) - (src_x - center_x) * math.sin(radian) + center_y
12 | 
13 |     # return (int(dest_x), int(dest_y))
14 |     return (dest_x, dest_y)
15 | 
16 | 
17 | # 逆时针旋转
18 | def Srotation_angle_get_coor_coordinates(point, center, angle):
19 |     src_x, src_y = point
20 |     center_x, center_y = center
21 |     radian = math.radians(angle)
22 | 
23 |     dest_x = (src_x - center_x) * math.cos(radian) - (src_y - center_y) * math.sin(radian) + center_x
24 |     dest_y = (src_x - center_x) * math.sin(radian) + (src_y - center_y) * math.cos(radian) + center_y
25 | 
26 |     # return [int(dest_x), int(dest_y)]
27 |     return (dest_x, dest_y)
28 | 


--------------------------------------------------------------------------------
/convert.py:
--------------------------------------------------------------------------------
 1 | '''
 2 | Convert weight model to pb or h5 or onnx
 3 | '''
 4 | 
 5 | import argparse
 6 | import tf2onnx
 7 | import tensorflow as tf
 8 | from mrcnn.mask_rcnn import MASK_RCNN
 9 | 
10 | 
11 | def parser_opt():
12 |     parser = argparse.ArgumentParser(description="Convert Mask RCNN model")
13 |     parser.add_argument('--weight', type=str, help='model weight', required=True)
14 |     parser.add_argument('--label', type=str,help='label file', required=True)
15 |     parser.add_argument('--saved_pb', action='store_true', help='save pb model to current directory')
16 |     parser.add_argument('--saved_pb_dir', type=str, default='./save_model', help='save pb file if needed. Default:save_model')
17 |     
18 |     parser.add_argument('--saved_model', type=str, help='Tensorflow saved_model', default='')
19 |     parser.add_argument('--save_onnx', type=str, help='save onnx model name', required=True, default='')
20 |     parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version')
21 |     parser.add_argument('--flag', action='store_true', help='True:Tensoflow model, False:Tensorflow weights')
22 |     
23 |     return parser
24 | 
25 | def main(args):
26 |     save_path = args.save_onnx
27 |     opset = args.opset
28 |     if args.flag:
29 |         '''
30 |         加载模型并导出onnx模型
31 |         '''
32 |         saved_model = args.saved_model
33 |         assert len(saved_model) > 0, 'saved_model cannot be none or empty.'
34 |         maskrcnn_model = tf.keras.models.load_model(saved_model)
35 |         model_proto, _ = tf2onnx.convert.from_keras(maskrcnn_model, opset=opset, output_path=save_path)
36 |         output_names = [n.name for n in model_proto.graph.output]
37 |         print(output_names)
38 |     else:
39 |         print('Convert Tensorflow saved model to ONNX')
40 |         weights = args.weight
41 |         class_path = args.label
42 |         assert len(weights) > 0, 'weights cannot be none or empty.'
43 |         assert len(class_path) > 0, 'classes path doesn\'t exists.'
44 |         mask_rcnn = MASK_RCNN(model=weights, classes_path=class_path, confidence=0.8)
45 | 
46 |         save_pb = args.saved_pb
47 |         if save_pb:
48 |             save_name = args.saved_pb_dir
49 |             assert len(save_name) > 0, 'save_name cannot be none or empty.'
50 |             mask_rcnn.model.save(save_name, save_format='tf')
51 | 
52 |         model_proto, _ = tf2onnx.convert.from_keras(mask_rcnn.model, opset=opset, output_path=save_path)
53 |         output_names = [n.name for n in model_proto.graph.output]
54 |         print(f'Model output names: ',output_names)
55 | 
56 | if __name__ == '__main__':
57 |     parser = parser_opt()
58 |     args = parser.parse_args()
59 |     main(args=args)
60 | 


--------------------------------------------------------------------------------
/data/building.names:
--------------------------------------------------------------------------------
1 | building


--------------------------------------------------------------------------------
/data/coco_classes.txt:
--------------------------------------------------------------------------------
 1 | person
 2 | bicycle
 3 | car
 4 | motorbike
 5 | aeroplane
 6 | bus
 7 | train
 8 | truck
 9 | boat
10 | traffic light
11 | fire hydrant
12 | stop sign
13 | parking meter
14 | bench
15 | bird
16 | cat
17 | dog
18 | horse
19 | sheep
20 | cow
21 | elephant
22 | bear
23 | zebra
24 | giraffe
25 | backpack
26 | umbrella
27 | handbag
28 | tie
29 | suitcase
30 | frisbee
31 | skis
32 | snowboard
33 | sports ball
34 | kite
35 | baseball bat
36 | baseball glove
37 | skateboard
38 | surfboard
39 | tennis racket
40 | bottle
41 | wine glass
42 | cup
43 | fork
44 | knife
45 | spoon
46 | bowl
47 | banana
48 | apple
49 | sandwich
50 | orange
51 | broccoli
52 | carrot
53 | hot dog
54 | pizza
55 | donut
56 | cake
57 | chair
58 | sofa
59 | pottedplant
60 | bed
61 | diningtable
62 | toilet
63 | tvmonitor
64 | laptop
65 | mouse
66 | remote
67 | keyboard
68 | cell phone
69 | microwave
70 | oven
71 | toaster
72 | sink
73 | refrigerator
74 | book
75 | clock
76 | vase
77 | scissors
78 | teddy bear
79 | hair drier
80 | toothbrush


--------------------------------------------------------------------------------
/data/shapes.names:
--------------------------------------------------------------------------------
1 | circle
2 | square
3 | triangle


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | import cv2
  2 | import tensorflow as tf
  3 | import numpy as np
  4 | import config
  5 | import os
  6 | from mrcnn.mask_rcnn import MASK_RCNN
  7 | from PIL import Image
  8 | from utils import utils, dataset, visualize
  9 | from mrcnn.mrcnn_training import load_image_gt
 10 | import yaml
 11 | import matplotlib.pyplot as plt
 12 | from tqdm import tqdm
 13 | 
 14 | mask_rcnn = MASK_RCNN(model=config.InferenceConfig.model, classes_path = config.InferenceConfig.class_path)
 15 | class_names = mask_rcnn.get_class()
 16 | 
 17 | '''
 18 | 参考：
 19 | 1. https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/utils.py#L715
 20 | 2. https://github.com/matterport/Mask_RCNN/issues/2513
 21 | 3. https://zhuanlan.zhihu.com/p/61880018 
 22 | '''
 23 | 
 24 | class Evaluator(object):
 25 |     def __init__(self, num_class) -> None:
 26 |         super().__init__()
 27 |         self.num_class = num_class
 28 |         self.confusion_matrix = np.zeros((self.num_class, )*2)
 29 |     
 30 |     def Pixel_Accuracy(self):
 31 |         Acc = np.diag(self.confusion_matrix).sum()/self.confusion_matrix.sum()
 32 |         return Acc
 33 |     
 34 |     def Pixel_Recall(self, class_index):
 35 |         Acc = self.confusion_matrix[class_index][class_index]/self.confusion_matrix.sum(axis=0)[class_index]
 36 |         return Acc
 37 | 
 38 |     def Pixel_Accuracy_Class(self):
 39 |         Acc = np.diag(self.confusion_matrix)/self.confusion_matrix.sum(axis=1)
 40 |         Acc = np.nanmean(Acc)
 41 |         return Acc
 42 |     
 43 |     def Mean_Intersection_over_Union(self):
 44 |         MIoU = np.diag(self.confusion_matrix) / (
 45 |                     np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
 46 |                     np.diag(self.confusion_matrix))
 47 |         MIoU = np.nanmean(MIoU)
 48 |         return MIoU
 49 | 
 50 |     def Frequency_Weighted_Intersection_over_Union(self):
 51 |         freq = np.sum(self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix)
 52 |         iu = np.diag(self.confusion_matrix) / (
 53 |                     np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) -
 54 |                     np.diag(self.confusion_matrix))
 55 | 
 56 |         FWIoU = (freq[freq > 0] * iu[freq > 0]).sum()
 57 |         return FWIoU
 58 | 
 59 |     def _generate_matrix(self, gt_image, pre_image):
 60 |         mask = (gt_image >= 0) & (gt_image < self.num_class)
 61 |         label = self.num_class * gt_image[mask].astype('int') + pre_image[mask]
 62 |         count = np.bincount(label, minlength=self.num_class**2)
 63 |         confusion_matrix = count.reshape(self.num_class, self.num_class)
 64 |         return confusion_matrix
 65 | 
 66 |     def add_batch(self, gt_image, pre_image):
 67 |         '''
 68 |         输入的图像用0,1,2,3...表示类别
 69 |         '''
 70 |         assert gt_image.shape == pre_image.shape
 71 |         self.confusion_matrix += self._generate_matrix(gt_image, pre_image)
 72 | 
 73 |     def reset(self):
 74 |         self.confusion_matrix = np.zeros((self.num_class,) * 2)
 75 | 
 76 | class TestDataset(dataset.Dataset):
 77 |     # 获取图中的实例个数
 78 |     def get_obj_index(self, image):
 79 |         n = np.max(image)
 80 |         return n
 81 |     
 82 |     def get_class(self):
 83 |         classes_path = os.path.expanduser(self.classes_path)
 84 |         with open(classes_path) as f:
 85 |             class_names = f.readlines()
 86 |         class_names = [c.strip() for c in class_names]
 87 |         class_names.insert(0,"BG")
 88 |         return class_names
 89 | 
 90 |     # 解析yaml
 91 |     def get_classes_from_yaml(self, image_id):
 92 |         info = self.image_info[image_id]
 93 |         with open(info['yaml_path']) as f:
 94 |             temp = yaml.load(f.read())
 95 |             labels = temp['label_names']
 96 |             del labels[0]
 97 |         return labels
 98 |     
 99 |     def draw_mask(self, num_obj, mask, image, image_id):
100 |         info = self.image_info[image_id]
101 |         for index in range(num_obj):
102 |             for i in range(info['width']):
103 |                 for j in range(info['height']):
104 |                     at_pixel = image.getpixel((i, j))
105 |                     if at_pixel == index + 1:
106 |                         mask[j, i, index] = 1
107 |         return mask
108 |     
109 |     def load_dataset(self, count, img_floder, mask_floder, imglist, dataset_root_path):
110 |         """
111 |         Generate the requested number of synthetic images.
112 |         count: number of images to generate.
113 |         height, width: the size of the generated images.
114 |         """
115 |         # Add classes
116 |         classes_names = config.get_class(config.InferenceConfig.class_path)
117 |         for index, item in enumerate(classes_names):
118 |             self.add_class('TestSet', index+1, item)
119 | 
120 |         for i in range(count):
121 |             # 获取图片宽和高
122 |             filestr = imglist[i].split(".")[0]
123 |             mask_path = mask_floder + "/" + filestr + ".png"
124 |             yaml_path = dataset_root_path + "/" +"yaml/" + filestr + ".yaml"
125 |             print(dataset_root_path + "labelme_json/" + filestr + "_json/img.png")
126 |             cv_img = cv2.imread(dataset_root_path + "/" +"imgs/" + filestr + ".jpg")
127 | 
128 |             self.add_image("TestSet", image_id=i, path=img_floder + "/" + imglist[i],
129 |                            width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path)
130 | 
131 |     def load_mask(self, image_id):
132 |         """Generate instance masks for shapes of the given image ID.
133 |         """
134 |         global iter_num
135 |         print("image_id", image_id)
136 |         info = self.image_info[image_id]
137 |         count = 1  # number of object
138 |         img = Image.open(info['mask_path'])
139 |         num_obj = self.get_obj_index(img)
140 |         mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8)
141 |         mask = self.draw_mask(num_obj, mask, img, image_id)
142 |         occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8)
143 |         for i in range(count - 2, -1, -1):
144 |             mask[:, :, i] = mask[:, :, i] * occlusion
145 | 
146 |             occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i]))
147 |         labels = []
148 |         labels = self.get_classes_from_yaml(image_id)
149 |         class_ids = np.array([self.class_names.index(s) for s in labels])
150 |         return mask, class_ids.astype(np.int32)
151 | 
152 | def text_save(filename, data):
153 |     file = open(filename,'a')
154 |     for i in range(len(data)):
155 |         s = str(data[i]).replace('[','').replace(']','')
156 |         s = s.replace("'",'').replace(',','') +'\n'   
157 |         file.write(s)
158 |     file.close()
159 |     print(f'save success:{filename}')
160 | 
161 | if __name__ == '__main__':
162 |     dataset_root_path = config.CustomerConfig.TRAIN_DATASET
163 |     img_floder =os.path.join(dataset_root_path, "imgs")
164 |     mask_floder = os.path.join(dataset_root_path, "mask")
165 |     imglist = os.listdir(img_floder)
166 |     count = len(imglist)
167 |     np.random.seed(10101)
168 |     np.random.shuffle(imglist)
169 |     train_imglist = imglist[:int(count*0.8)]
170 |     test_imglist = imglist[int(count*0.8):]
171 |     test_count = len(test_imglist)
172 | 
173 |     # 加载测试集
174 |     dataset_test = TestDataset()
175 |     dataset_test.load_dataset(test_count, img_floder, mask_floder, test_imglist, dataset_root_path)
176 |     dataset_test.prepare()
177 |     APs = []
178 |     flag = 0
179 |     for imageid in tqdm(dataset_test.image_ids[:20]):
180 |         image, image_meta, gt_class_id, gt_bbox, gt_mask = \
181 |             load_image_gt(dataset_test, config.InferenceConfig, imageid)
182 |         # 将所有ground truth载入并保存
183 |         if flag == 0:
184 |             gt_boxes, gt_class_ids, gt_masks = gt_bbox, gt_class_id, gt_mask
185 |         else:
186 |             gt_boxes = np.concatenate((gt_boxes, gt_bbox), axis=0)
187 |             gt_class_ids = np.concatenate((gt_class_ids, gt_class_id), axis=0)
188 |             gt_masks = np.concatenate((gt_masks, gt_mask), axis=2)
189 |         image = Image.fromarray(image)
190 |         r = mask_rcnn.get_detections(image=image)
191 |         if flag == 0:
192 |             pred_rois, pred_ids, pred_scores, pred_masks = r["rois"], r["class_ids"], r["scores"],  r['masks']
193 |         else:
194 |             pred_rois = np.concatenate((pred_rois, r["rois"]), axis=0)
195 |             pred_ids = np.concatenate((pred_ids, r["class_ids"]), axis=0)
196 |             pred_scores = np.concatenate((pred_scores, r["scores"]), axis=0)
197 |             pred_masks = np.concatenate((pred_masks, r['masks']), axis=2)
198 |         flag+=1
199 |         # 展示数据
200 |         drawed_image = visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], mask_rcnn.class_names, r['scores'], show_bbox=False, captions=False)
201 |         # 处理mask 文件
202 |         mask_image = np.any(r['masks'], axis=-1)
203 |         mask_image = Image.fromarray(mask_image)
204 |         drawed_image.show()
205 |         mask_image.show()
206 | 
207 |     iou_thresholds = [0.5, 0.6, 0.7, 0.8, 0.9]
208 |     # AP, precisions, recalls, overlaps =utils.compute_ap(gt_bbox, gt_class_id, gt_mask,r["rois"], r["class_ids"], r["scores"], r['masks'], iou_threshold=iou_threshold)
209 |     # 计算AP, precision, recall
210 |     for iou_threshold in iou_thresholds:
211 |         AP, precisions, recalls, overlaps = utils.compute_ap(gt_boxes, gt_class_ids, gt_masks, pred_rois, pred_ids, pred_scores, pred_masks, iou_threshold=iou_threshold)
212 |         print(f'AP@{iou_threshold}：{AP}')
213 |         print(f"mAP@{iou_threshold}: ", np.mean(AP))
214 |         # 保存precision, recall信息用于后续绘制图像
215 |     #     text_save(f'Kpreci@{iou_threshold}.txt', precisions)
216 |     #     text_save(f'Krecall@{iou_threshold}.txt', recalls)
217 |     #     text_save(f'KAP@{iou_threshold}.txt', [AP])
218 |     # plt.plot(recalls, precisions, 'b', label='PR')
219 |     # plt.title('precision-recall curve')
220 |     # plt.xlabel('Recall')
221 |     # plt.ylabel('Precision')
222 |     # plt.legend()
223 |     # plt.show()
224 |     
225 | 
226 | 
227 | 
228 | 
229 | 
230 | 
231 |         # '''
232 |         # Pixel Accuracy
233 |         # '''
234 |         # basename = os.path.splitext(imageid)[0]
235 |         # ori_img = os.path.join(img_floder, imageid)
236 |         # gt_img = os.path.join(mask_floder, basename+'.png')
237 |         # image = Image.open(ori_img)
238 |         # gt_img = Image.open(gt_img)
239 |         # n_classes = len(class_names)
240 |         # result_img, pred_img = mask_rcnn.detect_image(image=image)
241 |         # pred_img.show()
242 |         # gt_img.show()
243 |         # evaluate = Evaluator(1+1)
244 |         # evaluate.add_batch(np.array(gt_img), np.array(pred_img))
245 |         # acc = evaluate.Pixel_Accuracy()
246 |         # print('ACC:',acc)
247 |         # recall = evaluate.Pixel_Recall(0)
248 |         # print('Recall:', recall)
249 |         # basename = os.path.splitext(imageid)[0]
250 |         # image.save(os.path.join('./result', 'ori_'+basename+'.jpg'))
251 |         # pred_img.save(os.path.join('./result', 'res_'+basename+'.jpg'))
252 |         # iou计算: TODO:FIXBUG
253 |         # iou = IoU_calculate(pred_img, gt_img, 2)
254 |         # print(iou)
255 | 
256 | 
257 | 
258 | 
259 | 


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
 1 | from mrcnn.mask_rcnn import MASK_RCNN
 2 | from PIL import Image
 3 | 
 4 | import tensorflow as tf
 5 | import numpy as np
 6 | from utils.anchors import get_anchors
 7 | from utils.utils import mold_inputs,unmold_detections
 8 | from utils import visualize
 9 | import os
10 | from config import InferenceConfig
11 | from glob import glob
12 | from tqdm import tqdm
13 | 
14 | 
15 | # def get_class(classes_path):
16 | #         classes_path = os.path.expanduser(classes_path)
17 | #         with open(classes_path) as f:
18 | #             class_names = f.readlines()
19 | #         class_names = [c.strip() for c in class_names]
20 | #         class_names.insert(0,"BG")
21 | #         return class_names
22 | 
23 | 
24 | # model_path = './model/building'
25 | # class_path = './data/building.names'
26 | # class_names = get_class(class_path)
27 | 
28 | # def get_config():
29 | #     class InferenceConfig(Config):
30 | #         NUM_CLASSES = len(class_names)
31 | #         GPU_COUNT = 1
32 | #         IMAGES_PER_GPU = 1
33 | #         DETECTION_MIN_CONFIDENCE = 0.7
34 | #         NAME = "Customer"
35 | #         RPN_ANCHOR_SCALES =  (16, 32, 64, 128, 256)
36 | #         IMAGE_MIN_DIM = 512
37 | #         IMAGE_MAX_DIM = 512
38 | #         IMAGE_SHAPE =  [512, 512 ,3]
39 | 
40 | #     config = InferenceConfig()
41 | #     config.display()
42 | #     return config
43 | 
44 | # InferenceConfig = get_config()
45 | # model = tf.keras.models.load_model(model_path)
46 | 
47 | # image = Image.open(img)
48 | # image = [np.array(image)]
49 | 
50 | # molded_images, image_metas, windows = mold_inputs(InferenceConfig,image)
51 | 
52 | # image_shape = molded_images[0].shape
53 | # anchors = get_anchors(InferenceConfig,image_shape)
54 | # anchors = np.broadcast_to(anchors, (1,) + anchors.shape)
55 | # detections, _, _, mrcnn_mask, _, _, _ =model.predict([molded_images, image_metas, anchors], verbose=0)
56 | # final_rois, final_class_ids, final_scores, final_masks =unmold_detections(detections[0], mrcnn_mask[0],image[0].shape, molded_images[0].shape,windows[0])
57 | 
58 | # r = {
59 | #     "rois": final_rois,
60 | #     "class_ids": final_class_ids,
61 | #     "scores": final_scores,
62 | #     "masks": final_masks,
63 | # }
64 | 
65 | 
66 | # drawed_image = visualize.display_instances(image[0], r['rois'], r['masks'], r['class_ids'], 
67 | #                                     class_names, r['scores'])
68 | # drawed_image.save('6.jpg')
69 | # drawed_image.show()
70 | 
71 | images = glob('./samples/*')
72 | save_path = './result'
73 | if not os.path.exists(save_path):
74 |     os.makedirs(save_path)
75 | mask_rcnn = MASK_RCNN(model=InferenceConfig.model, classes_path=InferenceConfig.class_path, confidence=0.7)
76 | for img_name in tqdm(images):
77 |     image = Image.open(img_name).convert('RGB')
78 |     drawed_image,mask_image = mask_rcnn.detect_image(image = image)
79 |     drawed_image.show()
80 |     result_img = Image.blend(image, drawed_image, 0.5)
81 |     # result_img.show()
82 |     save_filename = os.path.join(save_path, os.path.basename(img_name))
83 |     result_img.save(save_filename)
84 |     


--------------------------------------------------------------------------------
/inference_onnxruntime.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | from utils.config import Config
  4 | from utils.anchors import get_anchors
  5 | from utils.utils import mold_inputs,unmold_detections
  6 | from utils.config import Config
  7 | import colorsys
  8 | import onnxruntime as ort
  9 | from PIL import Image
 10 | 
 11 | 
 12 | class InferenceConfig(Config):
 13 |     NAME = 'Customer'
 14 |     RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256)
 15 |     IMAGE_MIN_DIM = 512
 16 |     IMAGE_MAX_DIM = 512
 17 |     model = './maskrcnn_0.8.onnx'
 18 |     classes_path = './data/building.names'
 19 | 
 20 | def random_colors(N, bright=True):
 21 |     """
 22 |     生成随机颜色
 23 |     """
 24 |     brightness = 1.0 if bright else 0.7
 25 |     hsv = [(i / N, 1, brightness) for i in range(N)]
 26 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
 27 |     return colors
 28 | 
 29 | def apply_mask(image, mask, color, alpha=0.5):
 30 |     """
 31 |     打上mask图标
 32 |     """
 33 |     for c in range(3):
 34 |         image[:, :, c] = np.where(mask == 1,
 35 |                                   image[:, :, c] *
 36 |                                   (1 - alpha) + alpha * color[c] * 255,
 37 |                                   image[:, :, c])
 38 |     return image
 39 | 
 40 | class MASK_RCNN(object):
 41 |     _defaults = {
 42 |         "model_path": InferenceConfig.model,
 43 |         "classes_path": InferenceConfig.classes_path,
 44 |         "confidence": 0.5,
 45 |         # 使用coco数据集检测的时候，IMAGE_MIN_DIM=1024，IMAGE_MAX_DIM=1024, RPN_ANCHOR_SCALES=(32, 64, 128, 256, 512)
 46 |         "RPN_ANCHOR_SCALES": InferenceConfig.RPN_ANCHOR_SCALES,
 47 |         "IMAGE_MIN_DIM": InferenceConfig.IMAGE_MIN_DIM,
 48 |         "IMAGE_MAX_DIM": InferenceConfig.IMAGE_MAX_DIM,
 49 |     }
 50 | 
 51 |     @classmethod
 52 |     def get_defaults(cls, n):
 53 |         if n in cls._defaults:
 54 |             return cls._defaults[n]
 55 |         else:
 56 |             return "Unrecognized attribute name '" + n + "'"
 57 | 
 58 |     def __init__(self, **kwargs):
 59 |         self.__dict__.update(self._defaults)
 60 |         self.class_names = self._get_class()
 61 |         self.config = self._get_config()
 62 |         self.generate()
 63 | 
 64 |     def _get_class(self):
 65 |         classes_path = os.path.expanduser(self.classes_path)
 66 |         with open(classes_path) as f:
 67 |             class_names = f.readlines()
 68 |         class_names = [c.strip() for c in class_names]
 69 |         class_names.insert(0,"BG")
 70 |         return class_names
 71 | 
 72 |     def _get_config(self):
 73 |         class InferenceConfig(Config):
 74 |             NUM_CLASSES = len(self.class_names)
 75 |             GPU_COUNT = 1
 76 |             IMAGES_PER_GPU = 1
 77 |             NAME = "Customer"
 78 |             RPN_ANCHOR_SCALES = self.RPN_ANCHOR_SCALES
 79 |             IMAGE_MIN_DIM = self.IMAGE_MIN_DIM
 80 |             IMAGE_MAX_DIM = self.IMAGE_MAX_DIM
 81 | 
 82 |         config = InferenceConfig()
 83 | 
 84 |         return config
 85 | 
 86 |     def generate(self):
 87 |         model_path = os.path.expanduser(self.model_path)
 88 |         
 89 |         # 计算总的种类
 90 |         self.num_classes = len(self.class_names)
 91 | 
 92 |         # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
 93 |         self.model = ort.InferenceSession(model_path)
 94 |         self.outputs_names = ['mrcnn_detection', 'mrcnn_class', 'mrcnn_bbox', 'mrcnn_mask', 'ROI', 'rpn_class', 'rpn_bbox']
 95 |     
 96 |     def detect_image(self, image):
 97 |         image = [np.array(image)]
 98 |         molded_images, image_metas, windows = mold_inputs(self.config,image)
 99 | 
100 |         image_shape = molded_images[0].shape
101 |         anchors = get_anchors(self.config,image_shape)
102 |         anchors = np.broadcast_to(anchors, (1,) + anchors.shape)
103 | 
104 |         detections, _, _, mrcnn_mask, _, _, _ =\
105 |             self.model.run(self.outputs_names, {"input_image":molded_images.astype(np.float32), "input_image_meta":image_metas.astype(np.float32), "input_anchors":anchors.astype(np.float32)})
106 | 
107 |         final_rois, final_class_ids, final_scores, final_masks =\
108 |             unmold_detections(detections[0], mrcnn_mask[0],
109 |                                     image[0].shape, molded_images[0].shape,
110 |                                     windows[0])
111 | 
112 |         r = {
113 |             "rois": final_rois,
114 |             "class_ids": final_class_ids,
115 |             "scores": final_scores,
116 |             "masks": final_masks,
117 |         }
118 |         # 生成mask图像
119 |         mask_image = np.zeros_like(image[0], np.uint8)
120 |         masks = r['masks']
121 |         N = r['rois'].shape[0]
122 |         for i in range(N):
123 |             mask = masks[:, :, i]
124 |             color = (1.0, 0.0, 0.0)
125 |             mask_image = apply_mask(mask_image, mask, color, alpha=1)
126 |             padded_mask = np.zeros(
127 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
128 |             padded_mask[1:-1, 1:-1] = mask
129 |         return mask_image
130 |     
131 | if __name__ == '__main__':
132 |     mask_rcnn = MASK_RCNN()
133 |     img = './samples/20221101144640.png'
134 |     image = Image.open(img).convert('RGB')
135 |     r_image = mask_rcnn.detect_image(image)
136 |     img = Image.fromarray(r_image)
137 |     img = Image.blend(img, image, 0.7)
138 |     # img.save('./test.png')
139 |     img.show()
140 |     


--------------------------------------------------------------------------------
/largest_interior_rectangle/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RyanCCC/Mask_RCNN/a32837ba992eda4aa0b7799cd3317200e79d462b/largest_interior_rectangle/__init__.py


--------------------------------------------------------------------------------
/largest_interior_rectangle/lir.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import cv2
  3 | import numba as nb
  4 | 
  5 | '''
  6 | Find the largest interior rectangle in the mask
  7 | '''
  8 | 
  9 | 
 10 | 
 11 | def largest_interior_rectangle(cells):
 12 |     h_adjacency = horizontal_adjacency(cells)
 13 |     v_adjacency = vertical_adjacency(cells)
 14 |     s_map = span_map(h_adjacency, v_adjacency)
 15 |     return biggest_span_in_span_map(s_map)
 16 | 
 17 | 
 18 | @nb.njit('uint32[:,::1](uint8[:,::1])', parallel=True, cache=True)
 19 | def horizontal_adjacency(cells):
 20 |     result = np.zeros((cells.shape[0], cells.shape[1]), dtype=np.uint32)
 21 |     for y in nb.prange(cells.shape[0]):
 22 |         span = 0
 23 |         for x in range(cells.shape[1]-1, -1, -1):
 24 |             if cells[y, x] > 0:
 25 |                 span += 1
 26 |             else:
 27 |                 span = 0
 28 |             result[y, x] = span
 29 |     return result
 30 | 
 31 | 
 32 | @nb.njit('uint32[:,::1](uint8[:,::1])', parallel=True, cache=True)
 33 | def vertical_adjacency(cells):
 34 |     result = np.zeros((cells.shape[0], cells.shape[1]), dtype=np.uint32)
 35 |     for x in nb.prange(cells.shape[1]):
 36 |         span = 0
 37 |         for y in range(cells.shape[0]-1, -1, -1):
 38 |             if cells[y, x] > 0:
 39 |                 span += 1
 40 |             else:
 41 |                 span = 0
 42 |             result[y, x] = span
 43 |     return result
 44 | 
 45 | 
 46 | @nb.njit('uint32(uint32[:])', cache=True)
 47 | def predict_vector_size(array):
 48 |     zero_indices = np.where(array == 0)[0]
 49 |     if len(zero_indices) == 0:
 50 |         if len(array) == 0:
 51 |             return 0
 52 |         return len(array)
 53 |     return zero_indices[0]
 54 | 
 55 | 
 56 | @nb.jit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
 57 | def h_vector(h_adjacency, x, y):
 58 |     vector_size = predict_vector_size(h_adjacency[y:, x])
 59 |     h_vector = np.zeros(vector_size, dtype=np.uint32)
 60 |     h = np.Inf
 61 |     for p in range(vector_size):
 62 |         h = np.minimum(h_adjacency[y+p, x], h)
 63 |         h_vector[p] = h
 64 |     h_vector = np.unique(h_vector)[::-1]
 65 |     return h_vector
 66 | 
 67 | 
 68 | @nb.jit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
 69 | def v_vector(v_adjacency, x, y):
 70 |     vector_size = predict_vector_size(v_adjacency[y, x:])
 71 |     v_vector = np.zeros(vector_size, dtype=np.uint32)
 72 |     v = np.Inf
 73 |     for q in range(vector_size):
 74 |         v = np.minimum(v_adjacency[y, x+q], v)
 75 |         v_vector[q] = v
 76 |     v_vector = np.unique(v_vector)[::-1]
 77 |     return v_vector
 78 | 
 79 | 
 80 | @nb.njit('uint32[:,:](uint32[:], uint32[:])', cache=True)
 81 | def spans(h_vector, v_vector):
 82 |     spans = np.stack((h_vector, v_vector[::-1]), axis=1)
 83 |     return spans
 84 | 
 85 | 
 86 | @nb.njit('uint32[:](uint32[:,:])', cache=True)
 87 | def biggest_span(spans):
 88 |     if len(spans) == 0:
 89 |         return np.array([0, 0], dtype=np.uint32)
 90 |     areas = spans[:, 0] * spans[:, 1]
 91 |     biggest_span_index = np.where(areas == np.amax(areas))[0][0]
 92 |     return spans[biggest_span_index]
 93 | 
 94 | 
 95 | @nb.njit('uint32[:, :, :](uint32[:,::1], uint32[:,::1])',
 96 |          parallel=True, cache=True)
 97 | def span_map(h_adjacency, v_adjacency):
 98 |     span_map = np.zeros((h_adjacency.shape[0],
 99 |                          h_adjacency.shape[1],
100 |                          2), dtype=np.uint32)
101 | 
102 |     for x in nb.prange(span_map.shape[1]):
103 |         for y in range(span_map.shape[0]):
104 |             h_vec = h_vector(h_adjacency, x, y)
105 |             v_vec = v_vector(v_adjacency, x, y)
106 |             s = spans(h_vec, v_vec)
107 |             s = biggest_span(s)
108 |             span_map[y, x, :] = s
109 | 
110 |     return span_map
111 | 
112 | 
113 | @nb.njit('uint32[:](uint32[:, :, :])', cache=True)
114 | def biggest_span_in_span_map(span_map):
115 |     areas = span_map[:, :, 0] * span_map[:, :, 1]
116 |     largest_rectangle_indices = np.where(areas == np.amax(areas))
117 |     x = largest_rectangle_indices[1][0]
118 |     y = largest_rectangle_indices[0][0]
119 |     span = span_map[y, x]
120 |     return np.array([x, y, span[0], span[1]], dtype=np.uint32)
121 | 


--------------------------------------------------------------------------------
/largest_interior_rectangle/lir_within_outline.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numba as nb
  3 | import cv2 as cv
  4 | 
  5 | 
  6 | def largest_interior_rectangle(cells):
  7 |     outline = get_outline(cells)
  8 |     adjacencies = adjacencies_all_directions(cells)
  9 |     s_map, _, saddle_candidates_map = create_maps(outline, adjacencies)
 10 |     lir1 = biggest_span_in_span_map(s_map)
 11 | 
 12 |     candidate_cells = cells_of_interest(saddle_candidates_map)
 13 |     s_map = span_map(adjacencies[0], adjacencies[2], candidate_cells)
 14 |     lir2 = biggest_span_in_span_map(s_map)
 15 | 
 16 |     lir = biggest_rectangle(lir1, lir2)
 17 |     return lir
 18 | 
 19 | 
 20 | def get_outline(cells):
 21 |     contours, hierarchy = \
 22 |         cv.findContours(cells, cv.RETR_TREE, cv.CHAIN_APPROX_NONE)
 23 |     # TODO support multiple contours
 24 |     # test that only one regular contour exists
 25 |     assert hierarchy.shape == (1, 1, 4)
 26 |     assert np.all(hierarchy == -1)
 27 |     contour = contours[0][:, 0, :]
 28 |     x_values = contour[:, 0].astype("uint32", order="C")
 29 |     y_values = contour[:, 1].astype("uint32", order="C")
 30 |     return x_values, y_values
 31 | 
 32 | 
 33 | @nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True)
 34 | def horizontal_adjacency(cells, direction):
 35 |     result = np.zeros(cells.shape, dtype=np.uint32)
 36 |     for y in nb.prange(cells.shape[0]):
 37 |         span = 0
 38 |         if direction:
 39 |             iterator = range(cells.shape[1]-1, -1, -1)
 40 |         else:
 41 |             iterator = range(cells.shape[1])
 42 |         for x in iterator:
 43 |             if cells[y, x] > 0:
 44 |                 span += 1
 45 |             else:
 46 |                 span = 0
 47 |             result[y, x] = span
 48 |     return result
 49 | 
 50 | 
 51 | @nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True)
 52 | def vertical_adjacency(cells, direction):
 53 |     result = np.zeros(cells.shape, dtype=np.uint32)
 54 |     for x in nb.prange(cells.shape[1]):
 55 |         span = 0
 56 |         if direction:
 57 |             iterator = range(cells.shape[0]-1, -1, -1)
 58 |         else:
 59 |             iterator = range(cells.shape[0])
 60 |         for y in iterator:
 61 |             if cells[y, x] > 0:
 62 |                 span += 1
 63 |             else:
 64 |                 span = 0
 65 |             result[y, x] = span
 66 |     return result
 67 | 
 68 | 
 69 | @nb.njit(cache=True)
 70 | def adjacencies_all_directions(cells):
 71 |     h_left2right = horizontal_adjacency(cells, 1)
 72 |     h_right2left = horizontal_adjacency(cells, 0)
 73 |     v_top2bottom = vertical_adjacency(cells, 1)
 74 |     v_bottom2top = vertical_adjacency(cells, 0)
 75 |     return h_left2right, h_right2left, v_top2bottom, v_bottom2top
 76 | 
 77 | 
 78 | @nb.njit('uint32(uint32[:])', cache=True)
 79 | def predict_vector_size(array):
 80 |     zero_indices = np.where(array == 0)[0]
 81 |     if len(zero_indices) == 0:
 82 |         if len(array) == 0:
 83 |             return 0
 84 |         return len(array)
 85 |     return zero_indices[0]
 86 | 
 87 | 
 88 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
 89 | def h_vector_top2bottom(h_adjacency, x, y):
 90 |     vector_size = predict_vector_size(h_adjacency[y:, x])
 91 |     h_vector = np.zeros(vector_size, dtype=np.uint32)
 92 |     h = np.Inf
 93 |     for p in range(vector_size):
 94 |         h = np.minimum(h_adjacency[y+p, x], h)
 95 |         h_vector[p] = h
 96 |     h_vector = np.unique(h_vector)[::-1]
 97 |     return h_vector
 98 | 
 99 | 
100 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
101 | def h_vector_bottom2top(h_adjacency, x, y):
102 |     vector_size = predict_vector_size(np.flip(h_adjacency[:y+1, x]))
103 |     h_vector = np.zeros(vector_size, dtype=np.uint32)
104 |     h = np.Inf
105 |     for p in range(vector_size):
106 |         h = np.minimum(h_adjacency[y-p, x], h)
107 |         h_vector[p] = h
108 |     h_vector = np.unique(h_vector)[::-1]
109 |     return h_vector
110 | 
111 | 
112 | @nb.njit(cache=True)
113 | def h_vectors_all_directions(h_left2right, h_right2left, x, y):
114 |     h_l2r_t2b = h_vector_top2bottom(h_left2right, x, y)
115 |     h_r2l_t2b = h_vector_top2bottom(h_right2left, x, y)
116 |     h_l2r_b2t = h_vector_bottom2top(h_left2right, x, y)
117 |     h_r2l_b2t = h_vector_bottom2top(h_right2left, x, y)
118 |     return h_l2r_t2b, h_r2l_t2b, h_l2r_b2t, h_r2l_b2t
119 | 
120 | 
121 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
122 | def v_vector_left2right(v_adjacency, x, y):
123 |     vector_size = predict_vector_size(v_adjacency[y, x:])
124 |     v_vector = np.zeros(vector_size, dtype=np.uint32)
125 |     v = np.Inf
126 |     for q in range(vector_size):
127 |         v = np.minimum(v_adjacency[y, x+q], v)
128 |         v_vector[q] = v
129 |     v_vector = np.unique(v_vector)[::-1]
130 |     return v_vector
131 | 
132 | 
133 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True)
134 | def v_vector_right2left(v_adjacency, x, y):
135 |     vector_size = predict_vector_size(np.flip(v_adjacency[y, :x+1]))
136 |     v_vector = np.zeros(vector_size, dtype=np.uint32)
137 |     v = np.Inf
138 |     for q in range(vector_size):
139 |         v = np.minimum(v_adjacency[y, x-q], v)
140 |         v_vector[q] = v
141 |     v_vector = np.unique(v_vector)[::-1]
142 |     return v_vector
143 | 
144 | 
145 | @nb.njit(cache=True)
146 | def v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y):
147 |     v_l2r_t2b = v_vector_left2right(v_top2bottom, x, y)
148 |     v_r2l_t2b = v_vector_right2left(v_top2bottom, x, y)
149 |     v_l2r_b2t = v_vector_left2right(v_bottom2top, x, y)
150 |     v_r2l_b2t = v_vector_right2left(v_bottom2top, x, y)
151 |     return v_l2r_t2b, v_r2l_t2b, v_l2r_b2t, v_r2l_b2t
152 | 
153 | 
154 | @nb.njit('uint32[:,:](uint32[:], uint32[:])', cache=True)
155 | def spans(h_vector, v_vector):
156 |     spans = np.stack((h_vector, v_vector[::-1]), axis=1)
157 |     return spans
158 | 
159 | 
160 | @nb.njit('uint32[:](uint32[:,:])', cache=True)
161 | def biggest_span(spans):
162 |     if len(spans) == 0:
163 |         return np.array([0, 0], dtype=np.uint32)
164 |     areas = spans[:, 0] * spans[:, 1]
165 |     biggest_span_index = np.where(areas == np.amax(areas))[0][0]
166 |     return spans[biggest_span_index]
167 | 
168 | 
169 | @nb.njit(cache=True)
170 | def spans_all_directions(h_vectors, v_vectors):
171 |     span_l2r_t2b = spans(h_vectors[0], v_vectors[0])
172 |     span_r2l_t2b = spans(h_vectors[1], v_vectors[1])
173 |     span_l2r_b2t = spans(h_vectors[2], v_vectors[2])
174 |     span_r2l_b2t = spans(h_vectors[3], v_vectors[3])
175 |     return span_l2r_t2b, span_r2l_t2b, span_l2r_b2t, span_r2l_b2t
176 | 
177 | 
178 | @nb.njit(cache=True)
179 | def get_n_directions(spans_all_directions):
180 |     n_directions = 1
181 |     for spans in spans_all_directions:
182 |         all_x_1 = np.all(spans[:, 0] == 1)
183 |         all_y_1 = np.all(spans[:, 1] == 1)
184 |         if not all_x_1 and not all_y_1:
185 |             n_directions += 1
186 |     return n_directions
187 | 
188 | 
189 | @nb.njit(cache=True)
190 | def get_xy_array(x, y, spans, mode=0):
191 |     """0 - flip none, 1 - flip x, 2 - flip y, 3 - flip both"""
192 |     xy = spans.copy()
193 |     xy[:, 0] = x
194 |     xy[:, 1] = y
195 |     if mode == 1:
196 |         xy[:, 0] = xy[:, 0] - spans[:, 0] + 1
197 |     if mode == 2:
198 |         xy[:, 1] = xy[:, 1] - spans[:, 1] + 1
199 |     if mode == 3:
200 |         xy[:, 0] = xy[:, 0] - spans[:, 0] + 1
201 |         xy[:, 1] = xy[:, 1] - spans[:, 1] + 1
202 |     return xy
203 | 
204 | 
205 | @nb.njit(cache=True)
206 | def get_xy_arrays(x, y, spans_all_directions):
207 |     xy_l2r_t2b = get_xy_array(x, y, spans_all_directions[0], 0)
208 |     xy_r2l_t2b = get_xy_array(x, y, spans_all_directions[1], 1)
209 |     xy_l2r_b2t = get_xy_array(x, y, spans_all_directions[2], 2)
210 |     xy_r2l_b2t = get_xy_array(x, y, spans_all_directions[3], 3)
211 |     return xy_l2r_t2b, xy_r2l_t2b, xy_l2r_b2t, xy_r2l_b2t
212 | 
213 | 
214 | @nb.njit(cache=True)
215 | def check_if_point_on_outline(x, y, outline):
216 |     x_vals, y_vals = outline
217 |     x_true = x_vals == x
218 |     y_true = y_vals == y
219 |     both_true = np.logical_and(x_true, y_true)
220 |     return np.any(both_true)
221 | 
222 | 
223 | @nb.njit('Tuple((uint32[:,:,::1], uint8[:,::1], uint8[:,::1]))'
224 |          '(UniTuple(uint32[:], 2), UniTuple(uint32[:,::1], 4))', 
225 |          parallel=True, cache=True)
226 | def create_maps(outline, adjacencies):
227 |     x_values, y_values = outline
228 |     h_left2right, h_right2left, v_top2bottom, v_bottom2top = adjacencies
229 | 
230 |     shape = h_left2right.shape
231 |     span_map = np.zeros(shape + (2,), "uint32")
232 |     direction_map = np.zeros(shape, "uint8")
233 |     saddle_candidates_map = np.zeros(shape, "uint8")
234 | 
235 |     for idx in nb.prange(len(x_values)):
236 |         x, y = x_values[idx], y_values[idx]
237 |         h_vectors = h_vectors_all_directions(h_left2right, h_right2left, x, y)
238 |         v_vectors = v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y)
239 |         span_arrays = spans_all_directions(h_vectors, v_vectors)
240 |         n = get_n_directions(span_arrays)
241 |         direction_map[y, x] = n
242 |         xy_arrays = get_xy_arrays(x, y, span_arrays)
243 |         for direction_idx in range(4):
244 |             xy_array = xy_arrays[direction_idx]
245 |             span_array = span_arrays[direction_idx]
246 |             for span_idx in range(span_array.shape[0]):
247 |                 x, y = xy_array[span_idx][0], xy_array[span_idx][1]
248 |                 w, h = span_array[span_idx][0], span_array[span_idx][1]
249 |                 if w*h > span_map[y, x, 0] * span_map[y, x, 1]:
250 |                     span_map[y, x, :] = np.array([w, h], "uint32")
251 |                 if n == 3:
252 |                     if not check_if_point_on_outline(x, y, outline):
253 |                         saddle_candidates_map[y, x] = np.uint8(255)
254 | 
255 |     return span_map, direction_map, saddle_candidates_map
256 | 
257 | 
258 | def cells_of_interest(cells):
259 |     y_vals, x_vals = cells.nonzero()
260 |     x_vals = x_vals.astype("uint32", order="C")
261 |     y_vals = y_vals.astype("uint32", order="C")
262 |     return x_vals, y_vals
263 | 
264 | 
265 | @nb.njit('uint32[:, :, :]'
266 |          '(uint32[:,::1], uint32[:,::1], UniTuple(uint32[:], 2))',
267 |          parallel=True, cache=True)
268 | def span_map(h_adjacency_left2right,
269 |              v_adjacency_top2bottom,
270 |              cells_of_interest):
271 | 
272 |     x_values, y_values = cells_of_interest
273 | 
274 |     span_map = np.zeros(h_adjacency_left2right.shape + (2,), dtype=np.uint32)
275 | 
276 |     for idx in nb.prange(len(x_values)):
277 |         x, y = x_values[idx], y_values[idx]
278 |         h_vector = h_vector_top2bottom(h_adjacency_left2right, x, y)
279 |         v_vector = v_vector_left2right(v_adjacency_top2bottom, x, y)
280 |         s = spans(h_vector, v_vector)
281 |         s = biggest_span(s)
282 |         span_map[y, x, :] = s
283 | 
284 |     return span_map
285 | 
286 | 
287 | @nb.njit('uint32[:](uint32[:, :, :])', cache=True)
288 | def biggest_span_in_span_map(span_map):
289 |     areas = span_map[:, :, 0] * span_map[:, :, 1]
290 |     largest_rectangle_indices = np.where(areas == np.amax(areas))
291 |     x = largest_rectangle_indices[1][0]
292 |     y = largest_rectangle_indices[0][0]
293 |     span = span_map[y, x]
294 |     return np.array([x, y, span[0], span[1]], dtype=np.uint32)
295 | 
296 | 
297 | def biggest_rectangle(*args):
298 |     biggest_rect = np.array([0, 0, 0, 0], dtype=np.uint32)
299 |     for rect in args:
300 |         if rect[2] * rect[3] > biggest_rect[2] * biggest_rect[3]:
301 |             biggest_rect = rect
302 |     return biggest_rect


--------------------------------------------------------------------------------
/mask.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RyanCCC/Mask_RCNN/a32837ba992eda4aa0b7799cd3317200e79d462b/mask.png


--------------------------------------------------------------------------------
/mask2npz.py:
--------------------------------------------------------------------------------
 1 | from config import CustomerConfig
 2 | import os
 3 | from tqdm import tqdm
 4 | 
 5 | from utils.customerDataset import CustomerDataset
 6 | 
 7 | dataset_root_path = CustomerConfig.TRAIN_DATASET
 8 | img_floder =os.path.join(dataset_root_path, "imgs")
 9 | mask_floder = os.path.join(dataset_root_path, "mask")
10 | yaml_floder = os.path.join(dataset_root_path, "yaml")
11 | imglist = os.listdir(img_floder)
12 | 
13 | config = CustomerConfig()
14 | 
15 | count = len(imglist)
16 | dataset = CustomerDataset()
17 | dataset.load_dataset(config.NAME, len(imglist), config.CLASSES, img_floder, mask_floder, imglist, yaml_floder, train_mode=False)
18 | dataset.prepare()
19 | 
20 | 
21 | # 生成imageids
22 | # TODO: 多线程多进程优化
23 | image_ids = [id for id in dataset.image_ids]
24 | for imageid in tqdm(image_ids):
25 |     dataset.load_mask(imageid, train_mode=False)
26 | 
27 | 
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/mrcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/RyanCCC/Mask_RCNN/a32837ba992eda4aa0b7799cd3317200e79d462b/mrcnn/__init__.py


--------------------------------------------------------------------------------
/mrcnn/layers.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.keras as keras
  3 | import tensorflow.keras.backend as K
  4 | import tensorflow.keras.layers as KL
  5 | import tensorflow.keras.utils as KU
  6 | from tensorflow.python.eager import context
  7 | import tensorflow.keras.models as KM
  8 | import numpy as np
  9 | from utils import utils
 10 | 
 11 | # tf.compat.v1.disable_eager_execution()
 12 | 
 13 | #----------------------------------------------------------#
 14 | #   Proposal Layer
 15 | #   该部分代码用于将先验框转化成建议框
 16 | #----------------------------------------------------------#
 17 | 
 18 | def apply_box_deltas_graph(boxes, deltas):
 19 |     # 计算先验框的中心和宽高
 20 |     height = boxes[:, 2] - boxes[:, 0]
 21 |     width = boxes[:, 3] - boxes[:, 1]
 22 |     center_y = boxes[:, 0] + 0.5 * height
 23 |     center_x = boxes[:, 1] + 0.5 * width
 24 |     # 计算出调整后的先验框的中心和宽高
 25 |     center_y += deltas[:, 0] * height
 26 |     center_x += deltas[:, 1] * width
 27 |     height *= tf.exp(deltas[:, 2])
 28 |     width *= tf.exp(deltas[:, 3])
 29 |     # 计算左上角和右下角的点的坐标
 30 |     y1 = center_y - 0.5 * height
 31 |     x1 = center_x - 0.5 * width
 32 |     y2 = y1 + height
 33 |     x2 = x1 + width
 34 |     result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out")
 35 |     return result
 36 | 
 37 | 
 38 | def clip_boxes_graph(boxes, window):
 39 |     """
 40 |     boxes: [N, (y1, x1, y2, x2)]
 41 |     window: [4] in the form y1, x1, y2, x2
 42 |     """
 43 |     # Split
 44 |     wy1, wx1, wy2, wx2 = tf.split(window, 4)
 45 |     y1, x1, y2, x2 = tf.split(boxes, 4, axis=1)
 46 |     # Clip
 47 |     y1 = tf.maximum(tf.minimum(y1, wy2), wy1)
 48 |     x1 = tf.maximum(tf.minimum(x1, wx2), wx1)
 49 |     y2 = tf.maximum(tf.minimum(y2, wy2), wy1)
 50 |     x2 = tf.maximum(tf.minimum(x2, wx2), wx1)
 51 |     clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes")
 52 |     clipped.set_shape((clipped.shape[0], 4))
 53 |     return clipped
 54 | 
 55 | class ProposalLayer(KL.Layer):
 56 |     '''
 57 |     1. 根据rpn网络，获取score靠前的前6000个anchor
 58 |     2. 利用rpn_bbox对anchors进行修正
 59 |     3. 舍弃掉修正后边框超过图像大小的anchor
 60 |     4. 利用非极大值的方法获取最后的anchor
 61 |     '''
 62 | 
 63 |     def __init__(self, proposal_count, nms_threshold, config=None, **kwargs):
 64 |         super(ProposalLayer, self).__init__(**kwargs)
 65 |         self.config = config
 66 |         self.proposal_count = proposal_count
 67 |         self.nms_threshold = nms_threshold
 68 |     # [rpn_class, rpn_bbox, anchors]
 69 |     def call(self, inputs):
 70 |         # 代表这个先验框内部是否有物体[batch, num_rois, 1]
 71 |         scores = inputs[0][:, :, 1]
 72 |         # 代表这个先验框的调整参数[batch, num_rois, 4]
 73 |         deltas = inputs[1]
 74 |         # [0.1 0.1 0.2 0.2]，改变数量级
 75 |         deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4])
 76 |         # Anchors
 77 |         anchors = inputs[2]
 78 |         # 筛选出得分前6000个的框
 79 |         pre_nms_limit = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1])
 80 |         # 获得这些框的索引
 81 |         ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True,
 82 |                          name="top_anchors").indices
 83 |         # 获得这些框的得分
 84 |         scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y),
 85 |                                    self.config.IMAGES_PER_GPU)
 86 |         # 获得这些框的调整参数
 87 |         deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y),
 88 |                                    self.config.IMAGES_PER_GPU)
 89 |         # 获得这些框对应的先验框
 90 |         pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x),
 91 |                                     self.config.IMAGES_PER_GPU,
 92 |                                     names=["pre_nms_anchors"])
 93 | 
 94 |         # [batch, N, (y1, x1, y2, x2)]
 95 |         # 对先验框进行解码
 96 |         boxes = utils.batch_slice([pre_nms_anchors, deltas],
 97 |                                   lambda x, y: apply_box_deltas_graph(x, y),
 98 |                                   self.config.IMAGES_PER_GPU,
 99 |                                   names=["refined_anchors"])
100 | 
101 |         # [batch, N, (y1, x1, y2, x2)]
102 |         # 防止超出图片范围
103 |         window = np.array([0, 0, 1, 1], dtype=np.float32)
104 |         boxes = utils.batch_slice(boxes,
105 |                                   lambda x: clip_boxes_graph(x, window),
106 |                                   self.config.IMAGES_PER_GPU,
107 |                                   names=["refined_anchors_clipped"])
108 | 
109 | 
110 |         # 非极大抑制
111 |         def nms(boxes, scores):
112 |             indices = tf.image.non_max_suppression(
113 |                 boxes, scores, self.proposal_count,
114 |                 self.nms_threshold, name="rpn_non_max_suppression")
115 |             proposals = tf.gather(boxes, indices)
116 |             # 如果数量达不到设置的建议框数量的话
117 |             # 就padding
118 |             padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0)
119 |             proposals = tf.pad(proposals, [(0, padding), (0, 0)])
120 |             return proposals
121 | 
122 |         proposals = utils.batch_slice([boxes, scores], nms,
123 |                                       self.config.IMAGES_PER_GPU)
124 |         if not context.executing_eagerly():
125 |             # Infer the static output shape:
126 |             out_shape = self.compute_output_shape(None)
127 |             proposals.set_shape(out_shape)
128 |         return proposals
129 | 
130 |     def compute_output_shape(self, input_shape):
131 |         return (None, self.proposal_count, 4)
132 | 
133 | 
134 | 
135 | 
136 | #----------------------------------------------------------#
137 | #   ROIAlign Layer
138 | #   利用建议框在特征层上截取内容
139 | #----------------------------------------------------------#
140 | 
141 | def log2_graph(x):
142 |     return tf.math.log(x) / tf.math.log(2.0)
143 | 
144 | def parse_image_meta_graph(meta):
145 |     """
146 |     将meta里面的参数进行分割
147 |     """
148 |     image_id = meta[:, 0]
149 |     original_image_shape = meta[:, 1:4]
150 |     image_shape = meta[:, 4:7]
151 |     window = meta[:, 7:11]  # (y1, x1, y2, x2) window of image in in pixels
152 |     scale = meta[:, 11]
153 |     active_class_ids = meta[:, 12:]
154 |     return {
155 |         "image_id": image_id,
156 |         "original_image_shape": original_image_shape,
157 |         "image_shape": image_shape,
158 |         "window": window,
159 |         "scale": scale,
160 |         "active_class_ids": active_class_ids,
161 |     }
162 | 
163 | class PyramidROIAlign(KL.Layer):
164 |     def __init__(self, pool_shape, **kwargs):
165 |         super(PyramidROIAlign, self).__init__(**kwargs)
166 |         self.pool_shape = tuple(pool_shape)
167 | 
168 |     def call(self, inputs):
169 |         # 建议框的位置
170 |         boxes = inputs[0]
171 | 
172 |         # image_meta包含了一些必要的图片信息
173 |         image_meta = inputs[1]
174 | 
175 |         # 取出所有的特征层[batch, height, width, channels]
176 |         feature_maps = inputs[2:]
177 | 
178 |         y1, x1, y2, x2 = tf.split(boxes, 4, axis=2)
179 |         h = y2 - y1
180 |         w = x2 - x1
181 | 
182 |         # 获得输入进来的图像的大小
183 |         image_shape = parse_image_meta_graph(image_meta)['image_shape'][0]
184 |         
185 |         # 通过建议框的大小找到这个建议框属于哪个特征层
186 |         image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32)
187 |         roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area)))
188 |         roi_level = tf.minimum(5, tf.maximum(
189 |             2, 4 + tf.cast(tf.round(roi_level), tf.int32)))
190 |         # batch_size, box_num
191 |         roi_level = tf.squeeze(roi_level, 2)
192 | 
193 |         # Loop through levels and apply ROI pooling to each. P2 to P5.
194 |         pooled = []
195 |         box_to_level = []
196 |         # 分别在P2-P5中进行截取
197 |         for i, level in enumerate(range(2, 6)):
198 |             # 找到每个特征层对应box
199 |             ix = tf.compat.v1.where(tf.equal(roi_level, level))
200 |             level_boxes = tf.gather_nd(boxes, ix)
201 |             box_to_level.append(ix)
202 | 
203 |             # 获得这些box所属的图片
204 |             box_indices = tf.cast(ix[:, 0], tf.int32)
205 | 
206 |             # 停止梯度下降
207 |             level_boxes = tf.stop_gradient(level_boxes)
208 |             box_indices = tf.stop_gradient(box_indices)
209 | 
210 |             # Result: [batch * num_boxes, pool_height, pool_width, channels]
211 |             pooled.append(tf.image.crop_and_resize(
212 |                 feature_maps[i], level_boxes, box_indices, self.pool_shape,
213 |                 method="bilinear"))
214 | 
215 |         pooled = tf.concat(pooled, axis=0)
216 | 
217 |         # 将顺序和所属的图片进行堆叠
218 |         box_to_level = tf.concat(box_to_level, axis=0)
219 |         box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1)
220 |         box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range],
221 |                                  axis=1)
222 | 
223 |         # box_to_level[:, 0]表示第几张图
224 |         # box_to_level[:, 1]表示第几张图里的第几个框
225 |         sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1]
226 |         # 进行排序，将同一张图里的某一些聚集在一起
227 |         ix = tf.nn.top_k(sorting_tensor, k=tf.shape(
228 |             box_to_level)[0]).indices[::-1]
229 | 
230 |         # 按顺序获得图片的索引
231 |         ix = tf.gather(box_to_level[:, 2], ix)
232 |         pooled = tf.gather(pooled, ix)
233 | 
234 |         # 重新reshape为原来的格式
235 |         # 也就是
236 |         # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels]
237 |         shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0)
238 |         pooled = tf.reshape(pooled, shape)
239 |         return pooled
240 | 
241 |     def compute_output_shape(self, input_shape):
242 |         return input_shape[0][:2] + self.pool_shape + (input_shape[2][-1], )
243 | 
244 | 
245 | #----------------------------------------------------------#
246 | #   Detection Layer
247 | #   
248 | #----------------------------------------------------------#
249 | 
250 | def refine_detections_graph(rois, probs, deltas, window, config):
251 |     """细化分类建议并过滤重叠部分并返回最终结果探测。
252 |     Inputs:
253 |         rois: [N, (y1, x1, y2, x2)] in normalized coordinates
254 |         probs: [N, num_classes]. Class probabilities.
255 |         deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific
256 |                 bounding box deltas.
257 |         window: (y1, x1, y2, x2) in normalized coordinates. The part of the image
258 |             that contains the image excluding the padding.
259 |     Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where
260 |         coordinates are normalized.
261 |     """
262 |     # 找到得分最高的类
263 |     class_ids = tf.argmax(probs, axis=1, output_type=tf.int32)
264 |     # 序号+类
265 |     indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1)
266 |     # 取出成绩
267 |     class_scores = tf.gather_nd(probs, indices)
268 |     # 还有框的调整参数
269 |     deltas_specific = tf.gather_nd(deltas, indices)
270 |     # 进行解码
271 |     # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates
272 |     refined_rois = apply_box_deltas_graph(
273 |         rois, deltas_specific * config.BBOX_STD_DEV)
274 |     # 防止超出0-1
275 |     refined_rois = clip_boxes_graph(refined_rois, window)
276 | 
277 |     # 去除背景
278 |     keep = tf.compat.v1.where(class_ids > 0)[:, 0]
279 |     # 去除背景和得分小的区域
280 |     if config.DETECTION_MIN_CONFIDENCE:
281 |         conf_keep = tf.compat.v1.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0]
282 |         keep = tf.sets.intersection(tf.expand_dims(keep, 0),
283 |                                         tf.expand_dims(conf_keep, 0))
284 |         keep = tf.sparse.to_dense(keep)[0]
285 | 
286 |     # 获得除去背景并且得分较高的框还有种类与得分
287 |     # 1. Prepare variables
288 |     pre_nms_class_ids = tf.gather(class_ids, keep)
289 |     pre_nms_scores = tf.gather(class_scores, keep)
290 |     pre_nms_rois = tf.gather(refined_rois,   keep)
291 |     unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0]
292 | 
293 |     def nms_keep_map(class_id):
294 | 
295 |         ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0]
296 | 
297 |         class_keep = tf.image.non_max_suppression(
298 |                 tf.gather(pre_nms_rois, ixs),
299 |                 tf.gather(pre_nms_scores, ixs),
300 |                 max_output_size=config.DETECTION_MAX_INSTANCES,
301 |                 iou_threshold=config.DETECTION_NMS_THRESHOLD)
302 | 
303 |         class_keep = tf.gather(keep, tf.gather(ixs, class_keep))
304 | 
305 |         gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0]
306 |         class_keep = tf.pad(class_keep, [(0, gap)],
307 |                             mode='CONSTANT', constant_values=-1)
308 | 
309 |         class_keep.set_shape([config.DETECTION_MAX_INSTANCES])
310 |         return class_keep
311 | 
312 |     # 2. 进行非极大抑制
313 |     nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids,
314 |                          dtype=tf.int64)
315 |     # 3. 找到符合要求的需要被保留的建议框
316 |     nms_keep = tf.reshape(nms_keep, [-1])
317 |     nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0])
318 |     # 4. Compute intersection between keep and nms_keep
319 |     keep = tf.sets.intersection(tf.expand_dims(keep, 0),
320 |                                         tf.expand_dims(nms_keep, 0))
321 |     keep = tf.sparse.to_dense(keep)[0]
322 | 
323 |     # 寻找得分最高的num_keep个框
324 |     roi_count = config.DETECTION_MAX_INSTANCES
325 |     class_scores_keep = tf.gather(class_scores, keep)
326 |     num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count)
327 |     top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1]
328 |     keep = tf.gather(keep, top_ids)
329 | 
330 |     # Arrange output as [N, (y1, x1, y2, x2, class_id, score)]
331 |     detections = tf.concat([
332 |         tf.gather(refined_rois, keep),
333 |         tf.dtypes.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis],
334 |         tf.gather(class_scores, keep)[..., tf.newaxis]
335 |         ], axis=1)
336 | 
337 |     # 如果达不到数量的话就padding
338 |     gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0]
339 |     detections = tf.pad(tensor=detections, paddings=[(0, gap), (0, 0)], mode="CONSTANT")
340 |     return detections
341 | 
342 | def norm_boxes_graph(boxes, shape):
343 |     h, w = tf.split(tf.cast(shape, tf.float32), 2)
344 |     scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
345 |     shift = tf.constant([0., 0., 1., 1.])
346 |     return tf.divide(boxes - shift, scale)
347 | 
348 | class DetectionLayer(KL.Layer):
349 | 
350 |     def __init__(self, config=None, **kwargs):
351 |         super(DetectionLayer, self).__init__(**kwargs)
352 |         self.config = config
353 | 
354 |     def call(self, inputs):
355 |         rois = inputs[0]
356 |         mrcnn_class = inputs[1]
357 |         mrcnn_bbox = inputs[2]
358 |         image_meta = inputs[3]
359 | 
360 |         # 找到window的小数形式
361 |         m = parse_image_meta_graph(image_meta)
362 |         image_shape = m['image_shape'][0]
363 |         window = norm_boxes_graph(m['window'], image_shape[:2])
364 | 
365 |         # Run detection refinement graph on each item in the batch. 在此设定相关的阈值：refine_detections_graph
366 |         detections_batch = utils.batch_slice(
367 |             [rois, mrcnn_class, mrcnn_bbox, window],
368 |             lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config),
369 |             self.config.IMAGES_PER_GPU)
370 | 
371 |         # Reshape output
372 |         # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in
373 |         # normalized coordinates
374 |         return tf.reshape(
375 |             detections_batch,
376 |             [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6])
377 | 
378 |     def compute_output_shape(self, input_shape):
379 |         return (None, self.config.DETECTION_MAX_INSTANCES, 6)
380 | 
381 | 
382 | #----------------------------------------------------------#
383 | #   Detection Target Layer
384 | #   该部分代码会输入建议框
385 | #   判断建议框和真实框的重合情况
386 | #   筛选出内部包含物体的建议框
387 | #   利用建议框和真实框编码
388 | #   调整mask的格式使得其和预测格式相同
389 | #----------------------------------------------------------#
390 | 
391 | def overlaps_graph(boxes1, boxes2):
392 |     """
393 |     用于计算boxes1和boxes2的重合程度
394 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
395 |     返回 [len(boxes1), len(boxes2)]
396 |     """
397 |     b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1),
398 |                             [1, 1, tf.shape(boxes2)[0]]), [-1, 4])
399 |     b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1])
400 |     b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1)
401 |     b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1)
402 |     y1 = tf.maximum(b1_y1, b2_y1)
403 |     x1 = tf.maximum(b1_x1, b2_x1)
404 |     y2 = tf.minimum(b1_y2, b2_y2)
405 |     x2 = tf.minimum(b1_x2, b2_x2)
406 |     intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0)
407 |     b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1)
408 |     b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1)
409 |     union = b1_area + b2_area - intersection
410 |     iou = intersection / union
411 |     overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]])
412 |     return overlaps
413 | 
414 | 
415 | def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config):
416 |     asserts = [
417 |         tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals],
418 |                   name="roi_assertion"),
419 |     ]
420 |     with tf.control_dependencies(asserts):
421 |         proposals = tf.identity(proposals)
422 | 
423 |     # 移除之前获得的padding的部分
424 |     proposals, _ = trim_zeros_graph(proposals, name="trim_proposals")
425 |     gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes")
426 |     gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros,
427 |                                    name="trim_gt_class_ids")
428 |     gt_masks = tf.gather(gt_masks, tf.compat.v1.where(non_zeros)[:, 0], axis=2,
429 |                          name="trim_gt_masks")
430 | 
431 |     # Handle COCO crowds
432 |     # A crowd box in COCO is a bounding box around several instances. Exclude
433 |     # them from training. A crowd box is given a negative class ID.
434 |     crowd_ix = tf.compat.v1.where(gt_class_ids < 0)[:, 0]
435 |     non_crowd_ix = tf.compat.v1.where(gt_class_ids > 0)[:, 0]
436 |     crowd_boxes = tf.gather(gt_boxes, crowd_ix)
437 |     gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix)
438 |     gt_boxes = tf.gather(gt_boxes, non_crowd_ix)
439 |     gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2)
440 | 
441 |     # 计算建议框和所有真实框的重合程度 [proposals, gt_boxes]
442 |     overlaps = overlaps_graph(proposals, gt_boxes)
443 | 
444 |     # 计算和 crowd boxes 的重合程度 [proposals, crowd_boxes]
445 |     crowd_overlaps = overlaps_graph(proposals, crowd_boxes)
446 |     crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1)
447 |     no_crowd_bool = (crowd_iou_max < 0.001)
448 | 
449 |     # Determine positive and negative ROIs
450 |     roi_iou_max = tf.reduce_max(overlaps, axis=1)
451 |     # 1. 正样本建议框和真实框的重合程度大于0.5
452 |     positive_roi_bool = (roi_iou_max >= 0.5)
453 |     positive_indices = tf.where(positive_roi_bool)[:, 0]
454 |     # 2. 负样本建议框和真实框的重合程度小于0.5，Skip crowds.
455 |     negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0]
456 | 
457 |     # Subsample ROIs. Aim for 33% positive
458 |     # 进行正负样本的平衡
459 |     # 取出最大33%的正样本
460 |     positive_count = int(config.TRAIN_ROIS_PER_IMAGE *
461 |                          config.ROI_POSITIVE_RATIO)
462 |     positive_indices = tf.random.shuffle(positive_indices)[:positive_count]
463 |     positive_count = tf.shape(positive_indices)[0]
464 |     # 保持正负样本比例
465 |     r = 1.0 / config.ROI_POSITIVE_RATIO
466 |     negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count
467 |     negative_indices = tf.random.shuffle(negative_indices)[:negative_count]
468 |     # 获得正样本和负样本
469 |     positive_rois = tf.gather(proposals, positive_indices)
470 |     negative_rois = tf.gather(proposals, negative_indices)
471 | 
472 |     # 获取建议框和真实框重合程度
473 |     positive_overlaps = tf.gather(overlaps, positive_indices)
474 |     
475 |     # 判断是否有真实框
476 |     roi_gt_box_assignment = tf.cond(
477 |         tf.greater(tf.shape(positive_overlaps)[1], 0),
478 |         true_fn = lambda: tf.argmax(positive_overlaps, axis=1),
479 |         false_fn = lambda: tf.cast(tf.constant([]),tf.int64)
480 |     )
481 |     # 找到每一个建议框对应的真实框和种类
482 |     roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment)
483 |     roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment)
484 | 
485 |     # 解码获得网络应该有得预测结果
486 |     deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes)
487 |     deltas /= config.BBOX_STD_DEV
488 | 
489 |     # 切换mask的形式[N, height, width, 1]
490 |     transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1)
491 |     
492 |     # 取出对应的层
493 |     roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment)
494 | 
495 |     # Compute mask targets
496 |     boxes = positive_rois
497 |     if config.USE_MINI_MASK:
498 |         # Transform ROI coordinates from normalized image space
499 |         # to normalized mini-mask space.
500 |         y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1)
501 |         gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1)
502 |         gt_h = gt_y2 - gt_y1
503 |         gt_w = gt_x2 - gt_x1
504 |         y1 = (y1 - gt_y1) / gt_h
505 |         x1 = (x1 - gt_x1) / gt_w
506 |         y2 = (y2 - gt_y1) / gt_h
507 |         x2 = (x2 - gt_x1) / gt_w
508 |         boxes = tf.concat([y1, x1, y2, x2], 1)
509 |     box_ids = tf.range(0, tf.shape(roi_masks)[0])
510 |     masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes,
511 |                                      box_ids,
512 |                                      config.MASK_SHAPE)
513 |     # Remove the extra dimension from masks.
514 |     masks = tf.squeeze(masks, axis=3)
515 | 
516 |     # 防止resize后的结果不是1或者0
517 |     masks = tf.round(masks)
518 | 
519 |     # 一般传入config.TRAIN_ROIS_PER_IMAGE个建议框进行训练，
520 |     # 如果数量不够则padding
521 |     rois = tf.concat([positive_rois, negative_rois], axis=0)
522 |     N = tf.shape(negative_rois)[0]
523 |     P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0)
524 |     rois = tf.pad(rois, [(0, P), (0, 0)])
525 |     roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)])
526 |     roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)])
527 |     deltas = tf.pad(deltas, [(0, N + P), (0, 0)])
528 |     masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)])
529 | 
530 |     return rois, roi_gt_class_ids, deltas, masks
531 | 
532 | def trim_zeros_graph(boxes, name='trim_zeros'):
533 |     """
534 |     如果前一步没有满POST_NMS_ROIS_TRAINING个建议框，会有padding
535 |     要去掉padding
536 |     """
537 |     non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool)
538 |     boxes = tf.boolean_mask(boxes, non_zeros, name=name)
539 |     return boxes, non_zeros
540 | 
541 | class DetectionTargetLayer(KL.Layer):
542 |     """找到建议框的ground_truth
543 |     Inputs:
544 |     proposals: [batch, N, (y1, x1, y2, x2)]建议框
545 |     gt_class_ids: [batch, MAX_GT_INSTANCES]每个真实框对应的类
546 |     gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]真实框的位置
547 |     gt_masks: [batch, height, width, MAX_GT_INSTANCES]真实框的语义分割情况
548 |     Returns: 
549 |     rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]内部真实存在目标的建议框
550 |     target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]每个建议框对应的类
551 |     target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]每个建议框应该有的调整参数
552 |     target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]每个建议框语义分割情况， resize成28*28
553 |     """
554 | 
555 |     def __init__(self, config, **kwargs):
556 |         super(DetectionTargetLayer, self).__init__(**kwargs)
557 |         self.config = config
558 | 
559 |     def call(self, inputs):
560 |         proposals = inputs[0]
561 |         gt_class_ids = inputs[1]
562 |         gt_boxes = inputs[2]
563 |         gt_masks = inputs[3]
564 | 
565 |         # 对真实框进行编码
566 |         names = ["rois", "target_class_ids", "target_bbox", "target_mask"]
567 |         outputs = utils.batch_slice(
568 |             [proposals, gt_class_ids, gt_boxes, gt_masks],
569 |             lambda w, x, y, z: detection_targets_graph(
570 |                 w, x, y, z, self.config),
571 |             self.config.IMAGES_PER_GPU, names=names)
572 |         return outputs
573 | 
574 |     def compute_output_shape(self, input_shape):
575 |         return [
576 |             (None, self.config.TRAIN_ROIS_PER_IMAGE, 4),  # rois
577 |             (None, self.config.TRAIN_ROIS_PER_IMAGE),  # class_ids
578 |             (None, self.config.TRAIN_ROIS_PER_IMAGE, 4),  # deltas
579 |             (None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0],
580 |              self.config.MASK_SHAPE[1])  # masks
581 |         ]
582 | 
583 |     def compute_mask(self, inputs, mask=None):
584 |         return [None, None, None, None]


--------------------------------------------------------------------------------
/mrcnn/mask_rcnn.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from PIL import Image
  3 | import numpy as np
  4 | from .mrcnn import get_model
  5 | from utils.config import Config
  6 | from utils.anchors import get_anchors
  7 | from utils.utils import mold_inputs,unmold_detections
  8 | from utils import visualize
  9 | import tensorflow as tf
 10 | from config import InferenceConfig
 11 | 
 12 | # tf.compat.v1.disable_eager_execution()
 13 | 
 14 | class MASK_RCNN(object):
 15 | 
 16 |     def __init__(self, **kwargs):
 17 |         self.model_path = kwargs['model']
 18 |         self.classes_path= kwargs['classes_path']
 19 |         self.confidence = kwargs['confidence']
 20 | 
 21 |         # 使用coco数据集检测的时候，IMAGE_MIN_DIM=1024，IMAGE_MAX_DIM=1024, RPN_ANCHOR_SCALES=(32, 64, 128, 256, 512)
 22 |         self.RPN_ANCHOR_SCALES = InferenceConfig.RPN_ANCHOR_SCALES
 23 |         self.IMAGE_MIN_DIM = InferenceConfig.IMAGE_MIN_DIM
 24 |         self.IMAGE_MAX_DIM = InferenceConfig.IMAGE_MAX_DIM
 25 | 
 26 |         self.class_names = self.get_class()
 27 |         self.config = self._get_config()
 28 |         self.generate()
 29 | 
 30 |     def get_class(self):
 31 |         classes_path = os.path.expanduser(self.classes_path)
 32 |         with open(classes_path) as f:
 33 |             class_names = f.readlines()
 34 |         class_names = [c.strip() for c in class_names]
 35 |         class_names.insert(0,"BG")
 36 |         return class_names
 37 | 
 38 |     def _get_config(self):
 39 |         class InferenceConfig(Config):
 40 |             NUM_CLASSES = len(self.class_names)
 41 |             GPU_COUNT = 1
 42 |             IMAGES_PER_GPU = 1
 43 |             DETECTION_MIN_CONFIDENCE = self.confidence
 44 |             NAME = "Customer"
 45 |             RPN_ANCHOR_SCALES = self.RPN_ANCHOR_SCALES
 46 |             IMAGE_MIN_DIM = self.IMAGE_MIN_DIM
 47 |             IMAGE_MAX_DIM = self.IMAGE_MAX_DIM
 48 | 
 49 |         config = InferenceConfig()
 50 |         config.display()
 51 |         return config
 52 | 
 53 | 
 54 |     def generate(self):
 55 |         model_path = os.path.expanduser(self.model_path)
 56 |         assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
 57 |         
 58 |         # 计算总的种类
 59 |         self.num_classes = len(self.class_names)
 60 | 
 61 |         # 载入模型
 62 |         self.model = get_model(self.config, training=False)
 63 |         self.model.load_weights(self.model_path,by_name=True)
 64 |         # self.model.save('./maskrcnn_coco', save_format='tf')
 65 |     
 66 | 
 67 |     def detect_image(self, image, showBox = False, show_caption=False):
 68 |         image = [np.array(image)]
 69 |         molded_images, image_metas, windows = mold_inputs(self.config,image)
 70 | 
 71 |         image_shape = molded_images[0].shape
 72 |         anchors = get_anchors(self.config,image_shape)
 73 |         anchors = np.broadcast_to(anchors, (1,) + anchors.shape)
 74 | 
 75 |         detections, _, _, mrcnn_mask, _, _, _ =\
 76 |             self.model.predict([molded_images, image_metas, anchors], verbose=0)
 77 | 
 78 |         final_rois, final_class_ids, final_scores, final_masks =\
 79 |             unmold_detections(detections[0], mrcnn_mask[0],
 80 |                                     image[0].shape, molded_images[0].shape,
 81 |                                     windows[0])
 82 | 
 83 |         r = {
 84 |             "rois": final_rois,
 85 |             "class_ids": final_class_ids,
 86 |             "scores": final_scores,
 87 |             "masks": final_masks,
 88 |         }
 89 | 
 90 |         # 想要保存处理后的图片请查询plt保存图片的方法。
 91 |         drawed_image = visualize.display_instances(image[0], r['rois'], r['masks'], r['class_ids'], 
 92 |                                     self.class_names, r['scores'], show_bbox = showBox, captions=show_caption)
 93 |         # 处理mask 文件
 94 |         mask_image = np.any(r['masks'], axis=-1)
 95 |         mask_image = Image.fromarray(mask_image)
 96 |         return drawed_image, mask_image
 97 |     
 98 |     def get_detections(self, image):
 99 |         image = [np.array(image)]
100 |         molded_images, image_metas, windows = mold_inputs(self.config,image)
101 | 
102 |         image_shape = molded_images[0].shape
103 |         anchors = get_anchors(self.config,image_shape)
104 |         anchors = np.broadcast_to(anchors, (1,) + anchors.shape)
105 | 
106 |         detections, _, _, mrcnn_mask, _, _, _ =\
107 |             self.model.predict([molded_images, image_metas, anchors], verbose=0)
108 | 
109 |         final_rois, final_class_ids, final_scores, final_masks =\
110 |             unmold_detections(detections[0], mrcnn_mask[0],
111 |                                     image[0].shape, molded_images[0].shape,
112 |                                     windows[0])
113 | 
114 |         r = {
115 |             "rois": final_rois,
116 |             "class_ids": final_class_ids,
117 |             "scores": final_scores,
118 |             "masks": final_masks,
119 |         }
120 |         return r
121 |         
122 |     def close_session(self):
123 |         self.sess.close()


--------------------------------------------------------------------------------
/mrcnn/mrcnn.py:
--------------------------------------------------------------------------------
  1 | from .layers import ProposalLayer,PyramidROIAlign,DetectionLayer,DetectionTargetLayer
  2 | from .mrcnn_training import *
  3 | from utils.anchors import get_anchors
  4 | from utils.utils import norm_boxes_graph,parse_image_meta_graph
  5 | import numpy as np
  6 | import tensorflow as tf
  7 | import tensorflow.keras as keras
  8 | import tensorflow.keras.backend as K
  9 | import tensorflow.keras.layers as KL
 10 | import tensorflow.keras.utils as KU
 11 | from tensorflow.python.eager import context
 12 | import tensorflow.keras.models as KM
 13 | from mrcnn.restnet import get_resnet
 14 | 
 15 | 
 16 | # tf.compat.v1.disable_eager_execution()
 17 | 
 18 | 
 19 | def rpn_graph(feature_map, anchors_per_location, anchor_stride):
 20 |     
 21 |     shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu',strides=anchor_stride,
 22 |                        name='rpn_conv_shared')(feature_map)
 23 |     
 24 |     x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid',
 25 |                   activation='linear', name='rpn_class_raw')(shared)
 26 |     # batch_size,num_anchors,2
 27 |     # 代表这个先验框对应的类
 28 |     rpn_class_logits = KL.Reshape([-1,2])(x)
 29 | 
 30 |     rpn_probs = KL.Activation(
 31 |         "softmax", name="rpn_class_xxx")(rpn_class_logits)
 32 |     
 33 |     x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid",
 34 |                   activation='linear', name='rpn_bbox_pred')(shared)
 35 |     # batch_size,num_anchors,4
 36 |     # 这个先验框的调整参数
 37 |     rpn_bbox = KL.Reshape([-1,4])(x)
 38 | 
 39 |     return [rpn_class_logits, rpn_probs, rpn_bbox]
 40 | 
 41 | 
 42 | def build_rpn_model(anchor_stride, anchors_per_location, depth):
 43 |     """Builds a Keras model of the Region Proposal Network.
 44 |     It wraps the RPN graph so it can be used multiple times with shared
 45 |     weights.
 46 | 
 47 |     anchors_per_location: number of anchors per pixel in the feature map
 48 |     anchor_stride: Controls the density of anchors. Typically 1 (anchors for
 49 |                    every pixel in the feature map), or 2 (every other pixel).
 50 |     depth: Depth of the backbone feature map.
 51 | 
 52 |     Returns a Keras Model object. The model outputs, when called, are:
 53 |     rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax)
 54 |     rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities.
 55 |     rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be
 56 |                 applied to anchors.
 57 |     """
 58 |     input_feature_map = KL.Input(shape=[None, None, depth],
 59 |                                  name="input_rpn_feature_map")
 60 |     outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride)
 61 |     return KM.Model([input_feature_map], outputs, name="rpn_model")
 62 | 
 63 | 
 64 | 
 65 | def fpn_classifier_graph(rois, feature_maps, image_meta,
 66 |                          pool_size, num_classes, train_bn=True,
 67 |                          fc_layers_size=1024):
 68 |     # ROI Pooling，利用建议框在特征层上进行截取
 69 |     # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels]
 70 |     x = PyramidROIAlign([pool_size, pool_size],
 71 |                         name="roi_align_classifier")([rois, image_meta] + feature_maps)
 72 | 
 73 |     # Shape: [batch, num_rois, 1, 1, fc_layers_size]，相当于两次全连接
 74 |     x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"),
 75 |                            name="mrcnn_class_conv1")(x)
 76 |     x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_class_bn1')(x, training=train_bn)
 77 |     x = KL.Activation('relu')(x)
 78 | 
 79 |     # Shape: [batch, num_rois, 1, 1, fc_layers_size]
 80 |     x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)),
 81 |                            name="mrcnn_class_conv2")(x)
 82 |     x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_class_bn2')(x, training=train_bn)
 83 |     x = KL.Activation('relu')(x)
 84 | 
 85 |     # Shape: [batch, num_rois, fc_layers_size]
 86 |     shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2),
 87 |                        name="pool_squeeze")(x)
 88 | 
 89 |     # Classifier head
 90 |     # 这个的预测结果代表这个先验框内部的物体的种类
 91 |     mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes),
 92 |                                             name='mrcnn_class_logits')(shared)
 93 |     mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"),
 94 |                                      name="mrcnn_class")(mrcnn_class_logits)
 95 | 
 96 | 
 97 |     # BBox head
 98 |     # 这个的预测结果会对先验框进行调整
 99 |     # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))]
100 |     x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'),
101 |                            name='mrcnn_bbox_fc')(shared)
102 |     # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))]
103 |     s = K.int_shape(x)
104 |     if s[1] is None:
105 |         mrcnn_bbox = KL.Reshape((-1, num_classes, 4), name="mrcnn_bbox")(x)
106 |     else:
107 |         mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x)
108 | 
109 |     return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox
110 | 
111 | 
112 | 
113 | def build_fpn_mask_graph(rois, feature_maps, image_meta,
114 |                          pool_size, num_classes, train_bn=True):
115 |     # ROI Pooling，利用建议框在特征层上进行截取
116 |     # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
117 |     x = PyramidROIAlign([pool_size, pool_size],
118 |                         name="roi_align_mask")([rois, image_meta] + feature_maps)
119 | 
120 |     # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
121 |     x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
122 |                            name="mrcnn_mask_conv1")(x)
123 |     x = KL.TimeDistributed(KL.BatchNormalization(),
124 |                            name='mrcnn_mask_bn1')(x, training=train_bn)
125 |     x = KL.Activation('relu')(x)
126 | 
127 |     # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
128 |     x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
129 |                            name="mrcnn_mask_conv2")(x)
130 |     x = KL.TimeDistributed(KL.BatchNormalization(),
131 |                            name='mrcnn_mask_bn2')(x, training=train_bn)
132 |     x = KL.Activation('relu')(x)
133 | 
134 |     # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
135 |     x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
136 |                            name="mrcnn_mask_conv3")(x)
137 |     x = KL.TimeDistributed(KL.BatchNormalization(),
138 |                            name='mrcnn_mask_bn3')(x, training=train_bn)
139 |     x = KL.Activation('relu')(x)
140 | 
141 |     # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels]
142 |     x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"),
143 |                            name="mrcnn_mask_conv4")(x)
144 |     x = KL.TimeDistributed(KL.BatchNormalization(),
145 |                            name='mrcnn_mask_bn4')(x, training=train_bn)
146 |     x = KL.Activation('relu')(x)
147 | 
148 |     # Shape: [batch, num_rois, 2xMASK_POOL_SIZE, 2xMASK_POOL_SIZE, channels]
149 |     x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"),
150 |                            name="mrcnn_mask_deconv")(x)
151 |     # 反卷积后再次进行一个1x1卷积调整通道，使其最终数量为numclasses，代表分的类
152 |     x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"),
153 |                            name="mrcnn_mask")(x)
154 |     return x
155 | 
156 | 
157 | def get_model(config, training):
158 |     # Image size must be dividable by 2 multiple times
159 |     h, w = config.IMAGE_SHAPE[:2]
160 |     if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
161 |         raise Exception("Image size must be dividable by 2 at least 6 times "
162 |                             "to avoid fractions when downscaling and upscaling."
163 |                             "For example, use 256, 320, 384, 448, 512, ... etc. ")
164 | 
165 |     # Inputs
166 |     input_image = KL.Input(
167 |             shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image")
168 |     input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE],
169 |                                     name="input_image_meta")
170 | 
171 |     if training:
172 |         input_rpn_match = KL.Input(
173 |                 shape=[None, 1], name="input_rpn_match", dtype=tf.int32)
174 |         input_rpn_bbox = KL.Input(
175 |                 shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32)
176 | 
177 |         # Detection GT (class IDs, bounding boxes, and masks)
178 |         # 1. GT Class IDs (zero padded)
179 |         input_gt_class_ids = KL.Input(
180 |                 shape=[None], name="input_gt_class_ids", dtype=tf.int32)
181 |         # 2. GT Boxes in pixels (zero padded)
182 |         # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates
183 |         input_gt_boxes = KL.Input(
184 |                 shape=[None, 4], name="input_gt_boxes", dtype=tf.float32)
185 |         # Normalize coordinates
186 |         gt_boxes = KL.Lambda(lambda x: norm_boxes_graph(
187 |                 x, K.shape(input_image)[1:3]))(input_gt_boxes)
188 | 
189 |         # mask语义分析信息
190 |         # [batch, height, width, MAX_GT_INSTANCES]
191 |         if config.USE_MINI_MASK:
192 |             input_gt_masks = KL.Input(shape=[config.MINI_MASK_SHAPE[0],config.MINI_MASK_SHAPE[1], None],name="input_gt_masks", dtype=bool)
193 |         else:
194 |             input_gt_masks = KL.Input(shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],name="input_gt_masks", dtype=bool)
195 |         # 设置anchor
196 |         anchors = get_anchors(config,config.IMAGE_SHAPE)
197 |         # 拓展anchors的shape，第一个维度拓展为batch_size
198 |         anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape)
199 |         # 将anchors转化成tensor的形式
200 |         class ConstLayer(tf.keras.layers.Layer):
201 |             def __init__(self, x, name=None):
202 |                 super(ConstLayer, self).__init__(name=name)
203 |                 self.x = tf.Variable(x)
204 | 
205 |             def call(self, input):
206 |                 return self.x
207 | 
208 |         anchors = ConstLayer(anchors, name="anchors")(input_image)
209 | 
210 |     else:
211 |         input_anchors = KL.Input(shape=[None, 4], name="input_anchors")
212 |         anchors = input_anchors
213 | 
214 |     # 获得Resnet里的压缩程度不同的一些层
215 |     _, C2, C3, C4, C5 = get_resnet(input_image, stage5=True, train_bn=config.TRAIN_BN)
216 | 
217 |     # 组合成特征金字塔的结构
218 |     # P5长宽共压缩了5次
219 |     # Height/32,Width/32,256
220 |     P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5)
221 |     # P4长宽共压缩了4次
222 |     # Height/16,Width/16,256
223 |     P4 = KL.Add(name="fpn_p4add")([
224 |         KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5),
225 |         KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)])
226 |     # P4长宽共压缩了3次
227 |     # Height/8,Width/8,256
228 |     P3 = KL.Add(name="fpn_p3add")([
229 |         KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4),
230 |         KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)])
231 |     # P4长宽共压缩了2次
232 |     # Height/4,Width/4,256
233 |     P2 = KL.Add(name="fpn_p2add")([
234 |         KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3),
235 |         KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)])
236 |         
237 |     # 各自进行一次256通道的卷积，此时P2、P3、P4、P5通道数相同
238 |     # Height/4,Width/4,256
239 |     P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2)
240 |     # Height/8,Width/8,256
241 |     P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3)
242 |     # Height/16,Width/16,256
243 |     P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4)
244 |     # Height/32,Width/32,256
245 |     P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5)
246 |     # 在建议框网络里面还有一个P6用于获取建议框
247 |     # Height/64,Width/64,256
248 |     P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5)
249 | 
250 |     # P2, P3, P4, P5, P6可以用于获取建议框
251 |     rpn_feature_maps = [P2, P3, P4, P5, P6]
252 |     # P2, P3, P4, P5用于获取mask信息
253 |     mrcnn_feature_maps = [P2, P3, P4, P5]
254 | 
255 |     
256 |     
257 |     # anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image)
258 |     # 建立RPN模型
259 |     rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE)
260 | 
261 |     if training:
262 |         # Loop through pyramid layers
263 |         layer_outputs = []  # list of lists
264 |         for p in rpn_feature_maps:
265 |             layer_outputs.append(rpn([p]))
266 | 
267 |         # 获得RPN网络的预测结果，进行格式调整，把五个特征层的结果进行堆叠
268 |         output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"]
269 |         outputs = list(zip(*layer_outputs))
270 |         outputs = [KL.Concatenate(axis=1, name=n)(list(o))
271 |                        for o, n in zip(outputs, output_names)]
272 | 
273 |         rpn_class_logits, rpn_class, rpn_bbox = outputs
274 |     else:
275 |         rpn_class_logits, rpn_class, rpn_bbox = [],[],[]
276 | 
277 |         # 获得RPN网络的预测结果，进行格式调整，把五个特征层的结果进行堆叠
278 |         for p in rpn_feature_maps:
279 |             logits,classes,bbox = rpn([p])
280 |             rpn_class_logits.append(logits)
281 |             rpn_class.append(classes)
282 |             rpn_bbox.append(bbox)
283 | 
284 |         rpn_class_logits = KL.Concatenate(axis=1,name="rpn_class_logits")(rpn_class_logits)
285 |         rpn_class =KL.Concatenate(axis=1,name="rpn_class")(rpn_class)
286 |         rpn_bbox = KL.Concatenate(axis=1,name="rpn_bbox")(rpn_bbox)
287 | 
288 |     # 此时获得的rpn_class_logits、rpn_class、rpn_bbox的维度是
289 |     # rpn_class_logits : Batch_size, num_anchors, 2
290 |     # rpn_class : Batch_size, num_anchors, 2
291 |     # rpn_bbox : Batch_size, num_anchors, 4
292 |     proposal_count = config.POST_NMS_ROIS_TRAINING
293 | 
294 |     # Batch_size, proposal_count, 4
295 |     rpn_rois = ProposalLayer(
296 |             proposal_count=proposal_count,
297 |             nms_threshold=config.RPN_NMS_THRESHOLD,
298 |             name="ROI",
299 |             config=config)([rpn_class, rpn_bbox, anchors])
300 | 
301 |     if not training:
302 |         mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
303 |         fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta,
304 |                                 config.POOL_SIZE, config.NUM_CLASSES,
305 |                                 train_bn=config.TRAIN_BN,
306 |                                 fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
307 |     
308 |         detections = DetectionLayer(config, name="mrcnn_detection")(
309 |                     [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta])
310 |                 
311 |                 
312 |         detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections)
313 |         # 获得mask的结果
314 |         mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps,
315 |                                     input_image_meta,
316 |                                     config.MASK_POOL_SIZE,
317 |                                     config.NUM_CLASSES,
318 |                                     train_bn=config.TRAIN_BN)
319 | 
320 |         # 作为输出
321 |         model = KM.Model([input_image, input_image_meta, input_anchors],
322 |                         [detections, mrcnn_class, mrcnn_bbox,
323 |                             mrcnn_mask, rpn_rois, rpn_class, rpn_bbox],
324 |                         name='mask_rcnn')
325 |         return model
326 | 
327 |     active_class_ids = KL.Lambda(
328 |         lambda x: parse_image_meta_graph(x)["active_class_ids"]
329 |         )(input_image_meta)
330 | 
331 |     if not config.USE_RPN_ROIS:
332 |         # 使用外部输入的建议框
333 |         input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4],
334 |                                 name="input_roi", dtype=np.int32)
335 |         # Normalize coordinates
336 |         target_rois = KL.Lambda(lambda x: norm_boxes_graph(
337 |             x, K.shape(input_image)[1:3]))(input_rois)
338 |     else:
339 |         # 利用预测到的建议框进行下一步的操作
340 |         target_rois = rpn_rois
341 | 
342 |     """找到建议框的ground_truth
343 |     Inputs:
344 |     proposals: [batch, N, (y1, x1, y2, x2)]建议框
345 |     gt_class_ids: [batch, MAX_GT_INSTANCES]每个真实框对应的类
346 |     gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]真实框的位置
347 |     gt_masks: [batch, height, width, MAX_GT_INSTANCES]真实框的语义分割情况
348 |     Returns: 
349 |     rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]内部真实存在目标的建议框
350 |     target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]每个建议框对应的类
351 |     target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]每个建议框应该有的调整参数
352 |     target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]每个建议框语义分割情况
353 |     """
354 |     rois, target_class_ids, target_bbox, target_mask =\
355 |         DetectionTargetLayer(config, name="proposal_targets")([
356 |             target_rois, input_gt_class_ids, gt_boxes, input_gt_masks])
357 | 
358 |     # 找到合适的建议框的classifier预测结果
359 |     mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\
360 |         fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta,
361 |                                 config.POOL_SIZE, config.NUM_CLASSES,
362 |                                 train_bn=config.TRAIN_BN,
363 |                                 fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE)
364 |     # 找到合适的建议框的mask预测结果
365 |     mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps,
366 |                                         input_image_meta,
367 |                                         config.MASK_POOL_SIZE,
368 |                                         config.NUM_CLASSES,
369 |                                         train_bn=config.TRAIN_BN)
370 | 
371 |     output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois)
372 | 
373 |     # Losses
374 |     rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")(
375 |         [input_rpn_match, rpn_class_logits])
376 |     rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")(
377 |         [input_rpn_bbox, input_rpn_match, rpn_bbox])
378 |     class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")(
379 |         [target_class_ids, mrcnn_class_logits, active_class_ids])
380 |     bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")(
381 |         [target_bbox, target_class_ids, mrcnn_bbox])
382 |     mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")(
383 |         [target_mask, target_class_ids, mrcnn_mask])
384 | 
385 |     # Model
386 |     inputs = [input_image, input_image_meta,
387 |                 input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks]
388 |                 
389 |     if not config.USE_RPN_ROIS:
390 |         inputs.append(input_rois)
391 |     outputs = [rpn_class_logits, rpn_class, rpn_bbox,
392 |                 mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask,
393 |                 rpn_rois, output_rois,
394 |                 rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss]
395 |     model = KM.Model(inputs, outputs, name='mask_rcnn')
396 |     return model


--------------------------------------------------------------------------------
/mrcnn/mrcnn_training.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import tensorflow.keras.backend as K
  3 | import tensorflow.keras.layers as KL
  4 | import tensorflow.keras.utils as KU
  5 | from tensorflow.python.eager import context
  6 | import random
  7 | import numpy as np
  8 | import logging
  9 | from utils import utils
 10 | from utils.anchors import compute_backbone_shapes,generate_pyramid_anchors
 11 | 
 12 | # tf.compat.v1.disable_eager_execution()
 13 | 
 14 | def batch_pack_graph(x, counts, num_rows):
 15 |     outputs = []
 16 |     for i in range(num_rows):
 17 |         outputs.append(x[i, :counts[i]])
 18 |     return tf.concat(outputs, axis=0)
 19 | 
 20 | def smooth_l1_loss(y_true, y_pred):
 21 |     """
 22 |     smmoth_l1 损失函数
 23 |     """
 24 |     diff = K.abs(y_true - y_pred)
 25 |     less_than_one = K.cast(K.less(diff, 1.0), "float32")
 26 |     loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5)
 27 |     return loss
 28 | 
 29 | def rpn_class_loss_graph(rpn_match, rpn_class_logits):
 30 |     """
 31 |     建议框分类损失函数
 32 |     """
 33 |     rpn_match = tf.squeeze(rpn_match, -1)
 34 |     anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32)
 35 |     indices = tf.where(K.not_equal(rpn_match, 0))
 36 |     rpn_class_logits = tf.gather_nd(rpn_class_logits, indices)
 37 |     anchor_class = tf.gather_nd(anchor_class, indices)
 38 |     loss = K.sparse_categorical_crossentropy(target=anchor_class,
 39 |                                              output=rpn_class_logits,
 40 |                                              from_logits=True)
 41 |     loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
 42 |     loss = K.switch(tf.math.is_nan(loss), tf.constant([0.0]), loss)
 43 |     return loss
 44 | 
 45 | def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox):
 46 |     """
 47 |     建议框回归损失
 48 |     """
 49 |     rpn_match = K.squeeze(rpn_match, -1)
 50 |     indices = tf.where(K.equal(rpn_match, 1))
 51 |     rpn_bbox = tf.gather_nd(rpn_bbox, indices)
 52 |     batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1)
 53 |     target_bbox = batch_pack_graph(target_bbox, batch_counts,
 54 |                                    config.IMAGES_PER_GPU)
 55 |     # 计算smooth_l1损失函数
 56 |     loss = smooth_l1_loss(target_bbox, rpn_bbox)
 57 |     
 58 |     loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0))
 59 |     loss = K.switch(tf.math.is_nan(loss), tf.constant([0.0]), loss)
 60 |     return loss
 61 | 
 62 | def mrcnn_class_loss_graph(target_class_ids, pred_class_logits,
 63 |                            active_class_ids):
 64 |     """
 65 |     classifier的分类损失函数
 66 |     """
 67 |     target_class_ids = tf.cast(target_class_ids, 'int64')
 68 |     pred_class_ids = tf.argmax(pred_class_logits, axis=2)
 69 |     pred_active = tf.gather(active_class_ids[0], pred_class_ids)
 70 |     loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
 71 |         labels=target_class_ids, logits=pred_class_logits)
 72 | 
 73 |     loss = loss * pred_active
 74 |     loss = tf.reduce_sum(loss) / tf.maximum(tf.reduce_sum(pred_active), 1)
 75 |     return loss
 76 | 
 77 | def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox):
 78 |     """
 79 |     classifier的回归损失函数
 80 |     """
 81 |     # Reshape
 82 |     target_class_ids = K.reshape(target_class_ids, (-1,))
 83 |     target_bbox = K.reshape(target_bbox, (-1, 4))
 84 |     pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4))
 85 | 
 86 |     # 只有属于正样本的建议框用于训练
 87 |     positive_roi_ix = tf.where(target_class_ids > 0)[:, 0]
 88 |     positive_roi_class_ids = tf.cast(tf.gather(target_class_ids, positive_roi_ix), tf.int64)
 89 |     indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1)
 90 | 
 91 |     # 获得对应预测结果与实际结果
 92 |     target_bbox = tf.gather(target_bbox, positive_roi_ix)
 93 |     pred_bbox = tf.gather_nd(pred_bbox, indices)
 94 | 
 95 |     # Smooth-L1 Loss
 96 |     loss = K.switch(tf.size(target_bbox) > 0,
 97 |                     smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox),
 98 |                     tf.constant(0.0))
 99 |     loss = K.mean(loss)
100 |     return loss
101 | 
102 | def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks):
103 |     """
104 |     交叉熵损失
105 |     """
106 |     target_class_ids = K.reshape(target_class_ids, (-1,))
107 |     # 实际结果
108 |     mask_shape = tf.shape(target_masks)
109 |     target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3]))
110 | 
111 |     # 预测结果
112 |     pred_shape = tf.shape(pred_masks)
113 |     pred_masks = K.reshape(pred_masks, (-1, pred_shape[2], pred_shape[3], pred_shape[4]))
114 | 
115 |     # 进行维度变换 [N, num_classes, height, width]
116 |     pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2])
117 | 
118 |     # 只有正样本有效
119 |     positive_ix = tf.where(target_class_ids > 0)[:, 0]
120 |     positive_class_ids = tf.cast(tf.gather(target_class_ids, positive_ix), tf.int64)
121 |     indices = tf.stack([positive_ix, positive_class_ids], axis=1)
122 | 
123 |     # 获得实际结果与预测结果
124 |     y_true = tf.gather(target_masks, positive_ix)
125 |     y_pred = tf.gather_nd(pred_masks, indices)
126 | 
127 |     # shape: [batch, roi, num_classes]
128 |     loss = K.switch(tf.size(y_true) > 0,
129 |                     K.binary_crossentropy(target=y_true, output=y_pred),
130 |                     tf.constant(0.0))
131 |     loss = K.mean(loss)
132 |     return loss
133 | 
134 | def load_image_gt(dataset, config, image_id, augment=False, augmentation=None,
135 |                   use_mini_mask=False):
136 |     # 载入图片和语义分割效果
137 |     image = dataset.load_image(image_id)
138 |     mask, class_ids = dataset.load_mask(image_id)
139 |     # print("\nbefore:",image_id,np.shape(mask),np.shape(class_ids))
140 |     original_shape = image.shape
141 |     image, window, scale, padding, crop = utils.resize_image(
142 |         image,
143 |         min_dim=config.IMAGE_MIN_DIM,
144 |         min_scale=config.IMAGE_MIN_SCALE,
145 |         max_dim=config.IMAGE_MAX_DIM,
146 |         mode=config.IMAGE_RESIZE_MODE)
147 |     mask = utils.resize_mask(mask, scale, padding, crop)
148 |     # 图像翻转
149 |     if augment:
150 |         logging.warning("'augment' is deprecated. Use 'augmentation' instead.")
151 |         if random.randint(0, 1):
152 |             image = np.fliplr(image)
153 |             mask = np.fliplr(mask)
154 | 
155 |     if augmentation:
156 |         import imgaug
157 |         # 图像增强
158 |         MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes",
159 |                            "Fliplr", "Flipud", "CropAndPad",
160 |                            "Affine", "PiecewiseAffine"]
161 | 
162 |         def hook(images, augmenter, parents, default):
163 |             """Determines which augmenters to apply to masks."""
164 |             return augmenter.__class__.__name__ in MASK_AUGMENTERS
165 | 
166 |         image_shape = image.shape
167 |         mask_shape = mask.shape
168 |         det = augmentation.to_deterministic()
169 |         image = det.augment_image(image)
170 |         mask = det.augment_image(mask.astype(np.uint8),
171 |                                  hooks=imgaug.HooksImages(activator=hook))
172 |         assert image.shape == image_shape, "Augmentation shouldn't change image size"
173 |         assert mask.shape == mask_shape, "Augmentation shouldn't change mask size"
174 |         mask = mask.astype(np.bool)
175 |     # 检漏，防止某些层内部实际上不存在语义分割情况
176 |     _idx = np.sum(mask, axis=(0, 1)) > 0
177 |     
178 |     # print("\nafterer:",np.shape(mask),np.shape(_idx))
179 |     mask = mask[:, :, _idx]
180 |     class_ids = class_ids[_idx]
181 |     # 找到mask对应的box
182 |     bbox = utils.extract_bboxes(mask)
183 | 
184 |     active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32)
185 |     source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]]
186 |     active_class_ids[source_class_ids] = 1
187 | 
188 |     if use_mini_mask:
189 |         mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE)
190 | 
191 |     # 生成Image_meta
192 |     image_meta = utils.compose_image_meta(image_id, original_shape, image.shape,
193 |                                     window, scale, active_class_ids)
194 | 
195 |     return image, image_meta, class_ids, bbox, mask
196 | 
197 | 
198 | 
199 | def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config):
200 |     # 1代表正样本
201 |     # -1代表负样本
202 |     # 0代表忽略
203 |     rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32)
204 |     # 创建该部分内容利用先验框和真实框进行编码
205 |     rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4))
206 | 
207 |     '''
208 |     iscrowd=0的时候，表示这是一个单独的物体，轮廓用Polygon(多边形的点)表示，
209 |     iscrowd=1的时候表示两个没有分开的物体，轮廓用RLE编码表示
210 |     '''
211 |     crowd_ix = np.where(gt_class_ids < 0)[0]
212 |     if crowd_ix.shape[0] > 0:
213 |         non_crowd_ix = np.where(gt_class_ids > 0)[0]
214 |         crowd_boxes = gt_boxes[crowd_ix]
215 |         gt_class_ids = gt_class_ids[non_crowd_ix]
216 |         gt_boxes = gt_boxes[non_crowd_ix]
217 |         crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes)
218 |         crowd_iou_max = np.amax(crowd_overlaps, axis=1)
219 |         no_crowd_bool = (crowd_iou_max < 0.001)
220 |     else:
221 |         no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool)
222 | 
223 |     # 计算先验框和真实框的重合程度 [num_anchors, num_gt_boxes]
224 |     overlaps = utils.compute_overlaps(anchors, gt_boxes)
225 | 
226 |     # 1. 重合程度小于0.3则代表为负样本
227 |     anchor_iou_argmax = np.argmax(overlaps, axis=1)
228 |     anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax]
229 |     rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1
230 |     # 2. 每个真实框重合度最大的先验框是正样本
231 |     gt_iou_argmax = np.argwhere(overlaps == np.max(overlaps, axis=0))[:,0]
232 |     rpn_match[gt_iou_argmax] = 1
233 |     # 3. 重合度大于0.7则代表为正样本
234 |     rpn_match[anchor_iou_max >= 0.7] = 1
235 | 
236 |     # 正负样本平衡
237 |     # 找到正样本的索引
238 |     ids = np.where(rpn_match == 1)[0]
239 |     # 如果大于(config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2)则删掉一些
240 |     extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2)
241 |     if extra > 0:
242 |         ids = np.random.choice(ids, extra, replace=False)
243 |         rpn_match[ids] = 0
244 |     # 找到负样本的索引
245 |     ids = np.where(rpn_match == -1)[0]
246 |     # 使得总数为config.RPN_TRAIN_ANCHORS_PER_IMAGE
247 |     extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE -
248 |                         np.sum(rpn_match == 1))
249 |     if extra > 0:
250 |         # Rest the extra ones to neutral
251 |         ids = np.random.choice(ids, extra, replace=False)
252 |         rpn_match[ids] = 0
253 | 
254 |     # 找到内部真实存在物体的先验框，进行编码
255 |     ids = np.where(rpn_match == 1)[0]
256 |     ix = 0 
257 |     for i, a in zip(ids, anchors[ids]):
258 |         gt = gt_boxes[anchor_iou_argmax[i]]
259 |         # 计算真实框的中心，高宽
260 |         gt_h = gt[2] - gt[0]
261 |         gt_w = gt[3] - gt[1]
262 |         gt_center_y = gt[0] + 0.5 * gt_h
263 |         gt_center_x = gt[1] + 0.5 * gt_w
264 |         # 计算先验框中心，高宽
265 |         a_h = a[2] - a[0]
266 |         a_w = a[3] - a[1]
267 |         a_center_y = a[0] + 0.5 * a_h
268 |         a_center_x = a[1] + 0.5 * a_w
269 |         # 编码运算
270 |         rpn_bbox[ix] = [
271 |             (gt_center_y - a_center_y) / np.maximum(a_h, 1),
272 |             (gt_center_x - a_center_x) / np.maximum(a_w, 1),
273 |             np.log(np.maximum(gt_h / np.maximum(a_h, 1), 1e-5)),
274 |             np.log(np.maximum(gt_w / np.maximum(a_w, 1), 1e-5)),
275 |         ]
276 |         # 改变数量级
277 |         rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV
278 |         ix += 1
279 |     return rpn_match, rpn_bbox
280 | 
281 | 
282 | 
283 | 
284 | def data_generator(dataset, config, shuffle=True, augment=False, augmentation=None,
285 |                    batch_size=1, detection_targets=False,
286 |                    no_augmentation_sources=None):
287 |     """
288 |     网络输入清单
289 |     - images: [batch, H, W, C]
290 |     - image_meta: [batch, (meta data)] 图像详细信息。
291 |     - rpn_match: [batch, N] 代表建议框的匹配情况 (1=正样本, -1=负样本, 0=中性)
292 |     - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] 建议框网络应该有的预测结果.
293 |     - gt_class_ids: [batch, MAX_GT_INSTANCES] 种类ID
294 |     - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]
295 |     - gt_masks: [batch, height, width, MAX_GT_INSTANCES].
296 |     网络输出清单:
297 |         在常规训练中通常是空的。
298 |     """
299 |     b = 0  # batch item index
300 |     image_index = -1
301 |     image_ids = np.copy(dataset.image_ids)
302 |     no_augmentation_sources = no_augmentation_sources or []
303 | 
304 |     # [anchor_count, (y1, x1, y2, x2)]
305 |     # 计算获得先验框
306 |     backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE)
307 |     anchors = generate_pyramid_anchors(config.RPN_ANCHOR_SCALES,
308 |                                              config.RPN_ANCHOR_RATIOS,
309 |                                              backbone_shapes,
310 |                                              config.BACKBONE_STRIDES,
311 |                                              config.RPN_ANCHOR_STRIDE)
312 | 
313 |     while True:
314 | 
315 |         image_index = (image_index + 1) % len(image_ids)
316 |         if shuffle and image_index == 0:
317 |             np.random.shuffle(image_ids)
318 | 
319 |         # 获得id
320 |         image_id = image_ids[image_index]
321 | 
322 |         # 获得图片，真实框，语义分割结果等
323 |         if dataset.image_info[image_id]['source'] in no_augmentation_sources:
324 |             image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
325 |             load_image_gt(dataset, config, image_id, augment=augment,
326 |                             augmentation=None,
327 |                             use_mini_mask=config.USE_MINI_MASK)
328 |         else:
329 |             image, image_meta, gt_class_ids, gt_boxes, gt_masks = \
330 |                 load_image_gt(dataset, config, image_id, augment=augment,
331 |                             augmentation=augmentation,
332 |                             use_mini_mask=config.USE_MINI_MASK)
333 | 
334 |         if not np.any(gt_class_ids > 0):
335 |             continue
336 | 
337 |         # RPN Targets
338 |         rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors,
339 |                                                 gt_class_ids, gt_boxes, config)
340 | 
341 |         # 如果某张图片里面物体的数量大于最大值的话，则进行筛选，防止过大
342 |         if gt_boxes.shape[0] > config.MAX_GT_INSTANCES:
343 |             ids = np.random.choice(
344 |                 np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False)
345 |             gt_class_ids = gt_class_ids[ids]
346 |             gt_boxes = gt_boxes[ids]
347 |             gt_masks = gt_masks[:, :, ids]
348 | 
349 |         # 初始化用于训练的内容
350 |         if b == 0:
351 |             batch_image_meta = np.zeros(
352 |                 (batch_size,) + image_meta.shape, dtype=image_meta.dtype)
353 |             batch_rpn_match = np.zeros(
354 |                 [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype)
355 |             batch_rpn_bbox = np.zeros(
356 |                 [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype)
357 |             batch_images = np.zeros(
358 |                 (batch_size,) + image.shape, dtype=np.float32)
359 |             batch_gt_class_ids = np.zeros(
360 |                 (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32)
361 |             batch_gt_boxes = np.zeros(
362 |                 (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32)
363 |             batch_gt_masks = np.zeros(
364 |                 (batch_size, gt_masks.shape[0], gt_masks.shape[1],
365 |                     config.MAX_GT_INSTANCES), dtype=gt_masks.dtype)
366 |         
367 |         # 将当前信息加载进batch
368 |         batch_image_meta[b] = image_meta
369 |         batch_rpn_match[b] = rpn_match[:, np.newaxis]
370 |         batch_rpn_bbox[b] = rpn_bbox
371 |         batch_images[b] = utils.mold_image(image.astype(np.float32), config)
372 |         batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids
373 |         batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes
374 |         batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks 
375 | 
376 |         b += 1
377 |         
378 |         # 判断是否已经将batch_size全部载入
379 |         if b >= batch_size:
380 |             inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox,
381 |                         batch_gt_class_ids, batch_gt_boxes, batch_gt_masks]
382 |             outputs = []
383 | 
384 |             yield inputs, outputs
385 |             # 开始一个新的batch_size
386 |             b = 0


--------------------------------------------------------------------------------
/mrcnn/restnet.py:
--------------------------------------------------------------------------------
 1 | from tensorflow.keras.layers import ZeroPadding2D,Conv2D,MaxPooling2D,BatchNormalization,Activation,Add
 2 | def identity_block(input_tensor, kernel_size, filters, stage, block,
 3 |                    use_bias=True, train_bn=True):
 4 |     nb_filter1, nb_filter2, nb_filter3 = filters
 5 |     conv_name_base = 'res' + str(stage) + block + '_branch'
 6 |     bn_name_base = 'bn' + str(stage) + block + '_branch'
 7 | 
 8 |     x = Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a',
 9 |                   use_bias=use_bias)(input_tensor)
10 |     x = BatchNormalization(name=bn_name_base + '2a')(x, training=train_bn)
11 |     x = Activation('relu')(x)
12 | 
13 |     x = Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same',
14 |                   name=conv_name_base + '2b', use_bias=use_bias)(x)
15 |     x = BatchNormalization(name=bn_name_base + '2b')(x, training=train_bn)
16 |     x = Activation('relu')(x)
17 | 
18 |     x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c',
19 |                   use_bias=use_bias)(x)
20 |     x = BatchNormalization(name=bn_name_base + '2c')(x, training=train_bn)
21 | 
22 |     x = Add()([x, input_tensor])
23 |     x = Activation('relu', name='res' + str(stage) + block + '_out')(x)
24 |     return x
25 | 
26 | def conv_block(input_tensor, kernel_size, filters, stage, block,
27 |                strides=(2, 2), use_bias=True, train_bn=True):
28 | 
29 |     nb_filter1, nb_filter2, nb_filter3 = filters
30 |     conv_name_base = 'res' + str(stage) + block + '_branch'
31 |     bn_name_base = 'bn' + str(stage) + block + '_branch'
32 | 
33 |     x = Conv2D(nb_filter1, (1, 1), strides=strides,
34 |                   name=conv_name_base + '2a', use_bias=use_bias)(input_tensor)
35 |     x = BatchNormalization(name=bn_name_base + '2a')(x, training=train_bn)
36 |     x = Activation('relu')(x)
37 | 
38 |     x = Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same',
39 |                   name=conv_name_base + '2b', use_bias=use_bias)(x)
40 |     x = BatchNormalization(name=bn_name_base + '2b')(x, training=train_bn)
41 |     x = Activation('relu')(x)
42 | 
43 |     x = Conv2D(nb_filter3, (1, 1), name=conv_name_base +
44 |                   '2c', use_bias=use_bias)(x)
45 |     x = BatchNormalization(name=bn_name_base + '2c')(x, training=train_bn)
46 | 
47 |     shortcut = Conv2D(nb_filter3, (1, 1), strides=strides,
48 |                          name=conv_name_base + '1', use_bias=use_bias)(input_tensor)
49 |     shortcut = BatchNormalization(name=bn_name_base + '1')(shortcut, training=train_bn)
50 | 
51 |     x = Add()([x, shortcut])
52 |     x = Activation('relu', name='res' + str(stage) + block + '_out')(x)
53 |     return x
54 | 
55 | def get_resnet(input_image,stage5=False, train_bn=True):
56 |     # Stage 1
57 |     x = ZeroPadding2D((3, 3))(input_image)
58 |     x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x)
59 |     x = BatchNormalization(name='bn_conv1')(x, training=train_bn)
60 |     x = Activation('relu')(x)
61 |     # Height/4,Width/4,64
62 |     C1 = x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x)
63 |     # Stage 2
64 |     x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), train_bn=train_bn)
65 |     x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', train_bn=train_bn)
66 |     # Height/4,Width/4,256
67 |     C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', train_bn=train_bn)
68 |     # Stage 3
69 |     x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', train_bn=train_bn)
70 |     x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', train_bn=train_bn)
71 |     x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', train_bn=train_bn)
72 |     # Height/8,Width/8,512
73 |     C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', train_bn=train_bn)
74 |     # Stage 4
75 |     x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', train_bn=train_bn)
76 |     block_count = 22
77 |     for i in range(block_count):
78 |         x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i), train_bn=train_bn)
79 |     # Height/16,Width/16,1024
80 |     C4 = x
81 |     # Stage 5
82 |     if stage5:
83 |         x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', train_bn=train_bn)
84 |         x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', train_bn=train_bn)
85 |         # Height/32,Width/32,2048
86 |         C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', train_bn=train_bn)
87 |     else:
88 |         C5 = None
89 |     return [C1, C2, C3, C4, C5]


--------------------------------------------------------------------------------
/parallel_model.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | Mask RCNN
  3 | Multi-GPU Support for tensorflow
  4 | 
  5 | Ideas and a small code snippets from these sources:
  6 | https://github.com/fchollet/keras/issues/2436
  7 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
  8 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
  9 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
 10 | '''
 11 | 
 12 | import tensorflow as tf
 13 | import tensorflow.keras.backend as K
 14 | import tensorflow.keras.layers as KL
 15 | import tensorflow.keras.models as KM
 16 | from zmq import device
 17 | 
 18 | class ParallelModel(KM.Model):
 19 |     '''
 20 |     Subclasses the standard Keras Model and adds multi-GPU support.
 21 |     It works by creating a copy of the model on each GPU. Then it slices the inputs and 
 22 |     sends a slice to eack copy of the model, and then merges the outpus together and 
 23 |     applies the loss on the combined outputs.
 24 |     '''
 25 | 
 26 |     def __init__(self, mask_rcnn_model, gpu_count):
 27 |         '''
 28 |         Class constructor
 29 |         mask rcnn model: The model to patallelize
 30 |         gpu_count: Number of GPUS, must be >1
 31 |         '''
 32 |         self.inner_model = mask_rcnn_model
 33 |         self.gpu_count = gpu_count
 34 |         merge_outputs = self.make_parallel()
 35 |         super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, outputs=merge_outputs)
 36 |     
 37 | 
 38 |     def __getattribute__(self, attrname):
 39 |         '''
 40 |         Redirect loading and saving methods to the inner model. That's where the weights are stored.
 41 |         '''
 42 |         if 'load' in attrname or 'save' in attrname:
 43 |             return getattr(self.inner_model, attrname)
 44 |         return super(ParallelModel, self).__getattribute__(attrname)
 45 | 
 46 |     def summary(self, *args, **kwargs):
 47 |         '''
 48 |         Override summary() to display summaries of both, the wrapper and inner models
 49 |         '''
 50 |         super(ParallelModel, self).summary(*args, **kwargs)
 51 |         self.inner_model.summary(*args, **kwargs)
 52 |     
 53 |     def make_parallel(self):
 54 |         '''
 55 |         Creates a new wrapper model that consists of multiple replicas of the original model placed on different GPUs.
 56 |         '''
 57 |         # Slice inputs. Slice inputs on the CPU to avoid sending a copy
 58 |         # of the full inputs to all GPUs. Saves on bandwidth and memory
 59 |         input_slices = {name:tf.split(x, self.gpu_count) for name, x in zip(self.inner_model.input_names,self.inner_model.inputs)}
 60 |         output_names = self.inner_model.output_names
 61 |         outputs_all = []
 62 |         for i in range(len(self.inner_model.outputs)):
 63 |             outputs_all.append([])
 64 |         
 65 |         # Run the model call() on each GPU to place the ops there
 66 |         for i in range(self.gpu_count):
 67 |             with tf.device('/gpu:%d' % i):
 68 |                 with tf.name_scope('tower_%d' % i):
 69 |                     # Run a slice of inputs through this replica
 70 |                     zipped_inputs = zip(self.inner_model.input_names,
 71 |                                         self.inner_model.inputs)
 72 |                     inputs = [
 73 |                         KL.Lambda(lambda s: input_slices[name][i],
 74 |                                   output_shape=lambda s: (None,) + s[1:])(tensor)
 75 |                         for name, tensor in zipped_inputs]
 76 |                     # Create the model replica and get the outputs
 77 |                     outputs = self.inner_model(inputs)
 78 |                     if not isinstance(outputs, list):
 79 |                         outputs = [outputs]
 80 |                     # Save the outputs for merging back together later
 81 |                     for l, o in enumerate(outputs):
 82 |                         outputs_all[l].append(o)
 83 |             
 84 |         # Merge outputs on CPU
 85 |         with tf.device('/cpu:0'):
 86 |             merged = []
 87 |             for outputs, name in zip(outputs_all, output_names):
 88 |                 # Concatenate or average outputs?
 89 |                 # Outputs usually have a batch dimension and we concatenate
 90 |                 # across it. If they don't, then the output is likely a loss
 91 |                 # or a metric value that gets averaged across the batch.
 92 |                 # Keras expects losses and metrics to be scalars.
 93 |                 if K.int_shape(outputs[0]) == ():
 94 |                     # Average
 95 |                     m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs)
 96 |                 else:
 97 |                     # Concatenate
 98 |                     m = KL.Concatenate(axis=0, name=name)(outputs)
 99 |                 merged.append(m)
100 |         return merged
101 | 
102 | if __name__ == "__main__":
103 |     # Testing code below. It creates a simple model to train on MNIST and
104 |     # tries to run it on 2 GPUs. It saves the graph so it can be viewed
105 |     # in TensorBoard. Run it as:
106 |     #
107 |     # python3 parallel_model.py
108 | 
109 |     import os
110 |     import numpy as np
111 |     import keras.optimizers
112 |     from keras.datasets import mnist
113 |     from keras.preprocessing.image import ImageDataGenerator
114 | 
115 |     GPU_COUNT = 2
116 | 
117 |     # Root directory of the project
118 |     ROOT_DIR = os.path.abspath("../")
119 | 
120 |     # Directory to save logs and trained model
121 |     MODEL_DIR = os.path.join(ROOT_DIR, "logs")
122 | 
123 |     def build_model(x_train, num_classes):
124 |         # Reset default graph. Keras leaves old ops in the graph,
125 |         # which are ignored for execution but clutter graph
126 |         # visualization in TensorBoard.
127 |         tf.reset_default_graph()
128 | 
129 |         inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
130 |         x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
131 |                       name="conv1")(inputs)
132 |         x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
133 |                       name="conv2")(x)
134 |         x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
135 |         x = KL.Flatten(name="flat1")(x)
136 |         x = KL.Dense(128, activation='relu', name="dense1")(x)
137 |         x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
138 | 
139 |         return KM.Model(inputs, x, "digit_classifier_model")
140 | 
141 |     # Load MNIST Data
142 |     (x_train, y_train), (x_test, y_test) = mnist.load_data()
143 |     x_train = np.expand_dims(x_train, -1).astype('float32') / 255
144 |     x_test = np.expand_dims(x_test, -1).astype('float32') / 255
145 | 
146 |     print('x_train shape:', x_train.shape)
147 |     print('x_test shape:', x_test.shape)
148 | 
149 |     # Build data generator and model
150 |     datagen = ImageDataGenerator()
151 |     model = build_model(x_train, 10)
152 | 
153 |     # Add multi-GPU support.
154 |     model = ParallelModel(model, GPU_COUNT)
155 | 
156 |     optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
157 | 
158 |     model.compile(loss='sparse_categorical_crossentropy',
159 |                   optimizer=optimizer, metrics=['accuracy'])
160 | 
161 |     model.summary()
162 | 
163 |     # Train
164 |     model.fit_generator(
165 |         datagen.flow(x_train, y_train, batch_size=64),
166 |         steps_per_epoch=50, epochs=10, verbose=1,
167 |         validation_data=(x_test, y_test),
168 |         callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
169 |                                                write_graph=True)]
170 |     )
171 | 
172 | 
173 | 


--------------------------------------------------------------------------------
/regularization.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | from contourprocess import regularization
 4 | 
 5 | 
 6 | ori_img1 = cv2.imread('./test.jpg')
 7 | # 中值滤波，去噪
 8 | ori_img = cv2.medianBlur(ori_img1, 5)
 9 | ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY)
10 | ret, ori_img = cv2.threshold(ori_img, 127, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)
11 | # 连通域分析
12 | num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(ori_img, connectivity=8)
13 | 
14 | regularization_contours = []
15 | # 遍历联通域
16 | for i in range(1, num_labels):
17 |     img = np.zeros_like(labels)
18 |     index = np.where(labels==i)
19 |     img[index] = 255
20 |     img = np.array(img, dtype=np.uint8)
21 | 
22 |     regularization_contour =regularization.boundary_regularization(img).astype(np.int32)
23 |     regularization_contours.append(regularization_contour)
24 |     
25 |     single_out = np.zeros_like(ori_img1)
26 |     cv2.polylines(img=single_out, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=3)
27 |     cv2.imwrite('./result/single_out_{}.jpg'.format(i), single_out)
28 | 
29 | 
30 | 
31 | cv2.polylines(img=ori_img1, pts=regularization_contours, isClosed=True, color=(255, 0, 0), thickness=3)
32 | cv2.imwrite('all_out.jpg', ori_img1)


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | numpy
 2 | scipy
 3 | Pillow
 4 | cython
 5 | matplotlib
 6 | scikit-image
 7 | tensorflow>=1.3.0
 8 | keras>=2.0.8
 9 | opencv-python
10 | h5py
11 | imgaug


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from PIL import Image
  3 | import numpy as np
  4 | import random
  5 | import tensorflow as tf
  6 | from utils import visualize
  7 | from utils.anchors import get_anchors
  8 | from utils.utils import mold_inputs,unmold_detections
  9 | from mrcnn.mrcnn import get_model
 10 | from mrcnn.mrcnn_training import data_generator
 11 | from utils.customerDataset import CustomerDataset
 12 | from config import CustomerConfig
 13 | 
 14 | # tf.compat.v1.disable_eager_execution()
 15 | 
 16 | def log(text, array=None):
 17 |     if array is not None:
 18 |         text = text.ljust(25)
 19 |         text += ("shape: {:20}  ".format(str(array.shape)))
 20 |         if array.size:
 21 |             text += ("min: {:10.5f}  max: {:10.5f}".format(array.min(),array.max()))
 22 |         else:
 23 |             text += ("min: {:10}  max: {:10}".format("",""))
 24 |         text += "  {}".format(array.dtype)
 25 |     print(text)
 26 | 
 27 | 
 28 | 
 29 | if __name__ == "__main__":
 30 |     learning_rate = CustomerConfig.LEARNING_RATE
 31 |     init_epoch = 0
 32 |     epoch = CustomerConfig.EPOCH
 33 |     
 34 |     dataset_root_path=CustomerConfig.TRAIN_DATASET
 35 |     img_floder =os.path.join(dataset_root_path, "imgs")
 36 |     mask_floder = os.path.join(dataset_root_path, "mask")
 37 |     yaml_floder = os.path.join(dataset_root_path, "yaml")
 38 |     imglist = os.listdir(img_floder)
 39 | 
 40 |     count = len(imglist)
 41 |     np.random.seed(10101)
 42 |     np.random.shuffle(imglist)
 43 |     train_imglist = imglist[:int(count*0.9)]
 44 |     val_imglist = imglist[int(count*0.9):]
 45 | 
 46 |     MODEL_DIR = "logs"
 47 | 
 48 |     COCO_MODEL_PATH = CustomerConfig.PRETRAIN_MODEL
 49 |     config = CustomerConfig()
 50 |     # 计算训练集和验证集长度
 51 |     config.STEPS_PER_EPOCH = len(train_imglist)//config.IMAGES_PER_GPU
 52 |     config.VALIDATION_STEPS = len(val_imglist)//config.IMAGES_PER_GPU
 53 |     config.display()
 54 | 
 55 |     # 训练数据集准备
 56 |     dataset_train = CustomerDataset()
 57 |     dataset_train.load_dataset(config.NAME,len(train_imglist), config.CLASSES, img_floder, mask_floder, train_imglist, yaml_floder, train_mode=True)
 58 |     dataset_train.prepare()
 59 | 
 60 |     # 验证数据集准备
 61 |     dataset_val = CustomerDataset()
 62 |     dataset_val.load_dataset(config.NAME,len(val_imglist), config.CLASSES, img_floder, mask_floder, val_imglist, yaml_floder, train_mode=True)
 63 |     dataset_val.prepare()
 64 | 
 65 |     # 获得训练模型
 66 |     model = get_model(config, training=True)
 67 |     model.summary()
 68 |     model.load_weights(COCO_MODEL_PATH,by_name=True,skip_mismatch=True)
 69 | 
 70 |     # 数据生成器
 71 |     train_generator = data_generator(dataset_train, config, shuffle=True,
 72 |                                         batch_size=config.BATCH_SIZE)
 73 |     val_generator = data_generator(dataset_val, config, shuffle=True,
 74 |                                     batch_size=config.BATCH_SIZE)
 75 | 
 76 |     # 设置callbacks
 77 |     tensorboard = tf.keras.callbacks.TensorBoard(log_dir=MODEL_DIR,histogram_freq=0, write_graph=True, write_images=False)
 78 |     model_ckp= tf.keras.callbacks.ModelCheckpoint(os.path.join(MODEL_DIR, "building_new.h5"),verbose=0, save_weights_only=True)
 79 |     early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1)
 80 |     learning_rate_reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, verbose=1)
 81 |     callbacks = [tensorboard, model_ckp, early_stop, learning_rate_reduce]
 82 |     
 83 |     # callbacks = [
 84 |     #     tf.keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
 85 |     #                                 histogram_freq=0, write_graph=True, write_images=False),
 86 |     #     tf.keras.callbacks.ModelCheckpoint(os.path.join(MODEL_DIR, "epoch{epoch:03d}_loss{loss:.3f}_val_loss{val_loss:.3f}.h5"),
 87 |     #                                     verbose=0, save_weights_only=True),
 88 |     # ]
 89 | 
 90 | 
 91 |     if True:
 92 |         log("\nStarting at epoch {}. LR={}\n".format(init_epoch, learning_rate))
 93 |         log("Checkpoint Path: {}".format(MODEL_DIR))
 94 | 
 95 |         # 使用的优化器是
 96 |         optimizer = tf.keras.optimizers.Adam(lr=learning_rate, clipnorm=config.GRADIENT_CLIP_NORM)
 97 | 
 98 |         # 设置一下loss信息
 99 |         loss_names = [
100 |             "rpn_class_loss",  "rpn_bbox_loss",
101 |             "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"]
102 |         for name in loss_names:
103 |             layer = model.get_layer(name)
104 |             if layer.output in model.losses:
105 |                 continue
106 |             loss = (
107 |                 tf.reduce_mean(input_tensor=layer.output, keepdims=True)
108 |                 * config.LOSS_WEIGHTS.get(name, 1.))
109 |             model.add_loss(loss)
110 |         
111 |         # Add L2 Regularization
112 |         # Skip gamma and beta weights of batch normalization layers.
113 |         reg_losses = [
114 |             tf.keras.regularizers.l2(config.WEIGHT_DECAY)(w) / tf.cast(tf.size(input=w), tf.float32)
115 |             for w in model.trainable_weights
116 |             if 'gamma' not in w.name and 'beta' not in w.name]
117 |         model.add_loss(tf.add_n(reg_losses))
118 | 
119 | 
120 |         # 进行编译
121 |         model.compile(
122 |             optimizer=optimizer,
123 |             loss=[None] * len(model.outputs)
124 |         )
125 | 
126 |         # 用于显示训练情况
127 |         for name in loss_names:
128 |             if name in model.metrics_names:
129 |                 print(name)
130 |                 continue
131 |             layer = model.get_layer(name)
132 |             model.metrics_names.append(name)
133 |             loss = (
134 |                 tf.reduce_mean(input_tensor=layer.output, keepdims=True)
135 |                 * config.LOSS_WEIGHTS.get(name, 1.))
136 |             model.add_metric(loss, name=name, aggregation='mean')
137 | 
138 | 
139 |         model.fit_generator(
140 |             train_generator,
141 |             initial_epoch=init_epoch,
142 |             epochs=epoch,
143 |             steps_per_epoch=config.STEPS_PER_EPOCH,
144 |             callbacks=callbacks,
145 |             validation_data=val_generator,
146 |             validation_steps=config.VALIDATION_STEPS,
147 |             max_queue_size=100
148 |         )
149 | 


--------------------------------------------------------------------------------
/utils/anchors.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import math
 3 | from utils.utils import norm_boxes
 4 | #----------------------------------------------------------#
 5 | #  Anchors
 6 | #----------------------------------------------------------#
 7 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
 8 |     # 获得所有框的长度和比例的组合
 9 |     scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
10 |     scales = scales.flatten()
11 |     ratios = ratios.flatten()
12 |     heights = scales / np.sqrt(ratios)
13 |     widths = scales * np.sqrt(ratios)
14 | 
15 |     # 生成网格中心
16 |     shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
17 |     shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
18 |     shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
19 | 
20 |     # 获得先验框的中心和宽高
21 |     box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
22 |     box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
23 | 
24 |     # 更变格式
25 |     box_centers = np.stack(
26 |         [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
27 |     box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
28 | 
29 |     # 计算出(y1, x1, y2, x2)
30 |     boxes = np.concatenate([box_centers - 0.5 * box_sizes,
31 |                             box_centers + 0.5 * box_sizes], axis=1)
32 |     return boxes
33 | 
34 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
35 |                              anchor_stride):
36 |     """
37 |     生成不同特征层的anchors，并利用concatenate进行堆叠
38 |     """
39 |     # Anchors
40 |     # [anchor_count, (y1, x1, y2, x2)]
41 |     # P2对应的scale是32
42 |     # P3对应的scale是64
43 |     # P4对应的scale是128
44 |     # P5对应的scale是256
45 |     # P6对应的scale是512
46 |     anchors = []
47 |     for i in range(len(scales)):
48 |         anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
49 |                                         feature_strides[i], anchor_stride))
50 |         
51 |     return np.concatenate(anchors, axis=0)
52 | 
53 | def compute_backbone_shapes(config, image_shape):
54 |     # 用于计算主干特征提取网络的shape
55 |     if callable(config.BACKBONE):
56 |         return config.COMPUTE_BACKBONE_SHAPE(image_shape)
57 |     # 其实就是计算P2、P3、P4、P5、P6这些特征层的宽和高
58 |     assert config.BACKBONE in ["resnet50", "resnet101"]
59 |     return np.array(
60 |         [[int(math.ceil(image_shape[0] / stride)),
61 |             int(math.ceil(image_shape[1] / stride))]
62 |             for stride in config.BACKBONE_STRIDES])
63 | 
64 | def get_anchors(config, image_shape):
65 |     backbone_shapes = compute_backbone_shapes(config, image_shape)
66 |     anchor_cache = {}
67 |     if not tuple(image_shape) in anchor_cache:
68 |         a = generate_pyramid_anchors(
69 |             config.RPN_ANCHOR_SCALES,
70 |             config.RPN_ANCHOR_RATIOS,
71 |             backbone_shapes,
72 |             config.BACKBONE_STRIDES,
73 |             config.RPN_ANCHOR_STRIDE)
74 |         anchor_cache[tuple(image_shape)] = norm_boxes(a, image_shape[:2])
75 |     return anchor_cache[tuple(image_shape)]


--------------------------------------------------------------------------------
/utils/config.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | class Config(object):
  4 |     """
  5 |     基本配置类。对于自定义配置，请创建
  6 |     继承自该类并重写属性的子类
  7 |     """
  8 |     # 名称
  9 |     NAME = None 
 10 | 
 11 |     # GPU数量
 12 |     GPU_COUNT = 1
 13 | 
 14 |     # 每个GPU的图片数量
 15 |     IMAGES_PER_GPU = 2
 16 | 
 17 |     # 每个世代的步长
 18 |     STEPS_PER_EPOCH = 1000
 19 | 
 20 |     # 验证集长度
 21 |     VALIDATION_STEPS = 50
 22 | 
 23 |     COMPUTE_BACKBONE_SHAPE = None
 24 | 
 25 |     # 特征金字塔的步长
 26 |     BACKBONE_STRIDES = [4, 8, 16, 32, 64]
 27 | 
 28 |     # 分类图中完全连接层的大小
 29 |     FPN_CLASSIF_FC_LAYERS_SIZE = 1024
 30 | 
 31 |     # 用于构建特征金字塔的自上而下层的大小
 32 |     TOP_DOWN_PYRAMID_SIZE = 256
 33 | 
 34 |     # 分类类别数（包括背景）
 35 |     NUM_CLASSES = 1 
 36 | 
 37 |     # 建议框的先验框的长度（像素）
 38 |     RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
 39 |     # 先验框的变化比率
 40 |     RPN_ANCHOR_RATIOS = [0.5, 1, 2]
 41 | 
 42 |     # 建议框步长
 43 |     RPN_ANCHOR_STRIDE = 1
 44 | 
 45 |     # 建议框的非极大抑制的值
 46 |     RPN_NMS_THRESHOLD = 0.7
 47 | 
 48 |     # 每个图像有多少先验框用于RPN培训
 49 |     RPN_TRAIN_ANCHORS_PER_IMAGE = 256
 50 | 
 51 |     # 非极大抑制前的框的数量
 52 |     PRE_NMS_LIMIT = 6000
 53 | 
 54 |     # 非最大抑制后保持的ROI（训练和推理）
 55 |     POST_NMS_ROIS_TRAINING = 2000
 56 |     POST_NMS_ROIS_INFERENCE = 1000
 57 | 
 58 |     # 是否使用Mini Mask
 59 |     USE_MINI_MASK = True
 60 |     MINI_MASK_SHAPE = (56, 56)  # (height, width)
 61 | 
 62 |     BACKBONE = "resnet101"
 63 |     # 可选择的
 64 |     # square: 调整大小并用零填充以获得大小的方形图像 [max_dim, max_dim].
 65 |     # pad64:  如果IMAGE_MIN_DIM或IMAGE_MIN_SCALE 不是“无”，则在填充之前它会先放大。
 66 |     #         在此中忽略图像最大亮度模式需要64的倍数，以确保在FPN金字塔的6个级别上下平滑地缩放特征地图（2**6=64）。
 67 |     # crop:   从图像中随机选取作物。首先，根据图像亮度和图像灰度对图像进行缩放，
 68 |     #         然后随机选取一个大小为image_MIN_DIM x image_MIN_DIM的裁剪。只能用于培训。此模式下不使用图像最大亮度。
 69 |     IMAGE_RESIZE_MODE = "square"
 70 |     IMAGE_MIN_DIM = 800
 71 |     IMAGE_MAX_DIM = 1024
 72 |     # 最小比例。在IMAGE_MIN_DIM后检查，可以强制进一步放大。例如，如果设置为2，
 73 |     # 则图像将缩放为宽度和高度的两倍或更多，即使MIN_IMAGE_DIM不需要它。然而，在“正方形”模式下，它可能会被图像_MAX_DIM否决。
 74 |     IMAGE_MIN_SCALE = 0
 75 |     # RGB = 3, grayscale = 1, RGB-D = 4
 76 |     IMAGE_CHANNEL_COUNT = 3
 77 | 
 78 |     # Image mean (RGB)
 79 |     MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
 80 | 
 81 |     # 训练用的ROIS数量
 82 |     TRAIN_ROIS_PER_IMAGE = 200
 83 | 
 84 |     # 正样本比例
 85 |     ROI_POSITIVE_RATIO = 0.33
 86 | 
 87 |     # 池化方式
 88 |     POOL_SIZE = 7
 89 |     MASK_POOL_SIZE = 14
 90 | 
 91 |     # Mask
 92 |     MASK_SHAPE = [28, 28]
 93 | 
 94 |     MAX_GT_INSTANCES = 100
 95 | 
 96 |     # 标准化比率
 97 |     RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
 98 |     BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
 99 | 
100 |     DETECTION_MAX_INSTANCES = 100
101 | 
102 |     # 置信度
103 |     DETECTION_MIN_CONFIDENCE = 0.7
104 | 
105 |     # 非极大抑制
106 |     DETECTION_NMS_THRESHOLD = 0.3
107 | 
108 |     WEIGHT_DECAY = 0.0001
109 | 
110 |     # 损失的比重
111 |     LOSS_WEIGHTS = {
112 |         "rpn_class_loss": 1.,
113 |         "rpn_bbox_loss": 1.,
114 |         "mrcnn_class_loss": 1.,
115 |         "mrcnn_bbox_loss": 1.,
116 |         "mrcnn_mask_loss": 1.
117 |     }
118 | 
119 | 
120 |     USE_RPN_ROIS = True
121 | 
122 |     # 是否冻结BN层
123 |     TRAIN_BN = False 
124 | 
125 |     GRADIENT_CLIP_NORM = 5.0
126 | 
127 |     def __init__(self):
128 |         # 计算BATCH
129 |         self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
130 | 
131 |         if self.IMAGE_RESIZE_MODE == "crop":
132 |             self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
133 |                 self.IMAGE_CHANNEL_COUNT])
134 |         else:
135 |             self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
136 |                 self.IMAGE_CHANNEL_COUNT])
137 | 
138 |         self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
139 | 
140 |     def display(self):
141 |         print("\nConfigurations:")
142 |         for a in dir(self):
143 |             if not a.startswith("__") and not callable(getattr(self, a)):
144 |                 print("{:30} {}".format(a, getattr(self, a)))
145 |         print("\n")


--------------------------------------------------------------------------------
/utils/customerDataset.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import numpy as np
 3 | from PIL import Image
 4 | import yaml
 5 | from .dataset import Dataset
 6 | from .utils import non_max_suppression
 7 | 
 8 | class CustomerDataset(Dataset):
 9 |     #得到该图中有多少个实例（物体）
10 |     def get_obj_index(self, image):
11 |         n = np.max(image)
12 |         return n
13 |     #解析labelme中得到的yaml文件，从而得到mask每一层对应的实例标签
14 |     def from_yaml_get_class(self,image_id):
15 |         info=self.image_info[image_id]
16 |         with open(info['yaml_path']) as f:
17 |             temp=yaml.load(f.read(), Loader=yaml.FullLoader)
18 |             labels=temp['label_names']
19 |             del labels[0]
20 |         return labels
21 | 
22 |     def draw_mask(self, num_obj, mask, image, image_id):
23 |         npz_save_name =os.path.join(save_path, base_name)
24 |         # 已经生成npz文件则跳过
25 |         if os.path.exists(npz_save_name):
26 |             return None
27 |         info = self.image_info[image_id]
28 |         for index in range(num_obj):
29 |             for i in range(np.shape(mask)[1]):
30 |                 for j in range(np.shape(mask)[0]):
31 |                     at_pixel = image.getpixel((i, j))
32 |                     if at_pixel == index + 1:
33 |                         mask[j, i, index] =1 
34 |         save_path = os.path.dirname(info['mask_path'])
35 |         base_name = os.path.basename(info['path'])
36 |         base_name = os.path.splitext(base_name)[0]
37 |         np.savez_compressed(npz_save_name, mask)
38 |         return mask
39 | 
40 |     #并在self.image_info信息中添加了path、mask_path 、yaml_path
41 |     def load_dataset(self, shape_name, count, classes, img_floder, mask_floder, imglist, yaml_floder, train_mode = True):
42 |         for index, item in enumerate(classes):
43 |             self.add_class(shape_name, index+1, item)
44 |         for i in range(count):
45 |             img = imglist[i]
46 |             if img.endswith(".jpg"):
47 |                 img_name = img.split(".")[0]
48 |                 img_path = os.path.join(img_floder, img)
49 |                 if train_mode:
50 |                     mask_path = os.path.join(mask_floder, img_name + ".npz")
51 |                 else:
52 |                     # npz文件加载
53 |                     mask_path = os.path.join(mask_floder, img_name + ".png")
54 |                 yaml_path = os.path.join(yaml_floder, img_name + ".yaml")
55 |                 self.add_image(shape_name, image_id=i, path=img_path, mask_path=mask_path,yaml_path=yaml_path)
56 |     #重写load_mask
57 |     def load_mask(self, image_id, train_mode = True):
58 |         info = self.image_info[image_id]
59 |         if train_mode:
60 |             # 训练模式下加载npz数据
61 |             mask = np.load(info['mask_path'])['arr_0']
62 |         else:
63 |             # 生成npz文件
64 |             img = Image.open(info['mask_path'])
65 |             num_obj = self.get_obj_index(img)
66 |             mask = np.zeros([np.shape(img)[0], np.shape(img)[1], num_obj], dtype=np.uint8)
67 |             mask = self.draw_mask(num_obj, mask, img, image_id)
68 |         labels=[]
69 |         labels=self.from_yaml_get_class(image_id)
70 |         class_ids = np.array([self.class_names.index(s) for s in labels])
71 |         return mask, class_ids.astype(np.int32)


--------------------------------------------------------------------------------
/utils/dataset.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import skimage
  3 | import logging
  4 | import skimage.color
  5 | import skimage.io
  6 | import skimage.transform
  7 | #----------------------------------------------------------#
  8 | #  Dataset
  9 | #----------------------------------------------------------#
 10 | class Dataset(object):
 11 |     # 数据集训练的基本格式
 12 |     def __init__(self, class_map=None):
 13 |         self._image_ids = []
 14 |         self.image_info = []
 15 |         # 背景作为第一分类
 16 |         self.class_info = [{"source": "", "id": 0, "name": "BG"}]
 17 |         self.source_class_ids = {}
 18 | 
 19 |     def add_class(self, source, class_id, class_name):
 20 |         assert "." not in source, "Source name cannot contain a dot"
 21 |         # 用于增加新的类
 22 |         for info in self.class_info:
 23 |             if info['source'] == source and info["id"] == class_id:
 24 |                 return
 25 |         self.class_info.append({
 26 |             "source": source,
 27 |             "id": class_id,
 28 |             "name": class_name,
 29 |         })
 30 | 
 31 |     def add_image(self, source, image_id, path, **kwargs):
 32 |         # 用于增加用于训练的图片
 33 |         image_info = {
 34 |             "id": image_id,
 35 |             "source": source,
 36 |             "path": path,
 37 |         }
 38 |         image_info.update(kwargs)
 39 |         self.image_info.append(image_info)
 40 | 
 41 |     def image_reference(self, image_id):
 42 |         return ""
 43 | 
 44 |     def prepare(self, class_map=None):
 45 |         # 准备数据
 46 |         def clean_name(name):
 47 |             """Returns a shorter version of object names for cleaner display."""
 48 |             return ",".join(name.split(",")[:1])
 49 |         # 类别数
 50 |         self.num_classes = len(self.class_info)
 51 |         self.class_ids = np.arange(self.num_classes)
 52 |         self.class_names = [clean_name(c["name"]) for c in self.class_info]
 53 | 
 54 |         self.num_images = len(self.image_info)
 55 | 
 56 |         self._image_ids = np.arange(self.num_images)
 57 | 
 58 |         self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
 59 |                                       for info, id in zip(self.class_info, self.class_ids)}
 60 |         self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id
 61 |                                       for info, id in zip(self.image_info, self.image_ids)}
 62 | 
 63 |         # 建立sources
 64 |         self.sources = list(set([i['source'] for i in self.class_info]))
 65 |         self.source_class_ids = {}
 66 |         # Loop over datasets
 67 |         for source in self.sources:
 68 |             self.source_class_ids[source] = []
 69 |             # Find classes that belong to this dataset
 70 |             for i, info in enumerate(self.class_info):
 71 |                 # Include BG class in all datasets
 72 |                 if i == 0 or source == info['source']:
 73 |                     self.source_class_ids[source].append(i)
 74 | 
 75 |     def map_source_class_id(self, source_class_id):
 76 |         """Takes a source class ID and returns the int class ID assigned to it.
 77 |         For example:
 78 |         dataset.map_source_class_id("coco.12") -> 23
 79 |         """
 80 |         return self.class_from_source_map[source_class_id]
 81 | 
 82 |     def get_source_class_id(self, class_id, source):
 83 |         """Map an internal class ID to the corresponding class ID in the source dataset."""
 84 |         info = self.class_info[class_id]
 85 |         assert info['source'] == source
 86 |         return info['id']
 87 | 
 88 |     @property
 89 |     def image_ids(self):
 90 |         return self._image_ids
 91 | 
 92 |     def source_image_link(self, image_id):
 93 |         return self.image_info[image_id]["path"]
 94 | 
 95 |     def load_image(self, image_id):
 96 |         """
 97 |             载入图片
 98 |         """
 99 |         # Load image
100 |         image = skimage.io.imread(self.image_info[image_id]['path'])
101 |         if image.ndim != 3:
102 |             image = skimage.color.gray2rgb(image)
103 |         if image.shape[-1] == 4:
104 |             image = image[..., :3]
105 |         return image
106 | 
107 |     def load_mask(self, image_id):
108 |         '''
109 |             载入语义分割内容
110 |         '''
111 |         logging.warning("You are using the default load_mask(), maybe you need to define your own one.")
112 |         mask = np.empty([0, 0, 0])
113 |         class_ids = np.empty([0], np.int32)
114 |         return mask, class_ids


--------------------------------------------------------------------------------
/utils/utils.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import logging
  4 | import math
  5 | import random
  6 | import skimage
  7 | import skimage.transform
  8 | import numpy as np
  9 | import tensorflow as tf
 10 | import scipy
 11 | import urllib.request
 12 | import shutil
 13 | import warnings
 14 | from distutils.version import LooseVersion
 15 | 
 16 | # tf.compat.v1.disable_eager_execution()
 17 | # URL from which to download the latest COCO trained weights
 18 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
 19 | 
 20 | 
 21 | #----------------------------------------------------------#
 22 | #  Bounding Boxes
 23 | #----------------------------------------------------------#
 24 | 
 25 | def extract_bboxes(mask):
 26 |     # 利用语义分割的mask找到包围它的框
 27 |     boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
 28 |     for i in range(mask.shape[-1]):
 29 |         m = mask[:, :, i]
 30 |         horizontal_indicies = np.where(np.any(m, axis=0))[0]
 31 |         vertical_indicies = np.where(np.any(m, axis=1))[0]
 32 |         if horizontal_indicies.shape[0]:
 33 |             x1, x2 = horizontal_indicies[[0, -1]]
 34 |             y1, y2 = vertical_indicies[[0, -1]]
 35 |             x2 += 1
 36 |             y2 += 1
 37 |         else:
 38 |             x1, x2, y1, y2 = 0, 0, 0, 0
 39 |         boxes[i] = np.array([y1, x1, y2, x2])
 40 |     return boxes.astype(np.int32)
 41 | 
 42 | 
 43 | def compute_iou(box, boxes, box_area, boxes_area):
 44 |     """Calculates IoU of the given box with the array of the given boxes.
 45 |     box: 1D vector [y1, x1, y2, x2]
 46 |     boxes: [boxes_count, (y1, x1, y2, x2)]
 47 |     box_area: float. the area of 'box'
 48 |     boxes_area: array of length boxes_count.
 49 |     Note: the areas are passed in rather than calculated here for
 50 |     efficiency. Calculate once in the caller to avoid duplicate work.
 51 |     """
 52 |     # Calculate intersection areas
 53 |     y1 = np.maximum(box[0], boxes[:, 0])
 54 |     y2 = np.minimum(box[2], boxes[:, 2])
 55 |     x1 = np.maximum(box[1], boxes[:, 1])
 56 |     x2 = np.minimum(box[3], boxes[:, 3])
 57 |     intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
 58 |     union = box_area + boxes_area[:] - intersection[:]
 59 |     iou = intersection / union
 60 |     return iou
 61 | 
 62 | 
 63 | def compute_overlaps(boxes1, boxes2):
 64 |     """Computes IoU overlaps between two sets of boxes.
 65 |     boxes1, boxes2: [N, (y1, x1, y2, x2)].
 66 |     For better performance, pass the largest set first and the smaller second.
 67 |     """
 68 |     # Areas of anchors and GT boxes
 69 |     area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
 70 |     area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
 71 | 
 72 |     # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
 73 |     # Each cell contains the IoU value.
 74 |     overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
 75 |     for i in range(overlaps.shape[1]):
 76 |         box2 = boxes2[i]
 77 |         overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
 78 |     return overlaps
 79 | 
 80 | 
 81 | '''
 82 | compute_overlaps_masks() takes two arguments: 
 83 | mask1 and mask2. Assume mask1 has m instances, and mask2 has n instances, 
 84 | the function return an m*n array A. 
 85 | A[i, j] represents the IoU of ith instances of mask1 and jth instances of mask2.
 86 | '''
 87 | def compute_overlaps_masks(masks1, masks2):
 88 |     """Computes IoU overlaps between two sets of masks.
 89 |     masks1, masks2: [Height, Width, instances]
 90 |     """
 91 |     
 92 |     # If either set of masks is empty return empty result
 93 |     if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
 94 |         return np.zeros((masks1.shape[-1], masks2.shape[-1]))
 95 |     # flatten masks and compute their areas
 96 |     masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
 97 |     masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
 98 |     area1 = np.sum(masks1, axis=0)
 99 |     area2 = np.sum(masks2, axis=0)
100 | 
101 |     # intersections and union
102 |     intersections = np.dot(masks1.T, masks2)
103 |     union = area1[:, None] + area2[None, :] - intersections
104 |     overlaps = intersections / union
105 | 
106 |     return overlaps
107 | 
108 | 
109 | def non_max_suppression(boxes, scores, threshold):
110 |     """Performs non-maximum suppression and returns indices of kept boxes.
111 |     boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
112 |     scores: 1-D array of box scores.
113 |     threshold: Float. IoU threshold to use for filtering.
114 |     """
115 |     assert boxes.shape[0] > 0
116 |     if boxes.dtype.kind != "f":
117 |         boxes = boxes.astype(np.float32)
118 | 
119 |     # Compute box areas
120 |     y1 = boxes[:, 0]
121 |     x1 = boxes[:, 1]
122 |     y2 = boxes[:, 2]
123 |     x2 = boxes[:, 3]
124 |     area = (y2 - y1) * (x2 - x1)
125 | 
126 |     # Get indicies of boxes sorted by scores (highest first)
127 |     ixs = scores.argsort()[::-1]
128 | 
129 |     pick = []
130 |     while len(ixs) > 0:
131 |         # Pick top box and add its index to the list
132 |         i = ixs[0]
133 |         pick.append(i)
134 |         # Compute IoU of the picked box with the rest
135 |         iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
136 |         # Identify boxes with IoU over the threshold. This
137 |         # returns indices into ixs[1:], so add 1 to get
138 |         # indices into ixs.
139 |         remove_ixs = np.where(iou > threshold)[0] + 1
140 |         # Remove indices of the picked and overlapped boxes.
141 |         ixs = np.delete(ixs, remove_ixs)
142 |         ixs = np.delete(ixs, 0)
143 |     return np.array(pick, dtype=np.int32)
144 | 
145 | 
146 | def apply_box_deltas(boxes, deltas):
147 |     """Applies the given deltas to the given boxes.
148 |     boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
149 |     deltas: [N, (dy, dx, log(dh), log(dw))]
150 |     """
151 |     boxes = boxes.astype(np.float32)
152 |     # Convert to y, x, h, w
153 |     height = boxes[:, 2] - boxes[:, 0]
154 |     width = boxes[:, 3] - boxes[:, 1]
155 |     center_y = boxes[:, 0] + 0.5 * height
156 |     center_x = boxes[:, 1] + 0.5 * width
157 |     # Apply deltas
158 |     center_y += deltas[:, 0] * height
159 |     center_x += deltas[:, 1] * width
160 |     height *= np.exp(deltas[:, 2])
161 |     width *= np.exp(deltas[:, 3])
162 |     # Convert back to y1, x1, y2, x2
163 |     y1 = center_y - 0.5 * height
164 |     x1 = center_x - 0.5 * width
165 |     y2 = y1 + height
166 |     x2 = x1 + width
167 |     return np.stack([y1, x1, y2, x2], axis=1)
168 | 
169 | 
170 | def box_refinement_graph(box, gt_box):
171 |     """
172 |         编码运算
173 |     """
174 |     box = tf.cast(box, tf.float32)
175 |     gt_box = tf.cast(gt_box, tf.float32)
176 | 
177 |     height = box[:, 2] - box[:, 0]
178 |     width = box[:, 3] - box[:, 1]
179 |     center_y = box[:, 0] + 0.5 * height
180 |     center_x = box[:, 1] + 0.5 * width
181 | 
182 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
183 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
184 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
185 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
186 | 
187 |     dy = (gt_center_y - center_y) / height
188 |     dx = (gt_center_x - center_x) / width
189 |     dh = tf.math.log(gt_height / height)
190 |     dw = tf.math.log(gt_width / width)
191 | 
192 |     result = tf.stack([dy, dx, dh, dw], axis=1)
193 |     return result
194 | 
195 | 
196 | def box_refinement(box, gt_box):
197 |     """
198 |         编码运算
199 |     """
200 |     box = box.astype(np.float32)
201 |     gt_box = gt_box.astype(np.float32)
202 | 
203 |     height = box[:, 2] - box[:, 0]
204 |     width = box[:, 3] - box[:, 1]
205 |     center_y = box[:, 0] + 0.5 * height
206 |     center_x = box[:, 1] + 0.5 * width
207 | 
208 |     gt_height = gt_box[:, 2] - gt_box[:, 0]
209 |     gt_width = gt_box[:, 3] - gt_box[:, 1]
210 |     gt_center_y = gt_box[:, 0] + 0.5 * gt_height
211 |     gt_center_x = gt_box[:, 1] + 0.5 * gt_width
212 | 
213 |     dy = (gt_center_y - center_y) / height
214 |     dx = (gt_center_x - center_x) / width
215 |     dh = np.log(gt_height / height)
216 |     dw = np.log(gt_width / width)
217 | 
218 |     return np.stack([dy, dx, dh, dw], axis=1)
219 | 
220 | 
221 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
222 |     # 保持原有的image
223 |     image_dtype = image.dtype
224 |     # 初始化参数
225 |     h, w = image.shape[:2]
226 |     window = (0, 0, h, w)
227 |     scale = 1
228 |     padding = [(0, 0), (0, 0), (0, 0)]
229 |     crop = None
230 | 
231 |     if mode == "none":
232 |         return image, window, scale, padding, crop
233 | 
234 |     # 计算变化的尺度
235 |     if min_dim:
236 |         scale = max(1, min_dim / min(h, w))
237 |     if min_scale and scale < min_scale:
238 |         scale = min_scale
239 | 
240 |     # 判断按照原来的尺寸缩放是否会超过最大边长
241 |     if max_dim and mode == "square":
242 |         image_max = max(h, w)
243 |         if round(image_max * scale) > max_dim:
244 |             scale = max_dim / image_max
245 | 
246 |     # 对图片进行resize
247 |     if scale != 1:
248 |         image = resize(image, (round(h * scale), round(w * scale)),
249 |                        preserve_range=True)
250 | 
251 |     # 是否需要padding填充
252 |     if mode == "square":
253 |         # 计算四周padding的情况
254 |         h, w = image.shape[:2]
255 | 
256 |         top_pad = (max_dim - h) // 2
257 |         bottom_pad = max_dim - h - top_pad
258 |         left_pad = (max_dim - w) // 2
259 |         right_pad = max_dim - w - left_pad
260 | 
261 |         # 向四周进行填充
262 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0,0)]
263 |         image = np.pad(image, padding, mode='constant', constant_values=0)
264 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
265 |     elif mode == "pad64":
266 |         h, w = image.shape[:2]
267 |         # Both sides must be divisible by 64
268 |         assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
269 |         # Height
270 |         if h % 64 > 0:
271 |             max_h = h - (h % 64) + 64
272 |             top_pad = (max_h - h) // 2
273 |             bottom_pad = max_h - h - top_pad
274 |         else:
275 |             top_pad = bottom_pad = 0
276 |         # Width
277 |         if w % 64 > 0:
278 |             max_w = w - (w % 64) + 64
279 |             left_pad = (max_w - w) // 2
280 |             right_pad = max_w - w - left_pad
281 |         else:
282 |             left_pad = right_pad = 0
283 |         padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
284 |         image = np.pad(image, padding, mode='constant', constant_values=0)
285 |         window = (top_pad, left_pad, h + top_pad, w + left_pad)
286 |     elif mode == "crop":
287 |         # Pick a random crop
288 |         h, w = image.shape[:2]
289 |         y = random.randint(0, (h - min_dim))
290 |         x = random.randint(0, (w - min_dim))
291 |         crop = (y, x, min_dim, min_dim)
292 |         image = image[y:y + min_dim, x:x + min_dim]
293 |         window = (0, 0, min_dim, min_dim)
294 |     else:
295 |         raise Exception("Mode {} not supported".format(mode))
296 |     return image.astype(image_dtype), window, scale, padding, crop
297 | 
298 | 
299 | def resize_mask(mask, scale, padding, crop=None):
300 |     # 将mask按照scale放大缩小后
301 |     with warnings.catch_warnings():
302 |         warnings.simplefilter("ignore")
303 |         mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
304 |     if crop is not None:
305 |         y, x, h, w = crop
306 |         mask = mask[y:y + h, x:x + w]
307 |     else:
308 |         mask = np.pad(mask, padding, mode='constant', constant_values=0)
309 |     return mask
310 | 
311 | 
312 | def minimize_mask(bbox, mask, mini_shape):
313 |     """
314 |     减少语义分割载入时的size
315 |     """
316 |     mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
317 |     for i in range(mask.shape[-1]):
318 |         # Pick slice and cast to bool in case load_mask() returned wrong dtype
319 |         m = mask[:, :, i].astype(bool)
320 |         y1, x1, y2, x2 = bbox[i][:4]
321 |         m = m[y1:y2, x1:x2]
322 |         if m.size == 0:
323 |             raise Exception("Invalid bounding box with area of zero")
324 |         # Resize with bilinear interpolation
325 |         m = resize(m, mini_shape)
326 |         mini_mask[:, :, i] = np.around(m).astype(np.bool)
327 |     return mini_mask
328 | 
329 | 
330 | def expand_mask(bbox, mini_mask, image_shape):
331 |     """Resizes mini masks back to image size. Reverses the change
332 |     of minimize_mask().
333 |     See inspect_data.ipynb notebook for more details.
334 |     """
335 |     mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
336 |     for i in range(mask.shape[-1]):
337 |         m = mini_mask[:, :, i]
338 |         y1, x1, y2, x2 = bbox[i][:4]
339 |         h = y2 - y1
340 |         w = x2 - x1
341 |         # Resize with bilinear interpolation
342 |         m = resize(m, (h, w))
343 |         mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
344 |     return mask
345 | 
346 | 
347 | # TODO: Build and use this function to reduce code duplication
348 | def mold_mask(mask, config):
349 |     pass
350 | 
351 | 
352 | def unmold_mask(mask, bbox, image_shape):
353 |     """Converts a mask generated by the neural network to a format similar
354 |     to its original shape.
355 |     mask: [height, width] of type float. A small, typically 28x28 mask.
356 |     bbox: [y1, x1, y2, x2]. The box to fit the mask in.
357 |     Returns a binary mask with the same size as the original image.
358 |     """
359 |     threshold = 0.5
360 |     y1, x1, y2, x2 = bbox
361 |     mask = resize(mask, (y2 - y1, x2 - x1))
362 |     mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
363 | 
364 |     # Put the mask in the right location.
365 |     full_mask = np.zeros(image_shape[:2], dtype=np.bool)
366 |     full_mask[y1:y2, x1:x2] = mask
367 |     return full_mask
368 | 
369 | #----------------------------------------------------------#
370 | #  Miscellaneous
371 | #----------------------------------------------------------#
372 | 
373 | def trim_zeros(x):
374 |     """It's common to have tensors larger than the available data and
375 |     pad with zeros. This function removes rows that are all zeros.
376 |     x: [rows, columns].
377 |     """
378 |     assert len(x.shape) == 2
379 |     return x[~np.all(x == 0, axis=1)]
380 | 
381 | 
382 | def compute_matches(gt_boxes, gt_class_ids, gt_masks,
383 |                     pred_boxes, pred_class_ids, pred_scores, pred_masks,
384 |                     iou_threshold=0.5, score_threshold=0.0):
385 |     """Finds matches between prediction and ground truth instances.
386 |     Returns:
387 |         gt_match: 1-D array. For each GT box it has the index of the matched
388 |                   predicted box.
389 |         pred_match: 1-D array. For each predicted box, it has the index of
390 |                     the matched ground truth box.
391 |         overlaps: [pred_boxes, gt_boxes] IoU overlaps.
392 |     """
393 |     # Trim zero padding
394 |     # TODO: cleaner to do zero unpadding upstream
395 |     gt_boxes = trim_zeros(gt_boxes)
396 |     gt_masks = gt_masks[..., :gt_boxes.shape[0]]
397 |     pred_boxes = trim_zeros(pred_boxes)
398 |     pred_scores = pred_scores[:pred_boxes.shape[0]]
399 |     # Sort predictions by score from high to low
400 |     indices = np.argsort(pred_scores)[::-1]
401 |     pred_boxes = pred_boxes[indices]
402 |     pred_class_ids = pred_class_ids[indices]
403 |     pred_scores = pred_scores[indices]
404 |     pred_masks = pred_masks[..., indices]
405 | 
406 |     # Compute IoU overlaps [pred_masks, gt_masks]
407 |     overlaps = compute_overlaps_masks(pred_masks, gt_masks)
408 | 
409 |     # Loop through predictions and find matching ground truth boxes
410 |     match_count = 0
411 |     pred_match = -1 * np.ones([pred_boxes.shape[0]])
412 |     gt_match = -1 * np.ones([gt_boxes.shape[0]])
413 |     for i in range(len(pred_boxes)):
414 |         # Find best matching ground truth box
415 |         # 1. Sort matches by score
416 |         sorted_ixs = np.argsort(overlaps[i])[::-1]
417 |         # 2. Remove low scores
418 |         low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
419 |         if low_score_idx.size > 0:
420 |             sorted_ixs = sorted_ixs[:low_score_idx[0]]
421 |         # 3. Find the match
422 |         for j in sorted_ixs:
423 |             # If ground truth box is already matched, go to next one
424 |             if gt_match[j] > -1:
425 |                 continue
426 |             # If we reach IoU smaller than the threshold, end the loop
427 |             iou = overlaps[i, j]
428 |             if iou < iou_threshold:
429 |                 break
430 |             # Do we have a match?
431 |             if pred_class_ids[i] == gt_class_ids[j]:
432 |                 match_count += 1
433 |                 gt_match[j] = i
434 |                 pred_match[i] = j
435 |                 break
436 | 
437 |     return gt_match, pred_match, overlaps
438 | 
439 | 
440 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
441 |                pred_boxes, pred_class_ids, pred_scores, pred_masks,
442 |                iou_threshold=0.5):
443 |     """Compute Average Precision at a set IoU threshold (default 0.5).
444 |     Returns:
445 |     mAP: Mean Average Precision
446 |     precisions: List of precisions at different class score thresholds.
447 |     recalls: List of recall values at different class score thresholds.
448 |     overlaps: [pred_boxes, gt_boxes] IoU overlaps.
449 |     """
450 |     # Get matches and overlaps
451 |     gt_match, pred_match, overlaps = compute_matches(
452 |         gt_boxes, gt_class_ids, gt_masks,
453 |         pred_boxes, pred_class_ids, pred_scores, pred_masks,
454 |         iou_threshold)
455 | 
456 |     # Compute precision and recall at each prediction box step
457 |     precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
458 |     recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
459 | 
460 |     # Pad with start and end values to simplify the math
461 |     precisions = np.concatenate([[0], precisions, [0]])
462 |     recalls = np.concatenate([[0], recalls, [1]])
463 | 
464 |     # Ensure precision values decrease but don't increase. This way, the
465 |     # precision value at each recall threshold is the maximum it can be
466 |     # for all following recall thresholds, as specified by the VOC paper.
467 |     for i in range(len(precisions) - 2, -1, -1):
468 |         precisions[i] = np.maximum(precisions[i], precisions[i + 1])
469 | 
470 |     # Compute mean AP over recall range
471 |     indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
472 |     mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
473 |                  precisions[indices])
474 | 
475 |     return mAP, precisions, recalls, overlaps
476 | 
477 | 
478 | def compute_ap_range(gt_box, gt_class_id, gt_mask,
479 |                      pred_box, pred_class_id, pred_score, pred_mask,
480 |                      iou_thresholds=None, verbose=1):
481 |     """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
482 |     # Default is 0.5 to 0.95 with increments of 0.05
483 |     iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
484 |     
485 |     # Compute AP over range of IoU thresholds
486 |     AP = []
487 |     for iou_threshold in iou_thresholds:
488 |         ap, precisions, recalls, overlaps =\
489 |             compute_ap(gt_box, gt_class_id, gt_mask,
490 |                         pred_box, pred_class_id, pred_score, pred_mask,
491 |                         iou_threshold=iou_threshold)
492 |         if verbose:
493 |             print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
494 |         AP.append(ap)
495 |     AP = np.array(AP).mean()
496 |     if verbose:
497 |         print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
498 |             iou_thresholds[0], iou_thresholds[-1], AP))
499 |     return AP
500 | 
501 | 
502 | def compute_recall(pred_boxes, gt_boxes, iou):
503 |     """Compute the recall at the given IoU threshold. It's an indication
504 |     of how many GT boxes were found by the given prediction boxes.
505 |     pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
506 |     gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
507 |     """
508 |     # Measure overlaps
509 |     overlaps = compute_overlaps(pred_boxes, gt_boxes)
510 |     iou_max = np.max(overlaps, axis=1)
511 |     iou_argmax = np.argmax(overlaps, axis=1)
512 |     positive_ids = np.where(iou_max >= iou)[0]
513 |     matched_gt_boxes = iou_argmax[positive_ids]
514 | 
515 |     recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
516 |     return recall, positive_ids
517 | 
518 | 
519 | # ## Batch Slicing
520 | # Some custom layers support a batch size of 1 only, and require a lot of work
521 | # to support batches greater than 1. This function slices an input tensor
522 | # across the batch dimension and feeds batches of size 1. Effectively,
523 | # an easy way to support batches > 1 quickly with little code modification.
524 | # In the long run, it's more efficient to modify the code to support large
525 | # batches and getting rid of this function. Consider this a temporary solution
526 | def batch_slice(inputs, graph_fn, batch_size, names=None):
527 |     """Splits inputs into slices and feeds each slice to a copy of the given
528 |     computation graph and then combines the results. It allows you to run a
529 |     graph on a batch of inputs even if the graph is written to support one
530 |     instance only.
531 |     inputs: list of tensors. All must have the same first dimension length
532 |     graph_fn: A function that returns a TF tensor that's part of a graph.
533 |     batch_size: number of slices to divide the data into.
534 |     names: If provided, assigns names to the resulting tensors.
535 |     """
536 |     if not isinstance(inputs, list):
537 |         inputs = [inputs]
538 | 
539 |     outputs = []
540 |     for i in range(batch_size):
541 |         inputs_slice = [x[i] for x in inputs]
542 |         output_slice = graph_fn(*inputs_slice)
543 |         if not isinstance(output_slice, (tuple, list)):
544 |             output_slice = [output_slice]
545 |         outputs.append(output_slice)
546 |     # Change outputs from a list of slices where each is
547 |     # a list of outputs to a list of outputs and each has
548 |     # a list of slices
549 |     outputs = list(zip(*outputs))
550 | 
551 |     if names is None:
552 |         names = [None] * len(outputs)
553 | 
554 |     result = [tf.stack(o, axis=0, name=n)
555 |               for o, n in zip(outputs, names)]
556 |     if len(result) == 1:
557 |         result = result[0]
558 | 
559 |     return result
560 | 
561 | 
562 | def download_trained_weights(coco_model_path, verbose=1):
563 |     """Download COCO trained weights from Releases.
564 |     coco_model_path: local path of COCO trained weights
565 |     """
566 |     if verbose > 0:
567 |         print("Downloading pretrained model to " + coco_model_path + " ...")
568 |     with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
569 |         shutil.copyfileobj(resp, out)
570 |     if verbose > 0:
571 |         print("... done downloading pretrained model!")
572 | 
573 | 
574 | def norm_boxes(boxes, shape):
575 |     """Converts boxes from pixel coordinates to normalized coordinates.
576 |     boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
577 |     shape: [..., (height, width)] in pixels
578 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
579 |     coordinates it's inside the box.
580 |     Returns:
581 |         [N, (y1, x1, y2, x2)] in normalized coordinates
582 |     """
583 |     h, w = shape
584 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
585 |     shift = np.array([0, 0, 1, 1])
586 |     return np.divide((boxes - shift), scale).astype(np.float32)
587 | 
588 | 
589 | def denorm_boxes(boxes, shape):
590 |     """Converts boxes from normalized coordinates to pixel coordinates.
591 |     boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
592 |     shape: [..., (height, width)] in pixels
593 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
594 |     coordinates it's inside the box.
595 |     Returns:
596 |         [N, (y1, x1, y2, x2)] in pixel coordinates
597 |     """
598 |     h, w = shape
599 |     scale = np.array([h - 1, w - 1, h - 1, w - 1])
600 |     shift = np.array([0, 0, 1, 1])
601 |     return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
602 | 
603 | 
604 | def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
605 |            preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
606 |     """A wrapper for Scikit-Image resize().
607 |     Scikit-Image generates warnings on every call to resize() if it doesn't
608 |     receive the right parameters. The right parameters depend on the version
609 |     of skimage. This solves the problem by using different parameters per
610 |     version. And it provides a central place to control resizing defaults.
611 |     """
612 |     if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
613 |         # New in 0.14: anti_aliasing. Default it to False for backward
614 |         # compatibility with skimage 0.13.
615 |         return skimage.transform.resize(
616 |             image, output_shape,
617 |             order=order, mode=mode, cval=cval, clip=clip,
618 |             preserve_range=preserve_range, anti_aliasing=anti_aliasing,
619 |             anti_aliasing_sigma=anti_aliasing_sigma)
620 |     else:
621 |         return skimage.transform.resize(
622 |             image, output_shape,
623 |             order=order, mode=mode, cval=cval, clip=clip,
624 |             preserve_range=preserve_range)
625 | 
626 | 
627 | def mold_image(images, config):
628 |     """Expects an RGB image (or array of images) and subtracts
629 |     the mean pixel and converts it to float. Expects image
630 |     colors in RGB order.
631 |     """
632 |     return images.astype(np.float32) - config.MEAN_PIXEL
633 | 
634 | def compose_image_meta(image_id, original_image_shape, image_shape,
635 |                        window, scale, active_class_ids):
636 |     """Takes attributes of an image and puts them in one 1D array.
637 |     image_id: An int ID of the image. Useful for debugging.
638 |     original_image_shape: [H, W, C] before resizing or padding.
639 |     image_shape: [H, W, C] after resizing and padding
640 |     window: (y1, x1, y2, x2) in pixels. The area of the image where the real
641 |             image is (excluding the padding)
642 |     scale: The scaling factor applied to the original image (float32)
643 |     active_class_ids: List of class_ids available in the dataset from which
644 |         the image came. Useful if training on images from multiple datasets
645 |         where not all classes are present in all datasets.
646 |     """
647 |     meta = np.array(
648 |         [image_id] +                  # size=1
649 |         list(original_image_shape) +  # size=3
650 |         list(image_shape) +           # size=3
651 |         list(window) +                # size=4 (y1, x1, y2, x2) in image cooredinates
652 |         [scale] +                     # size=1
653 |         list(active_class_ids)        # size=num_classes
654 |     )
655 |     return meta
656 | 
657 | def mold_inputs(config, images):
658 |     molded_images = []
659 |     image_metas = []
660 |     windows = []
661 |     for image in images:
662 |         # Resize image
663 |         # TODO: move resizing to mold_image()
664 |         molded_image, window, scale, padding, crop = resize_image(
665 |             image,
666 |             min_dim=config.IMAGE_MIN_DIM,
667 |             min_scale=config.IMAGE_MIN_SCALE,
668 |             max_dim=config.IMAGE_MAX_DIM,
669 |             mode=config.IMAGE_RESIZE_MODE)
670 |         # print(np.shape(molded_image))
671 |         molded_image = mold_image(molded_image, config)
672 |         # Build image_meta
673 |         image_meta = compose_image_meta(
674 |             0, image.shape, molded_image.shape, window, scale,
675 |             np.zeros([config.NUM_CLASSES], dtype=np.int32))
676 |         # Append
677 |         molded_images.append(molded_image)
678 |         windows.append(window)
679 |         image_metas.append(image_meta)
680 |     # Pack into arrays
681 |     molded_images = np.stack(molded_images)
682 |     image_metas = np.stack(image_metas)
683 |     windows = np.stack(windows)
684 |     return molded_images, image_metas, windows
685 | 
686 | 
687 | def unmold_detections(detections, mrcnn_mask, original_image_shape,
688 |                         image_shape, window):
689 |     '''
690 |     Reformats the detection of one image from the format of the neural network output to a format suitable for use in thre rest of the application
691 |     params:
692 |         detections: [N, (y1, x1, y2, x2, class_id, score)] in normalize coordinates
693 |         mrcnn_mask: [N, height, wdidht, num_classes]
694 |         original_image_shape: [H, W, C] Original image shape before resizing
695 |         image_shape: [H, W, C] Shape of the image after resizing and padding
696 |         window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real image is excluding the padding
697 | 
698 |     Returns:
699 |         boxes:[N, (y1, x1, y2, x2)] Bounding boxes in pixels
700 |         class_ids: [N] Integer class IDs for each bounding box
701 |         scores: [N] Float probability scores of the class_id
702 |         masks: [height, width, num_instances] Instance masks
703 |     '''
704 |     # how many detection do we have? Detections array is padded with zeros. Find the first class_id==0
705 |     zero_ix = np.where(detections[:, 4] == 0)[0]
706 |     # N就是在待检测的图像中检测到多少个对象
707 |     N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0]
708 | 
709 |     boxes = detections[:N, :4]
710 |     class_ids = detections[:N, 4].astype(np.int32)
711 |     scores = detections[:N, 5]
712 |     masks = mrcnn_mask[np.arange(N), :, :, class_ids]
713 |     # translate normalized coordinates in the resized image to pixel, coordinates in the original image before resizing
714 |     window = norm_boxes(window, image_shape[:2])
715 | 
716 |     wy1, wx1, wy2, wx2 = window
717 |     shift = np.array([wy1, wx1, wy1, wx1])
718 |     wh = wy2 - wy1  # window height
719 |     ww = wx2 - wx1  # window width
720 | 
721 |     scale = np.array([wh, ww, wh, ww])
722 |     boxes = np.divide(boxes - shift, scale)
723 |     boxes = denorm_boxes(boxes, original_image_shape[:2])
724 | 
725 |     exclude_ix = np.where(
726 |         (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0]
727 |     if exclude_ix.shape[0] > 0:
728 |         boxes = np.delete(boxes, exclude_ix, axis=0)
729 |         class_ids = np.delete(class_ids, exclude_ix, axis=0)
730 |         scores = np.delete(scores, exclude_ix, axis=0)
731 |         masks = np.delete(masks, exclude_ix, axis=0)
732 |         N = class_ids.shape[0]
733 |     # Resize masks to original image size and set boundary threshold.
734 |     full_masks = []
735 |     for i in range(N):
736 | 
737 |         full_mask = unmold_mask(masks[i], boxes[i], original_image_shape)
738 |         full_masks.append(full_mask)
739 | 
740 |     full_masks = np.stack(full_masks, axis=-1)\
741 |         if full_masks else np.empty(original_image_shape[:2] + (0,))
742 | 
743 |     return boxes, class_ids, scores, full_masks
744 | 
745 | 
746 | 
747 | def norm_boxes_graph(boxes, shape):
748 |     """Converts boxes from pixel coordinates to normalized coordinates.
749 |     boxes: [..., (y1, x1, y2, x2)] in pixel coordinates
750 |     shape: [..., (height, width)] in pixels
751 | 
752 |     Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
753 |     coordinates it's inside the box.
754 | 
755 |     Returns:
756 |         [..., (y1, x1, y2, x2)] in normalized coordinates
757 |     """
758 |     h, w = tf.split(tf.cast(shape, tf.float32), 2)
759 |     scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0)
760 |     shift = tf.constant([0., 0., 1., 1.])
761 |     return tf.divide(boxes - shift, scale)
762 | 
763 | 
764 | def parse_image_meta_graph(meta):
765 |     """
766 |         对输入的meta进行拆解
767 |         将包含图像属性的张量解析为其组件。
768 |         返回解析的张量的dict。
769 |     """
770 |     image_id = meta[:, 0] # 图片的id
771 |     original_image_shape = meta[:, 1:4] # 原始的图片的大小
772 |     image_shape = meta[:, 4:7]  # resize后图片的大小
773 |     window = meta[:, 7:11]  # (y1, x1, y2, x2)有效的区域在图片中的位置
774 |     scale = meta[:, 11]     # 长宽的变化状况
775 |     active_class_ids = meta[:, 12:] 
776 |     return {
777 |         "image_id": image_id,
778 |         "original_image_shape": original_image_shape,
779 |         "image_shape": image_shape,
780 |         "window": window,
781 |         "scale": scale,
782 |         "active_class_ids": active_class_ids,
783 |     }


--------------------------------------------------------------------------------
/utils/visualize.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | import random
 4 | import itertools
 5 | import colorsys
 6 | import numpy as np
 7 | 
 8 | from skimage.measure import find_contours
 9 | from PIL import Image
10 | import cv2
11 | ROOT_DIR = os.path.abspath("../")
12 | 
13 | sys.path.append(ROOT_DIR)
14 | 
15 | #---------------------------------------------------------#
16 | #  Visualization
17 | #---------------------------------------------------------#
18 | def random_colors(N, bright=True):
19 |     """
20 |     生成随机颜色
21 |     """
22 |     brightness = 1.0 if bright else 0.7
23 |     hsv = [(i / N, 1, brightness) for i in range(N)]
24 |     colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv))
25 |     return colors
26 | 
27 | 
28 | def apply_mask(image, mask, color, alpha=0.5):
29 |     """
30 |     打上mask图标
31 |     """
32 |     for c in range(3):
33 |         image[:, :, c] = np.where(mask == 1,
34 |                                   image[:, :, c] *
35 |                                   (1 - alpha) + alpha * color[c] * 255,
36 |                                   image[:, :, c])
37 |     return image
38 | 
39 | 
40 | def display_instances(image, boxes, masks, class_ids, class_names,scores=None,show_mask=True, show_bbox=True,colors=None, captions=True):
41 |     # instance的数量
42 |     N = boxes.shape[0]
43 |     if not N:
44 |         print("\n*** No instances to display *** \n")
45 |     else:
46 |         assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0]
47 |     colors = colors or random_colors(N)
48 | 
49 |     # 当masked_image为原图时是在原图上绘制
50 |     # 如果不想在原图上绘制，可以把masked_image设置成等大小的全0矩阵
51 |     # masked_image = np.array(image,np.uint8)
52 |     masked_image = np.zeros_like(image,np.uint8)
53 |     for i in range(N):
54 |         color = colors[i]
55 | 
56 |         # display bounding box
57 |         if not np.any(boxes[i]):
58 |             continue
59 |         y1, x1, y2, x2 = boxes[i]
60 |         if show_bbox:
61 |             cv2.rectangle(masked_image, (x1, y1), (x2, y2), (color[0] * 255,color[1] * 255,color[2] * 255), 2)
62 | 
63 |         # display labels and captions
64 |         if captions:
65 |             class_id = class_ids[i]
66 |             score = scores[i] if scores is not None else None
67 |             label = class_names[class_id]
68 |             caption = "{} {:.3f}".format(label, score) if score else label
69 |             font = cv2.FONT_HERSHEY_SIMPLEX
70 |             cv2.putText(masked_image, caption, (x1, y1 + 8), font, 1, (255, 255, 255), 2)
71 |         
72 |         # display masks
73 |         mask = masks[:, :, i]
74 |         if show_mask:
75 |             masked_image = apply_mask(masked_image, mask, color)
76 | 
77 |         # 画出语义分割的范围
78 |         padded_mask = np.zeros(
79 |             (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
80 |         padded_mask[1:-1, 1:-1] = mask
81 |         contours = find_contours(padded_mask, 0.5)
82 |         for verts in contours:
83 |             verts = np.fliplr(verts) - 1
84 |             cv2.polylines(masked_image, [np.array([verts],np.int)], 1, (color[0] * 255,color[1] * 255,color[2] * 255), 2)
85 | 
86 |     img = Image.fromarray(np.uint8(masked_image))
87 |     return img


--------------------------------------------------------------------------------