├── .gitignore ├── DouglasPeuker.py ├── README.md ├── auto_annotation.py ├── config.py ├── contourprocess ├── __init__.py ├── cal_dist_ang.py ├── line_intersection.py ├── rdp_alg.py ├── regularization.py └── rotate_ang.py ├── convert.py ├── data ├── building.names ├── coco_classes.txt └── shapes.names ├── evaluate.py ├── inference.py ├── inference_onnxruntime.py ├── largest_interior_rectangle ├── __init__.py ├── lir.py └── lir_within_outline.py ├── mask.png ├── mask2npz.py ├── mrcnn ├── __init__.py ├── layers.py ├── mask_rcnn.py ├── mrcnn.py ├── mrcnn_training.py └── restnet.py ├── parallel_model.py ├── regularization.py ├── requirements.txt ├── train.py └── utils ├── anchors.py ├── config.py ├── customerDataset.py ├── dataset.py ├── utils.py └── visualize.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | *.h5 132 | *.onnx 133 | 134 | .history 135 | 136 | logs 137 | 138 | train_data 139 | 140 | result 141 | samples -------------------------------------------------------------------------------- /DouglasPeuker.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | ''' 4 | 道格拉斯-普克抽稀算法 5 | 垂距限值抽稀算法 6 | ''' 7 | # 设定阈值 8 | THRESHOLD = 0.0001 9 | 10 | # 计算点到直线的距离 11 | def point2Line(point_a, point_b, point_c): 12 | ''' 13 | 计算点a到b, c之间的距离 14 | point_a: (x_a, y_a) 15 | point_b: (x_b, y_b) 16 | point_c: (x_c, y_c) 17 | ''' 18 | # 计算bc的直线 19 | if point_b[0] == point_c[0]: 20 | return 9999 21 | # y = kx+b 22 | k = (point_b[1]-point_c[1])/(point_b[0]-point_c[0]) 23 | b = point_c[1]-k*point_c[0] 24 | 25 | # 计算点到直线的距离 26 | distance = abs(k*point_a[0]-point_a[1]+b)/math.sqrt(1+k**2) 27 | return distance 28 | 29 | 30 | class DouglasPeuker(object): 31 | 32 | def __init__(self): 33 | self._threshold=THRESHOLD 34 | self._qualify_list = [] 35 | self._disqualify_list = [] 36 | 37 | def diluting(self, point_list): 38 | ''' 39 | 抽稀算法 40 | : param point_list: 二维点列表 41 | : return 42 | ''' 43 | if len(point_list)<3: 44 | self._qualify_list.extend(point_list[::-1]) 45 | else: 46 | # 找到首尾相连的两点 47 | max_distance_index, max_distance = 0, 0 48 | for index, point in enumerate(point_list): 49 | if index in [0, len(point_list) - 1]: 50 | continue 51 | distance = point2Line(point, point_list[0], point_list[-1]) 52 | if distance > max_distance: 53 | max_distance_index = index 54 | max_distance = distance 55 | 56 | # 若最大距离小于阈值,则去掉所有中间点。 反之,则将曲线按最大距离点分割 57 | if max_distance < self._threshold: 58 | self._qualify_list.append(point_list[-1]) 59 | self._qualify_list.append(point_list[0]) 60 | else: 61 | # 将曲线按最大距离的点分割成两段 62 | sequence_a = point_list[:max_distance_index] 63 | sequence_b = point_list[max_distance_index:] 64 | 65 | for sequence in [sequence_a, sequence_b]: 66 | if len(sequence) < 3 and sequence == sequence_b: 67 | self._qualify_list.extend(sequence[::-1]) 68 | else: 69 | self._disqualify_list.append(sequence) 70 | def main(self, point_list): 71 | self.diluting(point_list) 72 | while len(self._disqualify_list) > 0: 73 | self.diluting(self._disqualify_list.pop()) 74 | print(self._qualify_list) 75 | print(len(self._qualify_list)) 76 | 77 | class LimitVerticalDistance(object): 78 | def __init__(self): 79 | self._threshold = THRESHOLD 80 | self._qualify_list = [] 81 | 82 | def diluting(self, point_list): 83 | self._qualify_list.append(point_list[0]) 84 | check_index = 1 85 | while check_index y2: 45 | ang = math.atan((y1 - y2) / (x2 - x1)) 46 | ang = ang * 180 / math.pi 47 | return 90 + (90 - ang) 48 | elif y1==y2: 49 | return 0 50 | elif x1 > x2: 51 | if y1 < y2: 52 | ang = math.atan((y2-y1)/(x1-x2)) 53 | ang = ang*180/math.pi 54 | return 90+(90-ang) 55 | elif y1 > y2: 56 | ang = math.atan((y1-y2)/(x1-x2)) 57 | ang = ang * 180 / math.pi 58 | return ang 59 | elif y1==y2: 60 | return 0 61 | 62 | elif x1==x2: 63 | return 90 64 | 65 | if __name__ == '__main__': 66 | pass -------------------------------------------------------------------------------- /contourprocess/line_intersection.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | # 线生成函数 5 | def line(p1, p2): 6 | A = (p1[1] - p2[1]) 7 | B = (p2[0] - p1[0]) 8 | C = (p1[0]*p2[1] - p2[0]*p1[1]) 9 | return A, B, -C 10 | 11 | 12 | # 计算两条直线之间的交点 13 | def intersection(L1, L2): 14 | D = L1[0] * L2[1] - L1[1] * L2[0] 15 | Dx = L1[2] * L2[1] - L1[1] * L2[2] 16 | Dy = L1[0] * L2[2] - L1[2] * L2[0] 17 | if D != 0: 18 | x = Dx / D 19 | y = Dy / D 20 | return x, y 21 | else: 22 | return False 23 | 24 | 25 | # 计算两个平行线之间的距离 26 | def par_line_dist(L1, L2): 27 | A1, B1, C1 = L1 28 | A2, B2, C2 = L2 29 | 30 | new_A1 = 1 31 | new_B1 = B1 / A1 32 | new_C1 = C1 / A1 33 | 34 | new_A2 = 1 35 | new_B2 = B2 / A2 36 | new_C2 = C2 / A2 37 | 38 | dist = (np.abs(new_C1-new_C2))/(np.sqrt(new_A2*new_A2+new_B2*new_B2)) 39 | return dist 40 | 41 | 42 | # 计算点在直线的投影位置 43 | def point_in_line(m, n, x1, y1, x2, y2): 44 | x = (m * (x2 - x1) * (x2 - x1) + n * (y2 - y1) * (x2 - x1) + (x1 * y2 - x2 * y1) * (y2 - y1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) 45 | y = (m * (x2 - x1) * (y2 - y1) + n * (y2 - y1) * (y2 - y1) + (x2 * y1 - x1 * y2) * (x2 - x1)) / ((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) 46 | return (x, y) -------------------------------------------------------------------------------- /contourprocess/rdp_alg.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def pldist(x0, x1, x2): 5 | """ 6 | Calculates the distance from the point ``x0`` to the line given 7 | by the points ``x1`` and ``x2``. 8 | :param x0: a point 9 | :type x0: a 2x1 numpy array 10 | :param x1: a point of the line 11 | :type x1: 2x1 numpy array 12 | :param x2: another point of the line 13 | :type x2: 2x1 numpy array 14 | """ 15 | x0, x1, x2 = x0[:2], x1[:2], x2[:2] # discard timestamp 16 | if x1[0] == x2[0]: 17 | return np.abs(x0[0] - x1[0]) 18 | 19 | return np.divide(np.linalg.norm(np.linalg.det([x2 - x1, x1 - x0])), 20 | np.linalg.norm(x2 - x1)) 21 | 22 | 23 | def _rdp(M, epsilon, dist): 24 | """ 25 | Simplifies a given array of points. 26 | :param M: an array 27 | :type M: Nx2 numpy array 28 | :param epsilon: epsilon in the rdp algorithm 29 | :type epsilon: float 30 | :param dist: distance function 31 | :type dist: function with signature ``f(x1, x2, x3)`` 32 | """ 33 | dmax = 0.0 34 | index = -1 35 | 36 | for i in range(1, M.shape[0]): 37 | d = dist(M[i], M[0], M[-1]) 38 | 39 | if d > dmax: 40 | index = i 41 | dmax = d 42 | 43 | if dmax > epsilon: 44 | r1 = _rdp(M[:index + 1], epsilon, dist) 45 | r2 = _rdp(M[index:], epsilon, dist) 46 | 47 | return np.vstack((r1[:-1], r2)) 48 | else: 49 | return np.vstack((M[0], M[-1])) 50 | 51 | 52 | def _rdp_nn(seq, epsilon, dist): 53 | """ 54 | Simplifies a given array of points. 55 | :param seq: a series of points 56 | :type seq: sequence of 2-tuples 57 | :param epsilon: epsilon in the rdp algorithm 58 | :type epsilon: float 59 | :param dist: distance function 60 | :type dist: function with signature ``f(x1, x2, x3)`` 61 | """ 62 | return _rdp(np.array(seq), epsilon, dist).tolist() 63 | 64 | 65 | def rdp(M, epsilon=0, dist=pldist): 66 | """ 67 | Simplifies a given array of points. 68 | :param M: a series of points 69 | :type M: either a Nx2 numpy array or sequence of 2-tuples 70 | :param epsilon: epsilon in the rdp algorithm 71 | :type epsilon: float 72 | :param dist: distance function 73 | :type dist: function with signature ``f(x1, x2, x3)`` 74 | """ 75 | if "numpy" in str(type(M)): 76 | return _rdp(M, epsilon, dist) 77 | return _rdp_nn(M, epsilon, dist) -------------------------------------------------------------------------------- /contourprocess/regularization.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import numpy as np 3 | from .rdp_alg import rdp 4 | from .cal_dist_ang import cal_angle, cal_dist, azimuthAngle 5 | from .rotate_ang import Nrotation_angle_get_coor_coordinates, Srotation_angle_get_coor_coordinates 6 | from .line_intersection import line, intersection, par_line_dist, point_in_line 7 | 8 | 9 | 10 | def boundary_regularization(img, epsilon=6): 11 | h, w = img.shape[0:2] 12 | # 轮廓定位 13 | contours, hierarchy = cv2.findContours(img, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) 14 | contours = np.squeeze(contours[0]) 15 | # 轮廓精简(DP) 16 | contours = rdp(contours, epsilon=epsilon) 17 | contours[:, 1] = h - contours[:, 1] 18 | # 轮廓规则化 19 | dists = [] 20 | azis = [] 21 | azis_index = [] 22 | # 获取每条边的长度和方位角 23 | for i in range(contours.shape[0]): 24 | cur_index = i 25 | next_index = i+1 if i < contours.shape[0]-1 else 0 26 | prev_index = i-1 27 | cur_point = contours[cur_index] 28 | nest_point = contours[next_index] 29 | prev_point = contours[prev_index] 30 | 31 | dist = cal_dist(cur_point, nest_point) 32 | azi = azimuthAngle(cur_point, nest_point) 33 | 34 | dists.append(dist) 35 | azis.append(azi) 36 | azis_index.append([cur_index, next_index]) 37 | 38 | # 以最长的边的方向作为主方向 39 | longest_edge_idex = np.argmax(dists) 40 | main_direction = azis[longest_edge_idex] 41 | # 方向纠正,绕中心点旋转到与主方向垂直或者平行 42 | correct_points = [] 43 | para_vetr_idxs = [] # 0平行 1垂直 44 | for i, (azi, (point_0_index, point_1_index)) in enumerate(zip(azis, azis_index)): 45 | 46 | if i == longest_edge_idex: 47 | correct_points.append([contours[point_0_index], contours[point_1_index]]) 48 | para_vetr_idxs.append(0) 49 | else: 50 | # 确定旋转角度 51 | rotate_ang = main_direction - azi 52 | 53 | if np.abs(rotate_ang) < 180/4: 54 | rotate_ang = rotate_ang 55 | para_vetr_idxs.append(0) 56 | elif np.abs(rotate_ang) >= 90-180/4: 57 | rotate_ang = rotate_ang + 90 58 | para_vetr_idxs.append(1) 59 | 60 | # 执行旋转任务 61 | point_0 = contours[point_0_index] 62 | point_1 = contours[point_1_index] 63 | point_middle = (point_0 + point_1) / 2 64 | 65 | if rotate_ang > 0: 66 | rotate_point_0 = Srotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang)) 67 | rotate_point_1 = Srotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang)) 68 | elif rotate_ang < 0: 69 | rotate_point_0 = Nrotation_angle_get_coor_coordinates(point_0, point_middle, np.abs(rotate_ang)) 70 | rotate_point_1 = Nrotation_angle_get_coor_coordinates(point_1, point_middle, np.abs(rotate_ang)) 71 | else: 72 | rotate_point_0 = point_0 73 | rotate_point_1 = point_1 74 | correct_points.append([rotate_point_0, rotate_point_1]) 75 | 76 | correct_points = np.array(correct_points) 77 | 78 | 79 | # 相邻边校正,垂直取交点,平行平移短边或者加线 80 | final_points = [] 81 | final_points.append(correct_points[0][0]) 82 | for i in range(correct_points.shape[0]-1): 83 | cur_index = i 84 | next_index = i + 1 if i < correct_points.shape[0] - 1 else 0 85 | 86 | cur_edge_point_0 = correct_points[cur_index][0] 87 | cur_edge_point_1 = correct_points[cur_index][1] 88 | next_edge_point_0 = correct_points[next_index][0] 89 | next_edge_point_1 = correct_points[next_index][1] 90 | 91 | cur_para_vetr_idx = para_vetr_idxs[cur_index] 92 | next_para_vetr_idx = para_vetr_idxs[next_index] 93 | 94 | if cur_para_vetr_idx != next_para_vetr_idx: 95 | # 垂直取交点 96 | L1 = line(cur_edge_point_0, cur_edge_point_1) 97 | L2 = line(next_edge_point_0, next_edge_point_1) 98 | 99 | point_intersection = intersection(L1, L2) 100 | final_points.append(point_intersection) 101 | 102 | elif cur_para_vetr_idx == next_para_vetr_idx: 103 | # 平行分两种,一种加短线,一种平移,取决于距离阈值 104 | L1 = line(cur_edge_point_0, cur_edge_point_1) 105 | L2 = line(next_edge_point_0, next_edge_point_1) 106 | marg = par_line_dist(L1, L2) 107 | 108 | if marg < 3: 109 | # 平移 110 | point_move = point_in_line(next_edge_point_0[0], next_edge_point_0[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1]) 111 | final_points.append(point_move) 112 | # 更新平移之后的下一条边 113 | correct_points[next_index][0] = point_move 114 | correct_points[next_index][1] = point_in_line(next_edge_point_1[0], next_edge_point_1[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1]) 115 | 116 | 117 | else: 118 | # 加线 119 | add_mid_point = (cur_edge_point_1 + next_edge_point_0) / 2 120 | add_point_1 = point_in_line(add_mid_point[0], add_mid_point[1], cur_edge_point_0[0], cur_edge_point_0[1], cur_edge_point_1[0], cur_edge_point_1[1]) 121 | add_point_2 = point_in_line(add_mid_point[0], add_mid_point[1], next_edge_point_0[0], next_edge_point_0[1], next_edge_point_1[0], next_edge_point_1[1]) 122 | final_points.append(add_point_1) 123 | final_points.append(add_point_2) 124 | 125 | 126 | final_points.append(final_points[0]) 127 | final_points = np.array(final_points) 128 | 129 | final_points[:, 1] = h - final_points[:, 1] 130 | return final_points 131 | -------------------------------------------------------------------------------- /contourprocess/rotate_ang.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | # 顺时针旋转 5 | def Nrotation_angle_get_coor_coordinates(point, center, angle): 6 | src_x, src_y = point 7 | center_x, center_y = center 8 | radian = math.radians(angle) 9 | 10 | dest_x = (src_x - center_x) * math.cos(radian) + (src_y - center_y) * math.sin(radian) + center_x 11 | dest_y = (src_y - center_y) * math.cos(radian) - (src_x - center_x) * math.sin(radian) + center_y 12 | 13 | # return (int(dest_x), int(dest_y)) 14 | return (dest_x, dest_y) 15 | 16 | 17 | # 逆时针旋转 18 | def Srotation_angle_get_coor_coordinates(point, center, angle): 19 | src_x, src_y = point 20 | center_x, center_y = center 21 | radian = math.radians(angle) 22 | 23 | dest_x = (src_x - center_x) * math.cos(radian) - (src_y - center_y) * math.sin(radian) + center_x 24 | dest_y = (src_x - center_x) * math.sin(radian) + (src_y - center_y) * math.cos(radian) + center_y 25 | 26 | # return [int(dest_x), int(dest_y)] 27 | return (dest_x, dest_y) 28 | -------------------------------------------------------------------------------- /convert.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Convert weight model to pb or h5 or onnx 3 | ''' 4 | 5 | import argparse 6 | import tf2onnx 7 | import tensorflow as tf 8 | from mrcnn.mask_rcnn import MASK_RCNN 9 | 10 | 11 | def parser_opt(): 12 | parser = argparse.ArgumentParser(description="Convert Mask RCNN model") 13 | parser.add_argument('--weight', type=str, help='model weight', required=True) 14 | parser.add_argument('--label', type=str,help='label file', required=True) 15 | parser.add_argument('--saved_pb', action='store_true', help='save pb model to current directory') 16 | parser.add_argument('--saved_pb_dir', type=str, default='./save_model', help='save pb file if needed. Default:save_model') 17 | 18 | parser.add_argument('--saved_model', type=str, help='Tensorflow saved_model', default='') 19 | parser.add_argument('--save_onnx', type=str, help='save onnx model name', required=True, default='') 20 | parser.add_argument('--opset', type=int, default=12, help='ONNX: opset version') 21 | parser.add_argument('--flag', action='store_true', help='True:Tensoflow model, False:Tensorflow weights') 22 | 23 | return parser 24 | 25 | def main(args): 26 | save_path = args.save_onnx 27 | opset = args.opset 28 | if args.flag: 29 | ''' 30 | 加载模型并导出onnx模型 31 | ''' 32 | saved_model = args.saved_model 33 | assert len(saved_model) > 0, 'saved_model cannot be none or empty.' 34 | maskrcnn_model = tf.keras.models.load_model(saved_model) 35 | model_proto, _ = tf2onnx.convert.from_keras(maskrcnn_model, opset=opset, output_path=save_path) 36 | output_names = [n.name for n in model_proto.graph.output] 37 | print(output_names) 38 | else: 39 | print('Convert Tensorflow saved model to ONNX') 40 | weights = args.weight 41 | class_path = args.label 42 | assert len(weights) > 0, 'weights cannot be none or empty.' 43 | assert len(class_path) > 0, 'classes path doesn\'t exists.' 44 | mask_rcnn = MASK_RCNN(model=weights, classes_path=class_path, confidence=0.8) 45 | 46 | save_pb = args.saved_pb 47 | if save_pb: 48 | save_name = args.saved_pb_dir 49 | assert len(save_name) > 0, 'save_name cannot be none or empty.' 50 | mask_rcnn.model.save(save_name, save_format='tf') 51 | 52 | model_proto, _ = tf2onnx.convert.from_keras(mask_rcnn.model, opset=opset, output_path=save_path) 53 | output_names = [n.name for n in model_proto.graph.output] 54 | print(f'Model output names: ',output_names) 55 | 56 | if __name__ == '__main__': 57 | parser = parser_opt() 58 | args = parser.parse_args() 59 | main(args=args) 60 | -------------------------------------------------------------------------------- /data/building.names: -------------------------------------------------------------------------------- 1 | building -------------------------------------------------------------------------------- /data/coco_classes.txt: -------------------------------------------------------------------------------- 1 | person 2 | bicycle 3 | car 4 | motorbike 5 | aeroplane 6 | bus 7 | train 8 | truck 9 | boat 10 | traffic light 11 | fire hydrant 12 | stop sign 13 | parking meter 14 | bench 15 | bird 16 | cat 17 | dog 18 | horse 19 | sheep 20 | cow 21 | elephant 22 | bear 23 | zebra 24 | giraffe 25 | backpack 26 | umbrella 27 | handbag 28 | tie 29 | suitcase 30 | frisbee 31 | skis 32 | snowboard 33 | sports ball 34 | kite 35 | baseball bat 36 | baseball glove 37 | skateboard 38 | surfboard 39 | tennis racket 40 | bottle 41 | wine glass 42 | cup 43 | fork 44 | knife 45 | spoon 46 | bowl 47 | banana 48 | apple 49 | sandwich 50 | orange 51 | broccoli 52 | carrot 53 | hot dog 54 | pizza 55 | donut 56 | cake 57 | chair 58 | sofa 59 | pottedplant 60 | bed 61 | diningtable 62 | toilet 63 | tvmonitor 64 | laptop 65 | mouse 66 | remote 67 | keyboard 68 | cell phone 69 | microwave 70 | oven 71 | toaster 72 | sink 73 | refrigerator 74 | book 75 | clock 76 | vase 77 | scissors 78 | teddy bear 79 | hair drier 80 | toothbrush -------------------------------------------------------------------------------- /data/shapes.names: -------------------------------------------------------------------------------- 1 | circle 2 | square 3 | triangle -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | import cv2 2 | import tensorflow as tf 3 | import numpy as np 4 | import config 5 | import os 6 | from mrcnn.mask_rcnn import MASK_RCNN 7 | from PIL import Image 8 | from utils import utils, dataset, visualize 9 | from mrcnn.mrcnn_training import load_image_gt 10 | import yaml 11 | import matplotlib.pyplot as plt 12 | from tqdm import tqdm 13 | 14 | mask_rcnn = MASK_RCNN(model=config.InferenceConfig.model, classes_path = config.InferenceConfig.class_path) 15 | class_names = mask_rcnn.get_class() 16 | 17 | ''' 18 | 参考: 19 | 1. https://github.com/matterport/Mask_RCNN/blob/master/mrcnn/utils.py#L715 20 | 2. https://github.com/matterport/Mask_RCNN/issues/2513 21 | 3. https://zhuanlan.zhihu.com/p/61880018 22 | ''' 23 | 24 | class Evaluator(object): 25 | def __init__(self, num_class) -> None: 26 | super().__init__() 27 | self.num_class = num_class 28 | self.confusion_matrix = np.zeros((self.num_class, )*2) 29 | 30 | def Pixel_Accuracy(self): 31 | Acc = np.diag(self.confusion_matrix).sum()/self.confusion_matrix.sum() 32 | return Acc 33 | 34 | def Pixel_Recall(self, class_index): 35 | Acc = self.confusion_matrix[class_index][class_index]/self.confusion_matrix.sum(axis=0)[class_index] 36 | return Acc 37 | 38 | def Pixel_Accuracy_Class(self): 39 | Acc = np.diag(self.confusion_matrix)/self.confusion_matrix.sum(axis=1) 40 | Acc = np.nanmean(Acc) 41 | return Acc 42 | 43 | def Mean_Intersection_over_Union(self): 44 | MIoU = np.diag(self.confusion_matrix) / ( 45 | np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) - 46 | np.diag(self.confusion_matrix)) 47 | MIoU = np.nanmean(MIoU) 48 | return MIoU 49 | 50 | def Frequency_Weighted_Intersection_over_Union(self): 51 | freq = np.sum(self.confusion_matrix, axis=1) / np.sum(self.confusion_matrix) 52 | iu = np.diag(self.confusion_matrix) / ( 53 | np.sum(self.confusion_matrix, axis=1) + np.sum(self.confusion_matrix, axis=0) - 54 | np.diag(self.confusion_matrix)) 55 | 56 | FWIoU = (freq[freq > 0] * iu[freq > 0]).sum() 57 | return FWIoU 58 | 59 | def _generate_matrix(self, gt_image, pre_image): 60 | mask = (gt_image >= 0) & (gt_image < self.num_class) 61 | label = self.num_class * gt_image[mask].astype('int') + pre_image[mask] 62 | count = np.bincount(label, minlength=self.num_class**2) 63 | confusion_matrix = count.reshape(self.num_class, self.num_class) 64 | return confusion_matrix 65 | 66 | def add_batch(self, gt_image, pre_image): 67 | ''' 68 | 输入的图像用0,1,2,3...表示类别 69 | ''' 70 | assert gt_image.shape == pre_image.shape 71 | self.confusion_matrix += self._generate_matrix(gt_image, pre_image) 72 | 73 | def reset(self): 74 | self.confusion_matrix = np.zeros((self.num_class,) * 2) 75 | 76 | class TestDataset(dataset.Dataset): 77 | # 获取图中的实例个数 78 | def get_obj_index(self, image): 79 | n = np.max(image) 80 | return n 81 | 82 | def get_class(self): 83 | classes_path = os.path.expanduser(self.classes_path) 84 | with open(classes_path) as f: 85 | class_names = f.readlines() 86 | class_names = [c.strip() for c in class_names] 87 | class_names.insert(0,"BG") 88 | return class_names 89 | 90 | # 解析yaml 91 | def get_classes_from_yaml(self, image_id): 92 | info = self.image_info[image_id] 93 | with open(info['yaml_path']) as f: 94 | temp = yaml.load(f.read()) 95 | labels = temp['label_names'] 96 | del labels[0] 97 | return labels 98 | 99 | def draw_mask(self, num_obj, mask, image, image_id): 100 | info = self.image_info[image_id] 101 | for index in range(num_obj): 102 | for i in range(info['width']): 103 | for j in range(info['height']): 104 | at_pixel = image.getpixel((i, j)) 105 | if at_pixel == index + 1: 106 | mask[j, i, index] = 1 107 | return mask 108 | 109 | def load_dataset(self, count, img_floder, mask_floder, imglist, dataset_root_path): 110 | """ 111 | Generate the requested number of synthetic images. 112 | count: number of images to generate. 113 | height, width: the size of the generated images. 114 | """ 115 | # Add classes 116 | classes_names = config.get_class(config.InferenceConfig.class_path) 117 | for index, item in enumerate(classes_names): 118 | self.add_class('TestSet', index+1, item) 119 | 120 | for i in range(count): 121 | # 获取图片宽和高 122 | filestr = imglist[i].split(".")[0] 123 | mask_path = mask_floder + "/" + filestr + ".png" 124 | yaml_path = dataset_root_path + "/" +"yaml/" + filestr + ".yaml" 125 | print(dataset_root_path + "labelme_json/" + filestr + "_json/img.png") 126 | cv_img = cv2.imread(dataset_root_path + "/" +"imgs/" + filestr + ".jpg") 127 | 128 | self.add_image("TestSet", image_id=i, path=img_floder + "/" + imglist[i], 129 | width=cv_img.shape[1], height=cv_img.shape[0], mask_path=mask_path, yaml_path=yaml_path) 130 | 131 | def load_mask(self, image_id): 132 | """Generate instance masks for shapes of the given image ID. 133 | """ 134 | global iter_num 135 | print("image_id", image_id) 136 | info = self.image_info[image_id] 137 | count = 1 # number of object 138 | img = Image.open(info['mask_path']) 139 | num_obj = self.get_obj_index(img) 140 | mask = np.zeros([info['height'], info['width'], num_obj], dtype=np.uint8) 141 | mask = self.draw_mask(num_obj, mask, img, image_id) 142 | occlusion = np.logical_not(mask[:, :, -1]).astype(np.uint8) 143 | for i in range(count - 2, -1, -1): 144 | mask[:, :, i] = mask[:, :, i] * occlusion 145 | 146 | occlusion = np.logical_and(occlusion, np.logical_not(mask[:, :, i])) 147 | labels = [] 148 | labels = self.get_classes_from_yaml(image_id) 149 | class_ids = np.array([self.class_names.index(s) for s in labels]) 150 | return mask, class_ids.astype(np.int32) 151 | 152 | def text_save(filename, data): 153 | file = open(filename,'a') 154 | for i in range(len(data)): 155 | s = str(data[i]).replace('[','').replace(']','') 156 | s = s.replace("'",'').replace(',','') +'\n' 157 | file.write(s) 158 | file.close() 159 | print(f'save success:{filename}') 160 | 161 | if __name__ == '__main__': 162 | dataset_root_path = config.CustomerConfig.TRAIN_DATASET 163 | img_floder =os.path.join(dataset_root_path, "imgs") 164 | mask_floder = os.path.join(dataset_root_path, "mask") 165 | imglist = os.listdir(img_floder) 166 | count = len(imglist) 167 | np.random.seed(10101) 168 | np.random.shuffle(imglist) 169 | train_imglist = imglist[:int(count*0.8)] 170 | test_imglist = imglist[int(count*0.8):] 171 | test_count = len(test_imglist) 172 | 173 | # 加载测试集 174 | dataset_test = TestDataset() 175 | dataset_test.load_dataset(test_count, img_floder, mask_floder, test_imglist, dataset_root_path) 176 | dataset_test.prepare() 177 | APs = [] 178 | flag = 0 179 | for imageid in tqdm(dataset_test.image_ids[:20]): 180 | image, image_meta, gt_class_id, gt_bbox, gt_mask = \ 181 | load_image_gt(dataset_test, config.InferenceConfig, imageid) 182 | # 将所有ground truth载入并保存 183 | if flag == 0: 184 | gt_boxes, gt_class_ids, gt_masks = gt_bbox, gt_class_id, gt_mask 185 | else: 186 | gt_boxes = np.concatenate((gt_boxes, gt_bbox), axis=0) 187 | gt_class_ids = np.concatenate((gt_class_ids, gt_class_id), axis=0) 188 | gt_masks = np.concatenate((gt_masks, gt_mask), axis=2) 189 | image = Image.fromarray(image) 190 | r = mask_rcnn.get_detections(image=image) 191 | if flag == 0: 192 | pred_rois, pred_ids, pred_scores, pred_masks = r["rois"], r["class_ids"], r["scores"], r['masks'] 193 | else: 194 | pred_rois = np.concatenate((pred_rois, r["rois"]), axis=0) 195 | pred_ids = np.concatenate((pred_ids, r["class_ids"]), axis=0) 196 | pred_scores = np.concatenate((pred_scores, r["scores"]), axis=0) 197 | pred_masks = np.concatenate((pred_masks, r['masks']), axis=2) 198 | flag+=1 199 | # 展示数据 200 | drawed_image = visualize.display_instances(image, r['rois'], r['masks'], r['class_ids'], mask_rcnn.class_names, r['scores'], show_bbox=False, captions=False) 201 | # 处理mask 文件 202 | mask_image = np.any(r['masks'], axis=-1) 203 | mask_image = Image.fromarray(mask_image) 204 | drawed_image.show() 205 | mask_image.show() 206 | 207 | iou_thresholds = [0.5, 0.6, 0.7, 0.8, 0.9] 208 | # AP, precisions, recalls, overlaps =utils.compute_ap(gt_bbox, gt_class_id, gt_mask,r["rois"], r["class_ids"], r["scores"], r['masks'], iou_threshold=iou_threshold) 209 | # 计算AP, precision, recall 210 | for iou_threshold in iou_thresholds: 211 | AP, precisions, recalls, overlaps = utils.compute_ap(gt_boxes, gt_class_ids, gt_masks, pred_rois, pred_ids, pred_scores, pred_masks, iou_threshold=iou_threshold) 212 | print(f'AP@{iou_threshold}:{AP}') 213 | print(f"mAP@{iou_threshold}: ", np.mean(AP)) 214 | # 保存precision, recall信息用于后续绘制图像 215 | # text_save(f'Kpreci@{iou_threshold}.txt', precisions) 216 | # text_save(f'Krecall@{iou_threshold}.txt', recalls) 217 | # text_save(f'KAP@{iou_threshold}.txt', [AP]) 218 | # plt.plot(recalls, precisions, 'b', label='PR') 219 | # plt.title('precision-recall curve') 220 | # plt.xlabel('Recall') 221 | # plt.ylabel('Precision') 222 | # plt.legend() 223 | # plt.show() 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | # ''' 232 | # Pixel Accuracy 233 | # ''' 234 | # basename = os.path.splitext(imageid)[0] 235 | # ori_img = os.path.join(img_floder, imageid) 236 | # gt_img = os.path.join(mask_floder, basename+'.png') 237 | # image = Image.open(ori_img) 238 | # gt_img = Image.open(gt_img) 239 | # n_classes = len(class_names) 240 | # result_img, pred_img = mask_rcnn.detect_image(image=image) 241 | # pred_img.show() 242 | # gt_img.show() 243 | # evaluate = Evaluator(1+1) 244 | # evaluate.add_batch(np.array(gt_img), np.array(pred_img)) 245 | # acc = evaluate.Pixel_Accuracy() 246 | # print('ACC:',acc) 247 | # recall = evaluate.Pixel_Recall(0) 248 | # print('Recall:', recall) 249 | # basename = os.path.splitext(imageid)[0] 250 | # image.save(os.path.join('./result', 'ori_'+basename+'.jpg')) 251 | # pred_img.save(os.path.join('./result', 'res_'+basename+'.jpg')) 252 | # iou计算: TODO:FIXBUG 253 | # iou = IoU_calculate(pred_img, gt_img, 2) 254 | # print(iou) 255 | 256 | 257 | 258 | 259 | -------------------------------------------------------------------------------- /inference.py: -------------------------------------------------------------------------------- 1 | from mrcnn.mask_rcnn import MASK_RCNN 2 | from PIL import Image 3 | 4 | import tensorflow as tf 5 | import numpy as np 6 | from utils.anchors import get_anchors 7 | from utils.utils import mold_inputs,unmold_detections 8 | from utils import visualize 9 | import os 10 | from config import InferenceConfig 11 | from glob import glob 12 | from tqdm import tqdm 13 | 14 | 15 | # def get_class(classes_path): 16 | # classes_path = os.path.expanduser(classes_path) 17 | # with open(classes_path) as f: 18 | # class_names = f.readlines() 19 | # class_names = [c.strip() for c in class_names] 20 | # class_names.insert(0,"BG") 21 | # return class_names 22 | 23 | 24 | # model_path = './model/building' 25 | # class_path = './data/building.names' 26 | # class_names = get_class(class_path) 27 | 28 | # def get_config(): 29 | # class InferenceConfig(Config): 30 | # NUM_CLASSES = len(class_names) 31 | # GPU_COUNT = 1 32 | # IMAGES_PER_GPU = 1 33 | # DETECTION_MIN_CONFIDENCE = 0.7 34 | # NAME = "Customer" 35 | # RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256) 36 | # IMAGE_MIN_DIM = 512 37 | # IMAGE_MAX_DIM = 512 38 | # IMAGE_SHAPE = [512, 512 ,3] 39 | 40 | # config = InferenceConfig() 41 | # config.display() 42 | # return config 43 | 44 | # InferenceConfig = get_config() 45 | # model = tf.keras.models.load_model(model_path) 46 | 47 | # image = Image.open(img) 48 | # image = [np.array(image)] 49 | 50 | # molded_images, image_metas, windows = mold_inputs(InferenceConfig,image) 51 | 52 | # image_shape = molded_images[0].shape 53 | # anchors = get_anchors(InferenceConfig,image_shape) 54 | # anchors = np.broadcast_to(anchors, (1,) + anchors.shape) 55 | # detections, _, _, mrcnn_mask, _, _, _ =model.predict([molded_images, image_metas, anchors], verbose=0) 56 | # final_rois, final_class_ids, final_scores, final_masks =unmold_detections(detections[0], mrcnn_mask[0],image[0].shape, molded_images[0].shape,windows[0]) 57 | 58 | # r = { 59 | # "rois": final_rois, 60 | # "class_ids": final_class_ids, 61 | # "scores": final_scores, 62 | # "masks": final_masks, 63 | # } 64 | 65 | 66 | # drawed_image = visualize.display_instances(image[0], r['rois'], r['masks'], r['class_ids'], 67 | # class_names, r['scores']) 68 | # drawed_image.save('6.jpg') 69 | # drawed_image.show() 70 | 71 | images = glob('./samples/*') 72 | save_path = './result' 73 | if not os.path.exists(save_path): 74 | os.makedirs(save_path) 75 | mask_rcnn = MASK_RCNN(model=InferenceConfig.model, classes_path=InferenceConfig.class_path, confidence=0.7) 76 | for img_name in tqdm(images): 77 | image = Image.open(img_name).convert('RGB') 78 | drawed_image,mask_image = mask_rcnn.detect_image(image = image) 79 | drawed_image.show() 80 | result_img = Image.blend(image, drawed_image, 0.5) 81 | # result_img.show() 82 | save_filename = os.path.join(save_path, os.path.basename(img_name)) 83 | result_img.save(save_filename) 84 | -------------------------------------------------------------------------------- /inference_onnxruntime.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from utils.config import Config 4 | from utils.anchors import get_anchors 5 | from utils.utils import mold_inputs,unmold_detections 6 | from utils.config import Config 7 | import colorsys 8 | import onnxruntime as ort 9 | from PIL import Image 10 | 11 | 12 | class InferenceConfig(Config): 13 | NAME = 'Customer' 14 | RPN_ANCHOR_SCALES = (16, 32, 64, 128, 256) 15 | IMAGE_MIN_DIM = 512 16 | IMAGE_MAX_DIM = 512 17 | model = './maskrcnn_0.8.onnx' 18 | classes_path = './data/building.names' 19 | 20 | def random_colors(N, bright=True): 21 | """ 22 | 生成随机颜色 23 | """ 24 | brightness = 1.0 if bright else 0.7 25 | hsv = [(i / N, 1, brightness) for i in range(N)] 26 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 27 | return colors 28 | 29 | def apply_mask(image, mask, color, alpha=0.5): 30 | """ 31 | 打上mask图标 32 | """ 33 | for c in range(3): 34 | image[:, :, c] = np.where(mask == 1, 35 | image[:, :, c] * 36 | (1 - alpha) + alpha * color[c] * 255, 37 | image[:, :, c]) 38 | return image 39 | 40 | class MASK_RCNN(object): 41 | _defaults = { 42 | "model_path": InferenceConfig.model, 43 | "classes_path": InferenceConfig.classes_path, 44 | "confidence": 0.5, 45 | # 使用coco数据集检测的时候,IMAGE_MIN_DIM=1024,IMAGE_MAX_DIM=1024, RPN_ANCHOR_SCALES=(32, 64, 128, 256, 512) 46 | "RPN_ANCHOR_SCALES": InferenceConfig.RPN_ANCHOR_SCALES, 47 | "IMAGE_MIN_DIM": InferenceConfig.IMAGE_MIN_DIM, 48 | "IMAGE_MAX_DIM": InferenceConfig.IMAGE_MAX_DIM, 49 | } 50 | 51 | @classmethod 52 | def get_defaults(cls, n): 53 | if n in cls._defaults: 54 | return cls._defaults[n] 55 | else: 56 | return "Unrecognized attribute name '" + n + "'" 57 | 58 | def __init__(self, **kwargs): 59 | self.__dict__.update(self._defaults) 60 | self.class_names = self._get_class() 61 | self.config = self._get_config() 62 | self.generate() 63 | 64 | def _get_class(self): 65 | classes_path = os.path.expanduser(self.classes_path) 66 | with open(classes_path) as f: 67 | class_names = f.readlines() 68 | class_names = [c.strip() for c in class_names] 69 | class_names.insert(0,"BG") 70 | return class_names 71 | 72 | def _get_config(self): 73 | class InferenceConfig(Config): 74 | NUM_CLASSES = len(self.class_names) 75 | GPU_COUNT = 1 76 | IMAGES_PER_GPU = 1 77 | NAME = "Customer" 78 | RPN_ANCHOR_SCALES = self.RPN_ANCHOR_SCALES 79 | IMAGE_MIN_DIM = self.IMAGE_MIN_DIM 80 | IMAGE_MAX_DIM = self.IMAGE_MAX_DIM 81 | 82 | config = InferenceConfig() 83 | 84 | return config 85 | 86 | def generate(self): 87 | model_path = os.path.expanduser(self.model_path) 88 | 89 | # 计算总的种类 90 | self.num_classes = len(self.class_names) 91 | 92 | # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 93 | self.model = ort.InferenceSession(model_path) 94 | self.outputs_names = ['mrcnn_detection', 'mrcnn_class', 'mrcnn_bbox', 'mrcnn_mask', 'ROI', 'rpn_class', 'rpn_bbox'] 95 | 96 | def detect_image(self, image): 97 | image = [np.array(image)] 98 | molded_images, image_metas, windows = mold_inputs(self.config,image) 99 | 100 | image_shape = molded_images[0].shape 101 | anchors = get_anchors(self.config,image_shape) 102 | anchors = np.broadcast_to(anchors, (1,) + anchors.shape) 103 | 104 | detections, _, _, mrcnn_mask, _, _, _ =\ 105 | self.model.run(self.outputs_names, {"input_image":molded_images.astype(np.float32), "input_image_meta":image_metas.astype(np.float32), "input_anchors":anchors.astype(np.float32)}) 106 | 107 | final_rois, final_class_ids, final_scores, final_masks =\ 108 | unmold_detections(detections[0], mrcnn_mask[0], 109 | image[0].shape, molded_images[0].shape, 110 | windows[0]) 111 | 112 | r = { 113 | "rois": final_rois, 114 | "class_ids": final_class_ids, 115 | "scores": final_scores, 116 | "masks": final_masks, 117 | } 118 | # 生成mask图像 119 | mask_image = np.zeros_like(image[0], np.uint8) 120 | masks = r['masks'] 121 | N = r['rois'].shape[0] 122 | for i in range(N): 123 | mask = masks[:, :, i] 124 | color = (1.0, 0.0, 0.0) 125 | mask_image = apply_mask(mask_image, mask, color, alpha=1) 126 | padded_mask = np.zeros( 127 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 128 | padded_mask[1:-1, 1:-1] = mask 129 | return mask_image 130 | 131 | if __name__ == '__main__': 132 | mask_rcnn = MASK_RCNN() 133 | img = './samples/20221101144640.png' 134 | image = Image.open(img).convert('RGB') 135 | r_image = mask_rcnn.detect_image(image) 136 | img = Image.fromarray(r_image) 137 | img = Image.blend(img, image, 0.7) 138 | # img.save('./test.png') 139 | img.show() 140 | -------------------------------------------------------------------------------- /largest_interior_rectangle/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RyanCCC/Mask_RCNN/a32837ba992eda4aa0b7799cd3317200e79d462b/largest_interior_rectangle/__init__.py -------------------------------------------------------------------------------- /largest_interior_rectangle/lir.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | import numba as nb 4 | 5 | ''' 6 | Find the largest interior rectangle in the mask 7 | ''' 8 | 9 | 10 | 11 | def largest_interior_rectangle(cells): 12 | h_adjacency = horizontal_adjacency(cells) 13 | v_adjacency = vertical_adjacency(cells) 14 | s_map = span_map(h_adjacency, v_adjacency) 15 | return biggest_span_in_span_map(s_map) 16 | 17 | 18 | @nb.njit('uint32[:,::1](uint8[:,::1])', parallel=True, cache=True) 19 | def horizontal_adjacency(cells): 20 | result = np.zeros((cells.shape[0], cells.shape[1]), dtype=np.uint32) 21 | for y in nb.prange(cells.shape[0]): 22 | span = 0 23 | for x in range(cells.shape[1]-1, -1, -1): 24 | if cells[y, x] > 0: 25 | span += 1 26 | else: 27 | span = 0 28 | result[y, x] = span 29 | return result 30 | 31 | 32 | @nb.njit('uint32[:,::1](uint8[:,::1])', parallel=True, cache=True) 33 | def vertical_adjacency(cells): 34 | result = np.zeros((cells.shape[0], cells.shape[1]), dtype=np.uint32) 35 | for x in nb.prange(cells.shape[1]): 36 | span = 0 37 | for y in range(cells.shape[0]-1, -1, -1): 38 | if cells[y, x] > 0: 39 | span += 1 40 | else: 41 | span = 0 42 | result[y, x] = span 43 | return result 44 | 45 | 46 | @nb.njit('uint32(uint32[:])', cache=True) 47 | def predict_vector_size(array): 48 | zero_indices = np.where(array == 0)[0] 49 | if len(zero_indices) == 0: 50 | if len(array) == 0: 51 | return 0 52 | return len(array) 53 | return zero_indices[0] 54 | 55 | 56 | @nb.jit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) 57 | def h_vector(h_adjacency, x, y): 58 | vector_size = predict_vector_size(h_adjacency[y:, x]) 59 | h_vector = np.zeros(vector_size, dtype=np.uint32) 60 | h = np.Inf 61 | for p in range(vector_size): 62 | h = np.minimum(h_adjacency[y+p, x], h) 63 | h_vector[p] = h 64 | h_vector = np.unique(h_vector)[::-1] 65 | return h_vector 66 | 67 | 68 | @nb.jit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) 69 | def v_vector(v_adjacency, x, y): 70 | vector_size = predict_vector_size(v_adjacency[y, x:]) 71 | v_vector = np.zeros(vector_size, dtype=np.uint32) 72 | v = np.Inf 73 | for q in range(vector_size): 74 | v = np.minimum(v_adjacency[y, x+q], v) 75 | v_vector[q] = v 76 | v_vector = np.unique(v_vector)[::-1] 77 | return v_vector 78 | 79 | 80 | @nb.njit('uint32[:,:](uint32[:], uint32[:])', cache=True) 81 | def spans(h_vector, v_vector): 82 | spans = np.stack((h_vector, v_vector[::-1]), axis=1) 83 | return spans 84 | 85 | 86 | @nb.njit('uint32[:](uint32[:,:])', cache=True) 87 | def biggest_span(spans): 88 | if len(spans) == 0: 89 | return np.array([0, 0], dtype=np.uint32) 90 | areas = spans[:, 0] * spans[:, 1] 91 | biggest_span_index = np.where(areas == np.amax(areas))[0][0] 92 | return spans[biggest_span_index] 93 | 94 | 95 | @nb.njit('uint32[:, :, :](uint32[:,::1], uint32[:,::1])', 96 | parallel=True, cache=True) 97 | def span_map(h_adjacency, v_adjacency): 98 | span_map = np.zeros((h_adjacency.shape[0], 99 | h_adjacency.shape[1], 100 | 2), dtype=np.uint32) 101 | 102 | for x in nb.prange(span_map.shape[1]): 103 | for y in range(span_map.shape[0]): 104 | h_vec = h_vector(h_adjacency, x, y) 105 | v_vec = v_vector(v_adjacency, x, y) 106 | s = spans(h_vec, v_vec) 107 | s = biggest_span(s) 108 | span_map[y, x, :] = s 109 | 110 | return span_map 111 | 112 | 113 | @nb.njit('uint32[:](uint32[:, :, :])', cache=True) 114 | def biggest_span_in_span_map(span_map): 115 | areas = span_map[:, :, 0] * span_map[:, :, 1] 116 | largest_rectangle_indices = np.where(areas == np.amax(areas)) 117 | x = largest_rectangle_indices[1][0] 118 | y = largest_rectangle_indices[0][0] 119 | span = span_map[y, x] 120 | return np.array([x, y, span[0], span[1]], dtype=np.uint32) 121 | -------------------------------------------------------------------------------- /largest_interior_rectangle/lir_within_outline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import numba as nb 3 | import cv2 as cv 4 | 5 | 6 | def largest_interior_rectangle(cells): 7 | outline = get_outline(cells) 8 | adjacencies = adjacencies_all_directions(cells) 9 | s_map, _, saddle_candidates_map = create_maps(outline, adjacencies) 10 | lir1 = biggest_span_in_span_map(s_map) 11 | 12 | candidate_cells = cells_of_interest(saddle_candidates_map) 13 | s_map = span_map(adjacencies[0], adjacencies[2], candidate_cells) 14 | lir2 = biggest_span_in_span_map(s_map) 15 | 16 | lir = biggest_rectangle(lir1, lir2) 17 | return lir 18 | 19 | 20 | def get_outline(cells): 21 | contours, hierarchy = \ 22 | cv.findContours(cells, cv.RETR_TREE, cv.CHAIN_APPROX_NONE) 23 | # TODO support multiple contours 24 | # test that only one regular contour exists 25 | assert hierarchy.shape == (1, 1, 4) 26 | assert np.all(hierarchy == -1) 27 | contour = contours[0][:, 0, :] 28 | x_values = contour[:, 0].astype("uint32", order="C") 29 | y_values = contour[:, 1].astype("uint32", order="C") 30 | return x_values, y_values 31 | 32 | 33 | @nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True) 34 | def horizontal_adjacency(cells, direction): 35 | result = np.zeros(cells.shape, dtype=np.uint32) 36 | for y in nb.prange(cells.shape[0]): 37 | span = 0 38 | if direction: 39 | iterator = range(cells.shape[1]-1, -1, -1) 40 | else: 41 | iterator = range(cells.shape[1]) 42 | for x in iterator: 43 | if cells[y, x] > 0: 44 | span += 1 45 | else: 46 | span = 0 47 | result[y, x] = span 48 | return result 49 | 50 | 51 | @nb.njit('uint32[:,::1](uint8[:,::1], boolean)', parallel=True, cache=True) 52 | def vertical_adjacency(cells, direction): 53 | result = np.zeros(cells.shape, dtype=np.uint32) 54 | for x in nb.prange(cells.shape[1]): 55 | span = 0 56 | if direction: 57 | iterator = range(cells.shape[0]-1, -1, -1) 58 | else: 59 | iterator = range(cells.shape[0]) 60 | for y in iterator: 61 | if cells[y, x] > 0: 62 | span += 1 63 | else: 64 | span = 0 65 | result[y, x] = span 66 | return result 67 | 68 | 69 | @nb.njit(cache=True) 70 | def adjacencies_all_directions(cells): 71 | h_left2right = horizontal_adjacency(cells, 1) 72 | h_right2left = horizontal_adjacency(cells, 0) 73 | v_top2bottom = vertical_adjacency(cells, 1) 74 | v_bottom2top = vertical_adjacency(cells, 0) 75 | return h_left2right, h_right2left, v_top2bottom, v_bottom2top 76 | 77 | 78 | @nb.njit('uint32(uint32[:])', cache=True) 79 | def predict_vector_size(array): 80 | zero_indices = np.where(array == 0)[0] 81 | if len(zero_indices) == 0: 82 | if len(array) == 0: 83 | return 0 84 | return len(array) 85 | return zero_indices[0] 86 | 87 | 88 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) 89 | def h_vector_top2bottom(h_adjacency, x, y): 90 | vector_size = predict_vector_size(h_adjacency[y:, x]) 91 | h_vector = np.zeros(vector_size, dtype=np.uint32) 92 | h = np.Inf 93 | for p in range(vector_size): 94 | h = np.minimum(h_adjacency[y+p, x], h) 95 | h_vector[p] = h 96 | h_vector = np.unique(h_vector)[::-1] 97 | return h_vector 98 | 99 | 100 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) 101 | def h_vector_bottom2top(h_adjacency, x, y): 102 | vector_size = predict_vector_size(np.flip(h_adjacency[:y+1, x])) 103 | h_vector = np.zeros(vector_size, dtype=np.uint32) 104 | h = np.Inf 105 | for p in range(vector_size): 106 | h = np.minimum(h_adjacency[y-p, x], h) 107 | h_vector[p] = h 108 | h_vector = np.unique(h_vector)[::-1] 109 | return h_vector 110 | 111 | 112 | @nb.njit(cache=True) 113 | def h_vectors_all_directions(h_left2right, h_right2left, x, y): 114 | h_l2r_t2b = h_vector_top2bottom(h_left2right, x, y) 115 | h_r2l_t2b = h_vector_top2bottom(h_right2left, x, y) 116 | h_l2r_b2t = h_vector_bottom2top(h_left2right, x, y) 117 | h_r2l_b2t = h_vector_bottom2top(h_right2left, x, y) 118 | return h_l2r_t2b, h_r2l_t2b, h_l2r_b2t, h_r2l_b2t 119 | 120 | 121 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) 122 | def v_vector_left2right(v_adjacency, x, y): 123 | vector_size = predict_vector_size(v_adjacency[y, x:]) 124 | v_vector = np.zeros(vector_size, dtype=np.uint32) 125 | v = np.Inf 126 | for q in range(vector_size): 127 | v = np.minimum(v_adjacency[y, x+q], v) 128 | v_vector[q] = v 129 | v_vector = np.unique(v_vector)[::-1] 130 | return v_vector 131 | 132 | 133 | @nb.njit('uint32[:](uint32[:,::1], uint32, uint32)', cache=True) 134 | def v_vector_right2left(v_adjacency, x, y): 135 | vector_size = predict_vector_size(np.flip(v_adjacency[y, :x+1])) 136 | v_vector = np.zeros(vector_size, dtype=np.uint32) 137 | v = np.Inf 138 | for q in range(vector_size): 139 | v = np.minimum(v_adjacency[y, x-q], v) 140 | v_vector[q] = v 141 | v_vector = np.unique(v_vector)[::-1] 142 | return v_vector 143 | 144 | 145 | @nb.njit(cache=True) 146 | def v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y): 147 | v_l2r_t2b = v_vector_left2right(v_top2bottom, x, y) 148 | v_r2l_t2b = v_vector_right2left(v_top2bottom, x, y) 149 | v_l2r_b2t = v_vector_left2right(v_bottom2top, x, y) 150 | v_r2l_b2t = v_vector_right2left(v_bottom2top, x, y) 151 | return v_l2r_t2b, v_r2l_t2b, v_l2r_b2t, v_r2l_b2t 152 | 153 | 154 | @nb.njit('uint32[:,:](uint32[:], uint32[:])', cache=True) 155 | def spans(h_vector, v_vector): 156 | spans = np.stack((h_vector, v_vector[::-1]), axis=1) 157 | return spans 158 | 159 | 160 | @nb.njit('uint32[:](uint32[:,:])', cache=True) 161 | def biggest_span(spans): 162 | if len(spans) == 0: 163 | return np.array([0, 0], dtype=np.uint32) 164 | areas = spans[:, 0] * spans[:, 1] 165 | biggest_span_index = np.where(areas == np.amax(areas))[0][0] 166 | return spans[biggest_span_index] 167 | 168 | 169 | @nb.njit(cache=True) 170 | def spans_all_directions(h_vectors, v_vectors): 171 | span_l2r_t2b = spans(h_vectors[0], v_vectors[0]) 172 | span_r2l_t2b = spans(h_vectors[1], v_vectors[1]) 173 | span_l2r_b2t = spans(h_vectors[2], v_vectors[2]) 174 | span_r2l_b2t = spans(h_vectors[3], v_vectors[3]) 175 | return span_l2r_t2b, span_r2l_t2b, span_l2r_b2t, span_r2l_b2t 176 | 177 | 178 | @nb.njit(cache=True) 179 | def get_n_directions(spans_all_directions): 180 | n_directions = 1 181 | for spans in spans_all_directions: 182 | all_x_1 = np.all(spans[:, 0] == 1) 183 | all_y_1 = np.all(spans[:, 1] == 1) 184 | if not all_x_1 and not all_y_1: 185 | n_directions += 1 186 | return n_directions 187 | 188 | 189 | @nb.njit(cache=True) 190 | def get_xy_array(x, y, spans, mode=0): 191 | """0 - flip none, 1 - flip x, 2 - flip y, 3 - flip both""" 192 | xy = spans.copy() 193 | xy[:, 0] = x 194 | xy[:, 1] = y 195 | if mode == 1: 196 | xy[:, 0] = xy[:, 0] - spans[:, 0] + 1 197 | if mode == 2: 198 | xy[:, 1] = xy[:, 1] - spans[:, 1] + 1 199 | if mode == 3: 200 | xy[:, 0] = xy[:, 0] - spans[:, 0] + 1 201 | xy[:, 1] = xy[:, 1] - spans[:, 1] + 1 202 | return xy 203 | 204 | 205 | @nb.njit(cache=True) 206 | def get_xy_arrays(x, y, spans_all_directions): 207 | xy_l2r_t2b = get_xy_array(x, y, spans_all_directions[0], 0) 208 | xy_r2l_t2b = get_xy_array(x, y, spans_all_directions[1], 1) 209 | xy_l2r_b2t = get_xy_array(x, y, spans_all_directions[2], 2) 210 | xy_r2l_b2t = get_xy_array(x, y, spans_all_directions[3], 3) 211 | return xy_l2r_t2b, xy_r2l_t2b, xy_l2r_b2t, xy_r2l_b2t 212 | 213 | 214 | @nb.njit(cache=True) 215 | def check_if_point_on_outline(x, y, outline): 216 | x_vals, y_vals = outline 217 | x_true = x_vals == x 218 | y_true = y_vals == y 219 | both_true = np.logical_and(x_true, y_true) 220 | return np.any(both_true) 221 | 222 | 223 | @nb.njit('Tuple((uint32[:,:,::1], uint8[:,::1], uint8[:,::1]))' 224 | '(UniTuple(uint32[:], 2), UniTuple(uint32[:,::1], 4))', 225 | parallel=True, cache=True) 226 | def create_maps(outline, adjacencies): 227 | x_values, y_values = outline 228 | h_left2right, h_right2left, v_top2bottom, v_bottom2top = adjacencies 229 | 230 | shape = h_left2right.shape 231 | span_map = np.zeros(shape + (2,), "uint32") 232 | direction_map = np.zeros(shape, "uint8") 233 | saddle_candidates_map = np.zeros(shape, "uint8") 234 | 235 | for idx in nb.prange(len(x_values)): 236 | x, y = x_values[idx], y_values[idx] 237 | h_vectors = h_vectors_all_directions(h_left2right, h_right2left, x, y) 238 | v_vectors = v_vectors_all_directions(v_top2bottom, v_bottom2top, x, y) 239 | span_arrays = spans_all_directions(h_vectors, v_vectors) 240 | n = get_n_directions(span_arrays) 241 | direction_map[y, x] = n 242 | xy_arrays = get_xy_arrays(x, y, span_arrays) 243 | for direction_idx in range(4): 244 | xy_array = xy_arrays[direction_idx] 245 | span_array = span_arrays[direction_idx] 246 | for span_idx in range(span_array.shape[0]): 247 | x, y = xy_array[span_idx][0], xy_array[span_idx][1] 248 | w, h = span_array[span_idx][0], span_array[span_idx][1] 249 | if w*h > span_map[y, x, 0] * span_map[y, x, 1]: 250 | span_map[y, x, :] = np.array([w, h], "uint32") 251 | if n == 3: 252 | if not check_if_point_on_outline(x, y, outline): 253 | saddle_candidates_map[y, x] = np.uint8(255) 254 | 255 | return span_map, direction_map, saddle_candidates_map 256 | 257 | 258 | def cells_of_interest(cells): 259 | y_vals, x_vals = cells.nonzero() 260 | x_vals = x_vals.astype("uint32", order="C") 261 | y_vals = y_vals.astype("uint32", order="C") 262 | return x_vals, y_vals 263 | 264 | 265 | @nb.njit('uint32[:, :, :]' 266 | '(uint32[:,::1], uint32[:,::1], UniTuple(uint32[:], 2))', 267 | parallel=True, cache=True) 268 | def span_map(h_adjacency_left2right, 269 | v_adjacency_top2bottom, 270 | cells_of_interest): 271 | 272 | x_values, y_values = cells_of_interest 273 | 274 | span_map = np.zeros(h_adjacency_left2right.shape + (2,), dtype=np.uint32) 275 | 276 | for idx in nb.prange(len(x_values)): 277 | x, y = x_values[idx], y_values[idx] 278 | h_vector = h_vector_top2bottom(h_adjacency_left2right, x, y) 279 | v_vector = v_vector_left2right(v_adjacency_top2bottom, x, y) 280 | s = spans(h_vector, v_vector) 281 | s = biggest_span(s) 282 | span_map[y, x, :] = s 283 | 284 | return span_map 285 | 286 | 287 | @nb.njit('uint32[:](uint32[:, :, :])', cache=True) 288 | def biggest_span_in_span_map(span_map): 289 | areas = span_map[:, :, 0] * span_map[:, :, 1] 290 | largest_rectangle_indices = np.where(areas == np.amax(areas)) 291 | x = largest_rectangle_indices[1][0] 292 | y = largest_rectangle_indices[0][0] 293 | span = span_map[y, x] 294 | return np.array([x, y, span[0], span[1]], dtype=np.uint32) 295 | 296 | 297 | def biggest_rectangle(*args): 298 | biggest_rect = np.array([0, 0, 0, 0], dtype=np.uint32) 299 | for rect in args: 300 | if rect[2] * rect[3] > biggest_rect[2] * biggest_rect[3]: 301 | biggest_rect = rect 302 | return biggest_rect -------------------------------------------------------------------------------- /mask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RyanCCC/Mask_RCNN/a32837ba992eda4aa0b7799cd3317200e79d462b/mask.png -------------------------------------------------------------------------------- /mask2npz.py: -------------------------------------------------------------------------------- 1 | from config import CustomerConfig 2 | import os 3 | from tqdm import tqdm 4 | 5 | from utils.customerDataset import CustomerDataset 6 | 7 | dataset_root_path = CustomerConfig.TRAIN_DATASET 8 | img_floder =os.path.join(dataset_root_path, "imgs") 9 | mask_floder = os.path.join(dataset_root_path, "mask") 10 | yaml_floder = os.path.join(dataset_root_path, "yaml") 11 | imglist = os.listdir(img_floder) 12 | 13 | config = CustomerConfig() 14 | 15 | count = len(imglist) 16 | dataset = CustomerDataset() 17 | dataset.load_dataset(config.NAME, len(imglist), config.CLASSES, img_floder, mask_floder, imglist, yaml_floder, train_mode=False) 18 | dataset.prepare() 19 | 20 | 21 | # 生成imageids 22 | # TODO: 多线程多进程优化 23 | image_ids = [id for id in dataset.image_ids] 24 | for imageid in tqdm(image_ids): 25 | dataset.load_mask(imageid, train_mode=False) 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /mrcnn/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/RyanCCC/Mask_RCNN/a32837ba992eda4aa0b7799cd3317200e79d462b/mrcnn/__init__.py -------------------------------------------------------------------------------- /mrcnn/layers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.keras as keras 3 | import tensorflow.keras.backend as K 4 | import tensorflow.keras.layers as KL 5 | import tensorflow.keras.utils as KU 6 | from tensorflow.python.eager import context 7 | import tensorflow.keras.models as KM 8 | import numpy as np 9 | from utils import utils 10 | 11 | # tf.compat.v1.disable_eager_execution() 12 | 13 | #----------------------------------------------------------# 14 | # Proposal Layer 15 | # 该部分代码用于将先验框转化成建议框 16 | #----------------------------------------------------------# 17 | 18 | def apply_box_deltas_graph(boxes, deltas): 19 | # 计算先验框的中心和宽高 20 | height = boxes[:, 2] - boxes[:, 0] 21 | width = boxes[:, 3] - boxes[:, 1] 22 | center_y = boxes[:, 0] + 0.5 * height 23 | center_x = boxes[:, 1] + 0.5 * width 24 | # 计算出调整后的先验框的中心和宽高 25 | center_y += deltas[:, 0] * height 26 | center_x += deltas[:, 1] * width 27 | height *= tf.exp(deltas[:, 2]) 28 | width *= tf.exp(deltas[:, 3]) 29 | # 计算左上角和右下角的点的坐标 30 | y1 = center_y - 0.5 * height 31 | x1 = center_x - 0.5 * width 32 | y2 = y1 + height 33 | x2 = x1 + width 34 | result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out") 35 | return result 36 | 37 | 38 | def clip_boxes_graph(boxes, window): 39 | """ 40 | boxes: [N, (y1, x1, y2, x2)] 41 | window: [4] in the form y1, x1, y2, x2 42 | """ 43 | # Split 44 | wy1, wx1, wy2, wx2 = tf.split(window, 4) 45 | y1, x1, y2, x2 = tf.split(boxes, 4, axis=1) 46 | # Clip 47 | y1 = tf.maximum(tf.minimum(y1, wy2), wy1) 48 | x1 = tf.maximum(tf.minimum(x1, wx2), wx1) 49 | y2 = tf.maximum(tf.minimum(y2, wy2), wy1) 50 | x2 = tf.maximum(tf.minimum(x2, wx2), wx1) 51 | clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes") 52 | clipped.set_shape((clipped.shape[0], 4)) 53 | return clipped 54 | 55 | class ProposalLayer(KL.Layer): 56 | ''' 57 | 1. 根据rpn网络,获取score靠前的前6000个anchor 58 | 2. 利用rpn_bbox对anchors进行修正 59 | 3. 舍弃掉修正后边框超过图像大小的anchor 60 | 4. 利用非极大值的方法获取最后的anchor 61 | ''' 62 | 63 | def __init__(self, proposal_count, nms_threshold, config=None, **kwargs): 64 | super(ProposalLayer, self).__init__(**kwargs) 65 | self.config = config 66 | self.proposal_count = proposal_count 67 | self.nms_threshold = nms_threshold 68 | # [rpn_class, rpn_bbox, anchors] 69 | def call(self, inputs): 70 | # 代表这个先验框内部是否有物体[batch, num_rois, 1] 71 | scores = inputs[0][:, :, 1] 72 | # 代表这个先验框的调整参数[batch, num_rois, 4] 73 | deltas = inputs[1] 74 | # [0.1 0.1 0.2 0.2],改变数量级 75 | deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) 76 | # Anchors 77 | anchors = inputs[2] 78 | # 筛选出得分前6000个的框 79 | pre_nms_limit = tf.minimum(self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) 80 | # 获得这些框的索引 81 | ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, 82 | name="top_anchors").indices 83 | # 获得这些框的得分 84 | scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), 85 | self.config.IMAGES_PER_GPU) 86 | # 获得这些框的调整参数 87 | deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), 88 | self.config.IMAGES_PER_GPU) 89 | # 获得这些框对应的先验框 90 | pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x), 91 | self.config.IMAGES_PER_GPU, 92 | names=["pre_nms_anchors"]) 93 | 94 | # [batch, N, (y1, x1, y2, x2)] 95 | # 对先验框进行解码 96 | boxes = utils.batch_slice([pre_nms_anchors, deltas], 97 | lambda x, y: apply_box_deltas_graph(x, y), 98 | self.config.IMAGES_PER_GPU, 99 | names=["refined_anchors"]) 100 | 101 | # [batch, N, (y1, x1, y2, x2)] 102 | # 防止超出图片范围 103 | window = np.array([0, 0, 1, 1], dtype=np.float32) 104 | boxes = utils.batch_slice(boxes, 105 | lambda x: clip_boxes_graph(x, window), 106 | self.config.IMAGES_PER_GPU, 107 | names=["refined_anchors_clipped"]) 108 | 109 | 110 | # 非极大抑制 111 | def nms(boxes, scores): 112 | indices = tf.image.non_max_suppression( 113 | boxes, scores, self.proposal_count, 114 | self.nms_threshold, name="rpn_non_max_suppression") 115 | proposals = tf.gather(boxes, indices) 116 | # 如果数量达不到设置的建议框数量的话 117 | # 就padding 118 | padding = tf.maximum(self.proposal_count - tf.shape(proposals)[0], 0) 119 | proposals = tf.pad(proposals, [(0, padding), (0, 0)]) 120 | return proposals 121 | 122 | proposals = utils.batch_slice([boxes, scores], nms, 123 | self.config.IMAGES_PER_GPU) 124 | if not context.executing_eagerly(): 125 | # Infer the static output shape: 126 | out_shape = self.compute_output_shape(None) 127 | proposals.set_shape(out_shape) 128 | return proposals 129 | 130 | def compute_output_shape(self, input_shape): 131 | return (None, self.proposal_count, 4) 132 | 133 | 134 | 135 | 136 | #----------------------------------------------------------# 137 | # ROIAlign Layer 138 | # 利用建议框在特征层上截取内容 139 | #----------------------------------------------------------# 140 | 141 | def log2_graph(x): 142 | return tf.math.log(x) / tf.math.log(2.0) 143 | 144 | def parse_image_meta_graph(meta): 145 | """ 146 | 将meta里面的参数进行分割 147 | """ 148 | image_id = meta[:, 0] 149 | original_image_shape = meta[:, 1:4] 150 | image_shape = meta[:, 4:7] 151 | window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels 152 | scale = meta[:, 11] 153 | active_class_ids = meta[:, 12:] 154 | return { 155 | "image_id": image_id, 156 | "original_image_shape": original_image_shape, 157 | "image_shape": image_shape, 158 | "window": window, 159 | "scale": scale, 160 | "active_class_ids": active_class_ids, 161 | } 162 | 163 | class PyramidROIAlign(KL.Layer): 164 | def __init__(self, pool_shape, **kwargs): 165 | super(PyramidROIAlign, self).__init__(**kwargs) 166 | self.pool_shape = tuple(pool_shape) 167 | 168 | def call(self, inputs): 169 | # 建议框的位置 170 | boxes = inputs[0] 171 | 172 | # image_meta包含了一些必要的图片信息 173 | image_meta = inputs[1] 174 | 175 | # 取出所有的特征层[batch, height, width, channels] 176 | feature_maps = inputs[2:] 177 | 178 | y1, x1, y2, x2 = tf.split(boxes, 4, axis=2) 179 | h = y2 - y1 180 | w = x2 - x1 181 | 182 | # 获得输入进来的图像的大小 183 | image_shape = parse_image_meta_graph(image_meta)['image_shape'][0] 184 | 185 | # 通过建议框的大小找到这个建议框属于哪个特征层 186 | image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32) 187 | roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area))) 188 | roi_level = tf.minimum(5, tf.maximum( 189 | 2, 4 + tf.cast(tf.round(roi_level), tf.int32))) 190 | # batch_size, box_num 191 | roi_level = tf.squeeze(roi_level, 2) 192 | 193 | # Loop through levels and apply ROI pooling to each. P2 to P5. 194 | pooled = [] 195 | box_to_level = [] 196 | # 分别在P2-P5中进行截取 197 | for i, level in enumerate(range(2, 6)): 198 | # 找到每个特征层对应box 199 | ix = tf.compat.v1.where(tf.equal(roi_level, level)) 200 | level_boxes = tf.gather_nd(boxes, ix) 201 | box_to_level.append(ix) 202 | 203 | # 获得这些box所属的图片 204 | box_indices = tf.cast(ix[:, 0], tf.int32) 205 | 206 | # 停止梯度下降 207 | level_boxes = tf.stop_gradient(level_boxes) 208 | box_indices = tf.stop_gradient(box_indices) 209 | 210 | # Result: [batch * num_boxes, pool_height, pool_width, channels] 211 | pooled.append(tf.image.crop_and_resize( 212 | feature_maps[i], level_boxes, box_indices, self.pool_shape, 213 | method="bilinear")) 214 | 215 | pooled = tf.concat(pooled, axis=0) 216 | 217 | # 将顺序和所属的图片进行堆叠 218 | box_to_level = tf.concat(box_to_level, axis=0) 219 | box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) 220 | box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], 221 | axis=1) 222 | 223 | # box_to_level[:, 0]表示第几张图 224 | # box_to_level[:, 1]表示第几张图里的第几个框 225 | sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] 226 | # 进行排序,将同一张图里的某一些聚集在一起 227 | ix = tf.nn.top_k(sorting_tensor, k=tf.shape( 228 | box_to_level)[0]).indices[::-1] 229 | 230 | # 按顺序获得图片的索引 231 | ix = tf.gather(box_to_level[:, 2], ix) 232 | pooled = tf.gather(pooled, ix) 233 | 234 | # 重新reshape为原来的格式 235 | # 也就是 236 | # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] 237 | shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0) 238 | pooled = tf.reshape(pooled, shape) 239 | return pooled 240 | 241 | def compute_output_shape(self, input_shape): 242 | return input_shape[0][:2] + self.pool_shape + (input_shape[2][-1], ) 243 | 244 | 245 | #----------------------------------------------------------# 246 | # Detection Layer 247 | # 248 | #----------------------------------------------------------# 249 | 250 | def refine_detections_graph(rois, probs, deltas, window, config): 251 | """细化分类建议并过滤重叠部分并返回最终结果探测。 252 | Inputs: 253 | rois: [N, (y1, x1, y2, x2)] in normalized coordinates 254 | probs: [N, num_classes]. Class probabilities. 255 | deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific 256 | bounding box deltas. 257 | window: (y1, x1, y2, x2) in normalized coordinates. The part of the image 258 | that contains the image excluding the padding. 259 | Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where 260 | coordinates are normalized. 261 | """ 262 | # 找到得分最高的类 263 | class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) 264 | # 序号+类 265 | indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) 266 | # 取出成绩 267 | class_scores = tf.gather_nd(probs, indices) 268 | # 还有框的调整参数 269 | deltas_specific = tf.gather_nd(deltas, indices) 270 | # 进行解码 271 | # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates 272 | refined_rois = apply_box_deltas_graph( 273 | rois, deltas_specific * config.BBOX_STD_DEV) 274 | # 防止超出0-1 275 | refined_rois = clip_boxes_graph(refined_rois, window) 276 | 277 | # 去除背景 278 | keep = tf.compat.v1.where(class_ids > 0)[:, 0] 279 | # 去除背景和得分小的区域 280 | if config.DETECTION_MIN_CONFIDENCE: 281 | conf_keep = tf.compat.v1.where(class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] 282 | keep = tf.sets.intersection(tf.expand_dims(keep, 0), 283 | tf.expand_dims(conf_keep, 0)) 284 | keep = tf.sparse.to_dense(keep)[0] 285 | 286 | # 获得除去背景并且得分较高的框还有种类与得分 287 | # 1. Prepare variables 288 | pre_nms_class_ids = tf.gather(class_ids, keep) 289 | pre_nms_scores = tf.gather(class_scores, keep) 290 | pre_nms_rois = tf.gather(refined_rois, keep) 291 | unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] 292 | 293 | def nms_keep_map(class_id): 294 | 295 | ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] 296 | 297 | class_keep = tf.image.non_max_suppression( 298 | tf.gather(pre_nms_rois, ixs), 299 | tf.gather(pre_nms_scores, ixs), 300 | max_output_size=config.DETECTION_MAX_INSTANCES, 301 | iou_threshold=config.DETECTION_NMS_THRESHOLD) 302 | 303 | class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) 304 | 305 | gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] 306 | class_keep = tf.pad(class_keep, [(0, gap)], 307 | mode='CONSTANT', constant_values=-1) 308 | 309 | class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) 310 | return class_keep 311 | 312 | # 2. 进行非极大抑制 313 | nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, 314 | dtype=tf.int64) 315 | # 3. 找到符合要求的需要被保留的建议框 316 | nms_keep = tf.reshape(nms_keep, [-1]) 317 | nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) 318 | # 4. Compute intersection between keep and nms_keep 319 | keep = tf.sets.intersection(tf.expand_dims(keep, 0), 320 | tf.expand_dims(nms_keep, 0)) 321 | keep = tf.sparse.to_dense(keep)[0] 322 | 323 | # 寻找得分最高的num_keep个框 324 | roi_count = config.DETECTION_MAX_INSTANCES 325 | class_scores_keep = tf.gather(class_scores, keep) 326 | num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) 327 | top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] 328 | keep = tf.gather(keep, top_ids) 329 | 330 | # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] 331 | detections = tf.concat([ 332 | tf.gather(refined_rois, keep), 333 | tf.dtypes.cast(tf.gather(class_ids, keep), tf.float32)[..., tf.newaxis], 334 | tf.gather(class_scores, keep)[..., tf.newaxis] 335 | ], axis=1) 336 | 337 | # 如果达不到数量的话就padding 338 | gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] 339 | detections = tf.pad(tensor=detections, paddings=[(0, gap), (0, 0)], mode="CONSTANT") 340 | return detections 341 | 342 | def norm_boxes_graph(boxes, shape): 343 | h, w = tf.split(tf.cast(shape, tf.float32), 2) 344 | scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0) 345 | shift = tf.constant([0., 0., 1., 1.]) 346 | return tf.divide(boxes - shift, scale) 347 | 348 | class DetectionLayer(KL.Layer): 349 | 350 | def __init__(self, config=None, **kwargs): 351 | super(DetectionLayer, self).__init__(**kwargs) 352 | self.config = config 353 | 354 | def call(self, inputs): 355 | rois = inputs[0] 356 | mrcnn_class = inputs[1] 357 | mrcnn_bbox = inputs[2] 358 | image_meta = inputs[3] 359 | 360 | # 找到window的小数形式 361 | m = parse_image_meta_graph(image_meta) 362 | image_shape = m['image_shape'][0] 363 | window = norm_boxes_graph(m['window'], image_shape[:2]) 364 | 365 | # Run detection refinement graph on each item in the batch. 在此设定相关的阈值:refine_detections_graph 366 | detections_batch = utils.batch_slice( 367 | [rois, mrcnn_class, mrcnn_bbox, window], 368 | lambda x, y, w, z: refine_detections_graph(x, y, w, z, self.config), 369 | self.config.IMAGES_PER_GPU) 370 | 371 | # Reshape output 372 | # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in 373 | # normalized coordinates 374 | return tf.reshape( 375 | detections_batch, 376 | [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6]) 377 | 378 | def compute_output_shape(self, input_shape): 379 | return (None, self.config.DETECTION_MAX_INSTANCES, 6) 380 | 381 | 382 | #----------------------------------------------------------# 383 | # Detection Target Layer 384 | # 该部分代码会输入建议框 385 | # 判断建议框和真实框的重合情况 386 | # 筛选出内部包含物体的建议框 387 | # 利用建议框和真实框编码 388 | # 调整mask的格式使得其和预测格式相同 389 | #----------------------------------------------------------# 390 | 391 | def overlaps_graph(boxes1, boxes2): 392 | """ 393 | 用于计算boxes1和boxes2的重合程度 394 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 395 | 返回 [len(boxes1), len(boxes2)] 396 | """ 397 | b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), 398 | [1, 1, tf.shape(boxes2)[0]]), [-1, 4]) 399 | b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) 400 | b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1) 401 | b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1) 402 | y1 = tf.maximum(b1_y1, b2_y1) 403 | x1 = tf.maximum(b1_x1, b2_x1) 404 | y2 = tf.minimum(b1_y2, b2_y2) 405 | x2 = tf.minimum(b1_x2, b2_x2) 406 | intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0) 407 | b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1) 408 | b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1) 409 | union = b1_area + b2_area - intersection 410 | iou = intersection / union 411 | overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) 412 | return overlaps 413 | 414 | 415 | def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): 416 | asserts = [ 417 | tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], 418 | name="roi_assertion"), 419 | ] 420 | with tf.control_dependencies(asserts): 421 | proposals = tf.identity(proposals) 422 | 423 | # 移除之前获得的padding的部分 424 | proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") 425 | gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") 426 | gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, 427 | name="trim_gt_class_ids") 428 | gt_masks = tf.gather(gt_masks, tf.compat.v1.where(non_zeros)[:, 0], axis=2, 429 | name="trim_gt_masks") 430 | 431 | # Handle COCO crowds 432 | # A crowd box in COCO is a bounding box around several instances. Exclude 433 | # them from training. A crowd box is given a negative class ID. 434 | crowd_ix = tf.compat.v1.where(gt_class_ids < 0)[:, 0] 435 | non_crowd_ix = tf.compat.v1.where(gt_class_ids > 0)[:, 0] 436 | crowd_boxes = tf.gather(gt_boxes, crowd_ix) 437 | gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) 438 | gt_boxes = tf.gather(gt_boxes, non_crowd_ix) 439 | gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) 440 | 441 | # 计算建议框和所有真实框的重合程度 [proposals, gt_boxes] 442 | overlaps = overlaps_graph(proposals, gt_boxes) 443 | 444 | # 计算和 crowd boxes 的重合程度 [proposals, crowd_boxes] 445 | crowd_overlaps = overlaps_graph(proposals, crowd_boxes) 446 | crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) 447 | no_crowd_bool = (crowd_iou_max < 0.001) 448 | 449 | # Determine positive and negative ROIs 450 | roi_iou_max = tf.reduce_max(overlaps, axis=1) 451 | # 1. 正样本建议框和真实框的重合程度大于0.5 452 | positive_roi_bool = (roi_iou_max >= 0.5) 453 | positive_indices = tf.where(positive_roi_bool)[:, 0] 454 | # 2. 负样本建议框和真实框的重合程度小于0.5,Skip crowds. 455 | negative_indices = tf.where(tf.logical_and(roi_iou_max < 0.5, no_crowd_bool))[:, 0] 456 | 457 | # Subsample ROIs. Aim for 33% positive 458 | # 进行正负样本的平衡 459 | # 取出最大33%的正样本 460 | positive_count = int(config.TRAIN_ROIS_PER_IMAGE * 461 | config.ROI_POSITIVE_RATIO) 462 | positive_indices = tf.random.shuffle(positive_indices)[:positive_count] 463 | positive_count = tf.shape(positive_indices)[0] 464 | # 保持正负样本比例 465 | r = 1.0 / config.ROI_POSITIVE_RATIO 466 | negative_count = tf.cast(r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count 467 | negative_indices = tf.random.shuffle(negative_indices)[:negative_count] 468 | # 获得正样本和负样本 469 | positive_rois = tf.gather(proposals, positive_indices) 470 | negative_rois = tf.gather(proposals, negative_indices) 471 | 472 | # 获取建议框和真实框重合程度 473 | positive_overlaps = tf.gather(overlaps, positive_indices) 474 | 475 | # 判断是否有真实框 476 | roi_gt_box_assignment = tf.cond( 477 | tf.greater(tf.shape(positive_overlaps)[1], 0), 478 | true_fn = lambda: tf.argmax(positive_overlaps, axis=1), 479 | false_fn = lambda: tf.cast(tf.constant([]),tf.int64) 480 | ) 481 | # 找到每一个建议框对应的真实框和种类 482 | roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) 483 | roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) 484 | 485 | # 解码获得网络应该有得预测结果 486 | deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) 487 | deltas /= config.BBOX_STD_DEV 488 | 489 | # 切换mask的形式[N, height, width, 1] 490 | transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) 491 | 492 | # 取出对应的层 493 | roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) 494 | 495 | # Compute mask targets 496 | boxes = positive_rois 497 | if config.USE_MINI_MASK: 498 | # Transform ROI coordinates from normalized image space 499 | # to normalized mini-mask space. 500 | y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) 501 | gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) 502 | gt_h = gt_y2 - gt_y1 503 | gt_w = gt_x2 - gt_x1 504 | y1 = (y1 - gt_y1) / gt_h 505 | x1 = (x1 - gt_x1) / gt_w 506 | y2 = (y2 - gt_y1) / gt_h 507 | x2 = (x2 - gt_x1) / gt_w 508 | boxes = tf.concat([y1, x1, y2, x2], 1) 509 | box_ids = tf.range(0, tf.shape(roi_masks)[0]) 510 | masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, 511 | box_ids, 512 | config.MASK_SHAPE) 513 | # Remove the extra dimension from masks. 514 | masks = tf.squeeze(masks, axis=3) 515 | 516 | # 防止resize后的结果不是1或者0 517 | masks = tf.round(masks) 518 | 519 | # 一般传入config.TRAIN_ROIS_PER_IMAGE个建议框进行训练, 520 | # 如果数量不够则padding 521 | rois = tf.concat([positive_rois, negative_rois], axis=0) 522 | N = tf.shape(negative_rois)[0] 523 | P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) 524 | rois = tf.pad(rois, [(0, P), (0, 0)]) 525 | roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) 526 | roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) 527 | deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) 528 | masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) 529 | 530 | return rois, roi_gt_class_ids, deltas, masks 531 | 532 | def trim_zeros_graph(boxes, name='trim_zeros'): 533 | """ 534 | 如果前一步没有满POST_NMS_ROIS_TRAINING个建议框,会有padding 535 | 要去掉padding 536 | """ 537 | non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool) 538 | boxes = tf.boolean_mask(boxes, non_zeros, name=name) 539 | return boxes, non_zeros 540 | 541 | class DetectionTargetLayer(KL.Layer): 542 | """找到建议框的ground_truth 543 | Inputs: 544 | proposals: [batch, N, (y1, x1, y2, x2)]建议框 545 | gt_class_ids: [batch, MAX_GT_INSTANCES]每个真实框对应的类 546 | gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]真实框的位置 547 | gt_masks: [batch, height, width, MAX_GT_INSTANCES]真实框的语义分割情况 548 | Returns: 549 | rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]内部真实存在目标的建议框 550 | target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]每个建议框对应的类 551 | target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]每个建议框应该有的调整参数 552 | target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]每个建议框语义分割情况, resize成28*28 553 | """ 554 | 555 | def __init__(self, config, **kwargs): 556 | super(DetectionTargetLayer, self).__init__(**kwargs) 557 | self.config = config 558 | 559 | def call(self, inputs): 560 | proposals = inputs[0] 561 | gt_class_ids = inputs[1] 562 | gt_boxes = inputs[2] 563 | gt_masks = inputs[3] 564 | 565 | # 对真实框进行编码 566 | names = ["rois", "target_class_ids", "target_bbox", "target_mask"] 567 | outputs = utils.batch_slice( 568 | [proposals, gt_class_ids, gt_boxes, gt_masks], 569 | lambda w, x, y, z: detection_targets_graph( 570 | w, x, y, z, self.config), 571 | self.config.IMAGES_PER_GPU, names=names) 572 | return outputs 573 | 574 | def compute_output_shape(self, input_shape): 575 | return [ 576 | (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # rois 577 | (None, self.config.TRAIN_ROIS_PER_IMAGE), # class_ids 578 | (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # deltas 579 | (None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0], 580 | self.config.MASK_SHAPE[1]) # masks 581 | ] 582 | 583 | def compute_mask(self, inputs, mask=None): 584 | return [None, None, None, None] -------------------------------------------------------------------------------- /mrcnn/mask_rcnn.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | import numpy as np 4 | from .mrcnn import get_model 5 | from utils.config import Config 6 | from utils.anchors import get_anchors 7 | from utils.utils import mold_inputs,unmold_detections 8 | from utils import visualize 9 | import tensorflow as tf 10 | from config import InferenceConfig 11 | 12 | # tf.compat.v1.disable_eager_execution() 13 | 14 | class MASK_RCNN(object): 15 | 16 | def __init__(self, **kwargs): 17 | self.model_path = kwargs['model'] 18 | self.classes_path= kwargs['classes_path'] 19 | self.confidence = kwargs['confidence'] 20 | 21 | # 使用coco数据集检测的时候,IMAGE_MIN_DIM=1024,IMAGE_MAX_DIM=1024, RPN_ANCHOR_SCALES=(32, 64, 128, 256, 512) 22 | self.RPN_ANCHOR_SCALES = InferenceConfig.RPN_ANCHOR_SCALES 23 | self.IMAGE_MIN_DIM = InferenceConfig.IMAGE_MIN_DIM 24 | self.IMAGE_MAX_DIM = InferenceConfig.IMAGE_MAX_DIM 25 | 26 | self.class_names = self.get_class() 27 | self.config = self._get_config() 28 | self.generate() 29 | 30 | def get_class(self): 31 | classes_path = os.path.expanduser(self.classes_path) 32 | with open(classes_path) as f: 33 | class_names = f.readlines() 34 | class_names = [c.strip() for c in class_names] 35 | class_names.insert(0,"BG") 36 | return class_names 37 | 38 | def _get_config(self): 39 | class InferenceConfig(Config): 40 | NUM_CLASSES = len(self.class_names) 41 | GPU_COUNT = 1 42 | IMAGES_PER_GPU = 1 43 | DETECTION_MIN_CONFIDENCE = self.confidence 44 | NAME = "Customer" 45 | RPN_ANCHOR_SCALES = self.RPN_ANCHOR_SCALES 46 | IMAGE_MIN_DIM = self.IMAGE_MIN_DIM 47 | IMAGE_MAX_DIM = self.IMAGE_MAX_DIM 48 | 49 | config = InferenceConfig() 50 | config.display() 51 | return config 52 | 53 | 54 | def generate(self): 55 | model_path = os.path.expanduser(self.model_path) 56 | assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' 57 | 58 | # 计算总的种类 59 | self.num_classes = len(self.class_names) 60 | 61 | # 载入模型 62 | self.model = get_model(self.config, training=False) 63 | self.model.load_weights(self.model_path,by_name=True) 64 | # self.model.save('./maskrcnn_coco', save_format='tf') 65 | 66 | 67 | def detect_image(self, image, showBox = False, show_caption=False): 68 | image = [np.array(image)] 69 | molded_images, image_metas, windows = mold_inputs(self.config,image) 70 | 71 | image_shape = molded_images[0].shape 72 | anchors = get_anchors(self.config,image_shape) 73 | anchors = np.broadcast_to(anchors, (1,) + anchors.shape) 74 | 75 | detections, _, _, mrcnn_mask, _, _, _ =\ 76 | self.model.predict([molded_images, image_metas, anchors], verbose=0) 77 | 78 | final_rois, final_class_ids, final_scores, final_masks =\ 79 | unmold_detections(detections[0], mrcnn_mask[0], 80 | image[0].shape, molded_images[0].shape, 81 | windows[0]) 82 | 83 | r = { 84 | "rois": final_rois, 85 | "class_ids": final_class_ids, 86 | "scores": final_scores, 87 | "masks": final_masks, 88 | } 89 | 90 | # 想要保存处理后的图片请查询plt保存图片的方法。 91 | drawed_image = visualize.display_instances(image[0], r['rois'], r['masks'], r['class_ids'], 92 | self.class_names, r['scores'], show_bbox = showBox, captions=show_caption) 93 | # 处理mask 文件 94 | mask_image = np.any(r['masks'], axis=-1) 95 | mask_image = Image.fromarray(mask_image) 96 | return drawed_image, mask_image 97 | 98 | def get_detections(self, image): 99 | image = [np.array(image)] 100 | molded_images, image_metas, windows = mold_inputs(self.config,image) 101 | 102 | image_shape = molded_images[0].shape 103 | anchors = get_anchors(self.config,image_shape) 104 | anchors = np.broadcast_to(anchors, (1,) + anchors.shape) 105 | 106 | detections, _, _, mrcnn_mask, _, _, _ =\ 107 | self.model.predict([molded_images, image_metas, anchors], verbose=0) 108 | 109 | final_rois, final_class_ids, final_scores, final_masks =\ 110 | unmold_detections(detections[0], mrcnn_mask[0], 111 | image[0].shape, molded_images[0].shape, 112 | windows[0]) 113 | 114 | r = { 115 | "rois": final_rois, 116 | "class_ids": final_class_ids, 117 | "scores": final_scores, 118 | "masks": final_masks, 119 | } 120 | return r 121 | 122 | def close_session(self): 123 | self.sess.close() -------------------------------------------------------------------------------- /mrcnn/mrcnn.py: -------------------------------------------------------------------------------- 1 | from .layers import ProposalLayer,PyramidROIAlign,DetectionLayer,DetectionTargetLayer 2 | from .mrcnn_training import * 3 | from utils.anchors import get_anchors 4 | from utils.utils import norm_boxes_graph,parse_image_meta_graph 5 | import numpy as np 6 | import tensorflow as tf 7 | import tensorflow.keras as keras 8 | import tensorflow.keras.backend as K 9 | import tensorflow.keras.layers as KL 10 | import tensorflow.keras.utils as KU 11 | from tensorflow.python.eager import context 12 | import tensorflow.keras.models as KM 13 | from mrcnn.restnet import get_resnet 14 | 15 | 16 | # tf.compat.v1.disable_eager_execution() 17 | 18 | 19 | def rpn_graph(feature_map, anchors_per_location, anchor_stride): 20 | 21 | shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu',strides=anchor_stride, 22 | name='rpn_conv_shared')(feature_map) 23 | 24 | x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid', 25 | activation='linear', name='rpn_class_raw')(shared) 26 | # batch_size,num_anchors,2 27 | # 代表这个先验框对应的类 28 | rpn_class_logits = KL.Reshape([-1,2])(x) 29 | 30 | rpn_probs = KL.Activation( 31 | "softmax", name="rpn_class_xxx")(rpn_class_logits) 32 | 33 | x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid", 34 | activation='linear', name='rpn_bbox_pred')(shared) 35 | # batch_size,num_anchors,4 36 | # 这个先验框的调整参数 37 | rpn_bbox = KL.Reshape([-1,4])(x) 38 | 39 | return [rpn_class_logits, rpn_probs, rpn_bbox] 40 | 41 | 42 | def build_rpn_model(anchor_stride, anchors_per_location, depth): 43 | """Builds a Keras model of the Region Proposal Network. 44 | It wraps the RPN graph so it can be used multiple times with shared 45 | weights. 46 | 47 | anchors_per_location: number of anchors per pixel in the feature map 48 | anchor_stride: Controls the density of anchors. Typically 1 (anchors for 49 | every pixel in the feature map), or 2 (every other pixel). 50 | depth: Depth of the backbone feature map. 51 | 52 | Returns a Keras Model object. The model outputs, when called, are: 53 | rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax) 54 | rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities. 55 | rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be 56 | applied to anchors. 57 | """ 58 | input_feature_map = KL.Input(shape=[None, None, depth], 59 | name="input_rpn_feature_map") 60 | outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride) 61 | return KM.Model([input_feature_map], outputs, name="rpn_model") 62 | 63 | 64 | 65 | def fpn_classifier_graph(rois, feature_maps, image_meta, 66 | pool_size, num_classes, train_bn=True, 67 | fc_layers_size=1024): 68 | # ROI Pooling,利用建议框在特征层上进行截取 69 | # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] 70 | x = PyramidROIAlign([pool_size, pool_size], 71 | name="roi_align_classifier")([rois, image_meta] + feature_maps) 72 | 73 | # Shape: [batch, num_rois, 1, 1, fc_layers_size],相当于两次全连接 74 | x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), 75 | name="mrcnn_class_conv1")(x) 76 | x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_class_bn1')(x, training=train_bn) 77 | x = KL.Activation('relu')(x) 78 | 79 | # Shape: [batch, num_rois, 1, 1, fc_layers_size] 80 | x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)), 81 | name="mrcnn_class_conv2")(x) 82 | x = KL.TimeDistributed(KL.BatchNormalization(), name='mrcnn_class_bn2')(x, training=train_bn) 83 | x = KL.Activation('relu')(x) 84 | 85 | # Shape: [batch, num_rois, fc_layers_size] 86 | shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), 87 | name="pool_squeeze")(x) 88 | 89 | # Classifier head 90 | # 这个的预测结果代表这个先验框内部的物体的种类 91 | mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes), 92 | name='mrcnn_class_logits')(shared) 93 | mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"), 94 | name="mrcnn_class")(mrcnn_class_logits) 95 | 96 | 97 | # BBox head 98 | # 这个的预测结果会对先验框进行调整 99 | # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))] 100 | x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'), 101 | name='mrcnn_bbox_fc')(shared) 102 | # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] 103 | s = K.int_shape(x) 104 | if s[1] is None: 105 | mrcnn_bbox = KL.Reshape((-1, num_classes, 4), name="mrcnn_bbox")(x) 106 | else: 107 | mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x) 108 | 109 | return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox 110 | 111 | 112 | 113 | def build_fpn_mask_graph(rois, feature_maps, image_meta, 114 | pool_size, num_classes, train_bn=True): 115 | # ROI Pooling,利用建议框在特征层上进行截取 116 | # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] 117 | x = PyramidROIAlign([pool_size, pool_size], 118 | name="roi_align_mask")([rois, image_meta] + feature_maps) 119 | 120 | # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] 121 | x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), 122 | name="mrcnn_mask_conv1")(x) 123 | x = KL.TimeDistributed(KL.BatchNormalization(), 124 | name='mrcnn_mask_bn1')(x, training=train_bn) 125 | x = KL.Activation('relu')(x) 126 | 127 | # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] 128 | x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), 129 | name="mrcnn_mask_conv2")(x) 130 | x = KL.TimeDistributed(KL.BatchNormalization(), 131 | name='mrcnn_mask_bn2')(x, training=train_bn) 132 | x = KL.Activation('relu')(x) 133 | 134 | # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] 135 | x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), 136 | name="mrcnn_mask_conv3")(x) 137 | x = KL.TimeDistributed(KL.BatchNormalization(), 138 | name='mrcnn_mask_bn3')(x, training=train_bn) 139 | x = KL.Activation('relu')(x) 140 | 141 | # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] 142 | x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), 143 | name="mrcnn_mask_conv4")(x) 144 | x = KL.TimeDistributed(KL.BatchNormalization(), 145 | name='mrcnn_mask_bn4')(x, training=train_bn) 146 | x = KL.Activation('relu')(x) 147 | 148 | # Shape: [batch, num_rois, 2xMASK_POOL_SIZE, 2xMASK_POOL_SIZE, channels] 149 | x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), 150 | name="mrcnn_mask_deconv")(x) 151 | # 反卷积后再次进行一个1x1卷积调整通道,使其最终数量为numclasses,代表分的类 152 | x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"), 153 | name="mrcnn_mask")(x) 154 | return x 155 | 156 | 157 | def get_model(config, training): 158 | # Image size must be dividable by 2 multiple times 159 | h, w = config.IMAGE_SHAPE[:2] 160 | if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): 161 | raise Exception("Image size must be dividable by 2 at least 6 times " 162 | "to avoid fractions when downscaling and upscaling." 163 | "For example, use 256, 320, 384, 448, 512, ... etc. ") 164 | 165 | # Inputs 166 | input_image = KL.Input( 167 | shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image") 168 | input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE], 169 | name="input_image_meta") 170 | 171 | if training: 172 | input_rpn_match = KL.Input( 173 | shape=[None, 1], name="input_rpn_match", dtype=tf.int32) 174 | input_rpn_bbox = KL.Input( 175 | shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) 176 | 177 | # Detection GT (class IDs, bounding boxes, and masks) 178 | # 1. GT Class IDs (zero padded) 179 | input_gt_class_ids = KL.Input( 180 | shape=[None], name="input_gt_class_ids", dtype=tf.int32) 181 | # 2. GT Boxes in pixels (zero padded) 182 | # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates 183 | input_gt_boxes = KL.Input( 184 | shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) 185 | # Normalize coordinates 186 | gt_boxes = KL.Lambda(lambda x: norm_boxes_graph( 187 | x, K.shape(input_image)[1:3]))(input_gt_boxes) 188 | 189 | # mask语义分析信息 190 | # [batch, height, width, MAX_GT_INSTANCES] 191 | if config.USE_MINI_MASK: 192 | input_gt_masks = KL.Input(shape=[config.MINI_MASK_SHAPE[0],config.MINI_MASK_SHAPE[1], None],name="input_gt_masks", dtype=bool) 193 | else: 194 | input_gt_masks = KL.Input(shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None],name="input_gt_masks", dtype=bool) 195 | # 设置anchor 196 | anchors = get_anchors(config,config.IMAGE_SHAPE) 197 | # 拓展anchors的shape,第一个维度拓展为batch_size 198 | anchors = np.broadcast_to(anchors, (config.BATCH_SIZE,) + anchors.shape) 199 | # 将anchors转化成tensor的形式 200 | class ConstLayer(tf.keras.layers.Layer): 201 | def __init__(self, x, name=None): 202 | super(ConstLayer, self).__init__(name=name) 203 | self.x = tf.Variable(x) 204 | 205 | def call(self, input): 206 | return self.x 207 | 208 | anchors = ConstLayer(anchors, name="anchors")(input_image) 209 | 210 | else: 211 | input_anchors = KL.Input(shape=[None, 4], name="input_anchors") 212 | anchors = input_anchors 213 | 214 | # 获得Resnet里的压缩程度不同的一些层 215 | _, C2, C3, C4, C5 = get_resnet(input_image, stage5=True, train_bn=config.TRAIN_BN) 216 | 217 | # 组合成特征金字塔的结构 218 | # P5长宽共压缩了5次 219 | # Height/32,Width/32,256 220 | P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c5p5')(C5) 221 | # P4长宽共压缩了4次 222 | # Height/16,Width/16,256 223 | P4 = KL.Add(name="fpn_p4add")([ 224 | KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), 225 | KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)]) 226 | # P4长宽共压缩了3次 227 | # Height/8,Width/8,256 228 | P3 = KL.Add(name="fpn_p3add")([ 229 | KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), 230 | KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)]) 231 | # P4长宽共压缩了2次 232 | # Height/4,Width/4,256 233 | P2 = KL.Add(name="fpn_p2add")([ 234 | KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), 235 | KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)]) 236 | 237 | # 各自进行一次256通道的卷积,此时P2、P3、P4、P5通道数相同 238 | # Height/4,Width/4,256 239 | P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p2")(P2) 240 | # Height/8,Width/8,256 241 | P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p3")(P3) 242 | # Height/16,Width/16,256 243 | P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p4")(P4) 244 | # Height/32,Width/32,256 245 | P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), padding="SAME", name="fpn_p5")(P5) 246 | # 在建议框网络里面还有一个P6用于获取建议框 247 | # Height/64,Width/64,256 248 | P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) 249 | 250 | # P2, P3, P4, P5, P6可以用于获取建议框 251 | rpn_feature_maps = [P2, P3, P4, P5, P6] 252 | # P2, P3, P4, P5用于获取mask信息 253 | mrcnn_feature_maps = [P2, P3, P4, P5] 254 | 255 | 256 | 257 | # anchors = KL.Lambda(lambda x: tf.Variable(anchors), name="anchors")(input_image) 258 | # 建立RPN模型 259 | rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE) 260 | 261 | if training: 262 | # Loop through pyramid layers 263 | layer_outputs = [] # list of lists 264 | for p in rpn_feature_maps: 265 | layer_outputs.append(rpn([p])) 266 | 267 | # 获得RPN网络的预测结果,进行格式调整,把五个特征层的结果进行堆叠 268 | output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] 269 | outputs = list(zip(*layer_outputs)) 270 | outputs = [KL.Concatenate(axis=1, name=n)(list(o)) 271 | for o, n in zip(outputs, output_names)] 272 | 273 | rpn_class_logits, rpn_class, rpn_bbox = outputs 274 | else: 275 | rpn_class_logits, rpn_class, rpn_bbox = [],[],[] 276 | 277 | # 获得RPN网络的预测结果,进行格式调整,把五个特征层的结果进行堆叠 278 | for p in rpn_feature_maps: 279 | logits,classes,bbox = rpn([p]) 280 | rpn_class_logits.append(logits) 281 | rpn_class.append(classes) 282 | rpn_bbox.append(bbox) 283 | 284 | rpn_class_logits = KL.Concatenate(axis=1,name="rpn_class_logits")(rpn_class_logits) 285 | rpn_class =KL.Concatenate(axis=1,name="rpn_class")(rpn_class) 286 | rpn_bbox = KL.Concatenate(axis=1,name="rpn_bbox")(rpn_bbox) 287 | 288 | # 此时获得的rpn_class_logits、rpn_class、rpn_bbox的维度是 289 | # rpn_class_logits : Batch_size, num_anchors, 2 290 | # rpn_class : Batch_size, num_anchors, 2 291 | # rpn_bbox : Batch_size, num_anchors, 4 292 | proposal_count = config.POST_NMS_ROIS_TRAINING 293 | 294 | # Batch_size, proposal_count, 4 295 | rpn_rois = ProposalLayer( 296 | proposal_count=proposal_count, 297 | nms_threshold=config.RPN_NMS_THRESHOLD, 298 | name="ROI", 299 | config=config)([rpn_class, rpn_bbox, anchors]) 300 | 301 | if not training: 302 | mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ 303 | fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta, 304 | config.POOL_SIZE, config.NUM_CLASSES, 305 | train_bn=config.TRAIN_BN, 306 | fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE) 307 | 308 | detections = DetectionLayer(config, name="mrcnn_detection")( 309 | [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) 310 | 311 | 312 | detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections) 313 | # 获得mask的结果 314 | mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, 315 | input_image_meta, 316 | config.MASK_POOL_SIZE, 317 | config.NUM_CLASSES, 318 | train_bn=config.TRAIN_BN) 319 | 320 | # 作为输出 321 | model = KM.Model([input_image, input_image_meta, input_anchors], 322 | [detections, mrcnn_class, mrcnn_bbox, 323 | mrcnn_mask, rpn_rois, rpn_class, rpn_bbox], 324 | name='mask_rcnn') 325 | return model 326 | 327 | active_class_ids = KL.Lambda( 328 | lambda x: parse_image_meta_graph(x)["active_class_ids"] 329 | )(input_image_meta) 330 | 331 | if not config.USE_RPN_ROIS: 332 | # 使用外部输入的建议框 333 | input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4], 334 | name="input_roi", dtype=np.int32) 335 | # Normalize coordinates 336 | target_rois = KL.Lambda(lambda x: norm_boxes_graph( 337 | x, K.shape(input_image)[1:3]))(input_rois) 338 | else: 339 | # 利用预测到的建议框进行下一步的操作 340 | target_rois = rpn_rois 341 | 342 | """找到建议框的ground_truth 343 | Inputs: 344 | proposals: [batch, N, (y1, x1, y2, x2)]建议框 345 | gt_class_ids: [batch, MAX_GT_INSTANCES]每个真实框对应的类 346 | gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)]真实框的位置 347 | gt_masks: [batch, height, width, MAX_GT_INSTANCES]真实框的语义分割情况 348 | Returns: 349 | rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)]内部真实存在目标的建议框 350 | target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]每个建议框对应的类 351 | target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)]每个建议框应该有的调整参数 352 | target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width]每个建议框语义分割情况 353 | """ 354 | rois, target_class_ids, target_bbox, target_mask =\ 355 | DetectionTargetLayer(config, name="proposal_targets")([ 356 | target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) 357 | 358 | # 找到合适的建议框的classifier预测结果 359 | mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ 360 | fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta, 361 | config.POOL_SIZE, config.NUM_CLASSES, 362 | train_bn=config.TRAIN_BN, 363 | fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE) 364 | # 找到合适的建议框的mask预测结果 365 | mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps, 366 | input_image_meta, 367 | config.MASK_POOL_SIZE, 368 | config.NUM_CLASSES, 369 | train_bn=config.TRAIN_BN) 370 | 371 | output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) 372 | 373 | # Losses 374 | rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")( 375 | [input_rpn_match, rpn_class_logits]) 376 | rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( 377 | [input_rpn_bbox, input_rpn_match, rpn_bbox]) 378 | class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")( 379 | [target_class_ids, mrcnn_class_logits, active_class_ids]) 380 | bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( 381 | [target_bbox, target_class_ids, mrcnn_bbox]) 382 | mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")( 383 | [target_mask, target_class_ids, mrcnn_mask]) 384 | 385 | # Model 386 | inputs = [input_image, input_image_meta, 387 | input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks] 388 | 389 | if not config.USE_RPN_ROIS: 390 | inputs.append(input_rois) 391 | outputs = [rpn_class_logits, rpn_class, rpn_bbox, 392 | mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, 393 | rpn_rois, output_rois, 394 | rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss] 395 | model = KM.Model(inputs, outputs, name='mask_rcnn') 396 | return model -------------------------------------------------------------------------------- /mrcnn/mrcnn_training.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | import tensorflow.keras.backend as K 3 | import tensorflow.keras.layers as KL 4 | import tensorflow.keras.utils as KU 5 | from tensorflow.python.eager import context 6 | import random 7 | import numpy as np 8 | import logging 9 | from utils import utils 10 | from utils.anchors import compute_backbone_shapes,generate_pyramid_anchors 11 | 12 | # tf.compat.v1.disable_eager_execution() 13 | 14 | def batch_pack_graph(x, counts, num_rows): 15 | outputs = [] 16 | for i in range(num_rows): 17 | outputs.append(x[i, :counts[i]]) 18 | return tf.concat(outputs, axis=0) 19 | 20 | def smooth_l1_loss(y_true, y_pred): 21 | """ 22 | smmoth_l1 损失函数 23 | """ 24 | diff = K.abs(y_true - y_pred) 25 | less_than_one = K.cast(K.less(diff, 1.0), "float32") 26 | loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) 27 | return loss 28 | 29 | def rpn_class_loss_graph(rpn_match, rpn_class_logits): 30 | """ 31 | 建议框分类损失函数 32 | """ 33 | rpn_match = tf.squeeze(rpn_match, -1) 34 | anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32) 35 | indices = tf.where(K.not_equal(rpn_match, 0)) 36 | rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) 37 | anchor_class = tf.gather_nd(anchor_class, indices) 38 | loss = K.sparse_categorical_crossentropy(target=anchor_class, 39 | output=rpn_class_logits, 40 | from_logits=True) 41 | loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) 42 | loss = K.switch(tf.math.is_nan(loss), tf.constant([0.0]), loss) 43 | return loss 44 | 45 | def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): 46 | """ 47 | 建议框回归损失 48 | """ 49 | rpn_match = K.squeeze(rpn_match, -1) 50 | indices = tf.where(K.equal(rpn_match, 1)) 51 | rpn_bbox = tf.gather_nd(rpn_bbox, indices) 52 | batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) 53 | target_bbox = batch_pack_graph(target_bbox, batch_counts, 54 | config.IMAGES_PER_GPU) 55 | # 计算smooth_l1损失函数 56 | loss = smooth_l1_loss(target_bbox, rpn_bbox) 57 | 58 | loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) 59 | loss = K.switch(tf.math.is_nan(loss), tf.constant([0.0]), loss) 60 | return loss 61 | 62 | def mrcnn_class_loss_graph(target_class_ids, pred_class_logits, 63 | active_class_ids): 64 | """ 65 | classifier的分类损失函数 66 | """ 67 | target_class_ids = tf.cast(target_class_ids, 'int64') 68 | pred_class_ids = tf.argmax(pred_class_logits, axis=2) 69 | pred_active = tf.gather(active_class_ids[0], pred_class_ids) 70 | loss = tf.nn.sparse_softmax_cross_entropy_with_logits( 71 | labels=target_class_ids, logits=pred_class_logits) 72 | 73 | loss = loss * pred_active 74 | loss = tf.reduce_sum(loss) / tf.maximum(tf.reduce_sum(pred_active), 1) 75 | return loss 76 | 77 | def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): 78 | """ 79 | classifier的回归损失函数 80 | """ 81 | # Reshape 82 | target_class_ids = K.reshape(target_class_ids, (-1,)) 83 | target_bbox = K.reshape(target_bbox, (-1, 4)) 84 | pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) 85 | 86 | # 只有属于正样本的建议框用于训练 87 | positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] 88 | positive_roi_class_ids = tf.cast(tf.gather(target_class_ids, positive_roi_ix), tf.int64) 89 | indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) 90 | 91 | # 获得对应预测结果与实际结果 92 | target_bbox = tf.gather(target_bbox, positive_roi_ix) 93 | pred_bbox = tf.gather_nd(pred_bbox, indices) 94 | 95 | # Smooth-L1 Loss 96 | loss = K.switch(tf.size(target_bbox) > 0, 97 | smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), 98 | tf.constant(0.0)) 99 | loss = K.mean(loss) 100 | return loss 101 | 102 | def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks): 103 | """ 104 | 交叉熵损失 105 | """ 106 | target_class_ids = K.reshape(target_class_ids, (-1,)) 107 | # 实际结果 108 | mask_shape = tf.shape(target_masks) 109 | target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3])) 110 | 111 | # 预测结果 112 | pred_shape = tf.shape(pred_masks) 113 | pred_masks = K.reshape(pred_masks, (-1, pred_shape[2], pred_shape[3], pred_shape[4])) 114 | 115 | # 进行维度变换 [N, num_classes, height, width] 116 | pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2]) 117 | 118 | # 只有正样本有效 119 | positive_ix = tf.where(target_class_ids > 0)[:, 0] 120 | positive_class_ids = tf.cast(tf.gather(target_class_ids, positive_ix), tf.int64) 121 | indices = tf.stack([positive_ix, positive_class_ids], axis=1) 122 | 123 | # 获得实际结果与预测结果 124 | y_true = tf.gather(target_masks, positive_ix) 125 | y_pred = tf.gather_nd(pred_masks, indices) 126 | 127 | # shape: [batch, roi, num_classes] 128 | loss = K.switch(tf.size(y_true) > 0, 129 | K.binary_crossentropy(target=y_true, output=y_pred), 130 | tf.constant(0.0)) 131 | loss = K.mean(loss) 132 | return loss 133 | 134 | def load_image_gt(dataset, config, image_id, augment=False, augmentation=None, 135 | use_mini_mask=False): 136 | # 载入图片和语义分割效果 137 | image = dataset.load_image(image_id) 138 | mask, class_ids = dataset.load_mask(image_id) 139 | # print("\nbefore:",image_id,np.shape(mask),np.shape(class_ids)) 140 | original_shape = image.shape 141 | image, window, scale, padding, crop = utils.resize_image( 142 | image, 143 | min_dim=config.IMAGE_MIN_DIM, 144 | min_scale=config.IMAGE_MIN_SCALE, 145 | max_dim=config.IMAGE_MAX_DIM, 146 | mode=config.IMAGE_RESIZE_MODE) 147 | mask = utils.resize_mask(mask, scale, padding, crop) 148 | # 图像翻转 149 | if augment: 150 | logging.warning("'augment' is deprecated. Use 'augmentation' instead.") 151 | if random.randint(0, 1): 152 | image = np.fliplr(image) 153 | mask = np.fliplr(mask) 154 | 155 | if augmentation: 156 | import imgaug 157 | # 图像增强 158 | MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes", 159 | "Fliplr", "Flipud", "CropAndPad", 160 | "Affine", "PiecewiseAffine"] 161 | 162 | def hook(images, augmenter, parents, default): 163 | """Determines which augmenters to apply to masks.""" 164 | return augmenter.__class__.__name__ in MASK_AUGMENTERS 165 | 166 | image_shape = image.shape 167 | mask_shape = mask.shape 168 | det = augmentation.to_deterministic() 169 | image = det.augment_image(image) 170 | mask = det.augment_image(mask.astype(np.uint8), 171 | hooks=imgaug.HooksImages(activator=hook)) 172 | assert image.shape == image_shape, "Augmentation shouldn't change image size" 173 | assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" 174 | mask = mask.astype(np.bool) 175 | # 检漏,防止某些层内部实际上不存在语义分割情况 176 | _idx = np.sum(mask, axis=(0, 1)) > 0 177 | 178 | # print("\nafterer:",np.shape(mask),np.shape(_idx)) 179 | mask = mask[:, :, _idx] 180 | class_ids = class_ids[_idx] 181 | # 找到mask对应的box 182 | bbox = utils.extract_bboxes(mask) 183 | 184 | active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) 185 | source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] 186 | active_class_ids[source_class_ids] = 1 187 | 188 | if use_mini_mask: 189 | mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) 190 | 191 | # 生成Image_meta 192 | image_meta = utils.compose_image_meta(image_id, original_shape, image.shape, 193 | window, scale, active_class_ids) 194 | 195 | return image, image_meta, class_ids, bbox, mask 196 | 197 | 198 | 199 | def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config): 200 | # 1代表正样本 201 | # -1代表负样本 202 | # 0代表忽略 203 | rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32) 204 | # 创建该部分内容利用先验框和真实框进行编码 205 | rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4)) 206 | 207 | ''' 208 | iscrowd=0的时候,表示这是一个单独的物体,轮廓用Polygon(多边形的点)表示, 209 | iscrowd=1的时候表示两个没有分开的物体,轮廓用RLE编码表示 210 | ''' 211 | crowd_ix = np.where(gt_class_ids < 0)[0] 212 | if crowd_ix.shape[0] > 0: 213 | non_crowd_ix = np.where(gt_class_ids > 0)[0] 214 | crowd_boxes = gt_boxes[crowd_ix] 215 | gt_class_ids = gt_class_ids[non_crowd_ix] 216 | gt_boxes = gt_boxes[non_crowd_ix] 217 | crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes) 218 | crowd_iou_max = np.amax(crowd_overlaps, axis=1) 219 | no_crowd_bool = (crowd_iou_max < 0.001) 220 | else: 221 | no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool) 222 | 223 | # 计算先验框和真实框的重合程度 [num_anchors, num_gt_boxes] 224 | overlaps = utils.compute_overlaps(anchors, gt_boxes) 225 | 226 | # 1. 重合程度小于0.3则代表为负样本 227 | anchor_iou_argmax = np.argmax(overlaps, axis=1) 228 | anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax] 229 | rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 230 | # 2. 每个真实框重合度最大的先验框是正样本 231 | gt_iou_argmax = np.argwhere(overlaps == np.max(overlaps, axis=0))[:,0] 232 | rpn_match[gt_iou_argmax] = 1 233 | # 3. 重合度大于0.7则代表为正样本 234 | rpn_match[anchor_iou_max >= 0.7] = 1 235 | 236 | # 正负样本平衡 237 | # 找到正样本的索引 238 | ids = np.where(rpn_match == 1)[0] 239 | # 如果大于(config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2)则删掉一些 240 | extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) 241 | if extra > 0: 242 | ids = np.random.choice(ids, extra, replace=False) 243 | rpn_match[ids] = 0 244 | # 找到负样本的索引 245 | ids = np.where(rpn_match == -1)[0] 246 | # 使得总数为config.RPN_TRAIN_ANCHORS_PER_IMAGE 247 | extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE - 248 | np.sum(rpn_match == 1)) 249 | if extra > 0: 250 | # Rest the extra ones to neutral 251 | ids = np.random.choice(ids, extra, replace=False) 252 | rpn_match[ids] = 0 253 | 254 | # 找到内部真实存在物体的先验框,进行编码 255 | ids = np.where(rpn_match == 1)[0] 256 | ix = 0 257 | for i, a in zip(ids, anchors[ids]): 258 | gt = gt_boxes[anchor_iou_argmax[i]] 259 | # 计算真实框的中心,高宽 260 | gt_h = gt[2] - gt[0] 261 | gt_w = gt[3] - gt[1] 262 | gt_center_y = gt[0] + 0.5 * gt_h 263 | gt_center_x = gt[1] + 0.5 * gt_w 264 | # 计算先验框中心,高宽 265 | a_h = a[2] - a[0] 266 | a_w = a[3] - a[1] 267 | a_center_y = a[0] + 0.5 * a_h 268 | a_center_x = a[1] + 0.5 * a_w 269 | # 编码运算 270 | rpn_bbox[ix] = [ 271 | (gt_center_y - a_center_y) / np.maximum(a_h, 1), 272 | (gt_center_x - a_center_x) / np.maximum(a_w, 1), 273 | np.log(np.maximum(gt_h / np.maximum(a_h, 1), 1e-5)), 274 | np.log(np.maximum(gt_w / np.maximum(a_w, 1), 1e-5)), 275 | ] 276 | # 改变数量级 277 | rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV 278 | ix += 1 279 | return rpn_match, rpn_bbox 280 | 281 | 282 | 283 | 284 | def data_generator(dataset, config, shuffle=True, augment=False, augmentation=None, 285 | batch_size=1, detection_targets=False, 286 | no_augmentation_sources=None): 287 | """ 288 | 网络输入清单 289 | - images: [batch, H, W, C] 290 | - image_meta: [batch, (meta data)] 图像详细信息。 291 | - rpn_match: [batch, N] 代表建议框的匹配情况 (1=正样本, -1=负样本, 0=中性) 292 | - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] 建议框网络应该有的预测结果. 293 | - gt_class_ids: [batch, MAX_GT_INSTANCES] 种类ID 294 | - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] 295 | - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. 296 | 网络输出清单: 297 | 在常规训练中通常是空的。 298 | """ 299 | b = 0 # batch item index 300 | image_index = -1 301 | image_ids = np.copy(dataset.image_ids) 302 | no_augmentation_sources = no_augmentation_sources or [] 303 | 304 | # [anchor_count, (y1, x1, y2, x2)] 305 | # 计算获得先验框 306 | backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE) 307 | anchors = generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, 308 | config.RPN_ANCHOR_RATIOS, 309 | backbone_shapes, 310 | config.BACKBONE_STRIDES, 311 | config.RPN_ANCHOR_STRIDE) 312 | 313 | while True: 314 | 315 | image_index = (image_index + 1) % len(image_ids) 316 | if shuffle and image_index == 0: 317 | np.random.shuffle(image_ids) 318 | 319 | # 获得id 320 | image_id = image_ids[image_index] 321 | 322 | # 获得图片,真实框,语义分割结果等 323 | if dataset.image_info[image_id]['source'] in no_augmentation_sources: 324 | image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ 325 | load_image_gt(dataset, config, image_id, augment=augment, 326 | augmentation=None, 327 | use_mini_mask=config.USE_MINI_MASK) 328 | else: 329 | image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ 330 | load_image_gt(dataset, config, image_id, augment=augment, 331 | augmentation=augmentation, 332 | use_mini_mask=config.USE_MINI_MASK) 333 | 334 | if not np.any(gt_class_ids > 0): 335 | continue 336 | 337 | # RPN Targets 338 | rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors, 339 | gt_class_ids, gt_boxes, config) 340 | 341 | # 如果某张图片里面物体的数量大于最大值的话,则进行筛选,防止过大 342 | if gt_boxes.shape[0] > config.MAX_GT_INSTANCES: 343 | ids = np.random.choice( 344 | np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False) 345 | gt_class_ids = gt_class_ids[ids] 346 | gt_boxes = gt_boxes[ids] 347 | gt_masks = gt_masks[:, :, ids] 348 | 349 | # 初始化用于训练的内容 350 | if b == 0: 351 | batch_image_meta = np.zeros( 352 | (batch_size,) + image_meta.shape, dtype=image_meta.dtype) 353 | batch_rpn_match = np.zeros( 354 | [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype) 355 | batch_rpn_bbox = np.zeros( 356 | [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype) 357 | batch_images = np.zeros( 358 | (batch_size,) + image.shape, dtype=np.float32) 359 | batch_gt_class_ids = np.zeros( 360 | (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32) 361 | batch_gt_boxes = np.zeros( 362 | (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32) 363 | batch_gt_masks = np.zeros( 364 | (batch_size, gt_masks.shape[0], gt_masks.shape[1], 365 | config.MAX_GT_INSTANCES), dtype=gt_masks.dtype) 366 | 367 | # 将当前信息加载进batch 368 | batch_image_meta[b] = image_meta 369 | batch_rpn_match[b] = rpn_match[:, np.newaxis] 370 | batch_rpn_bbox[b] = rpn_bbox 371 | batch_images[b] = utils.mold_image(image.astype(np.float32), config) 372 | batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids 373 | batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes 374 | batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks 375 | 376 | b += 1 377 | 378 | # 判断是否已经将batch_size全部载入 379 | if b >= batch_size: 380 | inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, 381 | batch_gt_class_ids, batch_gt_boxes, batch_gt_masks] 382 | outputs = [] 383 | 384 | yield inputs, outputs 385 | # 开始一个新的batch_size 386 | b = 0 -------------------------------------------------------------------------------- /mrcnn/restnet.py: -------------------------------------------------------------------------------- 1 | from tensorflow.keras.layers import ZeroPadding2D,Conv2D,MaxPooling2D,BatchNormalization,Activation,Add 2 | def identity_block(input_tensor, kernel_size, filters, stage, block, 3 | use_bias=True, train_bn=True): 4 | nb_filter1, nb_filter2, nb_filter3 = filters 5 | conv_name_base = 'res' + str(stage) + block + '_branch' 6 | bn_name_base = 'bn' + str(stage) + block + '_branch' 7 | 8 | x = Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', 9 | use_bias=use_bias)(input_tensor) 10 | x = BatchNormalization(name=bn_name_base + '2a')(x, training=train_bn) 11 | x = Activation('relu')(x) 12 | 13 | x = Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', 14 | name=conv_name_base + '2b', use_bias=use_bias)(x) 15 | x = BatchNormalization(name=bn_name_base + '2b')(x, training=train_bn) 16 | x = Activation('relu')(x) 17 | 18 | x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', 19 | use_bias=use_bias)(x) 20 | x = BatchNormalization(name=bn_name_base + '2c')(x, training=train_bn) 21 | 22 | x = Add()([x, input_tensor]) 23 | x = Activation('relu', name='res' + str(stage) + block + '_out')(x) 24 | return x 25 | 26 | def conv_block(input_tensor, kernel_size, filters, stage, block, 27 | strides=(2, 2), use_bias=True, train_bn=True): 28 | 29 | nb_filter1, nb_filter2, nb_filter3 = filters 30 | conv_name_base = 'res' + str(stage) + block + '_branch' 31 | bn_name_base = 'bn' + str(stage) + block + '_branch' 32 | 33 | x = Conv2D(nb_filter1, (1, 1), strides=strides, 34 | name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) 35 | x = BatchNormalization(name=bn_name_base + '2a')(x, training=train_bn) 36 | x = Activation('relu')(x) 37 | 38 | x = Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', 39 | name=conv_name_base + '2b', use_bias=use_bias)(x) 40 | x = BatchNormalization(name=bn_name_base + '2b')(x, training=train_bn) 41 | x = Activation('relu')(x) 42 | 43 | x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + 44 | '2c', use_bias=use_bias)(x) 45 | x = BatchNormalization(name=bn_name_base + '2c')(x, training=train_bn) 46 | 47 | shortcut = Conv2D(nb_filter3, (1, 1), strides=strides, 48 | name=conv_name_base + '1', use_bias=use_bias)(input_tensor) 49 | shortcut = BatchNormalization(name=bn_name_base + '1')(shortcut, training=train_bn) 50 | 51 | x = Add()([x, shortcut]) 52 | x = Activation('relu', name='res' + str(stage) + block + '_out')(x) 53 | return x 54 | 55 | def get_resnet(input_image,stage5=False, train_bn=True): 56 | # Stage 1 57 | x = ZeroPadding2D((3, 3))(input_image) 58 | x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x) 59 | x = BatchNormalization(name='bn_conv1')(x, training=train_bn) 60 | x = Activation('relu')(x) 61 | # Height/4,Width/4,64 62 | C1 = x = MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) 63 | # Stage 2 64 | x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1), train_bn=train_bn) 65 | x = identity_block(x, 3, [64, 64, 256], stage=2, block='b', train_bn=train_bn) 66 | # Height/4,Width/4,256 67 | C2 = x = identity_block(x, 3, [64, 64, 256], stage=2, block='c', train_bn=train_bn) 68 | # Stage 3 69 | x = conv_block(x, 3, [128, 128, 512], stage=3, block='a', train_bn=train_bn) 70 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='b', train_bn=train_bn) 71 | x = identity_block(x, 3, [128, 128, 512], stage=3, block='c', train_bn=train_bn) 72 | # Height/8,Width/8,512 73 | C3 = x = identity_block(x, 3, [128, 128, 512], stage=3, block='d', train_bn=train_bn) 74 | # Stage 4 75 | x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a', train_bn=train_bn) 76 | block_count = 22 77 | for i in range(block_count): 78 | x = identity_block(x, 3, [256, 256, 1024], stage=4, block=chr(98 + i), train_bn=train_bn) 79 | # Height/16,Width/16,1024 80 | C4 = x 81 | # Stage 5 82 | if stage5: 83 | x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a', train_bn=train_bn) 84 | x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b', train_bn=train_bn) 85 | # Height/32,Width/32,2048 86 | C5 = x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c', train_bn=train_bn) 87 | else: 88 | C5 = None 89 | return [C1, C2, C3, C4, C5] -------------------------------------------------------------------------------- /parallel_model.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Mask RCNN 3 | Multi-GPU Support for tensorflow 4 | 5 | Ideas and a small code snippets from these sources: 6 | https://github.com/fchollet/keras/issues/2436 7 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 8 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 9 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 10 | ''' 11 | 12 | import tensorflow as tf 13 | import tensorflow.keras.backend as K 14 | import tensorflow.keras.layers as KL 15 | import tensorflow.keras.models as KM 16 | from zmq import device 17 | 18 | class ParallelModel(KM.Model): 19 | ''' 20 | Subclasses the standard Keras Model and adds multi-GPU support. 21 | It works by creating a copy of the model on each GPU. Then it slices the inputs and 22 | sends a slice to eack copy of the model, and then merges the outpus together and 23 | applies the loss on the combined outputs. 24 | ''' 25 | 26 | def __init__(self, mask_rcnn_model, gpu_count): 27 | ''' 28 | Class constructor 29 | mask rcnn model: The model to patallelize 30 | gpu_count: Number of GPUS, must be >1 31 | ''' 32 | self.inner_model = mask_rcnn_model 33 | self.gpu_count = gpu_count 34 | merge_outputs = self.make_parallel() 35 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, outputs=merge_outputs) 36 | 37 | 38 | def __getattribute__(self, attrname): 39 | ''' 40 | Redirect loading and saving methods to the inner model. That's where the weights are stored. 41 | ''' 42 | if 'load' in attrname or 'save' in attrname: 43 | return getattr(self.inner_model, attrname) 44 | return super(ParallelModel, self).__getattribute__(attrname) 45 | 46 | def summary(self, *args, **kwargs): 47 | ''' 48 | Override summary() to display summaries of both, the wrapper and inner models 49 | ''' 50 | super(ParallelModel, self).summary(*args, **kwargs) 51 | self.inner_model.summary(*args, **kwargs) 52 | 53 | def make_parallel(self): 54 | ''' 55 | Creates a new wrapper model that consists of multiple replicas of the original model placed on different GPUs. 56 | ''' 57 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 58 | # of the full inputs to all GPUs. Saves on bandwidth and memory 59 | input_slices = {name:tf.split(x, self.gpu_count) for name, x in zip(self.inner_model.input_names,self.inner_model.inputs)} 60 | output_names = self.inner_model.output_names 61 | outputs_all = [] 62 | for i in range(len(self.inner_model.outputs)): 63 | outputs_all.append([]) 64 | 65 | # Run the model call() on each GPU to place the ops there 66 | for i in range(self.gpu_count): 67 | with tf.device('/gpu:%d' % i): 68 | with tf.name_scope('tower_%d' % i): 69 | # Run a slice of inputs through this replica 70 | zipped_inputs = zip(self.inner_model.input_names, 71 | self.inner_model.inputs) 72 | inputs = [ 73 | KL.Lambda(lambda s: input_slices[name][i], 74 | output_shape=lambda s: (None,) + s[1:])(tensor) 75 | for name, tensor in zipped_inputs] 76 | # Create the model replica and get the outputs 77 | outputs = self.inner_model(inputs) 78 | if not isinstance(outputs, list): 79 | outputs = [outputs] 80 | # Save the outputs for merging back together later 81 | for l, o in enumerate(outputs): 82 | outputs_all[l].append(o) 83 | 84 | # Merge outputs on CPU 85 | with tf.device('/cpu:0'): 86 | merged = [] 87 | for outputs, name in zip(outputs_all, output_names): 88 | # Concatenate or average outputs? 89 | # Outputs usually have a batch dimension and we concatenate 90 | # across it. If they don't, then the output is likely a loss 91 | # or a metric value that gets averaged across the batch. 92 | # Keras expects losses and metrics to be scalars. 93 | if K.int_shape(outputs[0]) == (): 94 | # Average 95 | m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs) 96 | else: 97 | # Concatenate 98 | m = KL.Concatenate(axis=0, name=name)(outputs) 99 | merged.append(m) 100 | return merged 101 | 102 | if __name__ == "__main__": 103 | # Testing code below. It creates a simple model to train on MNIST and 104 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 105 | # in TensorBoard. Run it as: 106 | # 107 | # python3 parallel_model.py 108 | 109 | import os 110 | import numpy as np 111 | import keras.optimizers 112 | from keras.datasets import mnist 113 | from keras.preprocessing.image import ImageDataGenerator 114 | 115 | GPU_COUNT = 2 116 | 117 | # Root directory of the project 118 | ROOT_DIR = os.path.abspath("../") 119 | 120 | # Directory to save logs and trained model 121 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 122 | 123 | def build_model(x_train, num_classes): 124 | # Reset default graph. Keras leaves old ops in the graph, 125 | # which are ignored for execution but clutter graph 126 | # visualization in TensorBoard. 127 | tf.reset_default_graph() 128 | 129 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 130 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 131 | name="conv1")(inputs) 132 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 133 | name="conv2")(x) 134 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 135 | x = KL.Flatten(name="flat1")(x) 136 | x = KL.Dense(128, activation='relu', name="dense1")(x) 137 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 138 | 139 | return KM.Model(inputs, x, "digit_classifier_model") 140 | 141 | # Load MNIST Data 142 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 143 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 144 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 145 | 146 | print('x_train shape:', x_train.shape) 147 | print('x_test shape:', x_test.shape) 148 | 149 | # Build data generator and model 150 | datagen = ImageDataGenerator() 151 | model = build_model(x_train, 10) 152 | 153 | # Add multi-GPU support. 154 | model = ParallelModel(model, GPU_COUNT) 155 | 156 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 157 | 158 | model.compile(loss='sparse_categorical_crossentropy', 159 | optimizer=optimizer, metrics=['accuracy']) 160 | 161 | model.summary() 162 | 163 | # Train 164 | model.fit_generator( 165 | datagen.flow(x_train, y_train, batch_size=64), 166 | steps_per_epoch=50, epochs=10, verbose=1, 167 | validation_data=(x_test, y_test), 168 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 169 | write_graph=True)] 170 | ) 171 | 172 | 173 | -------------------------------------------------------------------------------- /regularization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import cv2 3 | from contourprocess import regularization 4 | 5 | 6 | ori_img1 = cv2.imread('./test.jpg') 7 | # 中值滤波,去噪 8 | ori_img = cv2.medianBlur(ori_img1, 5) 9 | ori_img = cv2.cvtColor(ori_img, cv2.COLOR_BGR2GRAY) 10 | ret, ori_img = cv2.threshold(ori_img, 127, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) 11 | # 连通域分析 12 | num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(ori_img, connectivity=8) 13 | 14 | regularization_contours = [] 15 | # 遍历联通域 16 | for i in range(1, num_labels): 17 | img = np.zeros_like(labels) 18 | index = np.where(labels==i) 19 | img[index] = 255 20 | img = np.array(img, dtype=np.uint8) 21 | 22 | regularization_contour =regularization.boundary_regularization(img).astype(np.int32) 23 | regularization_contours.append(regularization_contour) 24 | 25 | single_out = np.zeros_like(ori_img1) 26 | cv2.polylines(img=single_out, pts=[regularization_contour], isClosed=True, color=(255, 0, 0), thickness=3) 27 | cv2.imwrite('./result/single_out_{}.jpg'.format(i), single_out) 28 | 29 | 30 | 31 | cv2.polylines(img=ori_img1, pts=regularization_contours, isClosed=True, color=(255, 0, 0), thickness=3) 32 | cv2.imwrite('all_out.jpg', ori_img1) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy 2 | scipy 3 | Pillow 4 | cython 5 | matplotlib 6 | scikit-image 7 | tensorflow>=1.3.0 8 | keras>=2.0.8 9 | opencv-python 10 | h5py 11 | imgaug -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | from PIL import Image 3 | import numpy as np 4 | import random 5 | import tensorflow as tf 6 | from utils import visualize 7 | from utils.anchors import get_anchors 8 | from utils.utils import mold_inputs,unmold_detections 9 | from mrcnn.mrcnn import get_model 10 | from mrcnn.mrcnn_training import data_generator 11 | from utils.customerDataset import CustomerDataset 12 | from config import CustomerConfig 13 | 14 | # tf.compat.v1.disable_eager_execution() 15 | 16 | def log(text, array=None): 17 | if array is not None: 18 | text = text.ljust(25) 19 | text += ("shape: {:20} ".format(str(array.shape))) 20 | if array.size: 21 | text += ("min: {:10.5f} max: {:10.5f}".format(array.min(),array.max())) 22 | else: 23 | text += ("min: {:10} max: {:10}".format("","")) 24 | text += " {}".format(array.dtype) 25 | print(text) 26 | 27 | 28 | 29 | if __name__ == "__main__": 30 | learning_rate = CustomerConfig.LEARNING_RATE 31 | init_epoch = 0 32 | epoch = CustomerConfig.EPOCH 33 | 34 | dataset_root_path=CustomerConfig.TRAIN_DATASET 35 | img_floder =os.path.join(dataset_root_path, "imgs") 36 | mask_floder = os.path.join(dataset_root_path, "mask") 37 | yaml_floder = os.path.join(dataset_root_path, "yaml") 38 | imglist = os.listdir(img_floder) 39 | 40 | count = len(imglist) 41 | np.random.seed(10101) 42 | np.random.shuffle(imglist) 43 | train_imglist = imglist[:int(count*0.9)] 44 | val_imglist = imglist[int(count*0.9):] 45 | 46 | MODEL_DIR = "logs" 47 | 48 | COCO_MODEL_PATH = CustomerConfig.PRETRAIN_MODEL 49 | config = CustomerConfig() 50 | # 计算训练集和验证集长度 51 | config.STEPS_PER_EPOCH = len(train_imglist)//config.IMAGES_PER_GPU 52 | config.VALIDATION_STEPS = len(val_imglist)//config.IMAGES_PER_GPU 53 | config.display() 54 | 55 | # 训练数据集准备 56 | dataset_train = CustomerDataset() 57 | dataset_train.load_dataset(config.NAME,len(train_imglist), config.CLASSES, img_floder, mask_floder, train_imglist, yaml_floder, train_mode=True) 58 | dataset_train.prepare() 59 | 60 | # 验证数据集准备 61 | dataset_val = CustomerDataset() 62 | dataset_val.load_dataset(config.NAME,len(val_imglist), config.CLASSES, img_floder, mask_floder, val_imglist, yaml_floder, train_mode=True) 63 | dataset_val.prepare() 64 | 65 | # 获得训练模型 66 | model = get_model(config, training=True) 67 | model.summary() 68 | model.load_weights(COCO_MODEL_PATH,by_name=True,skip_mismatch=True) 69 | 70 | # 数据生成器 71 | train_generator = data_generator(dataset_train, config, shuffle=True, 72 | batch_size=config.BATCH_SIZE) 73 | val_generator = data_generator(dataset_val, config, shuffle=True, 74 | batch_size=config.BATCH_SIZE) 75 | 76 | # 设置callbacks 77 | tensorboard = tf.keras.callbacks.TensorBoard(log_dir=MODEL_DIR,histogram_freq=0, write_graph=True, write_images=False) 78 | model_ckp= tf.keras.callbacks.ModelCheckpoint(os.path.join(MODEL_DIR, "building_new.h5"),verbose=0, save_weights_only=True) 79 | early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=1) 80 | learning_rate_reduce = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3, verbose=1) 81 | callbacks = [tensorboard, model_ckp, early_stop, learning_rate_reduce] 82 | 83 | # callbacks = [ 84 | # tf.keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 85 | # histogram_freq=0, write_graph=True, write_images=False), 86 | # tf.keras.callbacks.ModelCheckpoint(os.path.join(MODEL_DIR, "epoch{epoch:03d}_loss{loss:.3f}_val_loss{val_loss:.3f}.h5"), 87 | # verbose=0, save_weights_only=True), 88 | # ] 89 | 90 | 91 | if True: 92 | log("\nStarting at epoch {}. LR={}\n".format(init_epoch, learning_rate)) 93 | log("Checkpoint Path: {}".format(MODEL_DIR)) 94 | 95 | # 使用的优化器是 96 | optimizer = tf.keras.optimizers.Adam(lr=learning_rate, clipnorm=config.GRADIENT_CLIP_NORM) 97 | 98 | # 设置一下loss信息 99 | loss_names = [ 100 | "rpn_class_loss", "rpn_bbox_loss", 101 | "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"] 102 | for name in loss_names: 103 | layer = model.get_layer(name) 104 | if layer.output in model.losses: 105 | continue 106 | loss = ( 107 | tf.reduce_mean(input_tensor=layer.output, keepdims=True) 108 | * config.LOSS_WEIGHTS.get(name, 1.)) 109 | model.add_loss(loss) 110 | 111 | # Add L2 Regularization 112 | # Skip gamma and beta weights of batch normalization layers. 113 | reg_losses = [ 114 | tf.keras.regularizers.l2(config.WEIGHT_DECAY)(w) / tf.cast(tf.size(input=w), tf.float32) 115 | for w in model.trainable_weights 116 | if 'gamma' not in w.name and 'beta' not in w.name] 117 | model.add_loss(tf.add_n(reg_losses)) 118 | 119 | 120 | # 进行编译 121 | model.compile( 122 | optimizer=optimizer, 123 | loss=[None] * len(model.outputs) 124 | ) 125 | 126 | # 用于显示训练情况 127 | for name in loss_names: 128 | if name in model.metrics_names: 129 | print(name) 130 | continue 131 | layer = model.get_layer(name) 132 | model.metrics_names.append(name) 133 | loss = ( 134 | tf.reduce_mean(input_tensor=layer.output, keepdims=True) 135 | * config.LOSS_WEIGHTS.get(name, 1.)) 136 | model.add_metric(loss, name=name, aggregation='mean') 137 | 138 | 139 | model.fit_generator( 140 | train_generator, 141 | initial_epoch=init_epoch, 142 | epochs=epoch, 143 | steps_per_epoch=config.STEPS_PER_EPOCH, 144 | callbacks=callbacks, 145 | validation_data=val_generator, 146 | validation_steps=config.VALIDATION_STEPS, 147 | max_queue_size=100 148 | ) 149 | -------------------------------------------------------------------------------- /utils/anchors.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math 3 | from utils.utils import norm_boxes 4 | #----------------------------------------------------------# 5 | # Anchors 6 | #----------------------------------------------------------# 7 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 8 | # 获得所有框的长度和比例的组合 9 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 10 | scales = scales.flatten() 11 | ratios = ratios.flatten() 12 | heights = scales / np.sqrt(ratios) 13 | widths = scales * np.sqrt(ratios) 14 | 15 | # 生成网格中心 16 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 17 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 18 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 19 | 20 | # 获得先验框的中心和宽高 21 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 22 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 23 | 24 | # 更变格式 25 | box_centers = np.stack( 26 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 27 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 28 | 29 | # 计算出(y1, x1, y2, x2) 30 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 31 | box_centers + 0.5 * box_sizes], axis=1) 32 | return boxes 33 | 34 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 35 | anchor_stride): 36 | """ 37 | 生成不同特征层的anchors,并利用concatenate进行堆叠 38 | """ 39 | # Anchors 40 | # [anchor_count, (y1, x1, y2, x2)] 41 | # P2对应的scale是32 42 | # P3对应的scale是64 43 | # P4对应的scale是128 44 | # P5对应的scale是256 45 | # P6对应的scale是512 46 | anchors = [] 47 | for i in range(len(scales)): 48 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 49 | feature_strides[i], anchor_stride)) 50 | 51 | return np.concatenate(anchors, axis=0) 52 | 53 | def compute_backbone_shapes(config, image_shape): 54 | # 用于计算主干特征提取网络的shape 55 | if callable(config.BACKBONE): 56 | return config.COMPUTE_BACKBONE_SHAPE(image_shape) 57 | # 其实就是计算P2、P3、P4、P5、P6这些特征层的宽和高 58 | assert config.BACKBONE in ["resnet50", "resnet101"] 59 | return np.array( 60 | [[int(math.ceil(image_shape[0] / stride)), 61 | int(math.ceil(image_shape[1] / stride))] 62 | for stride in config.BACKBONE_STRIDES]) 63 | 64 | def get_anchors(config, image_shape): 65 | backbone_shapes = compute_backbone_shapes(config, image_shape) 66 | anchor_cache = {} 67 | if not tuple(image_shape) in anchor_cache: 68 | a = generate_pyramid_anchors( 69 | config.RPN_ANCHOR_SCALES, 70 | config.RPN_ANCHOR_RATIOS, 71 | backbone_shapes, 72 | config.BACKBONE_STRIDES, 73 | config.RPN_ANCHOR_STRIDE) 74 | anchor_cache[tuple(image_shape)] = norm_boxes(a, image_shape[:2]) 75 | return anchor_cache[tuple(image_shape)] -------------------------------------------------------------------------------- /utils/config.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class Config(object): 4 | """ 5 | 基本配置类。对于自定义配置,请创建 6 | 继承自该类并重写属性的子类 7 | """ 8 | # 名称 9 | NAME = None 10 | 11 | # GPU数量 12 | GPU_COUNT = 1 13 | 14 | # 每个GPU的图片数量 15 | IMAGES_PER_GPU = 2 16 | 17 | # 每个世代的步长 18 | STEPS_PER_EPOCH = 1000 19 | 20 | # 验证集长度 21 | VALIDATION_STEPS = 50 22 | 23 | COMPUTE_BACKBONE_SHAPE = None 24 | 25 | # 特征金字塔的步长 26 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 27 | 28 | # 分类图中完全连接层的大小 29 | FPN_CLASSIF_FC_LAYERS_SIZE = 1024 30 | 31 | # 用于构建特征金字塔的自上而下层的大小 32 | TOP_DOWN_PYRAMID_SIZE = 256 33 | 34 | # 分类类别数(包括背景) 35 | NUM_CLASSES = 1 36 | 37 | # 建议框的先验框的长度(像素) 38 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 39 | # 先验框的变化比率 40 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 41 | 42 | # 建议框步长 43 | RPN_ANCHOR_STRIDE = 1 44 | 45 | # 建议框的非极大抑制的值 46 | RPN_NMS_THRESHOLD = 0.7 47 | 48 | # 每个图像有多少先验框用于RPN培训 49 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 50 | 51 | # 非极大抑制前的框的数量 52 | PRE_NMS_LIMIT = 6000 53 | 54 | # 非最大抑制后保持的ROI(训练和推理) 55 | POST_NMS_ROIS_TRAINING = 2000 56 | POST_NMS_ROIS_INFERENCE = 1000 57 | 58 | # 是否使用Mini Mask 59 | USE_MINI_MASK = True 60 | MINI_MASK_SHAPE = (56, 56) # (height, width) 61 | 62 | BACKBONE = "resnet101" 63 | # 可选择的 64 | # square: 调整大小并用零填充以获得大小的方形图像 [max_dim, max_dim]. 65 | # pad64: 如果IMAGE_MIN_DIM或IMAGE_MIN_SCALE 不是“无”,则在填充之前它会先放大。 66 | # 在此中忽略图像最大亮度模式需要64的倍数,以确保在FPN金字塔的6个级别上下平滑地缩放特征地图(2**6=64)。 67 | # crop: 从图像中随机选取作物。首先,根据图像亮度和图像灰度对图像进行缩放, 68 | # 然后随机选取一个大小为image_MIN_DIM x image_MIN_DIM的裁剪。只能用于培训。此模式下不使用图像最大亮度。 69 | IMAGE_RESIZE_MODE = "square" 70 | IMAGE_MIN_DIM = 800 71 | IMAGE_MAX_DIM = 1024 72 | # 最小比例。在IMAGE_MIN_DIM后检查,可以强制进一步放大。例如,如果设置为2, 73 | # 则图像将缩放为宽度和高度的两倍或更多,即使MIN_IMAGE_DIM不需要它。然而,在“正方形”模式下,它可能会被图像_MAX_DIM否决。 74 | IMAGE_MIN_SCALE = 0 75 | # RGB = 3, grayscale = 1, RGB-D = 4 76 | IMAGE_CHANNEL_COUNT = 3 77 | 78 | # Image mean (RGB) 79 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 80 | 81 | # 训练用的ROIS数量 82 | TRAIN_ROIS_PER_IMAGE = 200 83 | 84 | # 正样本比例 85 | ROI_POSITIVE_RATIO = 0.33 86 | 87 | # 池化方式 88 | POOL_SIZE = 7 89 | MASK_POOL_SIZE = 14 90 | 91 | # Mask 92 | MASK_SHAPE = [28, 28] 93 | 94 | MAX_GT_INSTANCES = 100 95 | 96 | # 标准化比率 97 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 98 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 99 | 100 | DETECTION_MAX_INSTANCES = 100 101 | 102 | # 置信度 103 | DETECTION_MIN_CONFIDENCE = 0.7 104 | 105 | # 非极大抑制 106 | DETECTION_NMS_THRESHOLD = 0.3 107 | 108 | WEIGHT_DECAY = 0.0001 109 | 110 | # 损失的比重 111 | LOSS_WEIGHTS = { 112 | "rpn_class_loss": 1., 113 | "rpn_bbox_loss": 1., 114 | "mrcnn_class_loss": 1., 115 | "mrcnn_bbox_loss": 1., 116 | "mrcnn_mask_loss": 1. 117 | } 118 | 119 | 120 | USE_RPN_ROIS = True 121 | 122 | # 是否冻结BN层 123 | TRAIN_BN = False 124 | 125 | GRADIENT_CLIP_NORM = 5.0 126 | 127 | def __init__(self): 128 | # 计算BATCH 129 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 130 | 131 | if self.IMAGE_RESIZE_MODE == "crop": 132 | self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, 133 | self.IMAGE_CHANNEL_COUNT]) 134 | else: 135 | self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 136 | self.IMAGE_CHANNEL_COUNT]) 137 | 138 | self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES 139 | 140 | def display(self): 141 | print("\nConfigurations:") 142 | for a in dir(self): 143 | if not a.startswith("__") and not callable(getattr(self, a)): 144 | print("{:30} {}".format(a, getattr(self, a))) 145 | print("\n") -------------------------------------------------------------------------------- /utils/customerDataset.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from PIL import Image 4 | import yaml 5 | from .dataset import Dataset 6 | from .utils import non_max_suppression 7 | 8 | class CustomerDataset(Dataset): 9 | #得到该图中有多少个实例(物体) 10 | def get_obj_index(self, image): 11 | n = np.max(image) 12 | return n 13 | #解析labelme中得到的yaml文件,从而得到mask每一层对应的实例标签 14 | def from_yaml_get_class(self,image_id): 15 | info=self.image_info[image_id] 16 | with open(info['yaml_path']) as f: 17 | temp=yaml.load(f.read(), Loader=yaml.FullLoader) 18 | labels=temp['label_names'] 19 | del labels[0] 20 | return labels 21 | 22 | def draw_mask(self, num_obj, mask, image, image_id): 23 | npz_save_name =os.path.join(save_path, base_name) 24 | # 已经生成npz文件则跳过 25 | if os.path.exists(npz_save_name): 26 | return None 27 | info = self.image_info[image_id] 28 | for index in range(num_obj): 29 | for i in range(np.shape(mask)[1]): 30 | for j in range(np.shape(mask)[0]): 31 | at_pixel = image.getpixel((i, j)) 32 | if at_pixel == index + 1: 33 | mask[j, i, index] =1 34 | save_path = os.path.dirname(info['mask_path']) 35 | base_name = os.path.basename(info['path']) 36 | base_name = os.path.splitext(base_name)[0] 37 | np.savez_compressed(npz_save_name, mask) 38 | return mask 39 | 40 | #并在self.image_info信息中添加了path、mask_path 、yaml_path 41 | def load_dataset(self, shape_name, count, classes, img_floder, mask_floder, imglist, yaml_floder, train_mode = True): 42 | for index, item in enumerate(classes): 43 | self.add_class(shape_name, index+1, item) 44 | for i in range(count): 45 | img = imglist[i] 46 | if img.endswith(".jpg"): 47 | img_name = img.split(".")[0] 48 | img_path = os.path.join(img_floder, img) 49 | if train_mode: 50 | mask_path = os.path.join(mask_floder, img_name + ".npz") 51 | else: 52 | # npz文件加载 53 | mask_path = os.path.join(mask_floder, img_name + ".png") 54 | yaml_path = os.path.join(yaml_floder, img_name + ".yaml") 55 | self.add_image(shape_name, image_id=i, path=img_path, mask_path=mask_path,yaml_path=yaml_path) 56 | #重写load_mask 57 | def load_mask(self, image_id, train_mode = True): 58 | info = self.image_info[image_id] 59 | if train_mode: 60 | # 训练模式下加载npz数据 61 | mask = np.load(info['mask_path'])['arr_0'] 62 | else: 63 | # 生成npz文件 64 | img = Image.open(info['mask_path']) 65 | num_obj = self.get_obj_index(img) 66 | mask = np.zeros([np.shape(img)[0], np.shape(img)[1], num_obj], dtype=np.uint8) 67 | mask = self.draw_mask(num_obj, mask, img, image_id) 68 | labels=[] 69 | labels=self.from_yaml_get_class(image_id) 70 | class_ids = np.array([self.class_names.index(s) for s in labels]) 71 | return mask, class_ids.astype(np.int32) -------------------------------------------------------------------------------- /utils/dataset.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import skimage 3 | import logging 4 | import skimage.color 5 | import skimage.io 6 | import skimage.transform 7 | #----------------------------------------------------------# 8 | # Dataset 9 | #----------------------------------------------------------# 10 | class Dataset(object): 11 | # 数据集训练的基本格式 12 | def __init__(self, class_map=None): 13 | self._image_ids = [] 14 | self.image_info = [] 15 | # 背景作为第一分类 16 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 17 | self.source_class_ids = {} 18 | 19 | def add_class(self, source, class_id, class_name): 20 | assert "." not in source, "Source name cannot contain a dot" 21 | # 用于增加新的类 22 | for info in self.class_info: 23 | if info['source'] == source and info["id"] == class_id: 24 | return 25 | self.class_info.append({ 26 | "source": source, 27 | "id": class_id, 28 | "name": class_name, 29 | }) 30 | 31 | def add_image(self, source, image_id, path, **kwargs): 32 | # 用于增加用于训练的图片 33 | image_info = { 34 | "id": image_id, 35 | "source": source, 36 | "path": path, 37 | } 38 | image_info.update(kwargs) 39 | self.image_info.append(image_info) 40 | 41 | def image_reference(self, image_id): 42 | return "" 43 | 44 | def prepare(self, class_map=None): 45 | # 准备数据 46 | def clean_name(name): 47 | """Returns a shorter version of object names for cleaner display.""" 48 | return ",".join(name.split(",")[:1]) 49 | # 类别数 50 | self.num_classes = len(self.class_info) 51 | self.class_ids = np.arange(self.num_classes) 52 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 53 | 54 | self.num_images = len(self.image_info) 55 | 56 | self._image_ids = np.arange(self.num_images) 57 | 58 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 59 | for info, id in zip(self.class_info, self.class_ids)} 60 | self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id 61 | for info, id in zip(self.image_info, self.image_ids)} 62 | 63 | # 建立sources 64 | self.sources = list(set([i['source'] for i in self.class_info])) 65 | self.source_class_ids = {} 66 | # Loop over datasets 67 | for source in self.sources: 68 | self.source_class_ids[source] = [] 69 | # Find classes that belong to this dataset 70 | for i, info in enumerate(self.class_info): 71 | # Include BG class in all datasets 72 | if i == 0 or source == info['source']: 73 | self.source_class_ids[source].append(i) 74 | 75 | def map_source_class_id(self, source_class_id): 76 | """Takes a source class ID and returns the int class ID assigned to it. 77 | For example: 78 | dataset.map_source_class_id("coco.12") -> 23 79 | """ 80 | return self.class_from_source_map[source_class_id] 81 | 82 | def get_source_class_id(self, class_id, source): 83 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 84 | info = self.class_info[class_id] 85 | assert info['source'] == source 86 | return info['id'] 87 | 88 | @property 89 | def image_ids(self): 90 | return self._image_ids 91 | 92 | def source_image_link(self, image_id): 93 | return self.image_info[image_id]["path"] 94 | 95 | def load_image(self, image_id): 96 | """ 97 | 载入图片 98 | """ 99 | # Load image 100 | image = skimage.io.imread(self.image_info[image_id]['path']) 101 | if image.ndim != 3: 102 | image = skimage.color.gray2rgb(image) 103 | if image.shape[-1] == 4: 104 | image = image[..., :3] 105 | return image 106 | 107 | def load_mask(self, image_id): 108 | ''' 109 | 载入语义分割内容 110 | ''' 111 | logging.warning("You are using the default load_mask(), maybe you need to define your own one.") 112 | mask = np.empty([0, 0, 0]) 113 | class_ids = np.empty([0], np.int32) 114 | return mask, class_ids -------------------------------------------------------------------------------- /utils/utils.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import logging 4 | import math 5 | import random 6 | import skimage 7 | import skimage.transform 8 | import numpy as np 9 | import tensorflow as tf 10 | import scipy 11 | import urllib.request 12 | import shutil 13 | import warnings 14 | from distutils.version import LooseVersion 15 | 16 | # tf.compat.v1.disable_eager_execution() 17 | # URL from which to download the latest COCO trained weights 18 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 19 | 20 | 21 | #----------------------------------------------------------# 22 | # Bounding Boxes 23 | #----------------------------------------------------------# 24 | 25 | def extract_bboxes(mask): 26 | # 利用语义分割的mask找到包围它的框 27 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 28 | for i in range(mask.shape[-1]): 29 | m = mask[:, :, i] 30 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 31 | vertical_indicies = np.where(np.any(m, axis=1))[0] 32 | if horizontal_indicies.shape[0]: 33 | x1, x2 = horizontal_indicies[[0, -1]] 34 | y1, y2 = vertical_indicies[[0, -1]] 35 | x2 += 1 36 | y2 += 1 37 | else: 38 | x1, x2, y1, y2 = 0, 0, 0, 0 39 | boxes[i] = np.array([y1, x1, y2, x2]) 40 | return boxes.astype(np.int32) 41 | 42 | 43 | def compute_iou(box, boxes, box_area, boxes_area): 44 | """Calculates IoU of the given box with the array of the given boxes. 45 | box: 1D vector [y1, x1, y2, x2] 46 | boxes: [boxes_count, (y1, x1, y2, x2)] 47 | box_area: float. the area of 'box' 48 | boxes_area: array of length boxes_count. 49 | Note: the areas are passed in rather than calculated here for 50 | efficiency. Calculate once in the caller to avoid duplicate work. 51 | """ 52 | # Calculate intersection areas 53 | y1 = np.maximum(box[0], boxes[:, 0]) 54 | y2 = np.minimum(box[2], boxes[:, 2]) 55 | x1 = np.maximum(box[1], boxes[:, 1]) 56 | x2 = np.minimum(box[3], boxes[:, 3]) 57 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 58 | union = box_area + boxes_area[:] - intersection[:] 59 | iou = intersection / union 60 | return iou 61 | 62 | 63 | def compute_overlaps(boxes1, boxes2): 64 | """Computes IoU overlaps between two sets of boxes. 65 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 66 | For better performance, pass the largest set first and the smaller second. 67 | """ 68 | # Areas of anchors and GT boxes 69 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 70 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 71 | 72 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 73 | # Each cell contains the IoU value. 74 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 75 | for i in range(overlaps.shape[1]): 76 | box2 = boxes2[i] 77 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 78 | return overlaps 79 | 80 | 81 | ''' 82 | compute_overlaps_masks() takes two arguments: 83 | mask1 and mask2. Assume mask1 has m instances, and mask2 has n instances, 84 | the function return an m*n array A. 85 | A[i, j] represents the IoU of ith instances of mask1 and jth instances of mask2. 86 | ''' 87 | def compute_overlaps_masks(masks1, masks2): 88 | """Computes IoU overlaps between two sets of masks. 89 | masks1, masks2: [Height, Width, instances] 90 | """ 91 | 92 | # If either set of masks is empty return empty result 93 | if masks1.shape[-1] == 0 or masks2.shape[-1] == 0: 94 | return np.zeros((masks1.shape[-1], masks2.shape[-1])) 95 | # flatten masks and compute their areas 96 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 97 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 98 | area1 = np.sum(masks1, axis=0) 99 | area2 = np.sum(masks2, axis=0) 100 | 101 | # intersections and union 102 | intersections = np.dot(masks1.T, masks2) 103 | union = area1[:, None] + area2[None, :] - intersections 104 | overlaps = intersections / union 105 | 106 | return overlaps 107 | 108 | 109 | def non_max_suppression(boxes, scores, threshold): 110 | """Performs non-maximum suppression and returns indices of kept boxes. 111 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 112 | scores: 1-D array of box scores. 113 | threshold: Float. IoU threshold to use for filtering. 114 | """ 115 | assert boxes.shape[0] > 0 116 | if boxes.dtype.kind != "f": 117 | boxes = boxes.astype(np.float32) 118 | 119 | # Compute box areas 120 | y1 = boxes[:, 0] 121 | x1 = boxes[:, 1] 122 | y2 = boxes[:, 2] 123 | x2 = boxes[:, 3] 124 | area = (y2 - y1) * (x2 - x1) 125 | 126 | # Get indicies of boxes sorted by scores (highest first) 127 | ixs = scores.argsort()[::-1] 128 | 129 | pick = [] 130 | while len(ixs) > 0: 131 | # Pick top box and add its index to the list 132 | i = ixs[0] 133 | pick.append(i) 134 | # Compute IoU of the picked box with the rest 135 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 136 | # Identify boxes with IoU over the threshold. This 137 | # returns indices into ixs[1:], so add 1 to get 138 | # indices into ixs. 139 | remove_ixs = np.where(iou > threshold)[0] + 1 140 | # Remove indices of the picked and overlapped boxes. 141 | ixs = np.delete(ixs, remove_ixs) 142 | ixs = np.delete(ixs, 0) 143 | return np.array(pick, dtype=np.int32) 144 | 145 | 146 | def apply_box_deltas(boxes, deltas): 147 | """Applies the given deltas to the given boxes. 148 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 149 | deltas: [N, (dy, dx, log(dh), log(dw))] 150 | """ 151 | boxes = boxes.astype(np.float32) 152 | # Convert to y, x, h, w 153 | height = boxes[:, 2] - boxes[:, 0] 154 | width = boxes[:, 3] - boxes[:, 1] 155 | center_y = boxes[:, 0] + 0.5 * height 156 | center_x = boxes[:, 1] + 0.5 * width 157 | # Apply deltas 158 | center_y += deltas[:, 0] * height 159 | center_x += deltas[:, 1] * width 160 | height *= np.exp(deltas[:, 2]) 161 | width *= np.exp(deltas[:, 3]) 162 | # Convert back to y1, x1, y2, x2 163 | y1 = center_y - 0.5 * height 164 | x1 = center_x - 0.5 * width 165 | y2 = y1 + height 166 | x2 = x1 + width 167 | return np.stack([y1, x1, y2, x2], axis=1) 168 | 169 | 170 | def box_refinement_graph(box, gt_box): 171 | """ 172 | 编码运算 173 | """ 174 | box = tf.cast(box, tf.float32) 175 | gt_box = tf.cast(gt_box, tf.float32) 176 | 177 | height = box[:, 2] - box[:, 0] 178 | width = box[:, 3] - box[:, 1] 179 | center_y = box[:, 0] + 0.5 * height 180 | center_x = box[:, 1] + 0.5 * width 181 | 182 | gt_height = gt_box[:, 2] - gt_box[:, 0] 183 | gt_width = gt_box[:, 3] - gt_box[:, 1] 184 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 185 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 186 | 187 | dy = (gt_center_y - center_y) / height 188 | dx = (gt_center_x - center_x) / width 189 | dh = tf.math.log(gt_height / height) 190 | dw = tf.math.log(gt_width / width) 191 | 192 | result = tf.stack([dy, dx, dh, dw], axis=1) 193 | return result 194 | 195 | 196 | def box_refinement(box, gt_box): 197 | """ 198 | 编码运算 199 | """ 200 | box = box.astype(np.float32) 201 | gt_box = gt_box.astype(np.float32) 202 | 203 | height = box[:, 2] - box[:, 0] 204 | width = box[:, 3] - box[:, 1] 205 | center_y = box[:, 0] + 0.5 * height 206 | center_x = box[:, 1] + 0.5 * width 207 | 208 | gt_height = gt_box[:, 2] - gt_box[:, 0] 209 | gt_width = gt_box[:, 3] - gt_box[:, 1] 210 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 211 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 212 | 213 | dy = (gt_center_y - center_y) / height 214 | dx = (gt_center_x - center_x) / width 215 | dh = np.log(gt_height / height) 216 | dw = np.log(gt_width / width) 217 | 218 | return np.stack([dy, dx, dh, dw], axis=1) 219 | 220 | 221 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): 222 | # 保持原有的image 223 | image_dtype = image.dtype 224 | # 初始化参数 225 | h, w = image.shape[:2] 226 | window = (0, 0, h, w) 227 | scale = 1 228 | padding = [(0, 0), (0, 0), (0, 0)] 229 | crop = None 230 | 231 | if mode == "none": 232 | return image, window, scale, padding, crop 233 | 234 | # 计算变化的尺度 235 | if min_dim: 236 | scale = max(1, min_dim / min(h, w)) 237 | if min_scale and scale < min_scale: 238 | scale = min_scale 239 | 240 | # 判断按照原来的尺寸缩放是否会超过最大边长 241 | if max_dim and mode == "square": 242 | image_max = max(h, w) 243 | if round(image_max * scale) > max_dim: 244 | scale = max_dim / image_max 245 | 246 | # 对图片进行resize 247 | if scale != 1: 248 | image = resize(image, (round(h * scale), round(w * scale)), 249 | preserve_range=True) 250 | 251 | # 是否需要padding填充 252 | if mode == "square": 253 | # 计算四周padding的情况 254 | h, w = image.shape[:2] 255 | 256 | top_pad = (max_dim - h) // 2 257 | bottom_pad = max_dim - h - top_pad 258 | left_pad = (max_dim - w) // 2 259 | right_pad = max_dim - w - left_pad 260 | 261 | # 向四周进行填充 262 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0,0)] 263 | image = np.pad(image, padding, mode='constant', constant_values=0) 264 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 265 | elif mode == "pad64": 266 | h, w = image.shape[:2] 267 | # Both sides must be divisible by 64 268 | assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" 269 | # Height 270 | if h % 64 > 0: 271 | max_h = h - (h % 64) + 64 272 | top_pad = (max_h - h) // 2 273 | bottom_pad = max_h - h - top_pad 274 | else: 275 | top_pad = bottom_pad = 0 276 | # Width 277 | if w % 64 > 0: 278 | max_w = w - (w % 64) + 64 279 | left_pad = (max_w - w) // 2 280 | right_pad = max_w - w - left_pad 281 | else: 282 | left_pad = right_pad = 0 283 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 284 | image = np.pad(image, padding, mode='constant', constant_values=0) 285 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 286 | elif mode == "crop": 287 | # Pick a random crop 288 | h, w = image.shape[:2] 289 | y = random.randint(0, (h - min_dim)) 290 | x = random.randint(0, (w - min_dim)) 291 | crop = (y, x, min_dim, min_dim) 292 | image = image[y:y + min_dim, x:x + min_dim] 293 | window = (0, 0, min_dim, min_dim) 294 | else: 295 | raise Exception("Mode {} not supported".format(mode)) 296 | return image.astype(image_dtype), window, scale, padding, crop 297 | 298 | 299 | def resize_mask(mask, scale, padding, crop=None): 300 | # 将mask按照scale放大缩小后 301 | with warnings.catch_warnings(): 302 | warnings.simplefilter("ignore") 303 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 304 | if crop is not None: 305 | y, x, h, w = crop 306 | mask = mask[y:y + h, x:x + w] 307 | else: 308 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 309 | return mask 310 | 311 | 312 | def minimize_mask(bbox, mask, mini_shape): 313 | """ 314 | 减少语义分割载入时的size 315 | """ 316 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 317 | for i in range(mask.shape[-1]): 318 | # Pick slice and cast to bool in case load_mask() returned wrong dtype 319 | m = mask[:, :, i].astype(bool) 320 | y1, x1, y2, x2 = bbox[i][:4] 321 | m = m[y1:y2, x1:x2] 322 | if m.size == 0: 323 | raise Exception("Invalid bounding box with area of zero") 324 | # Resize with bilinear interpolation 325 | m = resize(m, mini_shape) 326 | mini_mask[:, :, i] = np.around(m).astype(np.bool) 327 | return mini_mask 328 | 329 | 330 | def expand_mask(bbox, mini_mask, image_shape): 331 | """Resizes mini masks back to image size. Reverses the change 332 | of minimize_mask(). 333 | See inspect_data.ipynb notebook for more details. 334 | """ 335 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 336 | for i in range(mask.shape[-1]): 337 | m = mini_mask[:, :, i] 338 | y1, x1, y2, x2 = bbox[i][:4] 339 | h = y2 - y1 340 | w = x2 - x1 341 | # Resize with bilinear interpolation 342 | m = resize(m, (h, w)) 343 | mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) 344 | return mask 345 | 346 | 347 | # TODO: Build and use this function to reduce code duplication 348 | def mold_mask(mask, config): 349 | pass 350 | 351 | 352 | def unmold_mask(mask, bbox, image_shape): 353 | """Converts a mask generated by the neural network to a format similar 354 | to its original shape. 355 | mask: [height, width] of type float. A small, typically 28x28 mask. 356 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 357 | Returns a binary mask with the same size as the original image. 358 | """ 359 | threshold = 0.5 360 | y1, x1, y2, x2 = bbox 361 | mask = resize(mask, (y2 - y1, x2 - x1)) 362 | mask = np.where(mask >= threshold, 1, 0).astype(np.bool) 363 | 364 | # Put the mask in the right location. 365 | full_mask = np.zeros(image_shape[:2], dtype=np.bool) 366 | full_mask[y1:y2, x1:x2] = mask 367 | return full_mask 368 | 369 | #----------------------------------------------------------# 370 | # Miscellaneous 371 | #----------------------------------------------------------# 372 | 373 | def trim_zeros(x): 374 | """It's common to have tensors larger than the available data and 375 | pad with zeros. This function removes rows that are all zeros. 376 | x: [rows, columns]. 377 | """ 378 | assert len(x.shape) == 2 379 | return x[~np.all(x == 0, axis=1)] 380 | 381 | 382 | def compute_matches(gt_boxes, gt_class_ids, gt_masks, 383 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 384 | iou_threshold=0.5, score_threshold=0.0): 385 | """Finds matches between prediction and ground truth instances. 386 | Returns: 387 | gt_match: 1-D array. For each GT box it has the index of the matched 388 | predicted box. 389 | pred_match: 1-D array. For each predicted box, it has the index of 390 | the matched ground truth box. 391 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 392 | """ 393 | # Trim zero padding 394 | # TODO: cleaner to do zero unpadding upstream 395 | gt_boxes = trim_zeros(gt_boxes) 396 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 397 | pred_boxes = trim_zeros(pred_boxes) 398 | pred_scores = pred_scores[:pred_boxes.shape[0]] 399 | # Sort predictions by score from high to low 400 | indices = np.argsort(pred_scores)[::-1] 401 | pred_boxes = pred_boxes[indices] 402 | pred_class_ids = pred_class_ids[indices] 403 | pred_scores = pred_scores[indices] 404 | pred_masks = pred_masks[..., indices] 405 | 406 | # Compute IoU overlaps [pred_masks, gt_masks] 407 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 408 | 409 | # Loop through predictions and find matching ground truth boxes 410 | match_count = 0 411 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 412 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 413 | for i in range(len(pred_boxes)): 414 | # Find best matching ground truth box 415 | # 1. Sort matches by score 416 | sorted_ixs = np.argsort(overlaps[i])[::-1] 417 | # 2. Remove low scores 418 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 419 | if low_score_idx.size > 0: 420 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 421 | # 3. Find the match 422 | for j in sorted_ixs: 423 | # If ground truth box is already matched, go to next one 424 | if gt_match[j] > -1: 425 | continue 426 | # If we reach IoU smaller than the threshold, end the loop 427 | iou = overlaps[i, j] 428 | if iou < iou_threshold: 429 | break 430 | # Do we have a match? 431 | if pred_class_ids[i] == gt_class_ids[j]: 432 | match_count += 1 433 | gt_match[j] = i 434 | pred_match[i] = j 435 | break 436 | 437 | return gt_match, pred_match, overlaps 438 | 439 | 440 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 441 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 442 | iou_threshold=0.5): 443 | """Compute Average Precision at a set IoU threshold (default 0.5). 444 | Returns: 445 | mAP: Mean Average Precision 446 | precisions: List of precisions at different class score thresholds. 447 | recalls: List of recall values at different class score thresholds. 448 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 449 | """ 450 | # Get matches and overlaps 451 | gt_match, pred_match, overlaps = compute_matches( 452 | gt_boxes, gt_class_ids, gt_masks, 453 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 454 | iou_threshold) 455 | 456 | # Compute precision and recall at each prediction box step 457 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 458 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 459 | 460 | # Pad with start and end values to simplify the math 461 | precisions = np.concatenate([[0], precisions, [0]]) 462 | recalls = np.concatenate([[0], recalls, [1]]) 463 | 464 | # Ensure precision values decrease but don't increase. This way, the 465 | # precision value at each recall threshold is the maximum it can be 466 | # for all following recall thresholds, as specified by the VOC paper. 467 | for i in range(len(precisions) - 2, -1, -1): 468 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 469 | 470 | # Compute mean AP over recall range 471 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 472 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 473 | precisions[indices]) 474 | 475 | return mAP, precisions, recalls, overlaps 476 | 477 | 478 | def compute_ap_range(gt_box, gt_class_id, gt_mask, 479 | pred_box, pred_class_id, pred_score, pred_mask, 480 | iou_thresholds=None, verbose=1): 481 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" 482 | # Default is 0.5 to 0.95 with increments of 0.05 483 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) 484 | 485 | # Compute AP over range of IoU thresholds 486 | AP = [] 487 | for iou_threshold in iou_thresholds: 488 | ap, precisions, recalls, overlaps =\ 489 | compute_ap(gt_box, gt_class_id, gt_mask, 490 | pred_box, pred_class_id, pred_score, pred_mask, 491 | iou_threshold=iou_threshold) 492 | if verbose: 493 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) 494 | AP.append(ap) 495 | AP = np.array(AP).mean() 496 | if verbose: 497 | print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( 498 | iou_thresholds[0], iou_thresholds[-1], AP)) 499 | return AP 500 | 501 | 502 | def compute_recall(pred_boxes, gt_boxes, iou): 503 | """Compute the recall at the given IoU threshold. It's an indication 504 | of how many GT boxes were found by the given prediction boxes. 505 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 506 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 507 | """ 508 | # Measure overlaps 509 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 510 | iou_max = np.max(overlaps, axis=1) 511 | iou_argmax = np.argmax(overlaps, axis=1) 512 | positive_ids = np.where(iou_max >= iou)[0] 513 | matched_gt_boxes = iou_argmax[positive_ids] 514 | 515 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 516 | return recall, positive_ids 517 | 518 | 519 | # ## Batch Slicing 520 | # Some custom layers support a batch size of 1 only, and require a lot of work 521 | # to support batches greater than 1. This function slices an input tensor 522 | # across the batch dimension and feeds batches of size 1. Effectively, 523 | # an easy way to support batches > 1 quickly with little code modification. 524 | # In the long run, it's more efficient to modify the code to support large 525 | # batches and getting rid of this function. Consider this a temporary solution 526 | def batch_slice(inputs, graph_fn, batch_size, names=None): 527 | """Splits inputs into slices and feeds each slice to a copy of the given 528 | computation graph and then combines the results. It allows you to run a 529 | graph on a batch of inputs even if the graph is written to support one 530 | instance only. 531 | inputs: list of tensors. All must have the same first dimension length 532 | graph_fn: A function that returns a TF tensor that's part of a graph. 533 | batch_size: number of slices to divide the data into. 534 | names: If provided, assigns names to the resulting tensors. 535 | """ 536 | if not isinstance(inputs, list): 537 | inputs = [inputs] 538 | 539 | outputs = [] 540 | for i in range(batch_size): 541 | inputs_slice = [x[i] for x in inputs] 542 | output_slice = graph_fn(*inputs_slice) 543 | if not isinstance(output_slice, (tuple, list)): 544 | output_slice = [output_slice] 545 | outputs.append(output_slice) 546 | # Change outputs from a list of slices where each is 547 | # a list of outputs to a list of outputs and each has 548 | # a list of slices 549 | outputs = list(zip(*outputs)) 550 | 551 | if names is None: 552 | names = [None] * len(outputs) 553 | 554 | result = [tf.stack(o, axis=0, name=n) 555 | for o, n in zip(outputs, names)] 556 | if len(result) == 1: 557 | result = result[0] 558 | 559 | return result 560 | 561 | 562 | def download_trained_weights(coco_model_path, verbose=1): 563 | """Download COCO trained weights from Releases. 564 | coco_model_path: local path of COCO trained weights 565 | """ 566 | if verbose > 0: 567 | print("Downloading pretrained model to " + coco_model_path + " ...") 568 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 569 | shutil.copyfileobj(resp, out) 570 | if verbose > 0: 571 | print("... done downloading pretrained model!") 572 | 573 | 574 | def norm_boxes(boxes, shape): 575 | """Converts boxes from pixel coordinates to normalized coordinates. 576 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates 577 | shape: [..., (height, width)] in pixels 578 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 579 | coordinates it's inside the box. 580 | Returns: 581 | [N, (y1, x1, y2, x2)] in normalized coordinates 582 | """ 583 | h, w = shape 584 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 585 | shift = np.array([0, 0, 1, 1]) 586 | return np.divide((boxes - shift), scale).astype(np.float32) 587 | 588 | 589 | def denorm_boxes(boxes, shape): 590 | """Converts boxes from normalized coordinates to pixel coordinates. 591 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates 592 | shape: [..., (height, width)] in pixels 593 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 594 | coordinates it's inside the box. 595 | Returns: 596 | [N, (y1, x1, y2, x2)] in pixel coordinates 597 | """ 598 | h, w = shape 599 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 600 | shift = np.array([0, 0, 1, 1]) 601 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) 602 | 603 | 604 | def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True, 605 | preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None): 606 | """A wrapper for Scikit-Image resize(). 607 | Scikit-Image generates warnings on every call to resize() if it doesn't 608 | receive the right parameters. The right parameters depend on the version 609 | of skimage. This solves the problem by using different parameters per 610 | version. And it provides a central place to control resizing defaults. 611 | """ 612 | if LooseVersion(skimage.__version__) >= LooseVersion("0.14"): 613 | # New in 0.14: anti_aliasing. Default it to False for backward 614 | # compatibility with skimage 0.13. 615 | return skimage.transform.resize( 616 | image, output_shape, 617 | order=order, mode=mode, cval=cval, clip=clip, 618 | preserve_range=preserve_range, anti_aliasing=anti_aliasing, 619 | anti_aliasing_sigma=anti_aliasing_sigma) 620 | else: 621 | return skimage.transform.resize( 622 | image, output_shape, 623 | order=order, mode=mode, cval=cval, clip=clip, 624 | preserve_range=preserve_range) 625 | 626 | 627 | def mold_image(images, config): 628 | """Expects an RGB image (or array of images) and subtracts 629 | the mean pixel and converts it to float. Expects image 630 | colors in RGB order. 631 | """ 632 | return images.astype(np.float32) - config.MEAN_PIXEL 633 | 634 | def compose_image_meta(image_id, original_image_shape, image_shape, 635 | window, scale, active_class_ids): 636 | """Takes attributes of an image and puts them in one 1D array. 637 | image_id: An int ID of the image. Useful for debugging. 638 | original_image_shape: [H, W, C] before resizing or padding. 639 | image_shape: [H, W, C] after resizing and padding 640 | window: (y1, x1, y2, x2) in pixels. The area of the image where the real 641 | image is (excluding the padding) 642 | scale: The scaling factor applied to the original image (float32) 643 | active_class_ids: List of class_ids available in the dataset from which 644 | the image came. Useful if training on images from multiple datasets 645 | where not all classes are present in all datasets. 646 | """ 647 | meta = np.array( 648 | [image_id] + # size=1 649 | list(original_image_shape) + # size=3 650 | list(image_shape) + # size=3 651 | list(window) + # size=4 (y1, x1, y2, x2) in image cooredinates 652 | [scale] + # size=1 653 | list(active_class_ids) # size=num_classes 654 | ) 655 | return meta 656 | 657 | def mold_inputs(config, images): 658 | molded_images = [] 659 | image_metas = [] 660 | windows = [] 661 | for image in images: 662 | # Resize image 663 | # TODO: move resizing to mold_image() 664 | molded_image, window, scale, padding, crop = resize_image( 665 | image, 666 | min_dim=config.IMAGE_MIN_DIM, 667 | min_scale=config.IMAGE_MIN_SCALE, 668 | max_dim=config.IMAGE_MAX_DIM, 669 | mode=config.IMAGE_RESIZE_MODE) 670 | # print(np.shape(molded_image)) 671 | molded_image = mold_image(molded_image, config) 672 | # Build image_meta 673 | image_meta = compose_image_meta( 674 | 0, image.shape, molded_image.shape, window, scale, 675 | np.zeros([config.NUM_CLASSES], dtype=np.int32)) 676 | # Append 677 | molded_images.append(molded_image) 678 | windows.append(window) 679 | image_metas.append(image_meta) 680 | # Pack into arrays 681 | molded_images = np.stack(molded_images) 682 | image_metas = np.stack(image_metas) 683 | windows = np.stack(windows) 684 | return molded_images, image_metas, windows 685 | 686 | 687 | def unmold_detections(detections, mrcnn_mask, original_image_shape, 688 | image_shape, window): 689 | ''' 690 | Reformats the detection of one image from the format of the neural network output to a format suitable for use in thre rest of the application 691 | params: 692 | detections: [N, (y1, x1, y2, x2, class_id, score)] in normalize coordinates 693 | mrcnn_mask: [N, height, wdidht, num_classes] 694 | original_image_shape: [H, W, C] Original image shape before resizing 695 | image_shape: [H, W, C] Shape of the image after resizing and padding 696 | window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real image is excluding the padding 697 | 698 | Returns: 699 | boxes:[N, (y1, x1, y2, x2)] Bounding boxes in pixels 700 | class_ids: [N] Integer class IDs for each bounding box 701 | scores: [N] Float probability scores of the class_id 702 | masks: [height, width, num_instances] Instance masks 703 | ''' 704 | # how many detection do we have? Detections array is padded with zeros. Find the first class_id==0 705 | zero_ix = np.where(detections[:, 4] == 0)[0] 706 | # N就是在待检测的图像中检测到多少个对象 707 | N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] 708 | 709 | boxes = detections[:N, :4] 710 | class_ids = detections[:N, 4].astype(np.int32) 711 | scores = detections[:N, 5] 712 | masks = mrcnn_mask[np.arange(N), :, :, class_ids] 713 | # translate normalized coordinates in the resized image to pixel, coordinates in the original image before resizing 714 | window = norm_boxes(window, image_shape[:2]) 715 | 716 | wy1, wx1, wy2, wx2 = window 717 | shift = np.array([wy1, wx1, wy1, wx1]) 718 | wh = wy2 - wy1 # window height 719 | ww = wx2 - wx1 # window width 720 | 721 | scale = np.array([wh, ww, wh, ww]) 722 | boxes = np.divide(boxes - shift, scale) 723 | boxes = denorm_boxes(boxes, original_image_shape[:2]) 724 | 725 | exclude_ix = np.where( 726 | (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] 727 | if exclude_ix.shape[0] > 0: 728 | boxes = np.delete(boxes, exclude_ix, axis=0) 729 | class_ids = np.delete(class_ids, exclude_ix, axis=0) 730 | scores = np.delete(scores, exclude_ix, axis=0) 731 | masks = np.delete(masks, exclude_ix, axis=0) 732 | N = class_ids.shape[0] 733 | # Resize masks to original image size and set boundary threshold. 734 | full_masks = [] 735 | for i in range(N): 736 | 737 | full_mask = unmold_mask(masks[i], boxes[i], original_image_shape) 738 | full_masks.append(full_mask) 739 | 740 | full_masks = np.stack(full_masks, axis=-1)\ 741 | if full_masks else np.empty(original_image_shape[:2] + (0,)) 742 | 743 | return boxes, class_ids, scores, full_masks 744 | 745 | 746 | 747 | def norm_boxes_graph(boxes, shape): 748 | """Converts boxes from pixel coordinates to normalized coordinates. 749 | boxes: [..., (y1, x1, y2, x2)] in pixel coordinates 750 | shape: [..., (height, width)] in pixels 751 | 752 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 753 | coordinates it's inside the box. 754 | 755 | Returns: 756 | [..., (y1, x1, y2, x2)] in normalized coordinates 757 | """ 758 | h, w = tf.split(tf.cast(shape, tf.float32), 2) 759 | scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0) 760 | shift = tf.constant([0., 0., 1., 1.]) 761 | return tf.divide(boxes - shift, scale) 762 | 763 | 764 | def parse_image_meta_graph(meta): 765 | """ 766 | 对输入的meta进行拆解 767 | 将包含图像属性的张量解析为其组件。 768 | 返回解析的张量的dict。 769 | """ 770 | image_id = meta[:, 0] # 图片的id 771 | original_image_shape = meta[:, 1:4] # 原始的图片的大小 772 | image_shape = meta[:, 4:7] # resize后图片的大小 773 | window = meta[:, 7:11] # (y1, x1, y2, x2)有效的区域在图片中的位置 774 | scale = meta[:, 11] # 长宽的变化状况 775 | active_class_ids = meta[:, 12:] 776 | return { 777 | "image_id": image_id, 778 | "original_image_shape": original_image_shape, 779 | "image_shape": image_shape, 780 | "window": window, 781 | "scale": scale, 782 | "active_class_ids": active_class_ids, 783 | } -------------------------------------------------------------------------------- /utils/visualize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import random 4 | import itertools 5 | import colorsys 6 | import numpy as np 7 | 8 | from skimage.measure import find_contours 9 | from PIL import Image 10 | import cv2 11 | ROOT_DIR = os.path.abspath("../") 12 | 13 | sys.path.append(ROOT_DIR) 14 | 15 | #---------------------------------------------------------# 16 | # Visualization 17 | #---------------------------------------------------------# 18 | def random_colors(N, bright=True): 19 | """ 20 | 生成随机颜色 21 | """ 22 | brightness = 1.0 if bright else 0.7 23 | hsv = [(i / N, 1, brightness) for i in range(N)] 24 | colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) 25 | return colors 26 | 27 | 28 | def apply_mask(image, mask, color, alpha=0.5): 29 | """ 30 | 打上mask图标 31 | """ 32 | for c in range(3): 33 | image[:, :, c] = np.where(mask == 1, 34 | image[:, :, c] * 35 | (1 - alpha) + alpha * color[c] * 255, 36 | image[:, :, c]) 37 | return image 38 | 39 | 40 | def display_instances(image, boxes, masks, class_ids, class_names,scores=None,show_mask=True, show_bbox=True,colors=None, captions=True): 41 | # instance的数量 42 | N = boxes.shape[0] 43 | if not N: 44 | print("\n*** No instances to display *** \n") 45 | else: 46 | assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] 47 | colors = colors or random_colors(N) 48 | 49 | # 当masked_image为原图时是在原图上绘制 50 | # 如果不想在原图上绘制,可以把masked_image设置成等大小的全0矩阵 51 | # masked_image = np.array(image,np.uint8) 52 | masked_image = np.zeros_like(image,np.uint8) 53 | for i in range(N): 54 | color = colors[i] 55 | 56 | # display bounding box 57 | if not np.any(boxes[i]): 58 | continue 59 | y1, x1, y2, x2 = boxes[i] 60 | if show_bbox: 61 | cv2.rectangle(masked_image, (x1, y1), (x2, y2), (color[0] * 255,color[1] * 255,color[2] * 255), 2) 62 | 63 | # display labels and captions 64 | if captions: 65 | class_id = class_ids[i] 66 | score = scores[i] if scores is not None else None 67 | label = class_names[class_id] 68 | caption = "{} {:.3f}".format(label, score) if score else label 69 | font = cv2.FONT_HERSHEY_SIMPLEX 70 | cv2.putText(masked_image, caption, (x1, y1 + 8), font, 1, (255, 255, 255), 2) 71 | 72 | # display masks 73 | mask = masks[:, :, i] 74 | if show_mask: 75 | masked_image = apply_mask(masked_image, mask, color) 76 | 77 | # 画出语义分割的范围 78 | padded_mask = np.zeros( 79 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 80 | padded_mask[1:-1, 1:-1] = mask 81 | contours = find_contours(padded_mask, 0.5) 82 | for verts in contours: 83 | verts = np.fliplr(verts) - 1 84 | cv2.polylines(masked_image, [np.array([verts],np.int)], 1, (color[0] * 255,color[1] * 255,color[2] * 255), 2) 85 | 86 | img = Image.fromarray(np.uint8(masked_image)) 87 | return img --------------------------------------------------------------------------------