├── README.md ├── core └── getLoc.py ├── frozen_inference_graph.pb ├── label_map.pbtxt ├── run.py ├── show ├── run.PNG ├── show.PNG └── yd.jpg └── utils ├── label_map_util.py └── visualization_utils.py /README.md: -------------------------------------------------------------------------------- 1 | # Slider-killer 2 | 滑块验证码杀手,识别率99%,输入路径返回坐标 3 | 4 | 环境pip install 5 | ```python 6 | tensorflow-gpu 1.11.0 7 | numpy 1.18.4 8 | opencv-python 4.2.0.34 9 | ``` 10 | 11 | 代码实例 12 | ```python 13 | from core.getLoc import getPageLoc 14 | 15 | if __name__=="__main__": 16 | result=getPageLoc("./show/yd.jpg") 17 | print(result) 18 | 19 | ``` 20 | 21 | 易盾测试 22 | ![](https://github.com/LoseNine/Slider-killer/blob/master/show/show.PNG) 23 | -------------------------------------------------------------------------------- /core/getLoc.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | import cv2 as cv 5 | import numpy as np 6 | import tensorflow as tf 7 | 8 | from utils import label_map_util 9 | from utils import visualization_utils as vis_util 10 | 11 | # Path to frozen detection graph. This is the actual model that is used for the object detection. 12 | PATH_TO_FROZEN_GRAPH = 'frozen_inference_graph.pb' 13 | 14 | # List of the strings that is used to add correct label for each box. 15 | PATH_TO_LABELS = 'label_map.pbtxt' 16 | 17 | NUM_CLASSES = 1 18 | detection_graph = tf.Graph() 19 | with detection_graph.as_default(): 20 | od_graph_def = tf.GraphDef() 21 | with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid: 22 | serialized_graph = fid.read() 23 | od_graph_def.ParseFromString(serialized_graph) 24 | tf.import_graph_def(od_graph_def, name='') 25 | 26 | label_map = label_map_util.load_labelmap(PATH_TO_LABELS) 27 | categorys = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) 28 | category_index = label_map_util.create_category_index(categorys) 29 | 30 | 31 | def getPageLoc(path): 32 | with detection_graph.as_default(): 33 | with tf.Session(graph=detection_graph) as sess: 34 | for i in range(1,2): 35 | image = cv.imread(path.format(i)) 36 | image_np_expanded = np.expand_dims(image, axis=0) 37 | image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') 38 | boxes = detection_graph.get_tensor_by_name('detection_boxes:0') 39 | scores = detection_graph.get_tensor_by_name('detection_scores:0') 40 | classes = detection_graph.get_tensor_by_name('detection_classes:0') 41 | num_detections = detection_graph.get_tensor_by_name('num_detections:0') 42 | 43 | (boxes, scores, classes, num_detections) = sess.run([boxes, scores, classes, num_detections], 44 | feed_dict={image_tensor: image_np_expanded}) 45 | 46 | if len(boxes[scores>0.9]) == 0: 47 | continue 48 | posX = int(boxes[scores>0.9][0][1] * 320) 49 | posY = int(boxes[scores>0.9][0][0] * 160) 50 | posXmax = int(boxes[scores>0.9][0][3] * 320) 51 | posYmax = int(boxes[scores>0.9][0][2] * 160) 52 | 53 | cv.rectangle( 54 | image, 55 | (posX,posY), #左上角 56 | (posXmax,posYmax), #右下角 57 | (0,255,0), 58 | 2 59 | ) 60 | print("左上角:",posX,posY) 61 | print("右下角:",posXmax,posYmax) 62 | cv.putText( 63 | image, #图片 64 | str(posX), #添加的文字 65 | (posX,posY - 5), #左上角坐标 66 | cv.FONT_HERSHEY_SIMPLEX, #字体 67 | 1, #字体大小 68 | (0,0,255), #颜色 69 | 2 #字体粗细 70 | ) 71 | 72 | 73 | cv.imshow("SSD - drag Detector Demo{}".format(i), image) 74 | cv.waitKey(0) 75 | cv.destroyAllWindows() 76 | return (posX,posY,posXmax,posYmax) 77 | -------------------------------------------------------------------------------- /frozen_inference_graph.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LoseNine/Slider-killer/6f98120e5ede42c37ff0d2d3debb05bedcff6b1b/frozen_inference_graph.pb -------------------------------------------------------------------------------- /label_map.pbtxt: -------------------------------------------------------------------------------- 1 | item { 2 | id: 1 3 | name: '1' 4 | } 5 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | from core.getLoc import getPageLoc 2 | 3 | 4 | 5 | if __name__=="__main__": 6 | result=getPageLoc("./show/yd.jpg") 7 | print(result) -------------------------------------------------------------------------------- /show/run.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LoseNine/Slider-killer/6f98120e5ede42c37ff0d2d3debb05bedcff6b1b/show/run.PNG -------------------------------------------------------------------------------- /show/show.PNG: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LoseNine/Slider-killer/6f98120e5ede42c37ff0d2d3debb05bedcff6b1b/show/show.PNG -------------------------------------------------------------------------------- /show/yd.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/LoseNine/Slider-killer/6f98120e5ede42c37ff0d2d3debb05bedcff6b1b/show/yd.jpg -------------------------------------------------------------------------------- /utils/label_map_util.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """Label map utility functions.""" 17 | 18 | import logging 19 | 20 | import tensorflow as tf 21 | from google.protobuf import text_format 22 | from object_detection.protos import string_int_label_map_pb2 23 | 24 | 25 | def _validate_label_map(label_map): 26 | """Checks if a label map is valid. 27 | 28 | Args: 29 | label_map: StringIntLabelMap to validate. 30 | 31 | Raises: 32 | ValueError: if label map is invalid. 33 | """ 34 | for item in label_map.item: 35 | if item.id < 0: 36 | raise ValueError('Label map ids should be >= 0.') 37 | if (item.id == 0 and item.name != 'background' and 38 | item.display_name != 'background'): 39 | raise ValueError('Label map id 0 is reserved for the background label') 40 | 41 | 42 | def create_category_index(categories): 43 | """Creates dictionary of COCO compatible categories keyed by category id. 44 | 45 | Args: 46 | categories: a list of dicts, each of which has the following keys: 47 | 'id': (required) an integer id uniquely identifying this category. 48 | 'name': (required) string representing category name 49 | e.g., 'cat', 'dog', 'pizza'. 50 | 51 | Returns: 52 | category_index: a dict containing the same entries as categories, but keyed 53 | by the 'id' field of each category. 54 | """ 55 | category_index = {} 56 | for cat in categories: 57 | category_index[cat['id']] = cat 58 | return category_index 59 | 60 | 61 | def get_max_label_map_index(label_map): 62 | """Get maximum index in label map. 63 | 64 | Args: 65 | label_map: a StringIntLabelMapProto 66 | 67 | Returns: 68 | an integer 69 | """ 70 | return max([item.id for item in label_map.item]) 71 | 72 | 73 | def convert_label_map_to_categories(label_map, 74 | max_num_classes, 75 | use_display_name=True): 76 | """Loads label map proto and returns categories list compatible with eval. 77 | 78 | This function loads a label map and returns a list of dicts, each of which 79 | has the following keys: 80 | 'id': (required) an integer id uniquely identifying this category. 81 | 'name': (required) string representing category name 82 | e.g., 'cat', 'dog', 'pizza'. 83 | We only allow class into the list if its id-label_id_offset is 84 | between 0 (inclusive) and max_num_classes (exclusive). 85 | If there are several items mapping to the same id in the label map, 86 | we will only keep the first one in the categories list. 87 | 88 | Args: 89 | label_map: a StringIntLabelMapProto or None. If None, a default categories 90 | list is created with max_num_classes categories. 91 | max_num_classes: maximum number of (consecutive) label indices to include. 92 | use_display_name: (boolean) choose whether to load 'display_name' field 93 | as category name. If False or if the display_name field does not exist, 94 | uses 'name' field as category names instead. 95 | Returns: 96 | categories: a list of dictionaries representing all possible categories. 97 | """ 98 | categories = [] 99 | list_of_ids_already_added = [] 100 | if not label_map: 101 | label_id_offset = 1 102 | for class_id in range(max_num_classes): 103 | categories.append({ 104 | 'id': class_id + label_id_offset, 105 | 'name': 'category_{}'.format(class_id + label_id_offset) 106 | }) 107 | return categories 108 | for item in label_map.item: 109 | if not 0 < item.id <= max_num_classes: 110 | logging.info('Ignore item %d since it falls outside of requested ' 111 | 'label range.', item.id) 112 | continue 113 | if use_display_name and item.HasField('display_name'): 114 | name = item.display_name 115 | else: 116 | name = item.name 117 | if item.id not in list_of_ids_already_added: 118 | list_of_ids_already_added.append(item.id) 119 | categories.append({'id': item.id, 'name': name}) 120 | return categories 121 | 122 | 123 | def load_labelmap(path): 124 | """Loads label map proto. 125 | 126 | Args: 127 | path: path to StringIntLabelMap proto text file. 128 | Returns: 129 | a StringIntLabelMapProto 130 | """ 131 | with tf.gfile.GFile(path, 'r') as fid: 132 | label_map_string = fid.read() 133 | label_map = string_int_label_map_pb2.StringIntLabelMap() 134 | try: 135 | text_format.Merge(label_map_string, label_map) 136 | except text_format.ParseError: 137 | label_map.ParseFromString(label_map_string) 138 | _validate_label_map(label_map) 139 | return label_map 140 | 141 | 142 | def get_label_map_dict(label_map_path, 143 | use_display_name=False, 144 | fill_in_gaps_and_background=False): 145 | """Reads a label map and returns a dictionary of label names to id. 146 | 147 | Args: 148 | label_map_path: path to StringIntLabelMap proto text file. 149 | use_display_name: whether to use the label map items' display names as keys. 150 | fill_in_gaps_and_background: whether to fill in gaps and background with 151 | respect to the id field in the proto. The id: 0 is reserved for the 152 | 'background' class and will be added if it is missing. All other missing 153 | ids in range(1, max(id)) will be added with a dummy class name 154 | ("class_") if they are missing. 155 | 156 | Returns: 157 | A dictionary mapping label names to id. 158 | 159 | Raises: 160 | ValueError: if fill_in_gaps_and_background and label_map has non-integer or 161 | negative values. 162 | """ 163 | label_map = load_labelmap(label_map_path) 164 | label_map_dict = {} 165 | for item in label_map.item: 166 | if use_display_name: 167 | label_map_dict[item.display_name] = item.id 168 | else: 169 | label_map_dict[item.name] = item.id 170 | 171 | if fill_in_gaps_and_background: 172 | values = set(label_map_dict.values()) 173 | 174 | if 0 not in values: 175 | label_map_dict['background'] = 0 176 | if not all(isinstance(value, int) for value in values): 177 | raise ValueError('The values in label map must be integers in order to' 178 | 'fill_in_gaps_and_background.') 179 | if not all(value >= 0 for value in values): 180 | raise ValueError('The values in the label map must be positive.') 181 | 182 | if len(values) != max(values) + 1: 183 | # there are gaps in the labels, fill in gaps. 184 | for value in range(1, max(values)): 185 | if value not in values: 186 | label_map_dict['class_' + str(value)] = value 187 | 188 | return label_map_dict 189 | 190 | 191 | def create_category_index_from_labelmap(label_map_path): 192 | """Reads a label map and returns a category index. 193 | 194 | Args: 195 | label_map_path: Path to `StringIntLabelMap` proto text file. 196 | 197 | Returns: 198 | A category index, which is a dictionary that maps integer ids to dicts 199 | containing categories, e.g. 200 | {1: {'id': 1, 'name': 'dog'}, 2: {'id': 2, 'name': 'cat'}, ...} 201 | """ 202 | label_map = load_labelmap(label_map_path) 203 | max_num_classes = max(item.id for item in label_map.item) 204 | categories = convert_label_map_to_categories(label_map, max_num_classes) 205 | return create_category_index(categories) 206 | 207 | 208 | def create_class_agnostic_category_index(): 209 | """Creates a category index with a single `object` class.""" 210 | return {1: {'id': 1, 'name': 'object'}} 211 | -------------------------------------------------------------------------------- /utils/visualization_utils.py: -------------------------------------------------------------------------------- 1 | # Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at 6 | # 7 | # http://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | # See the License for the specific language governing permissions and 13 | # limitations under the License. 14 | # ============================================================================== 15 | 16 | """A set of functions that are used for visualization. 17 | 18 | These functions often receive an image, perform some visualization on the image. 19 | The functions do not return a value, instead they modify the image itself. 20 | 21 | """ 22 | import collections 23 | import functools 24 | # Set headless-friendly backend. 25 | import matplotlib; matplotlib.use('Agg') # pylint: disable=multiple-statements 26 | import matplotlib.pyplot as plt # pylint: disable=g-import-not-at-top 27 | import numpy as np 28 | import PIL.Image as Image 29 | import PIL.ImageColor as ImageColor 30 | import PIL.ImageDraw as ImageDraw 31 | import PIL.ImageFont as ImageFont 32 | import six 33 | import tensorflow as tf 34 | 35 | from object_detection.core import standard_fields as fields 36 | 37 | 38 | _TITLE_LEFT_MARGIN = 10 39 | _TITLE_TOP_MARGIN = 10 40 | STANDARD_COLORS = [ 41 | 'AliceBlue', 'Chartreuse', 'Aqua', 'Aquamarine', 'Azure', 'Beige', 'Bisque', 42 | 'BlanchedAlmond', 'BlueViolet', 'BurlyWood', 'CadetBlue', 'AntiqueWhite', 43 | 'Chocolate', 'Coral', 'CornflowerBlue', 'Cornsilk', 'Crimson', 'Cyan', 44 | 'DarkCyan', 'DarkGoldenRod', 'DarkGrey', 'DarkKhaki', 'DarkOrange', 45 | 'DarkOrchid', 'DarkSalmon', 'DarkSeaGreen', 'DarkTurquoise', 'DarkViolet', 46 | 'DeepPink', 'DeepSkyBlue', 'DodgerBlue', 'FireBrick', 'FloralWhite', 47 | 'ForestGreen', 'Fuchsia', 'Gainsboro', 'GhostWhite', 'Gold', 'GoldenRod', 48 | 'Salmon', 'Tan', 'HoneyDew', 'HotPink', 'IndianRed', 'Ivory', 'Khaki', 49 | 'Lavender', 'LavenderBlush', 'LawnGreen', 'LemonChiffon', 'LightBlue', 50 | 'LightCoral', 'LightCyan', 'LightGoldenRodYellow', 'LightGray', 'LightGrey', 51 | 'LightGreen', 'LightPink', 'LightSalmon', 'LightSeaGreen', 'LightSkyBlue', 52 | 'LightSlateGray', 'LightSlateGrey', 'LightSteelBlue', 'LightYellow', 'Lime', 53 | 'LimeGreen', 'Linen', 'Magenta', 'MediumAquaMarine', 'MediumOrchid', 54 | 'MediumPurple', 'MediumSeaGreen', 'MediumSlateBlue', 'MediumSpringGreen', 55 | 'MediumTurquoise', 'MediumVioletRed', 'MintCream', 'MistyRose', 'Moccasin', 56 | 'NavajoWhite', 'OldLace', 'Olive', 'OliveDrab', 'Orange', 'OrangeRed', 57 | 'Orchid', 'PaleGoldenRod', 'PaleGreen', 'PaleTurquoise', 'PaleVioletRed', 58 | 'PapayaWhip', 'PeachPuff', 'Peru', 'Pink', 'Plum', 'PowderBlue', 'Purple', 59 | 'Red', 'RosyBrown', 'RoyalBlue', 'SaddleBrown', 'Green', 'SandyBrown', 60 | 'SeaGreen', 'SeaShell', 'Sienna', 'Silver', 'SkyBlue', 'SlateBlue', 61 | 'SlateGray', 'SlateGrey', 'Snow', 'SpringGreen', 'SteelBlue', 'GreenYellow', 62 | 'Teal', 'Thistle', 'Tomato', 'Turquoise', 'Violet', 'Wheat', 'White', 63 | 'WhiteSmoke', 'Yellow', 'YellowGreen' 64 | ] 65 | 66 | 67 | def save_image_array_as_png(image, output_path): 68 | """Saves an image (represented as a numpy array) to PNG. 69 | 70 | Args: 71 | image: a numpy array with shape [height, width, 3]. 72 | output_path: path to which image should be written. 73 | """ 74 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 75 | with tf.gfile.Open(output_path, 'w') as fid: 76 | image_pil.save(fid, 'PNG') 77 | 78 | 79 | def encode_image_array_as_png_str(image): 80 | """Encodes a numpy array into a PNG string. 81 | 82 | Args: 83 | image: a numpy array with shape [height, width, 3]. 84 | 85 | Returns: 86 | PNG encoded image string. 87 | """ 88 | image_pil = Image.fromarray(np.uint8(image)) 89 | output = six.BytesIO() 90 | image_pil.save(output, format='PNG') 91 | png_string = output.getvalue() 92 | output.close() 93 | return png_string 94 | 95 | 96 | def draw_bounding_box_on_image_array(image, 97 | ymin, 98 | xmin, 99 | ymax, 100 | xmax, 101 | color='red', 102 | thickness=4, 103 | display_str_list=(), 104 | use_normalized_coordinates=True): 105 | """Adds a bounding box to an image (numpy array). 106 | 107 | Bounding box coordinates can be specified in either absolute (pixel) or 108 | normalized coordinates by setting the use_normalized_coordinates argument. 109 | 110 | Args: 111 | image: a numpy array with shape [height, width, 3]. 112 | ymin: ymin of bounding box. 113 | xmin: xmin of bounding box. 114 | ymax: ymax of bounding box. 115 | xmax: xmax of bounding box. 116 | color: color to draw bounding box. Default is red. 117 | thickness: line thickness. Default value is 4. 118 | display_str_list: list of strings to display in box 119 | (each to be shown on its own line). 120 | use_normalized_coordinates: If True (default), treat coordinates 121 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 122 | coordinates as absolute. 123 | """ 124 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 125 | draw_bounding_box_on_image(image_pil, ymin, xmin, ymax, xmax, color, 126 | thickness, display_str_list, 127 | use_normalized_coordinates) 128 | np.copyto(image, np.array(image_pil)) 129 | 130 | 131 | def draw_bounding_box_on_image(image, 132 | ymin, 133 | xmin, 134 | ymax, 135 | xmax, 136 | color='red', 137 | thickness=4, 138 | display_str_list=(), 139 | use_normalized_coordinates=True): 140 | """Adds a bounding box to an image. 141 | 142 | Bounding box coordinates can be specified in either absolute (pixel) or 143 | normalized coordinates by setting the use_normalized_coordinates argument. 144 | 145 | Each string in display_str_list is displayed on a separate line above the 146 | bounding box in black text on a rectangle filled with the input 'color'. 147 | If the top of the bounding box extends to the edge of the image, the strings 148 | are displayed below the bounding box. 149 | 150 | Args: 151 | image: a PIL.Image object. 152 | ymin: ymin of bounding box. 153 | xmin: xmin of bounding box. 154 | ymax: ymax of bounding box. 155 | xmax: xmax of bounding box. 156 | color: color to draw bounding box. Default is red. 157 | thickness: line thickness. Default value is 4. 158 | display_str_list: list of strings to display in box 159 | (each to be shown on its own line). 160 | use_normalized_coordinates: If True (default), treat coordinates 161 | ymin, xmin, ymax, xmax as relative to the image. Otherwise treat 162 | coordinates as absolute. 163 | """ 164 | draw = ImageDraw.Draw(image) 165 | im_width, im_height = image.size 166 | if use_normalized_coordinates: 167 | (left, right, top, bottom) = (xmin * im_width, xmax * im_width, 168 | ymin * im_height, ymax * im_height) 169 | else: 170 | (left, right, top, bottom) = (xmin, xmax, ymin, ymax) 171 | draw.line([(left, top), (left, bottom), (right, bottom), 172 | (right, top), (left, top)], width=thickness, fill=color) 173 | try: 174 | font = ImageFont.truetype('arial.ttf', 24) 175 | except IOError: 176 | font = ImageFont.load_default() 177 | 178 | # If the total height of the display strings added to the top of the bounding 179 | # box exceeds the top of the image, stack the strings below the bounding box 180 | # instead of above. 181 | display_str_heights = [font.getsize(ds)[1] for ds in display_str_list] 182 | # Each display_str has a top and bottom margin of 0.05x. 183 | total_display_str_height = (1 + 2 * 0.05) * sum(display_str_heights) 184 | 185 | if top > total_display_str_height: 186 | text_bottom = top 187 | else: 188 | text_bottom = bottom + total_display_str_height 189 | # Reverse list and print from bottom to top. 190 | for display_str in display_str_list[::-1]: 191 | text_width, text_height = font.getsize(display_str) 192 | margin = np.ceil(0.05 * text_height) 193 | draw.rectangle( 194 | [(left, text_bottom - text_height - 2 * margin), (left + text_width, 195 | text_bottom)], 196 | fill=color) 197 | draw.text( 198 | (left + margin, text_bottom - text_height - margin), 199 | display_str, 200 | fill='black', 201 | font=font) 202 | text_bottom -= text_height - 2 * margin 203 | 204 | 205 | def draw_bounding_boxes_on_image_array(image, 206 | boxes, 207 | color='red', 208 | thickness=4, 209 | display_str_list_list=()): 210 | """Draws bounding boxes on image (numpy array). 211 | 212 | Args: 213 | image: a numpy array object. 214 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 215 | The coordinates are in normalized format between [0, 1]. 216 | color: color to draw bounding box. Default is red. 217 | thickness: line thickness. Default value is 4. 218 | display_str_list_list: list of list of strings. 219 | a list of strings for each bounding box. 220 | The reason to pass a list of strings for a 221 | bounding box is that it might contain 222 | multiple labels. 223 | 224 | Raises: 225 | ValueError: if boxes is not a [N, 4] array 226 | """ 227 | image_pil = Image.fromarray(image) 228 | draw_bounding_boxes_on_image(image_pil, boxes, color, thickness, 229 | display_str_list_list) 230 | np.copyto(image, np.array(image_pil)) 231 | 232 | 233 | def draw_bounding_boxes_on_image(image, 234 | boxes, 235 | color='red', 236 | thickness=4, 237 | display_str_list_list=()): 238 | """Draws bounding boxes on image. 239 | 240 | Args: 241 | image: a PIL.Image object. 242 | boxes: a 2 dimensional numpy array of [N, 4]: (ymin, xmin, ymax, xmax). 243 | The coordinates are in normalized format between [0, 1]. 244 | color: color to draw bounding box. Default is red. 245 | thickness: line thickness. Default value is 4. 246 | display_str_list_list: list of list of strings. 247 | a list of strings for each bounding box. 248 | The reason to pass a list of strings for a 249 | bounding box is that it might contain 250 | multiple labels. 251 | 252 | Raises: 253 | ValueError: if boxes is not a [N, 4] array 254 | """ 255 | boxes_shape = boxes.shape 256 | if not boxes_shape: 257 | return 258 | if len(boxes_shape) != 2 or boxes_shape[1] != 4: 259 | raise ValueError('Input must be of size [N, 4]') 260 | for i in range(boxes_shape[0]): 261 | display_str_list = () 262 | if display_str_list_list: 263 | display_str_list = display_str_list_list[i] 264 | draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2], 265 | boxes[i, 3], color, thickness, display_str_list) 266 | 267 | 268 | def _visualize_boxes(image, boxes, classes, scores, category_index, **kwargs): 269 | return visualize_boxes_and_labels_on_image_array( 270 | image, boxes, classes, scores, category_index=category_index, **kwargs) 271 | 272 | 273 | def _visualize_boxes_and_masks(image, boxes, classes, scores, masks, 274 | category_index, **kwargs): 275 | return visualize_boxes_and_labels_on_image_array( 276 | image, 277 | boxes, 278 | classes, 279 | scores, 280 | category_index=category_index, 281 | instance_masks=masks, 282 | **kwargs) 283 | 284 | 285 | def _visualize_boxes_and_keypoints(image, boxes, classes, scores, keypoints, 286 | category_index, **kwargs): 287 | return visualize_boxes_and_labels_on_image_array( 288 | image, 289 | boxes, 290 | classes, 291 | scores, 292 | category_index=category_index, 293 | keypoints=keypoints, 294 | **kwargs) 295 | 296 | 297 | def _visualize_boxes_and_masks_and_keypoints( 298 | image, boxes, classes, scores, masks, keypoints, category_index, **kwargs): 299 | return visualize_boxes_and_labels_on_image_array( 300 | image, 301 | boxes, 302 | classes, 303 | scores, 304 | category_index=category_index, 305 | instance_masks=masks, 306 | keypoints=keypoints, 307 | **kwargs) 308 | 309 | 310 | def draw_bounding_boxes_on_image_tensors(images, 311 | boxes, 312 | classes, 313 | scores, 314 | category_index, 315 | instance_masks=None, 316 | keypoints=None, 317 | max_boxes_to_draw=20, 318 | min_score_thresh=0.2, 319 | use_normalized_coordinates=True): 320 | """Draws bounding boxes, masks, and keypoints on batch of image tensors. 321 | 322 | Args: 323 | images: A 4D uint8 image tensor of shape [N, H, W, C]. If C > 3, additional 324 | channels will be ignored. 325 | boxes: [N, max_detections, 4] float32 tensor of detection boxes. 326 | classes: [N, max_detections] int tensor of detection classes. Note that 327 | classes are 1-indexed. 328 | scores: [N, max_detections] float32 tensor of detection scores. 329 | category_index: a dict that maps integer ids to category dicts. e.g. 330 | {1: {1: 'dog'}, 2: {2: 'cat'}, ...} 331 | instance_masks: A 4D uint8 tensor of shape [N, max_detection, H, W] with 332 | instance masks. 333 | keypoints: A 4D float32 tensor of shape [N, max_detection, num_keypoints, 2] 334 | with keypoints. 335 | max_boxes_to_draw: Maximum number of boxes to draw on an image. Default 20. 336 | min_score_thresh: Minimum score threshold for visualization. Default 0.2. 337 | use_normalized_coordinates: Whether to assume boxes and kepoints are in 338 | normalized coordinates (as opposed to absolute coordiantes). 339 | Default is True. 340 | 341 | Returns: 342 | 4D image tensor of type uint8, with boxes drawn on top. 343 | """ 344 | # Additional channels are being ignored. 345 | images = images[:, :, :, 0:3] 346 | visualization_keyword_args = { 347 | 'use_normalized_coordinates': use_normalized_coordinates, 348 | 'max_boxes_to_draw': max_boxes_to_draw, 349 | 'min_score_thresh': min_score_thresh, 350 | 'agnostic_mode': False, 351 | 'line_thickness': 4 352 | } 353 | 354 | if instance_masks is not None and keypoints is None: 355 | visualize_boxes_fn = functools.partial( 356 | _visualize_boxes_and_masks, 357 | category_index=category_index, 358 | **visualization_keyword_args) 359 | elems = [images, boxes, classes, scores, instance_masks] 360 | elif instance_masks is None and keypoints is not None: 361 | visualize_boxes_fn = functools.partial( 362 | _visualize_boxes_and_keypoints, 363 | category_index=category_index, 364 | **visualization_keyword_args) 365 | elems = [images, boxes, classes, scores, keypoints] 366 | elif instance_masks is not None and keypoints is not None: 367 | visualize_boxes_fn = functools.partial( 368 | _visualize_boxes_and_masks_and_keypoints, 369 | category_index=category_index, 370 | **visualization_keyword_args) 371 | elems = [images, boxes, classes, scores, instance_masks, keypoints] 372 | else: 373 | visualize_boxes_fn = functools.partial( 374 | _visualize_boxes, 375 | category_index=category_index, 376 | **visualization_keyword_args) 377 | elems = [images, boxes, classes, scores] 378 | 379 | def draw_boxes(image_and_detections): 380 | """Draws boxes on image.""" 381 | image_with_boxes = tf.py_func(visualize_boxes_fn, image_and_detections, 382 | tf.uint8) 383 | return image_with_boxes 384 | 385 | images = tf.map_fn(draw_boxes, elems, dtype=tf.uint8, back_prop=False) 386 | return images 387 | 388 | 389 | def draw_side_by_side_evaluation_image(eval_dict, 390 | category_index, 391 | max_boxes_to_draw=20, 392 | min_score_thresh=0.2, 393 | use_normalized_coordinates=True): 394 | """Creates a side-by-side image with detections and groundtruth. 395 | 396 | Bounding boxes (and instance masks, if available) are visualized on both 397 | subimages. 398 | 399 | Args: 400 | eval_dict: The evaluation dictionary returned by 401 | eval_util.result_dict_for_single_example(). 402 | category_index: A category index (dictionary) produced from a labelmap. 403 | max_boxes_to_draw: The maximum number of boxes to draw for detections. 404 | min_score_thresh: The minimum score threshold for showing detections. 405 | use_normalized_coordinates: Whether to assume boxes and kepoints are in 406 | normalized coordinates (as opposed to absolute coordiantes). 407 | Default is True. 408 | 409 | Returns: 410 | A [1, H, 2 * W, C] uint8 tensor. The subimage on the left corresponds to 411 | detections, while the subimage on the right corresponds to groundtruth. 412 | """ 413 | detection_fields = fields.DetectionResultFields() 414 | input_data_fields = fields.InputDataFields() 415 | instance_masks = None 416 | if detection_fields.detection_masks in eval_dict: 417 | instance_masks = tf.cast( 418 | tf.expand_dims(eval_dict[detection_fields.detection_masks], axis=0), 419 | tf.uint8) 420 | keypoints = None 421 | if detection_fields.detection_keypoints in eval_dict: 422 | keypoints = tf.expand_dims( 423 | eval_dict[detection_fields.detection_keypoints], axis=0) 424 | groundtruth_instance_masks = None 425 | if input_data_fields.groundtruth_instance_masks in eval_dict: 426 | groundtruth_instance_masks = tf.cast( 427 | tf.expand_dims( 428 | eval_dict[input_data_fields.groundtruth_instance_masks], axis=0), 429 | tf.uint8) 430 | images_with_detections = draw_bounding_boxes_on_image_tensors( 431 | eval_dict[input_data_fields.original_image], 432 | tf.expand_dims(eval_dict[detection_fields.detection_boxes], axis=0), 433 | tf.expand_dims(eval_dict[detection_fields.detection_classes], axis=0), 434 | tf.expand_dims(eval_dict[detection_fields.detection_scores], axis=0), 435 | category_index, 436 | instance_masks=instance_masks, 437 | keypoints=keypoints, 438 | max_boxes_to_draw=max_boxes_to_draw, 439 | min_score_thresh=min_score_thresh, 440 | use_normalized_coordinates=use_normalized_coordinates) 441 | images_with_groundtruth = draw_bounding_boxes_on_image_tensors( 442 | eval_dict[input_data_fields.original_image], 443 | tf.expand_dims(eval_dict[input_data_fields.groundtruth_boxes], axis=0), 444 | tf.expand_dims(eval_dict[input_data_fields.groundtruth_classes], axis=0), 445 | tf.expand_dims( 446 | tf.ones_like( 447 | eval_dict[input_data_fields.groundtruth_classes], 448 | dtype=tf.float32), 449 | axis=0), 450 | category_index, 451 | instance_masks=groundtruth_instance_masks, 452 | keypoints=None, 453 | max_boxes_to_draw=None, 454 | min_score_thresh=0.0, 455 | use_normalized_coordinates=use_normalized_coordinates) 456 | return tf.concat([images_with_detections, images_with_groundtruth], axis=2) 457 | 458 | 459 | def draw_keypoints_on_image_array(image, 460 | keypoints, 461 | color='red', 462 | radius=2, 463 | use_normalized_coordinates=True): 464 | """Draws keypoints on an image (numpy array). 465 | 466 | Args: 467 | image: a numpy array with shape [height, width, 3]. 468 | keypoints: a numpy array with shape [num_keypoints, 2]. 469 | color: color to draw the keypoints with. Default is red. 470 | radius: keypoint radius. Default value is 2. 471 | use_normalized_coordinates: if True (default), treat keypoint values as 472 | relative to the image. Otherwise treat them as absolute. 473 | """ 474 | image_pil = Image.fromarray(np.uint8(image)).convert('RGB') 475 | draw_keypoints_on_image(image_pil, keypoints, color, radius, 476 | use_normalized_coordinates) 477 | np.copyto(image, np.array(image_pil)) 478 | 479 | 480 | def draw_keypoints_on_image(image, 481 | keypoints, 482 | color='red', 483 | radius=2, 484 | use_normalized_coordinates=True): 485 | """Draws keypoints on an image. 486 | 487 | Args: 488 | image: a PIL.Image object. 489 | keypoints: a numpy array with shape [num_keypoints, 2]. 490 | color: color to draw the keypoints with. Default is red. 491 | radius: keypoint radius. Default value is 2. 492 | use_normalized_coordinates: if True (default), treat keypoint values as 493 | relative to the image. Otherwise treat them as absolute. 494 | """ 495 | draw = ImageDraw.Draw(image) 496 | im_width, im_height = image.size 497 | keypoints_x = [k[1] for k in keypoints] 498 | keypoints_y = [k[0] for k in keypoints] 499 | if use_normalized_coordinates: 500 | keypoints_x = tuple([im_width * x for x in keypoints_x]) 501 | keypoints_y = tuple([im_height * y for y in keypoints_y]) 502 | for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y): 503 | draw.ellipse([(keypoint_x - radius, keypoint_y - radius), 504 | (keypoint_x + radius, keypoint_y + radius)], 505 | outline=color, fill=color) 506 | 507 | 508 | def draw_mask_on_image_array(image, mask, color='red', alpha=0.4): 509 | """Draws mask on an image. 510 | 511 | Args: 512 | image: uint8 numpy array with shape (img_height, img_height, 3) 513 | mask: a uint8 numpy array of shape (img_height, img_height) with 514 | values between either 0 or 1. 515 | color: color to draw the keypoints with. Default is red. 516 | alpha: transparency value between 0 and 1. (default: 0.4) 517 | 518 | Raises: 519 | ValueError: On incorrect data type for image or masks. 520 | """ 521 | if image.dtype != np.uint8: 522 | raise ValueError('`image` not of type np.uint8') 523 | if mask.dtype != np.uint8: 524 | raise ValueError('`mask` not of type np.uint8') 525 | if np.any(np.logical_and(mask != 1, mask != 0)): 526 | raise ValueError('`mask` elements should be in [0, 1]') 527 | if image.shape[:2] != mask.shape: 528 | raise ValueError('The image has spatial dimensions %s but the mask has ' 529 | 'dimensions %s' % (image.shape[:2], mask.shape)) 530 | rgb = ImageColor.getrgb(color) 531 | pil_image = Image.fromarray(image) 532 | 533 | solid_color = np.expand_dims( 534 | np.ones_like(mask), axis=2) * np.reshape(list(rgb), [1, 1, 3]) 535 | pil_solid_color = Image.fromarray(np.uint8(solid_color)).convert('RGBA') 536 | pil_mask = Image.fromarray(np.uint8(255.0*alpha*mask)).convert('L') 537 | pil_image = Image.composite(pil_solid_color, pil_image, pil_mask) 538 | np.copyto(image, np.array(pil_image.convert('RGB'))) 539 | 540 | 541 | def visualize_boxes_and_labels_on_image_array( 542 | image, 543 | boxes, 544 | classes, 545 | scores, 546 | category_index, 547 | instance_masks=None, 548 | instance_boundaries=None, 549 | keypoints=None, 550 | use_normalized_coordinates=False, 551 | max_boxes_to_draw=20, 552 | min_score_thresh=.5, 553 | agnostic_mode=False, 554 | line_thickness=4, 555 | groundtruth_box_visualization_color='black', 556 | skip_scores=False, 557 | skip_labels=False): 558 | """Overlay labeled boxes on an image with formatted scores and label names. 559 | 560 | This function groups boxes that correspond to the same location 561 | and creates a display string for each detection and overlays these 562 | on the image. Note that this function modifies the image in place, and returns 563 | that same image. 564 | 565 | Args: 566 | image: uint8 numpy array with shape (img_height, img_width, 3) 567 | boxes: a numpy array of shape [N, 4] 568 | classes: a numpy array of shape [N]. Note that class indices are 1-based, 569 | and match the keys in the label map. 570 | scores: a numpy array of shape [N] or None. If scores=None, then 571 | this function assumes that the boxes to be plotted are groundtruth 572 | boxes and plot all boxes as black with no classes or scores. 573 | category_index: a dict containing category dictionaries (each holding 574 | category index `id` and category name `name`) keyed by category indices. 575 | instance_masks: a numpy array of shape [N, image_height, image_width] with 576 | values ranging between 0 and 1, can be None. 577 | instance_boundaries: a numpy array of shape [N, image_height, image_width] 578 | with values ranging between 0 and 1, can be None. 579 | keypoints: a numpy array of shape [N, num_keypoints, 2], can 580 | be None 581 | use_normalized_coordinates: whether boxes is to be interpreted as 582 | normalized coordinates or not. 583 | max_boxes_to_draw: maximum number of boxes to visualize. If None, draw 584 | all boxes. 585 | min_score_thresh: minimum score threshold for a box to be visualized 586 | agnostic_mode: boolean (default: False) controlling whether to evaluate in 587 | class-agnostic mode or not. This mode will display scores but ignore 588 | classes. 589 | line_thickness: integer (default: 4) controlling line width of the boxes. 590 | groundtruth_box_visualization_color: box color for visualizing groundtruth 591 | boxes 592 | skip_scores: whether to skip score when drawing a single detection 593 | skip_labels: whether to skip label when drawing a single detection 594 | 595 | Returns: 596 | uint8 numpy array with shape (img_height, img_width, 3) with overlaid boxes. 597 | """ 598 | # Create a display string (and color) for every box location, group any boxes 599 | # that correspond to the same location. 600 | box_to_display_str_map = collections.defaultdict(list) 601 | box_to_color_map = collections.defaultdict(str) 602 | box_to_instance_masks_map = {} 603 | box_to_instance_boundaries_map = {} 604 | box_to_keypoints_map = collections.defaultdict(list) 605 | if not max_boxes_to_draw: 606 | max_boxes_to_draw = boxes.shape[0] 607 | for i in range(min(max_boxes_to_draw, boxes.shape[0])): 608 | if scores is None or scores[i] > min_score_thresh: 609 | box = tuple(boxes[i].tolist()) 610 | if instance_masks is not None: 611 | box_to_instance_masks_map[box] = instance_masks[i] 612 | if instance_boundaries is not None: 613 | box_to_instance_boundaries_map[box] = instance_boundaries[i] 614 | if keypoints is not None: 615 | box_to_keypoints_map[box].extend(keypoints[i]) 616 | if scores is None: 617 | box_to_color_map[box] = groundtruth_box_visualization_color 618 | else: 619 | display_str = '' 620 | if not skip_labels: 621 | if not agnostic_mode: 622 | if classes[i] in category_index.keys(): 623 | class_name = category_index[classes[i]]['name'] 624 | else: 625 | class_name = 'N/A' 626 | display_str = str(class_name) 627 | if not skip_scores: 628 | if not display_str: 629 | display_str = '{}%'.format(int(100*scores[i])) 630 | else: 631 | display_str = '{}: {}%'.format(display_str, int(100*scores[i])) 632 | box_to_display_str_map[box].append(display_str) 633 | if agnostic_mode: 634 | box_to_color_map[box] = 'DarkOrange' 635 | else: 636 | box_to_color_map[box] = STANDARD_COLORS[ 637 | classes[i] % len(STANDARD_COLORS)] 638 | 639 | # Draw all boxes onto image. 640 | for box, color in box_to_color_map.items(): 641 | ymin, xmin, ymax, xmax = box 642 | if instance_masks is not None: 643 | draw_mask_on_image_array( 644 | image, 645 | box_to_instance_masks_map[box], 646 | color=color 647 | ) 648 | if instance_boundaries is not None: 649 | draw_mask_on_image_array( 650 | image, 651 | box_to_instance_boundaries_map[box], 652 | color='red', 653 | alpha=1.0 654 | ) 655 | draw_bounding_box_on_image_array( 656 | image, 657 | ymin, 658 | xmin, 659 | ymax, 660 | xmax, 661 | color=color, 662 | thickness=line_thickness, 663 | display_str_list=box_to_display_str_map[box], 664 | use_normalized_coordinates=use_normalized_coordinates) 665 | if keypoints is not None: 666 | draw_keypoints_on_image_array( 667 | image, 668 | box_to_keypoints_map[box], 669 | color=color, 670 | radius=line_thickness / 2, 671 | use_normalized_coordinates=use_normalized_coordinates) 672 | 673 | return image 674 | 675 | 676 | def add_cdf_image_summary(values, name): 677 | """Adds a tf.summary.image for a CDF plot of the values. 678 | 679 | Normalizes `values` such that they sum to 1, plots the cumulative distribution 680 | function and creates a tf image summary. 681 | 682 | Args: 683 | values: a 1-D float32 tensor containing the values. 684 | name: name for the image summary. 685 | """ 686 | def cdf_plot(values): 687 | """Numpy function to plot CDF.""" 688 | normalized_values = values / np.sum(values) 689 | sorted_values = np.sort(normalized_values) 690 | cumulative_values = np.cumsum(sorted_values) 691 | fraction_of_examples = (np.arange(cumulative_values.size, dtype=np.float32) 692 | / cumulative_values.size) 693 | fig = plt.figure(frameon=False) 694 | ax = fig.add_subplot('111') 695 | ax.plot(fraction_of_examples, cumulative_values) 696 | ax.set_ylabel('cumulative normalized values') 697 | ax.set_xlabel('fraction of examples') 698 | fig.canvas.draw() 699 | width, height = fig.get_size_inches() * fig.get_dpi() 700 | image = np.fromstring(fig.canvas.tostring_rgb(), dtype='uint8').reshape( 701 | 1, int(height), int(width), 3) 702 | return image 703 | cdf_plot = tf.py_func(cdf_plot, [values], tf.uint8) 704 | tf.summary.image(name, cdf_plot) 705 | 706 | 707 | def add_hist_image_summary(values, bins, name): 708 | """Adds a tf.summary.image for a histogram plot of the values. 709 | 710 | Plots the histogram of values and creates a tf image summary. 711 | 712 | Args: 713 | values: a 1-D float32 tensor containing the values. 714 | bins: bin edges which will be directly passed to np.histogram. 715 | name: name for the image summary. 716 | """ 717 | 718 | def hist_plot(values, bins): 719 | """Numpy function to plot hist.""" 720 | fig = plt.figure(frameon=False) 721 | ax = fig.add_subplot('111') 722 | y, x = np.histogram(values, bins=bins) 723 | ax.plot(x[:-1], y) 724 | ax.set_ylabel('count') 725 | ax.set_xlabel('value') 726 | fig.canvas.draw() 727 | width, height = fig.get_size_inches() * fig.get_dpi() 728 | image = np.fromstring( 729 | fig.canvas.tostring_rgb(), dtype='uint8').reshape( 730 | 1, int(height), int(width), 3) 731 | return image 732 | hist_plot = tf.py_func(hist_plot, [values, bins], tf.uint8) 733 | tf.summary.image(name, hist_plot) 734 | --------------------------------------------------------------------------------