├── mrcnn ├── __init__.py ├── __pycache__ │ ├── config.cpython-37.pyc │ ├── config.cpython-39.pyc │ ├── model.cpython-37.pyc │ ├── model.cpython-39.pyc │ ├── utils.cpython-37.pyc │ ├── utils.cpython-39.pyc │ ├── __init__.cpython-37.pyc │ ├── __init__.cpython-39.pyc │ ├── visualize.cpython-37.pyc │ └── visualize.cpython-39.pyc ├── parallel_model.py ├── config.py ├── visualize.py └── utils.py ├── __pycache__ ├── annotate.cpython-37.pyc ├── resume_eda.cpython-37.pyc └── Train_MASKRCNN_Script.cpython-37.pyc ├── requirements.txt ├── resume_eda.py ├── README.md ├── Train_MASKRCNN_Script.py ├── Resume_Parser.py └── annotate.py /mrcnn/__init__.py: -------------------------------------------------------------------------------- 1 | __version__ = "1.0.0" 2 | -------------------------------------------------------------------------------- /__pycache__/annotate.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/__pycache__/annotate.cpython-37.pyc -------------------------------------------------------------------------------- /__pycache__/resume_eda.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/__pycache__/resume_eda.cpython-37.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/config.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/config.cpython-39.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/model.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/model.cpython-37.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/model.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/model.cpython-39.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/utils.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/utils.cpython-39.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/__init__.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/__init__.cpython-39.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/visualize.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/visualize.cpython-37.pyc -------------------------------------------------------------------------------- /mrcnn/__pycache__/visualize.cpython-39.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/visualize.cpython-39.pyc -------------------------------------------------------------------------------- /__pycache__/Train_MASKRCNN_Script.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/__pycache__/Train_MASKRCNN_Script.cpython-37.pyc -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | matplotlib 2 | seaborn 3 | pandas 4 | numpy 5 | pdf2image 6 | easyocr 7 | Pillow 8 | ipython 9 | streamlit 10 | xml-python 11 | opencv-python 12 | tensorflow==1.13.1 13 | keras==2.2.5 14 | Python==3.7.10 15 | -------------------------------------------------------------------------------- /resume_eda.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # coding: utf-8 3 | import matplotlib.pyplot as plt 4 | import seaborn as sns 5 | def labels(ax): 6 | for p in ax.patches: 7 | width = p.get_width() 8 | height = p.get_height() 9 | x = p.get_x() 10 | y = p.get_y() 11 | ax.annotate(f"{int(height)}", (x + width/2, y + height*1.01), ha="center") 12 | plt.ylabel("Count of Resumes") 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Resume_Parsing 2 | Machine Learning Project 3 | 4 | **Team**: 5 | * David Balaban - Team Leader 6 | * Daniel Lichter - Techsmith 7 | * Asma Sadia - Specification Leader 8 | * Maitri Patel - Quality Assurance Specialist 9 | 10 | ### Business Problem 11 | Keystone Strategy's recruiting team often receives large "resume books" containing  hundreds of resumes from universities and their student organizations, which they then have to manually parse to catalog attributes about applicants such as education, work experience, skills, etc, before doing a more detailed review for fit with the organization. Keystone would like to automate this process using machine learning. A machine learning-based resume parsing will save recruiting team from hours of daily work by eliminating manual processing and analysis for every resume they receive. 12 | 13 | ### Data 14 | Our custom dataset consists of resumes from three resume books from graduate business schools such as Tuck School of Business at Dartmouth, Haas School of Business at Berekely, and Standard Graduate School of Business. In total, there are 841 resumes of MBA candidates. 15 | 16 | resume_count_uni 17 | 18 | ### Model 19 | The modeling approach that we took to create the resume parsing model was to use MaskRCNN and EasyOCR to parse through the resumes and extract the information. 20 | 21 | MaskRCNN is a pre-trained model generally used for object detection. We trained this model on the resumes available to us and used it to classify different portions of a resume using bounding boxes for each section of the resume, the weights that we used prior to training came from COCO dataset and are pretrained with 80 different classes on about 330K images. Then for the text extraction part we used EasyOCR model which is an Optical Character Recognition model that is already trained on multiple languages (including english), has very high accuracy and it's very easy to use. 22 | 23 | We used mean Average Precision (mAP) as the metric, which is standard for evaluating an object detection model. It measures the average precision (AUC of a precision-recall curve) of a model across all object classes, and ranges between 0 and 1. Based on the mAP for each IoU Threshold on the 50 testing resumes, so for 75% IoU we got almost a 95% mAP which is very good, but again might be too good to be true since there might be some overfitting involved, then we can see that the 85% IoU Threshold had a mAP score of 73%, which is very good since we believe an 85% IoU threshold is enough for the model to be able to correctly find the sections and that even though there might be some overlapping it's been a very rare prediction. Finally, for the 95% IoU Threshold we see the abismal score of 0.167%, which again is understandable due to the ammount of training this model has gone through (Only about 850 resumes) 24 | 25 | 26 | 27 | ### Conclusion 28 | Selecting the right candidates from a pool of applicants can be one of the toughest jobs for the talent acquisition leaders. Moreover, going through each resume manually for every hiring season can be tiresome and time consuming. The machine learning resume parser tool can be a life saver for the entire company. It can provide unbiased solutions while overcoming possible manual errors. 29 | 30 | 31 | ## How to use our app: 32 | 33 | To be able to run our app clone this repository and the first step is to make sure to run the requirements.txt file to install all necessary dependecies: 34 | ``` 35 | pip install -r requirements.txt 36 | ``` 37 | Then it's necessary to install poppler as well to handle the pdfs and there are two ways of doing this: 38 | ``` 39 | pip install python-poppler 40 | ``` 41 | or 42 | ``` 43 | conda install -c conda-forge poppler 44 | ``` 45 | Then once all the necessaries packages have been installed we will run the application using the framework Streamlit by running the following code: 46 | ``` 47 | python -m streamlit run Resume_Parser.py 48 | ``` 49 | 50 | Here is a screenshot of our application: 51 | 52 | app_screenshot 53 | 54 | -------------------------------------------------------------------------------- /Train_MASKRCNN_Script.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Mon Nov 15 02:28:40 2021 4 | 5 | @author: davba 6 | """ 7 | #Notebook written by David A. A. Balaban 8 | from pdf2image import convert_from_path 9 | import easyocr 10 | import numpy as np 11 | import PIL # Python Imaging Library 12 | from PIL import ImageDraw # drawing bounding boxes 13 | import tensorflow as tf 14 | from IPython.display import display,Image 15 | from matplotlib.pyplot import imshow 16 | import xml.dom.minidom 17 | import pandas as pd 18 | import mrcnn 19 | import mrcnn.utils 20 | import mrcnn.config 21 | import mrcnn.model 22 | import urllib.request 23 | import os 24 | import xml.etree 25 | 26 | 27 | #Using Keras==2.2.5 28 | 29 | # Sections = Personal Info, Education, Skills, Projects, Work Experience, Extra 30 | 31 | class ResumeDataset(mrcnn.utils.Dataset): 32 | 33 | def load_dataset(self, dataset_dir, is_train=True): 34 | # Adding all possible sections 35 | self.add_class("dataset", 1, "Personal Info") 36 | self.add_class("dataset", 2, "Education") 37 | self.add_class("dataset", 3, "Skills") 38 | self.add_class("dataset", 4, "Projects") 39 | self.add_class("dataset", 5, "Work Experience") 40 | self.add_class("dataset", 6, "Extra") 41 | 42 | 43 | 44 | images_dir = dataset_dir + '\\Resumes\\' 45 | annotations_dir = dataset_dir + '\\Resume_Annotations\\' 46 | 47 | dir_list = os.listdir(images_dir) 48 | count = 0 49 | image_id = "" 50 | 51 | for filename in dir_list: 52 | # Image ID is file name without .jpg 53 | image_id = filename[:-4] 54 | 55 | if is_train and count <= len(dir_list)*0.75: 56 | continue 57 | 58 | if not is_train and count > len(dir_list)*0.75: 59 | continue 60 | count+=1 61 | 62 | img_path = images_dir + filename 63 | ann_path = annotations_dir + image_id + '.xml' 64 | 65 | self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path) 66 | 67 | def load_mask(self, image_id): 68 | info = self.image_info[image_id] 69 | path = info['annotation'] 70 | boxes, w, h = self.extract_boxes(path) 71 | masks = np.zeros([h, w, len(boxes)], dtype='uint8') 72 | 73 | class_ids = list() 74 | for i in range(len(boxes)): 75 | obj = boxes[i] 76 | box = obj[1] 77 | row_s, row_e = box[1], box[3] 78 | col_s, col_e = box[0], box[2] 79 | masks[row_s:row_e, col_s:col_e, i] = 1 80 | class_ids.append(self.class_names.index(obj[0])) 81 | return masks, np.asarray(class_ids, dtype='int32') 82 | 83 | # A helper method to extract the bounding boxes from the annotation file 84 | def extract_boxes(self, filename): 85 | tree = xml.etree.ElementTree.parse(filename) 86 | 87 | root = tree.getroot() 88 | 89 | boxes = list() 90 | for obj in root.findall('./object'): 91 | name = obj.find('name').text 92 | xmin = int(obj.find('bndbox/xmin').text) 93 | ymin = int(obj.find('bndbox/ymin').text) 94 | xmax = int(obj.find('bndbox/xmax').text) 95 | ymax = int(obj.find('bndbox/ymax').text) 96 | coors = [xmin, ymin, xmax, ymax] 97 | box_array = [name,coors] 98 | print(box_array) 99 | boxes.append(box_array) 100 | 101 | 102 | width = int(root.find('.//size/width').text) 103 | height = int(root.find('.//size/height').text) 104 | return boxes, width, height 105 | 106 | class ResumeConfig(mrcnn.config.Config): 107 | NAME = "resumes_cfg" 108 | 109 | GPU_COUNT = 1 110 | IMAGES_PER_GPU = 1 111 | 112 | NUM_CLASSES = 7 113 | 114 | LEARNING_RATE = 0.001 115 | 116 | STEPS_PER_EPOCH = 131 117 | 118 | 119 | 120 | def train_model(dataset_path, model_path, num_epochs, final_model_path): 121 | 122 | # Training 123 | train_dataset = ResumeDataset() 124 | train_dataset.load_dataset(dataset_dir=dataset_path, is_train=True) 125 | train_dataset.prepare() 126 | # Validation 127 | validation_dataset = ResumeDataset() 128 | validation_dataset.load_dataset(dataset_dir=dataset_path, is_train=False) 129 | validation_dataset.prepare() 130 | 131 | #For Training; 132 | config = ResumeConfig() 133 | 134 | model = mrcnn.model.MaskRCNN(mode='training', 135 | model_dir='.log', 136 | config=config) 137 | model.keras_model.summary() 138 | 139 | model.load_weights(filepath=model_path, by_name=True) 140 | 141 | print("Weights loaded!") 142 | 143 | print("Training Started!") 144 | model.train(train_dataset=train_dataset, 145 | val_dataset=validation_dataset, 146 | learning_rate=config.LEARNING_RATE, 147 | epochs=num_epochs, 148 | layers='heads') 149 | 150 | 151 | model.keras_model.save_weights(final_model_path) 152 | print("Model Saved!") 153 | -------------------------------------------------------------------------------- /mrcnn/parallel_model.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Multi-GPU Support for Keras. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | 9 | Ideas and a small code snippets from these sources: 10 | https://github.com/fchollet/keras/issues/2436 11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ 13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py 14 | """ 15 | 16 | import tensorflow as tf 17 | import keras.backend as K 18 | import keras.layers as KL 19 | import keras.models as KM 20 | 21 | 22 | class ParallelModel(KM.Model): 23 | """Subclasses the standard Keras Model and adds multi-GPU support. 24 | It works by creating a copy of the model on each GPU. Then it slices 25 | the inputs and sends a slice to each copy of the model, and then 26 | merges the outputs together and applies the loss on the combined 27 | outputs. 28 | """ 29 | 30 | def __init__(self, keras_model, gpu_count): 31 | """Class constructor. 32 | keras_model: The Keras model to parallelize 33 | gpu_count: Number of GPUs. Must be > 1 34 | """ 35 | self.inner_model = keras_model 36 | self.gpu_count = gpu_count 37 | merged_outputs = self.make_parallel() 38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, 39 | outputs=merged_outputs) 40 | 41 | def __getattribute__(self, attrname): 42 | """Redirect loading and saving methods to the inner model. That's where 43 | the weights are stored.""" 44 | if 'load' in attrname or 'save' in attrname: 45 | return getattr(self.inner_model, attrname) 46 | return super(ParallelModel, self).__getattribute__(attrname) 47 | 48 | def summary(self, *args, **kwargs): 49 | """Override summary() to display summaries of both, the wrapper 50 | and inner models.""" 51 | super(ParallelModel, self).summary(*args, **kwargs) 52 | self.inner_model.summary(*args, **kwargs) 53 | 54 | def make_parallel(self): 55 | """Creates a new wrapper model that consists of multiple replicas of 56 | the original model placed on different GPUs. 57 | """ 58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy 59 | # of the full inputs to all GPUs. Saves on bandwidth and memory. 60 | input_slices = {name: tf.split(x, self.gpu_count) 61 | for name, x in zip(self.inner_model.input_names, 62 | self.inner_model.inputs)} 63 | 64 | output_names = self.inner_model.output_names 65 | outputs_all = [] 66 | for i in range(len(self.inner_model.outputs)): 67 | outputs_all.append([]) 68 | 69 | # Run the model call() on each GPU to place the ops there 70 | for i in range(self.gpu_count): 71 | with tf.device('/gpu:%d' % i): 72 | with tf.name_scope('tower_%d' % i): 73 | # Run a slice of inputs through this replica 74 | zipped_inputs = zip(self.inner_model.input_names, 75 | self.inner_model.inputs) 76 | inputs = [ 77 | KL.Lambda(lambda s: input_slices[name][i], 78 | output_shape=lambda s: (None,) + s[1:])(tensor) 79 | for name, tensor in zipped_inputs] 80 | # Create the model replica and get the outputs 81 | outputs = self.inner_model(inputs) 82 | if not isinstance(outputs, list): 83 | outputs = [outputs] 84 | # Save the outputs for merging back together later 85 | for l, o in enumerate(outputs): 86 | outputs_all[l].append(o) 87 | 88 | # Merge outputs on CPU 89 | with tf.device('/cpu:0'): 90 | merged = [] 91 | for outputs, name in zip(outputs_all, output_names): 92 | # Concatenate or average outputs? 93 | # Outputs usually have a batch dimension and we concatenate 94 | # across it. If they don't, then the output is likely a loss 95 | # or a metric value that gets averaged across the batch. 96 | # Keras expects losses and metrics to be scalars. 97 | if K.int_shape(outputs[0]) == (): 98 | # Average 99 | m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs) 100 | else: 101 | # Concatenate 102 | m = KL.Concatenate(axis=0, name=name)(outputs) 103 | merged.append(m) 104 | return merged 105 | 106 | 107 | if __name__ == "__main__": 108 | # Testing code below. It creates a simple model to train on MNIST and 109 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed 110 | # in TensorBoard. Run it as: 111 | # 112 | # python3 parallel_model.py 113 | 114 | import os 115 | import numpy as np 116 | import keras.optimizers 117 | from keras.datasets import mnist 118 | from keras.preprocessing.image import ImageDataGenerator 119 | 120 | GPU_COUNT = 2 121 | 122 | # Root directory of the project 123 | ROOT_DIR = os.path.abspath("../") 124 | 125 | # Directory to save logs and trained model 126 | MODEL_DIR = os.path.join(ROOT_DIR, "logs") 127 | 128 | def build_model(x_train, num_classes): 129 | # Reset default graph. Keras leaves old ops in the graph, 130 | # which are ignored for execution but clutter graph 131 | # visualization in TensorBoard. 132 | tf.reset_default_graph() 133 | 134 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image") 135 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", 136 | name="conv1")(inputs) 137 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", 138 | name="conv2")(x) 139 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) 140 | x = KL.Flatten(name="flat1")(x) 141 | x = KL.Dense(128, activation='relu', name="dense1")(x) 142 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) 143 | 144 | return KM.Model(inputs, x, "digit_classifier_model") 145 | 146 | # Load MNIST Data 147 | (x_train, y_train), (x_test, y_test) = mnist.load_data() 148 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255 149 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255 150 | 151 | print('x_train shape:', x_train.shape) 152 | print('x_test shape:', x_test.shape) 153 | 154 | # Build data generator and model 155 | datagen = ImageDataGenerator() 156 | model = build_model(x_train, 10) 157 | 158 | # Add multi-GPU support. 159 | model = ParallelModel(model, GPU_COUNT) 160 | 161 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) 162 | 163 | model.compile(loss='sparse_categorical_crossentropy', 164 | optimizer=optimizer, metrics=['accuracy']) 165 | 166 | model.summary() 167 | 168 | # Train 169 | model.fit_generator( 170 | datagen.flow(x_train, y_train, batch_size=64), 171 | steps_per_epoch=50, epochs=10, verbose=1, 172 | validation_data=(x_test, y_test), 173 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, 174 | write_graph=True)] 175 | ) 176 | -------------------------------------------------------------------------------- /Resume_Parser.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Dec 22 02:48:47 2021 4 | 5 | @author: davba 6 | 7 | path to this file rn D:\ResumeIT\Resume_Parser.py 8 | python -m streamlit run D:\ResumeIT\Resume_Parser.py 9 | 10 | """ 11 | import streamlit as st #framework 12 | import pdf2image #converts pdf 13 | from pdf2image import convert_from_path 14 | import easyocr 15 | import numpy as np 16 | import PIL # Python Imaging Library 17 | from PIL import ImageDraw # drawing bounding boxes 18 | import tensorflow as tf 19 | from IPython.display import display,Image 20 | from matplotlib.pyplot import imshow 21 | import xml.dom.minidom 22 | import pandas as pd 23 | import mrcnn 24 | import mrcnn.utils 25 | import mrcnn.config 26 | import mrcnn.model 27 | import mrcnn.visualize 28 | import urllib.request 29 | import os 30 | import xml.etree 31 | import cv2 32 | import Train_MASKRCNN_Script as training 33 | 34 | 35 | @st.cache 36 | def convert_df(df): 37 | # IMPORTANT: Cache the conversion to prevent computation on every rerun 38 | return df.to_csv().encode('utf-8') 39 | 40 | 41 | def run_model(model_path, img_array): 42 | CLASS_NAMES = ['BG', 'Personal Info', 'Education', 'Skills', 'Projects', 'Work Experience', 'Extra'] 43 | 44 | class SimpleConfig(mrcnn.config.Config): 45 | NAME = "coco_inference" 46 | 47 | GPU_COUNT = 1 48 | IMAGES_PER_GPU = 1 49 | 50 | NUM_CLASSES = len(CLASS_NAMES) 51 | 52 | model = mrcnn.model.MaskRCNN(mode="inference", 53 | config=SimpleConfig(), 54 | model_dir=os.getcwd()) 55 | 56 | model.load_weights(filepath=model_path, 57 | by_name=True) 58 | 59 | class_dict = {'bg' : 0, 60 | 'Personal Info' : 1, 61 | 'Education' : 2, 62 | 'Skills' : 3, 63 | 'Projects' : 4, 64 | 'Work Experience' : 5, 65 | 'Extra' : 6 66 | } 67 | 68 | inv_class_dict = {0:'bg', 69 | 1:'Personal Info', 70 | 2:'Education', 71 | 3:'Skills', 72 | 4:'Projects', 73 | 5:'Work Experience', 74 | 6: 'Extra' 75 | } 76 | 77 | sections = ['Personal Info', 'Education', 'Skills', 'Projects', 'Work Experience', 'Extra'] 78 | extracted_info_df = pd.DataFrame(columns=sections) 79 | 80 | #Assuming an array of images in cv2 format: img_array 81 | for image in img_array: 82 | 83 | pred = model.detect([image], verbose=0) 84 | pred = pred[0] 85 | #The bnd box outputed is [y1,x1,y2,x2] 86 | 87 | full_info = list() 88 | class_list = list() 89 | for i in range(len(pred['rois'])): 90 | img_pil = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 91 | temp_img = PIL.Image.fromarray(img_pil) 92 | current_bnd_box = pred['rois'][i] 93 | current_box_class = pred['class_ids'][i] 94 | #pil_img.crop(box=[x1,y1,x2,y2]) 95 | crop_box = [current_bnd_box[1], current_bnd_box[0], current_bnd_box[3], current_bnd_box[2]] 96 | crop_img = temp_img.crop(crop_box) 97 | 98 | #text for this section 99 | section_text = "" 100 | bounds = reader.readtext(np.array(crop_img),min_size=0,slope_ths=0.2,ycenter_ths=0.7,height_ths=0.6,width_ths=0.8) 101 | for b in bounds: 102 | section_text += " " + b[1] 103 | full_info.append([section_text]) 104 | class_list.append(inv_class_dict[current_box_class]) 105 | 106 | add_to_df = dict(zip(class_list,full_info)) 107 | 108 | for key in sections: 109 | if key in add_to_df: 110 | continue 111 | else: 112 | add_to_df[key] = [""] 113 | 114 | temp_df = pd.DataFrame.from_dict(add_to_df) 115 | 116 | extracted_info_df = extracted_info_df.append(temp_df, ignore_index = True) 117 | 118 | return extracted_info_df 119 | 120 | 121 | st.title("Resume Parsing") 122 | os.environ['KMP_DUPLICATE_LIB_OK']='True' 123 | reader= easyocr.Reader(["en"]) # select language 124 | 125 | image_path ="C:/ResumeParser/tempDirectory/resume_image" 126 | # Get Resume Book and Split Resume 127 | 128 | st.write('', unsafe_allow_html=True) 129 | st.write('', unsafe_allow_html=True) 130 | 131 | choose=st.radio("Current Job",("Train Model","Parse Resumes")) 132 | 133 | if choose == "Parse Resumes": 134 | uploaded_file =st.file_uploader("Upload Your Resume Book", type=['pdf'], accept_multiple_files=False, key=None, help=None, on_change=None, args=None, kwargs=None) 135 | if uploaded_file is not None: 136 | if uploaded_file.type == "application/pdf": 137 | images = pdf2image.convert_from_bytes(uploaded_file.read(),size=(1700,2200)) 138 | st.subheader("Please select page(s)") 139 | start = st.number_input('Start with page',min_value =1,max_value=len(images),step=1,key="start_page") 140 | end = st.number_input('End with page',min_value=1,max_value=len(images),step=1,key="end_page") 141 | 142 | split_button = st.button("Split resume book", key='split_button') 143 | if split_button: 144 | for i in range(start-1,end): 145 | img_index = i 146 | resume = images[img_index] 147 | 148 | image_path = "C:/ResumeParser/tempDirectory/resume_image" 149 | 150 | image_name = uploaded_file.name.split(".")[0] + str(img_index+1) 151 | image_ext = image_name + ".jpg" 152 | 153 | resume.save(f"{image_path}/{image_ext}") #save jpeg 154 | st.success("Finished splitting the resume. Ready to run!") 155 | 156 | #Resume is split and saved as images so now we open that to get the resumes for prediction 157 | 158 | files = os.listdir(image_path) 159 | img_array = list() 160 | for name in files: 161 | image = cv2.imread(image_path + "/" + name) 162 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) 163 | img_array.append(image) 164 | 165 | 166 | model_path = st.text_input("Model Path", value="", max_chars=None, key="Model_path_input") 167 | 168 | run_model_button = st.button("Run the Model", key='run_model_button') 169 | 170 | 171 | # button "Click here to run the model" 172 | df = pd.DataFrame() 173 | if run_model_button: 174 | df = run_model(model_path, img_array) 175 | 176 | st.dataframe(data = df) 177 | 178 | csv = convert_df(df) 179 | 180 | st.download_button( 181 | label="Download data as CSV", 182 | data=csv, 183 | mime='text/csv') 184 | elif choose == "Train Model": 185 | 186 | #def train_model(dataset_path, model_path, num_epochs, final_model_path): 187 | 188 | dataset_path = st.text_input("Dataset Path (Folder containing Resumes and Resume Annotations folders)", value="", max_chars=None, key="dataset_path_input") 189 | 190 | initial_model_path = st.text_input("Base Model Path", value="", max_chars=None, key="Base_Model_path_input") 191 | 192 | num_epochs = st.number_input("Number of Epochs", key="Num_Epochs_input") 193 | 194 | final_model_path = st.text_input("Final Model Path", value="", max_chars=None, key="Final_Model_path_input") 195 | 196 | train_button = st.button("Train Model", key="Train_Model_Button") 197 | 198 | if train_button: 199 | with st.spinner('Training the Model'): 200 | training.train_model(dataset_path,initial_model_path, int(num_epochs), final_model_path) 201 | st.success('Finished Training!') 202 | 203 | -------------------------------------------------------------------------------- /mrcnn/config.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Base Configurations class. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import numpy as np 11 | 12 | 13 | # Base Configuration Class 14 | # Don't use this class directly. Instead, sub-class it and override 15 | # the configurations you need to change. 16 | 17 | class Config(object): 18 | """Base configuration class. For custom configurations, create a 19 | sub-class that inherits from this one and override properties 20 | that need to be changed. 21 | """ 22 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. 23 | # Useful if your code needs to do things differently depending on which 24 | # experiment is running. 25 | NAME = None # Override in sub-classes 26 | 27 | # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1. 28 | GPU_COUNT = 1 29 | 30 | # Number of images to train with on each GPU. A 12GB GPU can typically 31 | # handle 2 images of 1024x1024px. 32 | # Adjust based on your GPU memory and image sizes. Use the highest 33 | # number that your GPU can handle for best performance. 34 | IMAGES_PER_GPU = 2 35 | 36 | # Number of training steps per epoch 37 | # This doesn't need to match the size of the training set. Tensorboard 38 | # updates are saved at the end of each epoch, so setting this to a 39 | # smaller number means getting more frequent TensorBoard updates. 40 | # Validation stats are also calculated at each epoch end and they 41 | # might take a while, so don't set this too small to avoid spending 42 | # a lot of time on validation stats. 43 | STEPS_PER_EPOCH = 1000 44 | 45 | # Number of validation steps to run at the end of every training epoch. 46 | # A bigger number improves accuracy of validation stats, but slows 47 | # down the training. 48 | VALIDATION_STEPS = 50 49 | 50 | # Backbone network architecture 51 | # Supported values are: resnet50, resnet101. 52 | # You can also provide a callable that should have the signature 53 | # of model.resnet_graph. If you do so, you need to supply a callable 54 | # to COMPUTE_BACKBONE_SHAPE as well 55 | BACKBONE = "resnet101" 56 | 57 | # Only useful if you supply a callable to BACKBONE. Should compute 58 | # the shape of each layer of the FPN Pyramid. 59 | # See model.compute_backbone_shapes 60 | COMPUTE_BACKBONE_SHAPE = None 61 | 62 | # The strides of each layer of the FPN Pyramid. These values 63 | # are based on a Resnet101 backbone. 64 | BACKBONE_STRIDES = [4, 8, 16, 32, 64] 65 | 66 | # Size of the fully-connected layers in the classification graph 67 | FPN_CLASSIF_FC_LAYERS_SIZE = 1024 68 | 69 | # Size of the top-down layers used to build the feature pyramid 70 | TOP_DOWN_PYRAMID_SIZE = 256 71 | 72 | # Number of classification classes (including background) 73 | NUM_CLASSES = 1 # Override in sub-classes 74 | 75 | # Length of square anchor side in pixels 76 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) 77 | 78 | # Ratios of anchors at each cell (width/height) 79 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor 80 | RPN_ANCHOR_RATIOS = [0.5, 1, 2] 81 | 82 | # Anchor stride 83 | # If 1 then anchors are created for each cell in the backbone feature map. 84 | # If 2, then anchors are created for every other cell, and so on. 85 | RPN_ANCHOR_STRIDE = 1 86 | 87 | # Non-max suppression threshold to filter RPN proposals. 88 | # You can increase this during training to generate more propsals. 89 | RPN_NMS_THRESHOLD = 0.7 90 | 91 | # How many anchors per image to use for RPN training 92 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256 93 | 94 | # ROIs kept after tf.nn.top_k and before non-maximum suppression 95 | PRE_NMS_LIMIT = 6000 96 | 97 | # ROIs kept after non-maximum suppression (training and inference) 98 | POST_NMS_ROIS_TRAINING = 2000 99 | POST_NMS_ROIS_INFERENCE = 1000 100 | 101 | # If enabled, resizes instance masks to a smaller size to reduce 102 | # memory load. Recommended when using high-resolution images. 103 | USE_MINI_MASK = True 104 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask 105 | 106 | # Input image resizing 107 | # Generally, use the "square" resizing mode for training and predicting 108 | # and it should work well in most cases. In this mode, images are scaled 109 | # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the 110 | # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is 111 | # padded with zeros to make it a square so multiple images can be put 112 | # in one batch. 113 | # Available resizing modes: 114 | # none: No resizing or padding. Return the image unchanged. 115 | # square: Resize and pad with zeros to get a square image 116 | # of size [max_dim, max_dim]. 117 | # pad64: Pads width and height with zeros to make them multiples of 64. 118 | # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales 119 | # up before padding. IMAGE_MAX_DIM is ignored in this mode. 120 | # The multiple of 64 is needed to ensure smooth scaling of feature 121 | # maps up and down the 6 levels of the FPN pyramid (2**6=64). 122 | # crop: Picks random crops from the image. First, scales the image based 123 | # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of 124 | # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. 125 | # IMAGE_MAX_DIM is not used in this mode. 126 | IMAGE_RESIZE_MODE = "square" 127 | IMAGE_MIN_DIM = 800 128 | IMAGE_MAX_DIM = 1024 129 | # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further 130 | # up scaling. For example, if set to 2 then images are scaled up to double 131 | # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. 132 | # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. 133 | IMAGE_MIN_SCALE = 0 134 | # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4 135 | # Changing this requires other changes in the code. See the WIKI for more 136 | # details: https://github.com/matterport/Mask_RCNN/wiki 137 | IMAGE_CHANNEL_COUNT = 3 138 | 139 | # Image mean (RGB) 140 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) 141 | 142 | # Number of ROIs per image to feed to classifier/mask heads 143 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate 144 | # enough positive proposals to fill this and keep a positive:negative 145 | # ratio of 1:3. You can increase the number of proposals by adjusting 146 | # the RPN NMS threshold. 147 | TRAIN_ROIS_PER_IMAGE = 200 148 | 149 | # Percent of positive ROIs used to train classifier/mask heads 150 | ROI_POSITIVE_RATIO = 0.33 151 | 152 | # Pooled ROIs 153 | POOL_SIZE = 7 154 | MASK_POOL_SIZE = 14 155 | 156 | # Shape of output mask 157 | # To change this you also need to change the neural network mask branch 158 | MASK_SHAPE = [28, 28] 159 | 160 | # Maximum number of ground truth instances to use in one image 161 | MAX_GT_INSTANCES = 100 162 | 163 | # Bounding box refinement standard deviation for RPN and final detections. 164 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 165 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) 166 | 167 | # Max number of final detections 168 | DETECTION_MAX_INSTANCES = 100 169 | 170 | # Minimum probability value to accept a detected instance 171 | # ROIs below this threshold are skipped 172 | DETECTION_MIN_CONFIDENCE = 0.7 173 | 174 | # Non-maximum suppression threshold for detection 175 | DETECTION_NMS_THRESHOLD = 0.3 176 | 177 | # Learning rate and momentum 178 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes 179 | # weights to explode. Likely due to differences in optimizer 180 | # implementation. 181 | LEARNING_RATE = 0.001 182 | LEARNING_MOMENTUM = 0.9 183 | 184 | # Weight decay regularization 185 | WEIGHT_DECAY = 0.0001 186 | 187 | # Loss weights for more precise optimization. 188 | # Can be used for R-CNN training setup. 189 | LOSS_WEIGHTS = { 190 | "rpn_class_loss": 1., 191 | "rpn_bbox_loss": 1., 192 | "mrcnn_class_loss": 1., 193 | "mrcnn_bbox_loss": 1., 194 | "mrcnn_mask_loss": 1. 195 | } 196 | 197 | # Use RPN ROIs or externally generated ROIs for training 198 | # Keep this True for most situations. Set to False if you want to train 199 | # the head branches on ROI generated by code rather than the ROIs from 200 | # the RPN. For example, to debug the classifier head without having to 201 | # train the RPN. 202 | USE_RPN_ROIS = True 203 | 204 | # Train or freeze batch normalization layers 205 | # None: Train BN layers. This is the normal mode 206 | # False: Freeze BN layers. Good when using a small batch size 207 | # True: (don't use). Set layer in training mode even when predicting 208 | TRAIN_BN = False # Defaulting to False since batch size is often small 209 | 210 | # Gradient norm clipping 211 | GRADIENT_CLIP_NORM = 5.0 212 | 213 | def __init__(self): 214 | """Set values of computed attributes.""" 215 | # Effective batch size 216 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT 217 | 218 | # Input image size 219 | if self.IMAGE_RESIZE_MODE == "crop": 220 | self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, 221 | self.IMAGE_CHANNEL_COUNT]) 222 | else: 223 | self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, 224 | self.IMAGE_CHANNEL_COUNT]) 225 | 226 | # Image meta data length 227 | # See compose_image_meta() for details 228 | self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES 229 | 230 | def display(self): 231 | """Display Configuration values.""" 232 | print("\nConfigurations:") 233 | for a in dir(self): 234 | if not a.startswith("__") and not callable(getattr(self, a)): 235 | print("{:30} {}".format(a, getattr(self, a))) 236 | print("\n") 237 | -------------------------------------------------------------------------------- /annotate.py: -------------------------------------------------------------------------------- 1 | from pdf2image import convert_from_path 2 | import easyocr 3 | import numpy as np 4 | import PIL # Python Imaging Library 5 | from PIL import ImageDraw, Image # drawing bounding boxes 6 | import spacy # advanced NLP for key attributes 7 | 8 | def createBoxes(bounds): 9 | categories =['publications','community service','experience','technicallmanagement skills', 10 | 'project experience','activities','awards','summary','volunteer','education', 11 | 'interests','skills','projects','work experience','professional experience','references', 12 | 'volunteer experience','technical skills','research experience','computer skills', 13 | 'leadership & volunteer experience','skillset','extracurriculars', 'certifications', 'certification', 14 | 'academic projects','education & credentials','leadership and extra curriculars', 15 | 'skills & certifications','skill set_','extra curricular activities', 16 | 'leadership , awards & achievements','academic experience','leadership and achievements', 17 | 'education and training','work history','professional summary','skills and abilities', 18 | 'cqurse workand proiects','relevant skills','skill highlights','educations','experiences', 19 | 'personality and language','related courses and skills','key skills','people & capability development', 20 | 'skills and certifications', 'personal','caree interests','additional','interests 0','employment history', 21 | 'details','relevant coursework','other','social impact','experience_', 'leadership and community engagement', 22 | 'professional & leadership experience','additional skills/interests', 'leadership & volunteer experience', 23 | 'other_','social impact','professional distinctions','additional information','activities and leadership', 24 | 'additional experience','interests and skills','athletics','recent experience','awards and personal','selected patents and publications', 25 | 'awards, honors, and interests','activities & leadership','leadership & community activities','additional info', 26 | 'leadership','technical experience','community leadership & interests','community activities & additional information', 27 | 'miscellaneous','leadership & community involvement','prior work experience','publication','personal','extracurricular experience', 28 | 'additional skills and projects','publications & research','leadership, community & other','additional data', 29 | 'community work','awards and honours','others','volunteering and public service','skills, interests & publications','personal interests', 30 | 'languages','community & interests','community involement/personal','activities and skills','awards & community involvement', 31 | 'entrepreneurial experience','entrepreneurship','media & technology experience','business ownership','service and interests', 32 | 'extracurricular','certifications','skills & personal','other information','activities','professional sports experience', 33 | 'other activities and personal interests','professional','writing & publications','skills/activities','community & other', 34 | 'board experience','impact investing work','product, user, and strategy work','extra-curricular & community activities', 35 | 'additional interests','additional data','additional experience','social entrepreneurship','interests and extracurriculars', 36 | 'skills & personal','professional certifications and awards','community involvement','selected publications','volunteer experience and additional skills', 37 | 'internship experience','employment','community engagement','awards, speaking engagements & press','leadership & other activities','other leadership experience', 38 | 'hobbies','initiatives','additional projects','professional experience and leadership','professional experience & leadership','volunteer activities/activities outside job', 39 | 'other inerests/hobbies','professional experiences','athletic experience','community','skills/additional information','education_','additional_','community leadership', 40 | 'academic experience','academic experience_','prqeessional experience','leadershp & volunteer experience','additional:','volunteer & leadership experience', 41 | 'dditional','additiona','ed uc a tio n','e xp e rie n c e','ad dttio nal','awards, honors; and interests', 42 | 'professional experience: united states marine corps','skills and interests','leadership & additional information', 43 | 'leadership , community & other','addtional leadershp','experience__','prqeessional_experience','honors & awards', 44 | 'financial skills','extracurricular leadership','additional skills & interests','prqfessional experience','personal activities and interests', 45 | 'leadership experience and service: collegiate activities','leadership experience and service: post-collegiate activities','professional expereince','honors and awards', 46 | 'additional skills','skills & interests','leadershp & communty involvement','workexperience','addtional','honors; skills, & interests','leadership_awards_& skills', 47 | 'skills &','awards &','personal and interests','leadership and activities','education:','work experience:','skills_hobbies & interests', 48 | 'leadership experience','additional activties and interests','community_leadership_','interests & skills', 49 | 'leadership & involvement','awards & interests','work','activities:', 'awards:','skills, achievements & interests','leadership & activities','additional leadership , skills, and interests', 50 | 'education _','communty leadershp','skills, activities & interests','skills, languages and interests','experience (u.s_navy, submarines)','additional_experience','activities & interests', 51 | 'skills and personal','leadership activities','professional experience:','leadership experience:','key skills:','e d u c a tio n','ex p e rienc e', 52 | 'p e r s 0 na l','additional leadership','additional information and interests','professional experience_','leadership & service', 53 | 'skills, activities and interests','selected publica tions','teaching','iternshps','public service','communty involvement', 54 | 'professlonal experience','activities and interests','leadership & extracurricular','additional experience_','extracurricular activities & skills', 55 | 'leadership & interests','leadership & extracurricular activittes','leadership and social impact','additional projects_', 56 | 'education and honors','learn to_win (lzw executive and management experience_','naval intelligence officer_experience','navy surface warfare officer experience', 57 | 'education & honors','leadership experience_','summary: strategic, results-oriented leader with experience building cross-functional systems and processes. looking to', 58 | 'military','extracurricular activities','other experience','qther','edlcation','leadershpandcommunty service','education; honors and scholarships', 59 | 'other interestsihobbies','volunteer activitiesiactivities outside job','leadership and additional information','skills/ additional information','extracurricular activities', 60 | 'community and personal interests','community leadership & additional'] 61 | box = [] 62 | for x in bounds: 63 | if x[1].lower() in categories: 64 | box.append(x) 65 | box.append(x) 66 | return box 67 | 68 | def giveProperNames(new_bounds): 69 | properNameBounds = [] 70 | educationNames = ['education','education & credentials','academic experience','education and training','educations','education_','academic experience', 71 | 'academic experience_','ed uc a tio n','education:','education _','e d u c a tio n','education and honors','education & honors','edlcation','education; honors and scholarships'] 72 | 73 | workNames = ['professional experience','work experience','experience','work history','experiences','experience_','recent work experience','prior work experience', 74 | 'entrepreneurial experience','employment','professional experience and leadership','professional experience & leadership','professional experiences','recent experience', 75 | 'media & technology experience','business ownership','additional experience','professional & leadership experience','prqeessional experience', 76 | 'e xp e rie n c e','professional experience: united states marine corps','experience__','prqeessional_experience','prqfessional experience','professional expereince', 77 | 'workexperience','work experience:','entrepreneurship','work','experience (u.s_navy, submarines)','professional experience:','ex p e rienc e','professional experience_', 78 | 'iternshps','professlonal experience','learn to_win (lzw executive and management experience_','naval intelligence officer_experience','navy surface warfare officer experience'] 79 | 80 | skillNames = ['skills','technicallmanagement skills','computer skills','skillset','skill set_','relevant skills', 81 | 'skills and abilities','skill highlights','skills/additional information','skills/activities','skills & personal', 82 | 'skills/additional information', 'activities and skills', 'skills', 'skills, interests & publications', 83 | 'interests and skills', 'additional skills/interests','skills and interests','financial skills','additional skills & interests', 84 | 'additional skills','skills & interests','leadership_awards_& skills', 'skills &','skills_hobbies & interests','interests & skills', 85 | 'skills, achievements & interests','skills, activities & interests','skills, languages and interests','skills and personal', 86 | 'key skills:','skills, activities and interests','extracurricular activities & skills','skills/ additional information'] 87 | for x in new_bounds: 88 | if(len(properNameBounds)==0): 89 | properNameBounds.append(x) 90 | elif(x[1] in educationNames): 91 | properNameBounds.append((x[0],'Education')) 92 | elif(x[1] in workNames): 93 | properNameBounds.append((x[0],'Work Experience')) 94 | elif(x[1] in skillNames): 95 | properNameBounds.append((x[0],'Skills')) 96 | else: 97 | properNameBounds.append((x[0],'Extra')) 98 | return properNameBounds 99 | 100 | def draw_boxes(image,bounds,color="yellow",width=2): 101 | draw = ImageDraw.Draw(image) 102 | for bound in bounds: 103 | p0,p1,p2,p3 = bound[0] 104 | draw.line([*p0,*p1,*p2,*p3,*p0],fill=color,width=width) 105 | return image 106 | 107 | def createNormalBounds(box): 108 | new_bounds = [] 109 | last_section = 'Personal Info' 110 | for x in box: 111 | if len(new_bounds)==0: 112 | new_bounds.append(([[0, x[0][0][1]-10], [1700,x[0][0][1]-10], [1700, 0], [0,0]], last_section)) 113 | elif len(new_bounds)==1: 114 | new_bounds.append(([[0,new_bounds[-1][0][0][1]+10],[1700,new_bounds[-1][0][0][1]+10],[1700,x[0][0][1]-10],[0,x[0][0][1]-10]], last_section)) 115 | else: 116 | new_bounds.append(([[0,new_bounds[-1][0][3][1]+10],[1700,new_bounds[-1][0][3][1]+10],[1700,x[0][0][1]-10],[0,x[0][0][1]-10]],last_section)) 117 | last_section = x[1].lower() 118 | new_bounds[-1][0][3][1]=2200 119 | new_bounds[-1][0][2][1]=2200 120 | return new_bounds 121 | 122 | def createColumnBounds(box): 123 | new_bounds = [] 124 | box.sort(key = lambda x: x[0][0][0]) 125 | box.sort(key = lambda x: x[0][0][1]) 126 | 127 | for i in range(2, len(box)): 128 | if len(new_bounds)==0: 129 | last_section = 'Personal Info' 130 | if (box[i-2][0][0][0]<500): 131 | new_bounds.append(([[0, 0], [1700,0], [1700, box[i-2][0][0][1]-10], 132 | [0,box[i-2][0][0][1]-10]], last_section)) 133 | last_section = box[i-2][1].lower() 134 | if len(new_bounds)==1: 135 | new_bounds.append(([[0,box[i-2][0][0][1]],[1700,box[i-2][0][0][1]], 136 | [1700,box[i-1][0][0][1]-10],[0,box[i-1][0][0][1]-10]], last_section)) 137 | last_section = box[i-1][1].lower() 138 | new_bounds.append(([[0,box[i-1][0][0][1]],[1700,box[i-1][0][0][1]],[1700,box[i][0][0][1]-10], 139 | [0,box[i][0][0][1]-10]],last_section)) 140 | last_section = box[i][1].lower() 141 | print(new_bounds) 142 | if(box[i][0][0][0] <550): 143 | new_bounds.append(([[box[i][0][0][0],box[i][0][0][1]],[1700,box[i][0][0][1]],[1700,box[i][0][3][1]-10], 144 | [box[i][0][0][0],box[i][0][3][1]-10]],last_section)) 145 | 146 | elif(box[i][0][0][0]>550) and (box[i-1][0][0][0]<550): 147 | new_bounds[-1][0][3][1]=2200 148 | new_bounds[-1][0][2][1]=2200 149 | last_section=box[i][1].lower() 150 | endOfColumn = i 151 | break 152 | 153 | 154 | for i in range(endOfColumn, len(box)-1): 155 | for x in new_bounds: 156 | if box[i][0][2][1] -1 else overlaps[i].max())) 220 | for i in range(len(pred_match))] 221 | # Set title if not provided 222 | title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU" 223 | # Display 224 | display_instances( 225 | image, 226 | boxes, masks, class_ids, 227 | class_names, scores, ax=ax, 228 | show_bbox=show_box, show_mask=show_mask, 229 | colors=colors, captions=captions, 230 | title=title) 231 | 232 | 233 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): 234 | """ 235 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. 236 | proposals: [n, 4] the same anchors but refined to fit objects better. 237 | """ 238 | masked_image = image.copy() 239 | 240 | # Pick random anchors in case there are too many. 241 | ids = np.arange(rois.shape[0], dtype=np.int32) 242 | ids = np.random.choice( 243 | ids, limit, replace=False) if ids.shape[0] > limit else ids 244 | 245 | fig, ax = plt.subplots(1, figsize=(12, 12)) 246 | if rois.shape[0] > limit: 247 | plt.title("Showing {} random ROIs out of {}".format( 248 | len(ids), rois.shape[0])) 249 | else: 250 | plt.title("{} ROIs".format(len(ids))) 251 | 252 | # Show area outside image boundaries. 253 | ax.set_ylim(image.shape[0] + 20, -20) 254 | ax.set_xlim(-50, image.shape[1] + 20) 255 | ax.axis('off') 256 | 257 | for i, id in enumerate(ids): 258 | color = np.random.rand(3) 259 | class_id = class_ids[id] 260 | # ROI 261 | y1, x1, y2, x2 = rois[id] 262 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 263 | edgecolor=color if class_id else "gray", 264 | facecolor='none', linestyle="dashed") 265 | ax.add_patch(p) 266 | # Refined ROI 267 | if class_id: 268 | ry1, rx1, ry2, rx2 = refined_rois[id] 269 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 270 | edgecolor=color, facecolor='none') 271 | ax.add_patch(p) 272 | # Connect the top-left corners of the anchor and proposal for easy visualization 273 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 274 | 275 | # Label 276 | label = class_names[class_id] 277 | ax.text(rx1, ry1 + 8, "{}".format(label), 278 | color='w', size=11, backgroundcolor="none") 279 | 280 | # Mask 281 | m = utils.unmold_mask(mask[id], rois[id] 282 | [:4].astype(np.int32), image.shape) 283 | masked_image = apply_mask(masked_image, m, color) 284 | 285 | ax.imshow(masked_image) 286 | 287 | # Print stats 288 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) 289 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) 290 | print("Positive Ratio: {:.2f}".format( 291 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) 292 | 293 | 294 | # TODO: Replace with matplotlib equivalent? 295 | def draw_box(image, box, color): 296 | """Draw 3-pixel width bounding boxes on the given image array. 297 | color: list of 3 int values for RGB. 298 | """ 299 | y1, x1, y2, x2 = box 300 | image[y1:y1 + 2, x1:x2] = color 301 | image[y2:y2 + 2, x1:x2] = color 302 | image[y1:y2, x1:x1 + 2] = color 303 | image[y1:y2, x2:x2 + 2] = color 304 | return image 305 | 306 | 307 | def display_top_masks(image, mask, class_ids, class_names, limit=4): 308 | """Display the given image and the top few class masks.""" 309 | to_display = [] 310 | titles = [] 311 | to_display.append(image) 312 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) 313 | # Pick top prominent classes in this image 314 | unique_class_ids = np.unique(class_ids) 315 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) 316 | for i in unique_class_ids] 317 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), 318 | key=lambda r: r[1], reverse=True) if v[1] > 0] 319 | # Generate images and titles 320 | for i in range(limit): 321 | class_id = top_ids[i] if i < len(top_ids) else -1 322 | # Pull masks of instances belonging to the same class. 323 | m = mask[:, :, np.where(class_ids == class_id)[0]] 324 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) 325 | to_display.append(m) 326 | titles.append(class_names[class_id] if class_id != -1 else "-") 327 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") 328 | 329 | 330 | def plot_precision_recall(AP, precisions, recalls): 331 | """Draw the precision-recall curve. 332 | 333 | AP: Average precision at IoU >= 0.5 334 | precisions: list of precision values 335 | recalls: list of recall values 336 | """ 337 | # Plot the Precision-Recall curve 338 | _, ax = plt.subplots(1) 339 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) 340 | ax.set_ylim(0, 1.1) 341 | ax.set_xlim(0, 1.1) 342 | _ = ax.plot(recalls, precisions) 343 | 344 | 345 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, 346 | overlaps, class_names, threshold=0.5): 347 | """Draw a grid showing how ground truth objects are classified. 348 | gt_class_ids: [N] int. Ground truth class IDs 349 | pred_class_id: [N] int. Predicted class IDs 350 | pred_scores: [N] float. The probability scores of predicted classes 351 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes. 352 | class_names: list of all class names in the dataset 353 | threshold: Float. The prediction probability required to predict a class 354 | """ 355 | gt_class_ids = gt_class_ids[gt_class_ids != 0] 356 | pred_class_ids = pred_class_ids[pred_class_ids != 0] 357 | 358 | plt.figure(figsize=(12, 10)) 359 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) 360 | plt.yticks(np.arange(len(pred_class_ids)), 361 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) 362 | for i, id in enumerate(pred_class_ids)]) 363 | plt.xticks(np.arange(len(gt_class_ids)), 364 | [class_names[int(id)] for id in gt_class_ids], rotation=90) 365 | 366 | thresh = overlaps.max() / 2. 367 | for i, j in itertools.product(range(overlaps.shape[0]), 368 | range(overlaps.shape[1])): 369 | text = "" 370 | if overlaps[i, j] > threshold: 371 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" 372 | color = ("white" if overlaps[i, j] > thresh 373 | else "black" if overlaps[i, j] > 0 374 | else "grey") 375 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), 376 | horizontalalignment="center", verticalalignment="center", 377 | fontsize=9, color=color) 378 | 379 | plt.tight_layout() 380 | plt.xlabel("Ground Truth") 381 | plt.ylabel("Predictions") 382 | 383 | 384 | def draw_boxes(image, boxes=None, refined_boxes=None, 385 | masks=None, captions=None, visibilities=None, 386 | title="", ax=None): 387 | """Draw bounding boxes and segmentation masks with different 388 | customizations. 389 | 390 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. 391 | refined_boxes: Like boxes, but draw with solid lines to show 392 | that they're the result of refining 'boxes'. 393 | masks: [N, height, width] 394 | captions: List of N titles to display on each box 395 | visibilities: (optional) List of values of 0, 1, or 2. Determine how 396 | prominent each bounding box should be. 397 | title: An optional title to show over the image 398 | ax: (optional) Matplotlib axis to draw on. 399 | """ 400 | # Number of boxes 401 | assert boxes is not None or refined_boxes is not None 402 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] 403 | 404 | # Matplotlib Axis 405 | if not ax: 406 | _, ax = plt.subplots(1, figsize=(12, 12)) 407 | 408 | # Generate random colors 409 | colors = random_colors(N) 410 | 411 | # Show area outside image boundaries. 412 | margin = image.shape[0] // 10 413 | ax.set_ylim(image.shape[0] + margin, -margin) 414 | ax.set_xlim(-margin, image.shape[1] + margin) 415 | ax.axis('off') 416 | 417 | ax.set_title(title) 418 | 419 | masked_image = image.astype(np.uint32).copy() 420 | for i in range(N): 421 | # Box visibility 422 | visibility = visibilities[i] if visibilities is not None else 1 423 | if visibility == 0: 424 | color = "gray" 425 | style = "dotted" 426 | alpha = 0.5 427 | elif visibility == 1: 428 | color = colors[i] 429 | style = "dotted" 430 | alpha = 1 431 | elif visibility == 2: 432 | color = colors[i] 433 | style = "solid" 434 | alpha = 1 435 | 436 | # Boxes 437 | if boxes is not None: 438 | if not np.any(boxes[i]): 439 | # Skip this instance. Has no bbox. Likely lost in cropping. 440 | continue 441 | y1, x1, y2, x2 = boxes[i] 442 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, 443 | alpha=alpha, linestyle=style, 444 | edgecolor=color, facecolor='none') 445 | ax.add_patch(p) 446 | 447 | # Refined boxes 448 | if refined_boxes is not None and visibility > 0: 449 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) 450 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, 451 | edgecolor=color, facecolor='none') 452 | ax.add_patch(p) 453 | # Connect the top-left corners of the anchor and proposal 454 | if boxes is not None: 455 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) 456 | 457 | # Captions 458 | if captions is not None: 459 | caption = captions[i] 460 | # If there are refined boxes, display captions on them 461 | if refined_boxes is not None: 462 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2 463 | ax.text(x1, y1, caption, size=11, verticalalignment='top', 464 | color='w', backgroundcolor="none", 465 | bbox={'facecolor': color, 'alpha': 0.5, 466 | 'pad': 2, 'edgecolor': 'none'}) 467 | 468 | # Masks 469 | if masks is not None: 470 | mask = masks[:, :, i] 471 | masked_image = apply_mask(masked_image, mask, color) 472 | # Mask Polygon 473 | # Pad to ensure proper polygons for masks that touch image edges. 474 | padded_mask = np.zeros( 475 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) 476 | padded_mask[1:-1, 1:-1] = mask 477 | contours = find_contours(padded_mask, 0.5) 478 | for verts in contours: 479 | # Subtract the padding and flip (y, x) to (x, y) 480 | verts = np.fliplr(verts) - 1 481 | p = Polygon(verts, facecolor="none", edgecolor=color) 482 | ax.add_patch(p) 483 | ax.imshow(masked_image.astype(np.uint8)) 484 | 485 | 486 | def display_table(table): 487 | """Display values in a table format. 488 | table: an iterable of rows, and each row is an iterable of values. 489 | """ 490 | html = "" 491 | for row in table: 492 | row_html = "" 493 | for col in row: 494 | row_html += "{:40}".format(str(col)) 495 | html += "" + row_html + "" 496 | html = "" + html + "
" 497 | IPython.display.display(IPython.display.HTML(html)) 498 | 499 | 500 | def display_weight_stats(model): 501 | """Scans all the weights in the model and returns a list of tuples 502 | that contain stats about each weight. 503 | """ 504 | layers = model.get_trainable_layers() 505 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] 506 | for l in layers: 507 | weight_values = l.get_weights() # list of Numpy arrays 508 | weight_tensors = l.weights # list of TF tensors 509 | for i, w in enumerate(weight_values): 510 | weight_name = weight_tensors[i].name 511 | # Detect problematic layers. Exclude biases of conv layers. 512 | alert = "" 513 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): 514 | alert += "*** dead?" 515 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: 516 | alert += "*** Overflow?" 517 | # Add row 518 | table.append([ 519 | weight_name + alert, 520 | str(w.shape), 521 | "{:+9.4f}".format(w.min()), 522 | "{:+10.4f}".format(w.max()), 523 | "{:+9.4f}".format(w.std()), 524 | ]) 525 | display_table(table) 526 | -------------------------------------------------------------------------------- /mrcnn/utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mask R-CNN 3 | Common utility functions and classes. 4 | 5 | Copyright (c) 2017 Matterport, Inc. 6 | Licensed under the MIT License (see LICENSE for details) 7 | Written by Waleed Abdulla 8 | """ 9 | 10 | import sys 11 | import os 12 | import logging 13 | import math 14 | import random 15 | import numpy as np 16 | import tensorflow as tf 17 | import scipy 18 | import skimage.color 19 | import skimage.io 20 | import skimage.transform 21 | import urllib.request 22 | import shutil 23 | import warnings 24 | from distutils.version import LooseVersion 25 | 26 | # URL from which to download the latest COCO trained weights 27 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" 28 | 29 | 30 | ############################################################ 31 | # Bounding Boxes 32 | ############################################################ 33 | 34 | def extract_bboxes(mask): 35 | """Compute bounding boxes from masks. 36 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0. 37 | 38 | Returns: bbox array [num_instances, (y1, x1, y2, x2)]. 39 | """ 40 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) 41 | for i in range(mask.shape[-1]): 42 | m = mask[:, :, i] 43 | # Bounding box. 44 | horizontal_indicies = np.where(np.any(m, axis=0))[0] 45 | vertical_indicies = np.where(np.any(m, axis=1))[0] 46 | if horizontal_indicies.shape[0]: 47 | x1, x2 = horizontal_indicies[[0, -1]] 48 | y1, y2 = vertical_indicies[[0, -1]] 49 | # x2 and y2 should not be part of the box. Increment by 1. 50 | x2 += 1 51 | y2 += 1 52 | else: 53 | # No mask for this instance. Might happen due to 54 | # resizing or cropping. Set bbox to zeros 55 | x1, x2, y1, y2 = 0, 0, 0, 0 56 | boxes[i] = np.array([y1, x1, y2, x2]) 57 | return boxes.astype(np.int32) 58 | 59 | 60 | def compute_iou(box, boxes, box_area, boxes_area): 61 | """Calculates IoU of the given box with the array of the given boxes. 62 | box: 1D vector [y1, x1, y2, x2] 63 | boxes: [boxes_count, (y1, x1, y2, x2)] 64 | box_area: float. the area of 'box' 65 | boxes_area: array of length boxes_count. 66 | 67 | Note: the areas are passed in rather than calculated here for 68 | efficiency. Calculate once in the caller to avoid duplicate work. 69 | """ 70 | # Calculate intersection areas 71 | y1 = np.maximum(box[0], boxes[:, 0]) 72 | y2 = np.minimum(box[2], boxes[:, 2]) 73 | x1 = np.maximum(box[1], boxes[:, 1]) 74 | x2 = np.minimum(box[3], boxes[:, 3]) 75 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) 76 | union = box_area + boxes_area[:] - intersection[:] 77 | iou = intersection / union 78 | return iou 79 | 80 | 81 | def compute_overlaps(boxes1, boxes2): 82 | """Computes IoU overlaps between two sets of boxes. 83 | boxes1, boxes2: [N, (y1, x1, y2, x2)]. 84 | 85 | For better performance, pass the largest set first and the smaller second. 86 | """ 87 | # Areas of anchors and GT boxes 88 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) 89 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) 90 | 91 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count] 92 | # Each cell contains the IoU value. 93 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) 94 | for i in range(overlaps.shape[1]): 95 | box2 = boxes2[i] 96 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) 97 | return overlaps 98 | 99 | 100 | def compute_overlaps_masks(masks1, masks2): 101 | """Computes IoU overlaps between two sets of masks. 102 | masks1, masks2: [Height, Width, instances] 103 | """ 104 | 105 | # If either set of masks is empty return empty result 106 | if masks1.shape[-1] == 0 or masks2.shape[-1] == 0: 107 | return np.zeros((masks1.shape[-1], masks2.shape[-1])) 108 | # flatten masks and compute their areas 109 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) 110 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) 111 | area1 = np.sum(masks1, axis=0) 112 | area2 = np.sum(masks2, axis=0) 113 | 114 | # intersections and union 115 | intersections = np.dot(masks1.T, masks2) 116 | union = area1[:, None] + area2[None, :] - intersections 117 | overlaps = intersections / union 118 | 119 | return overlaps 120 | 121 | 122 | def non_max_suppression(boxes, scores, threshold): 123 | """Performs non-maximum suppression and returns indices of kept boxes. 124 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. 125 | scores: 1-D array of box scores. 126 | threshold: Float. IoU threshold to use for filtering. 127 | """ 128 | assert boxes.shape[0] > 0 129 | if boxes.dtype.kind != "f": 130 | boxes = boxes.astype(np.float32) 131 | 132 | # Compute box areas 133 | y1 = boxes[:, 0] 134 | x1 = boxes[:, 1] 135 | y2 = boxes[:, 2] 136 | x2 = boxes[:, 3] 137 | area = (y2 - y1) * (x2 - x1) 138 | 139 | # Get indicies of boxes sorted by scores (highest first) 140 | ixs = scores.argsort()[::-1] 141 | 142 | pick = [] 143 | while len(ixs) > 0: 144 | # Pick top box and add its index to the list 145 | i = ixs[0] 146 | pick.append(i) 147 | # Compute IoU of the picked box with the rest 148 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) 149 | # Identify boxes with IoU over the threshold. This 150 | # returns indices into ixs[1:], so add 1 to get 151 | # indices into ixs. 152 | remove_ixs = np.where(iou > threshold)[0] + 1 153 | # Remove indices of the picked and overlapped boxes. 154 | ixs = np.delete(ixs, remove_ixs) 155 | ixs = np.delete(ixs, 0) 156 | return np.array(pick, dtype=np.int32) 157 | 158 | 159 | def apply_box_deltas(boxes, deltas): 160 | """Applies the given deltas to the given boxes. 161 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. 162 | deltas: [N, (dy, dx, log(dh), log(dw))] 163 | """ 164 | boxes = boxes.astype(np.float32) 165 | # Convert to y, x, h, w 166 | height = boxes[:, 2] - boxes[:, 0] 167 | width = boxes[:, 3] - boxes[:, 1] 168 | center_y = boxes[:, 0] + 0.5 * height 169 | center_x = boxes[:, 1] + 0.5 * width 170 | # Apply deltas 171 | center_y += deltas[:, 0] * height 172 | center_x += deltas[:, 1] * width 173 | height *= np.exp(deltas[:, 2]) 174 | width *= np.exp(deltas[:, 3]) 175 | # Convert back to y1, x1, y2, x2 176 | y1 = center_y - 0.5 * height 177 | x1 = center_x - 0.5 * width 178 | y2 = y1 + height 179 | x2 = x1 + width 180 | return np.stack([y1, x1, y2, x2], axis=1) 181 | 182 | 183 | def box_refinement_graph(box, gt_box): 184 | """Compute refinement needed to transform box to gt_box. 185 | box and gt_box are [N, (y1, x1, y2, x2)] 186 | """ 187 | box = tf.cast(box, tf.float32) 188 | gt_box = tf.cast(gt_box, tf.float32) 189 | 190 | height = box[:, 2] - box[:, 0] 191 | width = box[:, 3] - box[:, 1] 192 | center_y = box[:, 0] + 0.5 * height 193 | center_x = box[:, 1] + 0.5 * width 194 | 195 | gt_height = gt_box[:, 2] - gt_box[:, 0] 196 | gt_width = gt_box[:, 3] - gt_box[:, 1] 197 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 198 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 199 | 200 | dy = (gt_center_y - center_y) / height 201 | dx = (gt_center_x - center_x) / width 202 | dh = tf.math.log(gt_height / height) 203 | dw = tf.math.log(gt_width / width) 204 | 205 | result = tf.stack([dy, dx, dh, dw], axis=1) 206 | return result 207 | 208 | 209 | def box_refinement(box, gt_box): 210 | """Compute refinement needed to transform box to gt_box. 211 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is 212 | assumed to be outside the box. 213 | """ 214 | box = box.astype(np.float32) 215 | gt_box = gt_box.astype(np.float32) 216 | 217 | height = box[:, 2] - box[:, 0] 218 | width = box[:, 3] - box[:, 1] 219 | center_y = box[:, 0] + 0.5 * height 220 | center_x = box[:, 1] + 0.5 * width 221 | 222 | gt_height = gt_box[:, 2] - gt_box[:, 0] 223 | gt_width = gt_box[:, 3] - gt_box[:, 1] 224 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height 225 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width 226 | 227 | dy = (gt_center_y - center_y) / height 228 | dx = (gt_center_x - center_x) / width 229 | dh = np.log(gt_height / height) 230 | dw = np.log(gt_width / width) 231 | 232 | return np.stack([dy, dx, dh, dw], axis=1) 233 | 234 | 235 | ############################################################ 236 | # Dataset 237 | ############################################################ 238 | 239 | class Dataset(object): 240 | """The base class for dataset classes. 241 | To use it, create a new class that adds functions specific to the dataset 242 | you want to use. For example: 243 | 244 | class CatsAndDogsDataset(Dataset): 245 | def load_cats_and_dogs(self): 246 | ... 247 | def load_mask(self, image_id): 248 | ... 249 | def image_reference(self, image_id): 250 | ... 251 | 252 | See COCODataset and ShapesDataset as examples. 253 | """ 254 | 255 | def __init__(self, class_map=None): 256 | self._image_ids = [] 257 | self.image_info = [] 258 | # Background is always the first class 259 | self.class_info = [{"source": "", "id": 0, "name": "BG"}] 260 | self.source_class_ids = {} 261 | 262 | def add_class(self, source, class_id, class_name): 263 | assert "." not in source, "Source name cannot contain a dot" 264 | # Does the class exist already? 265 | for info in self.class_info: 266 | if info['source'] == source and info["id"] == class_id: 267 | # source.class_id combination already available, skip 268 | return 269 | # Add the class 270 | self.class_info.append({ 271 | "source": source, 272 | "id": class_id, 273 | "name": class_name, 274 | }) 275 | 276 | def add_image(self, source, image_id, path, **kwargs): 277 | image_info = { 278 | "id": image_id, 279 | "source": source, 280 | "path": path, 281 | } 282 | image_info.update(kwargs) 283 | self.image_info.append(image_info) 284 | 285 | def image_reference(self, image_id): 286 | """Return a link to the image in its source Website or details about 287 | the image that help looking it up or debugging it. 288 | 289 | Override for your dataset, but pass to this function 290 | if you encounter images not in your dataset. 291 | """ 292 | return "" 293 | 294 | def prepare(self, class_map=None): 295 | """Prepares the Dataset class for use. 296 | 297 | TODO: class map is not supported yet. When done, it should handle mapping 298 | classes from different datasets to the same class ID. 299 | """ 300 | 301 | def clean_name(name): 302 | """Returns a shorter version of object names for cleaner display.""" 303 | return ",".join(name.split(",")[:1]) 304 | 305 | # Build (or rebuild) everything else from the info dicts. 306 | self.num_classes = len(self.class_info) 307 | self.class_ids = np.arange(self.num_classes) 308 | self.class_names = [clean_name(c["name"]) for c in self.class_info] 309 | self.num_images = len(self.image_info) 310 | self._image_ids = np.arange(self.num_images) 311 | 312 | # Mapping from source class and image IDs to internal IDs 313 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id 314 | for info, id in zip(self.class_info, self.class_ids)} 315 | self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id 316 | for info, id in zip(self.image_info, self.image_ids)} 317 | 318 | # Map sources to class_ids they support 319 | self.sources = list(set([i['source'] for i in self.class_info])) 320 | self.source_class_ids = {} 321 | # Loop over datasets 322 | for source in self.sources: 323 | self.source_class_ids[source] = [] 324 | # Find classes that belong to this dataset 325 | for i, info in enumerate(self.class_info): 326 | # Include BG class in all datasets 327 | if i == 0 or source == info['source']: 328 | self.source_class_ids[source].append(i) 329 | 330 | def map_source_class_id(self, source_class_id): 331 | """Takes a source class ID and returns the int class ID assigned to it. 332 | 333 | For example: 334 | dataset.map_source_class_id("coco.12") -> 23 335 | """ 336 | return self.class_from_source_map[source_class_id] 337 | 338 | def get_source_class_id(self, class_id, source): 339 | """Map an internal class ID to the corresponding class ID in the source dataset.""" 340 | info = self.class_info[class_id] 341 | assert info['source'] == source 342 | return info['id'] 343 | 344 | @property 345 | def image_ids(self): 346 | return self._image_ids 347 | 348 | def source_image_link(self, image_id): 349 | """Returns the path or URL to the image. 350 | Override this to return a URL to the image if it's available online for easy 351 | debugging. 352 | """ 353 | return self.image_info[image_id]["path"] 354 | 355 | def load_image(self, image_id): 356 | """Load the specified image and return a [H,W,3] Numpy array. 357 | """ 358 | # Load image 359 | image = skimage.io.imread(self.image_info[image_id]['path']) 360 | # If grayscale. Convert to RGB for consistency. 361 | if image.ndim != 3: 362 | image = skimage.color.gray2rgb(image) 363 | # If has an alpha channel, remove it for consistency 364 | if image.shape[-1] == 4: 365 | image = image[..., :3] 366 | return image 367 | 368 | def load_mask(self, image_id): 369 | """Load instance masks for the given image. 370 | 371 | Different datasets use different ways to store masks. Override this 372 | method to load instance masks and return them in the form of am 373 | array of binary masks of shape [height, width, instances]. 374 | 375 | Returns: 376 | masks: A bool array of shape [height, width, instance count] with 377 | a binary mask per instance. 378 | class_ids: a 1D array of class IDs of the instance masks. 379 | """ 380 | # Override this function to load a mask from your dataset. 381 | # Otherwise, it returns an empty mask. 382 | logging.warning("You are using the default load_mask(), maybe you need to define your own one.") 383 | mask = np.empty([0, 0, 0]) 384 | class_ids = np.empty([0], np.int32) 385 | return mask, class_ids 386 | 387 | 388 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): 389 | """Resizes an image keeping the aspect ratio unchanged. 390 | 391 | min_dim: if provided, resizes the image such that it's smaller 392 | dimension == min_dim 393 | max_dim: if provided, ensures that the image longest side doesn't 394 | exceed this value. 395 | min_scale: if provided, ensure that the image is scaled up by at least 396 | this percent even if min_dim doesn't require it. 397 | mode: Resizing mode. 398 | none: No resizing. Return the image unchanged. 399 | square: Resize and pad with zeros to get a square image 400 | of size [max_dim, max_dim]. 401 | pad64: Pads width and height with zeros to make them multiples of 64. 402 | If min_dim or min_scale are provided, it scales the image up 403 | before padding. max_dim is ignored in this mode. 404 | The multiple of 64 is needed to ensure smooth scaling of feature 405 | maps up and down the 6 levels of the FPN pyramid (2**6=64). 406 | crop: Picks random crops from the image. First, scales the image based 407 | on min_dim and min_scale, then picks a random crop of 408 | size min_dim x min_dim. Can be used in training only. 409 | max_dim is not used in this mode. 410 | 411 | Returns: 412 | image: the resized image 413 | window: (y1, x1, y2, x2). If max_dim is provided, padding might 414 | be inserted in the returned image. If so, this window is the 415 | coordinates of the image part of the full image (excluding 416 | the padding). The x2, y2 pixels are not included. 417 | scale: The scale factor used to resize the image 418 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] 419 | """ 420 | # Keep track of image dtype and return results in the same dtype 421 | image_dtype = image.dtype 422 | # Default window (y1, x1, y2, x2) and default scale == 1. 423 | h, w = image.shape[:2] 424 | window = (0, 0, h, w) 425 | scale = 1 426 | padding = [(0, 0), (0, 0), (0, 0)] 427 | crop = None 428 | 429 | if mode == "none": 430 | return image, window, scale, padding, crop 431 | 432 | # Scale? 433 | if min_dim: 434 | # Scale up but not down 435 | scale = max(1, min_dim / min(h, w)) 436 | if min_scale and scale < min_scale: 437 | scale = min_scale 438 | 439 | # Does it exceed max dim? 440 | if max_dim and mode == "square": 441 | image_max = max(h, w) 442 | if round(image_max * scale) > max_dim: 443 | scale = max_dim / image_max 444 | 445 | # Resize image using bilinear interpolation 446 | if scale != 1: 447 | image = resize(image, (round(h * scale), round(w * scale)), 448 | preserve_range=True) 449 | 450 | # Need padding or cropping? 451 | if mode == "square": 452 | # Get new height and width 453 | h, w = image.shape[:2] 454 | top_pad = (max_dim - h) // 2 455 | bottom_pad = max_dim - h - top_pad 456 | left_pad = (max_dim - w) // 2 457 | right_pad = max_dim - w - left_pad 458 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 459 | image = np.pad(image, padding, mode='constant', constant_values=0) 460 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 461 | elif mode == "pad64": 462 | h, w = image.shape[:2] 463 | # Both sides must be divisible by 64 464 | assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" 465 | # Height 466 | if h % 64 > 0: 467 | max_h = h - (h % 64) + 64 468 | top_pad = (max_h - h) // 2 469 | bottom_pad = max_h - h - top_pad 470 | else: 471 | top_pad = bottom_pad = 0 472 | # Width 473 | if w % 64 > 0: 474 | max_w = w - (w % 64) + 64 475 | left_pad = (max_w - w) // 2 476 | right_pad = max_w - w - left_pad 477 | else: 478 | left_pad = right_pad = 0 479 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] 480 | image = np.pad(image, padding, mode='constant', constant_values=0) 481 | window = (top_pad, left_pad, h + top_pad, w + left_pad) 482 | elif mode == "crop": 483 | # Pick a random crop 484 | h, w = image.shape[:2] 485 | y = random.randint(0, (h - min_dim)) 486 | x = random.randint(0, (w - min_dim)) 487 | crop = (y, x, min_dim, min_dim) 488 | image = image[y:y + min_dim, x:x + min_dim] 489 | window = (0, 0, min_dim, min_dim) 490 | else: 491 | raise Exception("Mode {} not supported".format(mode)) 492 | return image.astype(image_dtype), window, scale, padding, crop 493 | 494 | 495 | def resize_mask(mask, scale, padding, crop=None): 496 | """Resizes a mask using the given scale and padding. 497 | Typically, you get the scale and padding from resize_image() to 498 | ensure both, the image and the mask, are resized consistently. 499 | 500 | scale: mask scaling factor 501 | padding: Padding to add to the mask in the form 502 | [(top, bottom), (left, right), (0, 0)] 503 | """ 504 | # Suppress warning from scipy 0.13.0, the output shape of zoom() is 505 | # calculated with round() instead of int() 506 | with warnings.catch_warnings(): 507 | warnings.simplefilter("ignore") 508 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) 509 | if crop is not None: 510 | y, x, h, w = crop 511 | mask = mask[y:y + h, x:x + w] 512 | else: 513 | mask = np.pad(mask, padding, mode='constant', constant_values=0) 514 | return mask 515 | 516 | 517 | def minimize_mask(bbox, mask, mini_shape): 518 | """Resize masks to a smaller version to reduce memory load. 519 | Mini-masks can be resized back to image scale using expand_masks() 520 | 521 | See inspect_data.ipynb notebook for more details. 522 | """ 523 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) 524 | for i in range(mask.shape[-1]): 525 | # Pick slice and cast to bool in case load_mask() returned wrong dtype 526 | m = mask[:, :, i].astype(bool) 527 | y1, x1, y2, x2 = bbox[i][:4] 528 | m = m[y1:y2, x1:x2] 529 | if m.size == 0: 530 | raise Exception("Invalid bounding box with area of zero") 531 | # Resize with bilinear interpolation 532 | m = resize(m, mini_shape) 533 | mini_mask[:, :, i] = np.around(m).astype(np.bool) 534 | return mini_mask 535 | 536 | 537 | def expand_mask(bbox, mini_mask, image_shape): 538 | """Resizes mini masks back to image size. Reverses the change 539 | of minimize_mask(). 540 | 541 | See inspect_data.ipynb notebook for more details. 542 | """ 543 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) 544 | for i in range(mask.shape[-1]): 545 | m = mini_mask[:, :, i] 546 | y1, x1, y2, x2 = bbox[i][:4] 547 | h = y2 - y1 548 | w = x2 - x1 549 | # Resize with bilinear interpolation 550 | m = resize(m, (h, w)) 551 | mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) 552 | return mask 553 | 554 | 555 | # TODO: Build and use this function to reduce code duplication 556 | def mold_mask(mask, config): 557 | pass 558 | 559 | 560 | def unmold_mask(mask, bbox, image_shape): 561 | """Converts a mask generated by the neural network to a format similar 562 | to its original shape. 563 | mask: [height, width] of type float. A small, typically 28x28 mask. 564 | bbox: [y1, x1, y2, x2]. The box to fit the mask in. 565 | 566 | Returns a binary mask with the same size as the original image. 567 | """ 568 | threshold = 0.5 569 | y1, x1, y2, x2 = bbox 570 | mask = resize(mask, (y2 - y1, x2 - x1)) 571 | mask = np.where(mask >= threshold, 1, 0).astype(np.bool) 572 | 573 | # Put the mask in the right location. 574 | full_mask = np.zeros(image_shape[:2], dtype=np.bool) 575 | full_mask[y1:y2, x1:x2] = mask 576 | return full_mask 577 | 578 | 579 | ############################################################ 580 | # Anchors 581 | ############################################################ 582 | 583 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): 584 | """ 585 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] 586 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] 587 | shape: [height, width] spatial shape of the feature map over which 588 | to generate anchors. 589 | feature_stride: Stride of the feature map relative to the image in pixels. 590 | anchor_stride: Stride of anchors on the feature map. For example, if the 591 | value is 2 then generate anchors for every other feature map pixel. 592 | """ 593 | # Get all combinations of scales and ratios 594 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) 595 | scales = scales.flatten() 596 | ratios = ratios.flatten() 597 | 598 | # Enumerate heights and widths from scales and ratios 599 | heights = scales / np.sqrt(ratios) 600 | widths = scales * np.sqrt(ratios) 601 | 602 | # Enumerate shifts in feature space 603 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride 604 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride 605 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) 606 | 607 | # Enumerate combinations of shifts, widths, and heights 608 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x) 609 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y) 610 | 611 | # Reshape to get a list of (y, x) and a list of (h, w) 612 | box_centers = np.stack( 613 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) 614 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) 615 | 616 | # Convert to corner coordinates (y1, x1, y2, x2) 617 | boxes = np.concatenate([box_centers - 0.5 * box_sizes, 618 | box_centers + 0.5 * box_sizes], axis=1) 619 | return boxes 620 | 621 | 622 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, 623 | anchor_stride): 624 | """Generate anchors at different levels of a feature pyramid. Each scale 625 | is associated with a level of the pyramid, but each ratio is used in 626 | all levels of the pyramid. 627 | 628 | Returns: 629 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted 630 | with the same order of the given scales. So, anchors of scale[0] come 631 | first, then anchors of scale[1], and so on. 632 | """ 633 | # Anchors 634 | # [anchor_count, (y1, x1, y2, x2)] 635 | anchors = [] 636 | for i in range(len(scales)): 637 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], 638 | feature_strides[i], anchor_stride)) 639 | return np.concatenate(anchors, axis=0) 640 | 641 | 642 | ############################################################ 643 | # Miscellaneous 644 | ############################################################ 645 | 646 | def trim_zeros(x): 647 | """It's common to have tensors larger than the available data and 648 | pad with zeros. This function removes rows that are all zeros. 649 | 650 | x: [rows, columns]. 651 | """ 652 | assert len(x.shape) == 2 653 | return x[~np.all(x == 0, axis=1)] 654 | 655 | 656 | def compute_matches(gt_boxes, gt_class_ids, gt_masks, 657 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 658 | iou_threshold=0.5, score_threshold=0.0): 659 | """Finds matches between prediction and ground truth instances. 660 | 661 | Returns: 662 | gt_match: 1-D array. For each GT box it has the index of the matched 663 | predicted box. 664 | pred_match: 1-D array. For each predicted box, it has the index of 665 | the matched ground truth box. 666 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 667 | """ 668 | # Trim zero padding 669 | # TODO: cleaner to do zero unpadding upstream 670 | gt_boxes = trim_zeros(gt_boxes) 671 | gt_masks = gt_masks[..., :gt_boxes.shape[0]] 672 | pred_boxes = trim_zeros(pred_boxes) 673 | pred_scores = pred_scores[:pred_boxes.shape[0]] 674 | # Sort predictions by score from high to low 675 | indices = np.argsort(pred_scores)[::-1] 676 | pred_boxes = pred_boxes[indices] 677 | pred_class_ids = pred_class_ids[indices] 678 | pred_scores = pred_scores[indices] 679 | pred_masks = pred_masks[..., indices] 680 | 681 | # Compute IoU overlaps [pred_masks, gt_masks] 682 | overlaps = compute_overlaps_masks(pred_masks, gt_masks) 683 | 684 | # Loop through predictions and find matching ground truth boxes 685 | match_count = 0 686 | pred_match = -1 * np.ones([pred_boxes.shape[0]]) 687 | gt_match = -1 * np.ones([gt_boxes.shape[0]]) 688 | for i in range(len(pred_boxes)): 689 | # Find best matching ground truth box 690 | # 1. Sort matches by score 691 | sorted_ixs = np.argsort(overlaps[i])[::-1] 692 | # 2. Remove low scores 693 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] 694 | if low_score_idx.size > 0: 695 | sorted_ixs = sorted_ixs[:low_score_idx[0]] 696 | # 3. Find the match 697 | for j in sorted_ixs: 698 | # If ground truth box is already matched, go to next one 699 | if gt_match[j] > -1: 700 | continue 701 | # If we reach IoU smaller than the threshold, end the loop 702 | iou = overlaps[i, j] 703 | if iou < iou_threshold: 704 | break 705 | # Do we have a match? 706 | if pred_class_ids[i] == gt_class_ids[j]: 707 | match_count += 1 708 | gt_match[j] = i 709 | pred_match[i] = j 710 | break 711 | 712 | return gt_match, pred_match, overlaps 713 | 714 | 715 | def compute_ap(gt_boxes, gt_class_ids, gt_masks, 716 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 717 | iou_threshold=0.5): 718 | """Compute Average Precision at a set IoU threshold (default 0.5). 719 | 720 | Returns: 721 | mAP: Mean Average Precision 722 | precisions: List of precisions at different class score thresholds. 723 | recalls: List of recall values at different class score thresholds. 724 | overlaps: [pred_boxes, gt_boxes] IoU overlaps. 725 | """ 726 | # Get matches and overlaps 727 | gt_match, pred_match, overlaps = compute_matches( 728 | gt_boxes, gt_class_ids, gt_masks, 729 | pred_boxes, pred_class_ids, pred_scores, pred_masks, 730 | iou_threshold) 731 | 732 | # Compute precision and recall at each prediction box step 733 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) 734 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) 735 | 736 | # Pad with start and end values to simplify the math 737 | precisions = np.concatenate([[0], precisions, [0]]) 738 | recalls = np.concatenate([[0], recalls, [1]]) 739 | 740 | # Ensure precision values decrease but don't increase. This way, the 741 | # precision value at each recall threshold is the maximum it can be 742 | # for all following recall thresholds, as specified by the VOC paper. 743 | for i in range(len(precisions) - 2, -1, -1): 744 | precisions[i] = np.maximum(precisions[i], precisions[i + 1]) 745 | 746 | # Compute mean AP over recall range 747 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 748 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) * 749 | precisions[indices]) 750 | 751 | return mAP, precisions, recalls, overlaps 752 | 753 | 754 | def compute_ap_range(gt_box, gt_class_id, gt_mask, 755 | pred_box, pred_class_id, pred_score, pred_mask, 756 | iou_thresholds=None, verbose=1): 757 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" 758 | # Default is 0.5 to 0.95 with increments of 0.05 759 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) 760 | 761 | # Compute AP over range of IoU thresholds 762 | AP = [] 763 | for iou_threshold in iou_thresholds: 764 | ap, precisions, recalls, overlaps =\ 765 | compute_ap(gt_box, gt_class_id, gt_mask, 766 | pred_box, pred_class_id, pred_score, pred_mask, 767 | iou_threshold=iou_threshold) 768 | if verbose: 769 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) 770 | AP.append(ap) 771 | AP = np.array(AP).mean() 772 | if verbose: 773 | print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( 774 | iou_thresholds[0], iou_thresholds[-1], AP)) 775 | return AP 776 | 777 | 778 | def compute_recall(pred_boxes, gt_boxes, iou): 779 | """Compute the recall at the given IoU threshold. It's an indication 780 | of how many GT boxes were found by the given prediction boxes. 781 | 782 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates 783 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates 784 | """ 785 | # Measure overlaps 786 | overlaps = compute_overlaps(pred_boxes, gt_boxes) 787 | iou_max = np.max(overlaps, axis=1) 788 | iou_argmax = np.argmax(overlaps, axis=1) 789 | positive_ids = np.where(iou_max >= iou)[0] 790 | matched_gt_boxes = iou_argmax[positive_ids] 791 | 792 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] 793 | return recall, positive_ids 794 | 795 | 796 | # ## Batch Slicing 797 | # Some custom layers support a batch size of 1 only, and require a lot of work 798 | # to support batches greater than 1. This function slices an input tensor 799 | # across the batch dimension and feeds batches of size 1. Effectively, 800 | # an easy way to support batches > 1 quickly with little code modification. 801 | # In the long run, it's more efficient to modify the code to support large 802 | # batches and getting rid of this function. Consider this a temporary solution 803 | def batch_slice(inputs, graph_fn, batch_size, names=None): 804 | """Splits inputs into slices and feeds each slice to a copy of the given 805 | computation graph and then combines the results. It allows you to run a 806 | graph on a batch of inputs even if the graph is written to support one 807 | instance only. 808 | 809 | inputs: list of tensors. All must have the same first dimension length 810 | graph_fn: A function that returns a TF tensor that's part of a graph. 811 | batch_size: number of slices to divide the data into. 812 | names: If provided, assigns names to the resulting tensors. 813 | """ 814 | if not isinstance(inputs, list): 815 | inputs = [inputs] 816 | 817 | outputs = [] 818 | for i in range(batch_size): 819 | inputs_slice = [x[i] for x in inputs] 820 | output_slice = graph_fn(*inputs_slice) 821 | if not isinstance(output_slice, (tuple, list)): 822 | output_slice = [output_slice] 823 | outputs.append(output_slice) 824 | # Change outputs from a list of slices where each is 825 | # a list of outputs to a list of outputs and each has 826 | # a list of slices 827 | outputs = list(zip(*outputs)) 828 | 829 | if names is None: 830 | names = [None] * len(outputs) 831 | 832 | result = [tf.stack(o, axis=0, name=n) 833 | for o, n in zip(outputs, names)] 834 | if len(result) == 1: 835 | result = result[0] 836 | 837 | return result 838 | 839 | 840 | def download_trained_weights(coco_model_path, verbose=1): 841 | """Download COCO trained weights from Releases. 842 | 843 | coco_model_path: local path of COCO trained weights 844 | """ 845 | if verbose > 0: 846 | print("Downloading pretrained model to " + coco_model_path + " ...") 847 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: 848 | shutil.copyfileobj(resp, out) 849 | if verbose > 0: 850 | print("... done downloading pretrained model!") 851 | 852 | 853 | def norm_boxes(boxes, shape): 854 | """Converts boxes from pixel coordinates to normalized coordinates. 855 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates 856 | shape: [..., (height, width)] in pixels 857 | 858 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 859 | coordinates it's inside the box. 860 | 861 | Returns: 862 | [N, (y1, x1, y2, x2)] in normalized coordinates 863 | """ 864 | h, w = shape 865 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 866 | shift = np.array([0, 0, 1, 1]) 867 | return np.divide((boxes - shift), scale).astype(np.float32) 868 | 869 | 870 | def denorm_boxes(boxes, shape): 871 | """Converts boxes from normalized coordinates to pixel coordinates. 872 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates 873 | shape: [..., (height, width)] in pixels 874 | 875 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized 876 | coordinates it's inside the box. 877 | 878 | Returns: 879 | [N, (y1, x1, y2, x2)] in pixel coordinates 880 | """ 881 | h, w = shape 882 | scale = np.array([h - 1, w - 1, h - 1, w - 1]) 883 | shift = np.array([0, 0, 1, 1]) 884 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) 885 | 886 | 887 | def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True, 888 | preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None): 889 | """A wrapper for Scikit-Image resize(). 890 | 891 | Scikit-Image generates warnings on every call to resize() if it doesn't 892 | receive the right parameters. The right parameters depend on the version 893 | of skimage. This solves the problem by using different parameters per 894 | version. And it provides a central place to control resizing defaults. 895 | """ 896 | if LooseVersion(skimage.__version__) >= LooseVersion("0.14"): 897 | # New in 0.14: anti_aliasing. Default it to False for backward 898 | # compatibility with skimage 0.13. 899 | return skimage.transform.resize( 900 | image, output_shape, 901 | order=order, mode=mode, cval=cval, clip=clip, 902 | preserve_range=preserve_range, anti_aliasing=anti_aliasing, 903 | anti_aliasing_sigma=anti_aliasing_sigma) 904 | else: 905 | return skimage.transform.resize( 906 | image, output_shape, 907 | order=order, mode=mode, cval=cval, clip=clip, 908 | preserve_range=preserve_range) 909 | --------------------------------------------------------------------------------