├── mrcnn
├── __init__.py
├── __pycache__
│ ├── config.cpython-37.pyc
│ ├── config.cpython-39.pyc
│ ├── model.cpython-37.pyc
│ ├── model.cpython-39.pyc
│ ├── utils.cpython-37.pyc
│ ├── utils.cpython-39.pyc
│ ├── __init__.cpython-37.pyc
│ ├── __init__.cpython-39.pyc
│ ├── visualize.cpython-37.pyc
│ └── visualize.cpython-39.pyc
├── parallel_model.py
├── config.py
├── visualize.py
└── utils.py
├── __pycache__
├── annotate.cpython-37.pyc
├── resume_eda.cpython-37.pyc
└── Train_MASKRCNN_Script.cpython-37.pyc
├── requirements.txt
├── resume_eda.py
├── README.md
├── Train_MASKRCNN_Script.py
├── Resume_Parser.py
└── annotate.py
/mrcnn/__init__.py:
--------------------------------------------------------------------------------
1 | __version__ = "1.0.0"
2 |
--------------------------------------------------------------------------------
/__pycache__/annotate.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/__pycache__/annotate.cpython-37.pyc
--------------------------------------------------------------------------------
/__pycache__/resume_eda.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/__pycache__/resume_eda.cpython-37.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/config.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/config.cpython-37.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/config.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/config.cpython-39.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/model.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/model.cpython-37.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/model.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/model.cpython-39.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/utils.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/utils.cpython-37.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/utils.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/utils.cpython-39.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/__init__.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/__init__.cpython-39.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/visualize.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/visualize.cpython-37.pyc
--------------------------------------------------------------------------------
/mrcnn/__pycache__/visualize.cpython-39.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/mrcnn/__pycache__/visualize.cpython-39.pyc
--------------------------------------------------------------------------------
/__pycache__/Train_MASKRCNN_Script.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/brazilian-code/Resume_Parsing/HEAD/__pycache__/Train_MASKRCNN_Script.cpython-37.pyc
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | matplotlib
2 | seaborn
3 | pandas
4 | numpy
5 | pdf2image
6 | easyocr
7 | Pillow
8 | ipython
9 | streamlit
10 | xml-python
11 | opencv-python
12 | tensorflow==1.13.1
13 | keras==2.2.5
14 | Python==3.7.10
15 |
--------------------------------------------------------------------------------
/resume_eda.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # coding: utf-8
3 | import matplotlib.pyplot as plt
4 | import seaborn as sns
5 | def labels(ax):
6 | for p in ax.patches:
7 | width = p.get_width()
8 | height = p.get_height()
9 | x = p.get_x()
10 | y = p.get_y()
11 | ax.annotate(f"{int(height)}", (x + width/2, y + height*1.01), ha="center")
12 | plt.ylabel("Count of Resumes")
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Resume_Parsing
2 | Machine Learning Project
3 |
4 | **Team**:
5 | * David Balaban - Team Leader
6 | * Daniel Lichter - Techsmith
7 | * Asma Sadia - Specification Leader
8 | * Maitri Patel - Quality Assurance Specialist
9 |
10 | ### Business Problem
11 | Keystone Strategy's recruiting team often receives large "resume books" containing hundreds of resumes from universities and their student organizations, which they then have to manually parse to catalog attributes about applicants such as education, work experience, skills, etc, before doing a more detailed review for fit with the organization. Keystone would like to automate this process using machine learning. A machine learning-based resume parsing will save recruiting team from hours of daily work by eliminating manual processing and analysis for every resume they receive.
12 |
13 | ### Data
14 | Our custom dataset consists of resumes from three resume books from graduate business schools such as Tuck School of Business at Dartmouth, Haas School of Business at Berekely, and Standard Graduate School of Business. In total, there are 841 resumes of MBA candidates.
15 |
16 |
17 |
18 | ### Model
19 | The modeling approach that we took to create the resume parsing model was to use MaskRCNN and EasyOCR to parse through the resumes and extract the information.
20 |
21 | MaskRCNN is a pre-trained model generally used for object detection. We trained this model on the resumes available to us and used it to classify different portions of a resume using bounding boxes for each section of the resume, the weights that we used prior to training came from COCO dataset and are pretrained with 80 different classes on about 330K images. Then for the text extraction part we used EasyOCR model which is an Optical Character Recognition model that is already trained on multiple languages (including english), has very high accuracy and it's very easy to use.
22 |
23 | We used mean Average Precision (mAP) as the metric, which is standard for evaluating an object detection model. It measures the average precision (AUC of a precision-recall curve) of a model across all object classes, and ranges between 0 and 1. Based on the mAP for each IoU Threshold on the 50 testing resumes, so for 75% IoU we got almost a 95% mAP which is very good, but again might be too good to be true since there might be some overfitting involved, then we can see that the 85% IoU Threshold had a mAP score of 73%, which is very good since we believe an 85% IoU threshold is enough for the model to be able to correctly find the sections and that even though there might be some overlapping it's been a very rare prediction. Finally, for the 95% IoU Threshold we see the abismal score of 0.167%, which again is understandable due to the ammount of training this model has gone through (Only about 850 resumes)
24 |
25 |
26 |
27 | ### Conclusion
28 | Selecting the right candidates from a pool of applicants can be one of the toughest jobs for the talent acquisition leaders. Moreover, going through each resume manually for every hiring season can be tiresome and time consuming. The machine learning resume parser tool can be a life saver for the entire company. It can provide unbiased solutions while overcoming possible manual errors.
29 |
30 |
31 | ## How to use our app:
32 |
33 | To be able to run our app clone this repository and the first step is to make sure to run the requirements.txt file to install all necessary dependecies:
34 | ```
35 | pip install -r requirements.txt
36 | ```
37 | Then it's necessary to install poppler as well to handle the pdfs and there are two ways of doing this:
38 | ```
39 | pip install python-poppler
40 | ```
41 | or
42 | ```
43 | conda install -c conda-forge poppler
44 | ```
45 | Then once all the necessaries packages have been installed we will run the application using the framework Streamlit by running the following code:
46 | ```
47 | python -m streamlit run Resume_Parser.py
48 | ```
49 |
50 | Here is a screenshot of our application:
51 |
52 |
53 |
54 |
--------------------------------------------------------------------------------
/Train_MASKRCNN_Script.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Mon Nov 15 02:28:40 2021
4 |
5 | @author: davba
6 | """
7 | #Notebook written by David A. A. Balaban
8 | from pdf2image import convert_from_path
9 | import easyocr
10 | import numpy as np
11 | import PIL # Python Imaging Library
12 | from PIL import ImageDraw # drawing bounding boxes
13 | import tensorflow as tf
14 | from IPython.display import display,Image
15 | from matplotlib.pyplot import imshow
16 | import xml.dom.minidom
17 | import pandas as pd
18 | import mrcnn
19 | import mrcnn.utils
20 | import mrcnn.config
21 | import mrcnn.model
22 | import urllib.request
23 | import os
24 | import xml.etree
25 |
26 |
27 | #Using Keras==2.2.5
28 |
29 | # Sections = Personal Info, Education, Skills, Projects, Work Experience, Extra
30 |
31 | class ResumeDataset(mrcnn.utils.Dataset):
32 |
33 | def load_dataset(self, dataset_dir, is_train=True):
34 | # Adding all possible sections
35 | self.add_class("dataset", 1, "Personal Info")
36 | self.add_class("dataset", 2, "Education")
37 | self.add_class("dataset", 3, "Skills")
38 | self.add_class("dataset", 4, "Projects")
39 | self.add_class("dataset", 5, "Work Experience")
40 | self.add_class("dataset", 6, "Extra")
41 |
42 |
43 |
44 | images_dir = dataset_dir + '\\Resumes\\'
45 | annotations_dir = dataset_dir + '\\Resume_Annotations\\'
46 |
47 | dir_list = os.listdir(images_dir)
48 | count = 0
49 | image_id = ""
50 |
51 | for filename in dir_list:
52 | # Image ID is file name without .jpg
53 | image_id = filename[:-4]
54 |
55 | if is_train and count <= len(dir_list)*0.75:
56 | continue
57 |
58 | if not is_train and count > len(dir_list)*0.75:
59 | continue
60 | count+=1
61 |
62 | img_path = images_dir + filename
63 | ann_path = annotations_dir + image_id + '.xml'
64 |
65 | self.add_image('dataset', image_id=image_id, path=img_path, annotation=ann_path)
66 |
67 | def load_mask(self, image_id):
68 | info = self.image_info[image_id]
69 | path = info['annotation']
70 | boxes, w, h = self.extract_boxes(path)
71 | masks = np.zeros([h, w, len(boxes)], dtype='uint8')
72 |
73 | class_ids = list()
74 | for i in range(len(boxes)):
75 | obj = boxes[i]
76 | box = obj[1]
77 | row_s, row_e = box[1], box[3]
78 | col_s, col_e = box[0], box[2]
79 | masks[row_s:row_e, col_s:col_e, i] = 1
80 | class_ids.append(self.class_names.index(obj[0]))
81 | return masks, np.asarray(class_ids, dtype='int32')
82 |
83 | # A helper method to extract the bounding boxes from the annotation file
84 | def extract_boxes(self, filename):
85 | tree = xml.etree.ElementTree.parse(filename)
86 |
87 | root = tree.getroot()
88 |
89 | boxes = list()
90 | for obj in root.findall('./object'):
91 | name = obj.find('name').text
92 | xmin = int(obj.find('bndbox/xmin').text)
93 | ymin = int(obj.find('bndbox/ymin').text)
94 | xmax = int(obj.find('bndbox/xmax').text)
95 | ymax = int(obj.find('bndbox/ymax').text)
96 | coors = [xmin, ymin, xmax, ymax]
97 | box_array = [name,coors]
98 | print(box_array)
99 | boxes.append(box_array)
100 |
101 |
102 | width = int(root.find('.//size/width').text)
103 | height = int(root.find('.//size/height').text)
104 | return boxes, width, height
105 |
106 | class ResumeConfig(mrcnn.config.Config):
107 | NAME = "resumes_cfg"
108 |
109 | GPU_COUNT = 1
110 | IMAGES_PER_GPU = 1
111 |
112 | NUM_CLASSES = 7
113 |
114 | LEARNING_RATE = 0.001
115 |
116 | STEPS_PER_EPOCH = 131
117 |
118 |
119 |
120 | def train_model(dataset_path, model_path, num_epochs, final_model_path):
121 |
122 | # Training
123 | train_dataset = ResumeDataset()
124 | train_dataset.load_dataset(dataset_dir=dataset_path, is_train=True)
125 | train_dataset.prepare()
126 | # Validation
127 | validation_dataset = ResumeDataset()
128 | validation_dataset.load_dataset(dataset_dir=dataset_path, is_train=False)
129 | validation_dataset.prepare()
130 |
131 | #For Training;
132 | config = ResumeConfig()
133 |
134 | model = mrcnn.model.MaskRCNN(mode='training',
135 | model_dir='.log',
136 | config=config)
137 | model.keras_model.summary()
138 |
139 | model.load_weights(filepath=model_path, by_name=True)
140 |
141 | print("Weights loaded!")
142 |
143 | print("Training Started!")
144 | model.train(train_dataset=train_dataset,
145 | val_dataset=validation_dataset,
146 | learning_rate=config.LEARNING_RATE,
147 | epochs=num_epochs,
148 | layers='heads')
149 |
150 |
151 | model.keras_model.save_weights(final_model_path)
152 | print("Model Saved!")
153 |
--------------------------------------------------------------------------------
/mrcnn/parallel_model.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Multi-GPU Support for Keras.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 |
9 | Ideas and a small code snippets from these sources:
10 | https://github.com/fchollet/keras/issues/2436
11 | https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012
12 | https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/
13 | https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py
14 | """
15 |
16 | import tensorflow as tf
17 | import keras.backend as K
18 | import keras.layers as KL
19 | import keras.models as KM
20 |
21 |
22 | class ParallelModel(KM.Model):
23 | """Subclasses the standard Keras Model and adds multi-GPU support.
24 | It works by creating a copy of the model on each GPU. Then it slices
25 | the inputs and sends a slice to each copy of the model, and then
26 | merges the outputs together and applies the loss on the combined
27 | outputs.
28 | """
29 |
30 | def __init__(self, keras_model, gpu_count):
31 | """Class constructor.
32 | keras_model: The Keras model to parallelize
33 | gpu_count: Number of GPUs. Must be > 1
34 | """
35 | self.inner_model = keras_model
36 | self.gpu_count = gpu_count
37 | merged_outputs = self.make_parallel()
38 | super(ParallelModel, self).__init__(inputs=self.inner_model.inputs,
39 | outputs=merged_outputs)
40 |
41 | def __getattribute__(self, attrname):
42 | """Redirect loading and saving methods to the inner model. That's where
43 | the weights are stored."""
44 | if 'load' in attrname or 'save' in attrname:
45 | return getattr(self.inner_model, attrname)
46 | return super(ParallelModel, self).__getattribute__(attrname)
47 |
48 | def summary(self, *args, **kwargs):
49 | """Override summary() to display summaries of both, the wrapper
50 | and inner models."""
51 | super(ParallelModel, self).summary(*args, **kwargs)
52 | self.inner_model.summary(*args, **kwargs)
53 |
54 | def make_parallel(self):
55 | """Creates a new wrapper model that consists of multiple replicas of
56 | the original model placed on different GPUs.
57 | """
58 | # Slice inputs. Slice inputs on the CPU to avoid sending a copy
59 | # of the full inputs to all GPUs. Saves on bandwidth and memory.
60 | input_slices = {name: tf.split(x, self.gpu_count)
61 | for name, x in zip(self.inner_model.input_names,
62 | self.inner_model.inputs)}
63 |
64 | output_names = self.inner_model.output_names
65 | outputs_all = []
66 | for i in range(len(self.inner_model.outputs)):
67 | outputs_all.append([])
68 |
69 | # Run the model call() on each GPU to place the ops there
70 | for i in range(self.gpu_count):
71 | with tf.device('/gpu:%d' % i):
72 | with tf.name_scope('tower_%d' % i):
73 | # Run a slice of inputs through this replica
74 | zipped_inputs = zip(self.inner_model.input_names,
75 | self.inner_model.inputs)
76 | inputs = [
77 | KL.Lambda(lambda s: input_slices[name][i],
78 | output_shape=lambda s: (None,) + s[1:])(tensor)
79 | for name, tensor in zipped_inputs]
80 | # Create the model replica and get the outputs
81 | outputs = self.inner_model(inputs)
82 | if not isinstance(outputs, list):
83 | outputs = [outputs]
84 | # Save the outputs for merging back together later
85 | for l, o in enumerate(outputs):
86 | outputs_all[l].append(o)
87 |
88 | # Merge outputs on CPU
89 | with tf.device('/cpu:0'):
90 | merged = []
91 | for outputs, name in zip(outputs_all, output_names):
92 | # Concatenate or average outputs?
93 | # Outputs usually have a batch dimension and we concatenate
94 | # across it. If they don't, then the output is likely a loss
95 | # or a metric value that gets averaged across the batch.
96 | # Keras expects losses and metrics to be scalars.
97 | if K.int_shape(outputs[0]) == ():
98 | # Average
99 | m = KL.Lambda(lambda o: tf.add_n(o) / len(outputs), name=name)(outputs)
100 | else:
101 | # Concatenate
102 | m = KL.Concatenate(axis=0, name=name)(outputs)
103 | merged.append(m)
104 | return merged
105 |
106 |
107 | if __name__ == "__main__":
108 | # Testing code below. It creates a simple model to train on MNIST and
109 | # tries to run it on 2 GPUs. It saves the graph so it can be viewed
110 | # in TensorBoard. Run it as:
111 | #
112 | # python3 parallel_model.py
113 |
114 | import os
115 | import numpy as np
116 | import keras.optimizers
117 | from keras.datasets import mnist
118 | from keras.preprocessing.image import ImageDataGenerator
119 |
120 | GPU_COUNT = 2
121 |
122 | # Root directory of the project
123 | ROOT_DIR = os.path.abspath("../")
124 |
125 | # Directory to save logs and trained model
126 | MODEL_DIR = os.path.join(ROOT_DIR, "logs")
127 |
128 | def build_model(x_train, num_classes):
129 | # Reset default graph. Keras leaves old ops in the graph,
130 | # which are ignored for execution but clutter graph
131 | # visualization in TensorBoard.
132 | tf.reset_default_graph()
133 |
134 | inputs = KL.Input(shape=x_train.shape[1:], name="input_image")
135 | x = KL.Conv2D(32, (3, 3), activation='relu', padding="same",
136 | name="conv1")(inputs)
137 | x = KL.Conv2D(64, (3, 3), activation='relu', padding="same",
138 | name="conv2")(x)
139 | x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x)
140 | x = KL.Flatten(name="flat1")(x)
141 | x = KL.Dense(128, activation='relu', name="dense1")(x)
142 | x = KL.Dense(num_classes, activation='softmax', name="dense2")(x)
143 |
144 | return KM.Model(inputs, x, "digit_classifier_model")
145 |
146 | # Load MNIST Data
147 | (x_train, y_train), (x_test, y_test) = mnist.load_data()
148 | x_train = np.expand_dims(x_train, -1).astype('float32') / 255
149 | x_test = np.expand_dims(x_test, -1).astype('float32') / 255
150 |
151 | print('x_train shape:', x_train.shape)
152 | print('x_test shape:', x_test.shape)
153 |
154 | # Build data generator and model
155 | datagen = ImageDataGenerator()
156 | model = build_model(x_train, 10)
157 |
158 | # Add multi-GPU support.
159 | model = ParallelModel(model, GPU_COUNT)
160 |
161 | optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0)
162 |
163 | model.compile(loss='sparse_categorical_crossentropy',
164 | optimizer=optimizer, metrics=['accuracy'])
165 |
166 | model.summary()
167 |
168 | # Train
169 | model.fit_generator(
170 | datagen.flow(x_train, y_train, batch_size=64),
171 | steps_per_epoch=50, epochs=10, verbose=1,
172 | validation_data=(x_test, y_test),
173 | callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR,
174 | write_graph=True)]
175 | )
176 |
--------------------------------------------------------------------------------
/Resume_Parser.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | """
3 | Created on Wed Dec 22 02:48:47 2021
4 |
5 | @author: davba
6 |
7 | path to this file rn D:\ResumeIT\Resume_Parser.py
8 | python -m streamlit run D:\ResumeIT\Resume_Parser.py
9 |
10 | """
11 | import streamlit as st #framework
12 | import pdf2image #converts pdf
13 | from pdf2image import convert_from_path
14 | import easyocr
15 | import numpy as np
16 | import PIL # Python Imaging Library
17 | from PIL import ImageDraw # drawing bounding boxes
18 | import tensorflow as tf
19 | from IPython.display import display,Image
20 | from matplotlib.pyplot import imshow
21 | import xml.dom.minidom
22 | import pandas as pd
23 | import mrcnn
24 | import mrcnn.utils
25 | import mrcnn.config
26 | import mrcnn.model
27 | import mrcnn.visualize
28 | import urllib.request
29 | import os
30 | import xml.etree
31 | import cv2
32 | import Train_MASKRCNN_Script as training
33 |
34 |
35 | @st.cache
36 | def convert_df(df):
37 | # IMPORTANT: Cache the conversion to prevent computation on every rerun
38 | return df.to_csv().encode('utf-8')
39 |
40 |
41 | def run_model(model_path, img_array):
42 | CLASS_NAMES = ['BG', 'Personal Info', 'Education', 'Skills', 'Projects', 'Work Experience', 'Extra']
43 |
44 | class SimpleConfig(mrcnn.config.Config):
45 | NAME = "coco_inference"
46 |
47 | GPU_COUNT = 1
48 | IMAGES_PER_GPU = 1
49 |
50 | NUM_CLASSES = len(CLASS_NAMES)
51 |
52 | model = mrcnn.model.MaskRCNN(mode="inference",
53 | config=SimpleConfig(),
54 | model_dir=os.getcwd())
55 |
56 | model.load_weights(filepath=model_path,
57 | by_name=True)
58 |
59 | class_dict = {'bg' : 0,
60 | 'Personal Info' : 1,
61 | 'Education' : 2,
62 | 'Skills' : 3,
63 | 'Projects' : 4,
64 | 'Work Experience' : 5,
65 | 'Extra' : 6
66 | }
67 |
68 | inv_class_dict = {0:'bg',
69 | 1:'Personal Info',
70 | 2:'Education',
71 | 3:'Skills',
72 | 4:'Projects',
73 | 5:'Work Experience',
74 | 6: 'Extra'
75 | }
76 |
77 | sections = ['Personal Info', 'Education', 'Skills', 'Projects', 'Work Experience', 'Extra']
78 | extracted_info_df = pd.DataFrame(columns=sections)
79 |
80 | #Assuming an array of images in cv2 format: img_array
81 | for image in img_array:
82 |
83 | pred = model.detect([image], verbose=0)
84 | pred = pred[0]
85 | #The bnd box outputed is [y1,x1,y2,x2]
86 |
87 | full_info = list()
88 | class_list = list()
89 | for i in range(len(pred['rois'])):
90 | img_pil = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
91 | temp_img = PIL.Image.fromarray(img_pil)
92 | current_bnd_box = pred['rois'][i]
93 | current_box_class = pred['class_ids'][i]
94 | #pil_img.crop(box=[x1,y1,x2,y2])
95 | crop_box = [current_bnd_box[1], current_bnd_box[0], current_bnd_box[3], current_bnd_box[2]]
96 | crop_img = temp_img.crop(crop_box)
97 |
98 | #text for this section
99 | section_text = ""
100 | bounds = reader.readtext(np.array(crop_img),min_size=0,slope_ths=0.2,ycenter_ths=0.7,height_ths=0.6,width_ths=0.8)
101 | for b in bounds:
102 | section_text += " " + b[1]
103 | full_info.append([section_text])
104 | class_list.append(inv_class_dict[current_box_class])
105 |
106 | add_to_df = dict(zip(class_list,full_info))
107 |
108 | for key in sections:
109 | if key in add_to_df:
110 | continue
111 | else:
112 | add_to_df[key] = [""]
113 |
114 | temp_df = pd.DataFrame.from_dict(add_to_df)
115 |
116 | extracted_info_df = extracted_info_df.append(temp_df, ignore_index = True)
117 |
118 | return extracted_info_df
119 |
120 |
121 | st.title("Resume Parsing")
122 | os.environ['KMP_DUPLICATE_LIB_OK']='True'
123 | reader= easyocr.Reader(["en"]) # select language
124 |
125 | image_path ="C:/ResumeParser/tempDirectory/resume_image"
126 | # Get Resume Book and Split Resume
127 |
128 | st.write('', unsafe_allow_html=True)
129 | st.write('', unsafe_allow_html=True)
130 |
131 | choose=st.radio("Current Job",("Train Model","Parse Resumes"))
132 |
133 | if choose == "Parse Resumes":
134 | uploaded_file =st.file_uploader("Upload Your Resume Book", type=['pdf'], accept_multiple_files=False, key=None, help=None, on_change=None, args=None, kwargs=None)
135 | if uploaded_file is not None:
136 | if uploaded_file.type == "application/pdf":
137 | images = pdf2image.convert_from_bytes(uploaded_file.read(),size=(1700,2200))
138 | st.subheader("Please select page(s)")
139 | start = st.number_input('Start with page',min_value =1,max_value=len(images),step=1,key="start_page")
140 | end = st.number_input('End with page',min_value=1,max_value=len(images),step=1,key="end_page")
141 |
142 | split_button = st.button("Split resume book", key='split_button')
143 | if split_button:
144 | for i in range(start-1,end):
145 | img_index = i
146 | resume = images[img_index]
147 |
148 | image_path = "C:/ResumeParser/tempDirectory/resume_image"
149 |
150 | image_name = uploaded_file.name.split(".")[0] + str(img_index+1)
151 | image_ext = image_name + ".jpg"
152 |
153 | resume.save(f"{image_path}/{image_ext}") #save jpeg
154 | st.success("Finished splitting the resume. Ready to run!")
155 |
156 | #Resume is split and saved as images so now we open that to get the resumes for prediction
157 |
158 | files = os.listdir(image_path)
159 | img_array = list()
160 | for name in files:
161 | image = cv2.imread(image_path + "/" + name)
162 | image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
163 | img_array.append(image)
164 |
165 |
166 | model_path = st.text_input("Model Path", value="", max_chars=None, key="Model_path_input")
167 |
168 | run_model_button = st.button("Run the Model", key='run_model_button')
169 |
170 |
171 | # button "Click here to run the model"
172 | df = pd.DataFrame()
173 | if run_model_button:
174 | df = run_model(model_path, img_array)
175 |
176 | st.dataframe(data = df)
177 |
178 | csv = convert_df(df)
179 |
180 | st.download_button(
181 | label="Download data as CSV",
182 | data=csv,
183 | mime='text/csv')
184 | elif choose == "Train Model":
185 |
186 | #def train_model(dataset_path, model_path, num_epochs, final_model_path):
187 |
188 | dataset_path = st.text_input("Dataset Path (Folder containing Resumes and Resume Annotations folders)", value="", max_chars=None, key="dataset_path_input")
189 |
190 | initial_model_path = st.text_input("Base Model Path", value="", max_chars=None, key="Base_Model_path_input")
191 |
192 | num_epochs = st.number_input("Number of Epochs", key="Num_Epochs_input")
193 |
194 | final_model_path = st.text_input("Final Model Path", value="", max_chars=None, key="Final_Model_path_input")
195 |
196 | train_button = st.button("Train Model", key="Train_Model_Button")
197 |
198 | if train_button:
199 | with st.spinner('Training the Model'):
200 | training.train_model(dataset_path,initial_model_path, int(num_epochs), final_model_path)
201 | st.success('Finished Training!')
202 |
203 |
--------------------------------------------------------------------------------
/mrcnn/config.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Base Configurations class.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import numpy as np
11 |
12 |
13 | # Base Configuration Class
14 | # Don't use this class directly. Instead, sub-class it and override
15 | # the configurations you need to change.
16 |
17 | class Config(object):
18 | """Base configuration class. For custom configurations, create a
19 | sub-class that inherits from this one and override properties
20 | that need to be changed.
21 | """
22 | # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc.
23 | # Useful if your code needs to do things differently depending on which
24 | # experiment is running.
25 | NAME = None # Override in sub-classes
26 |
27 | # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1.
28 | GPU_COUNT = 1
29 |
30 | # Number of images to train with on each GPU. A 12GB GPU can typically
31 | # handle 2 images of 1024x1024px.
32 | # Adjust based on your GPU memory and image sizes. Use the highest
33 | # number that your GPU can handle for best performance.
34 | IMAGES_PER_GPU = 2
35 |
36 | # Number of training steps per epoch
37 | # This doesn't need to match the size of the training set. Tensorboard
38 | # updates are saved at the end of each epoch, so setting this to a
39 | # smaller number means getting more frequent TensorBoard updates.
40 | # Validation stats are also calculated at each epoch end and they
41 | # might take a while, so don't set this too small to avoid spending
42 | # a lot of time on validation stats.
43 | STEPS_PER_EPOCH = 1000
44 |
45 | # Number of validation steps to run at the end of every training epoch.
46 | # A bigger number improves accuracy of validation stats, but slows
47 | # down the training.
48 | VALIDATION_STEPS = 50
49 |
50 | # Backbone network architecture
51 | # Supported values are: resnet50, resnet101.
52 | # You can also provide a callable that should have the signature
53 | # of model.resnet_graph. If you do so, you need to supply a callable
54 | # to COMPUTE_BACKBONE_SHAPE as well
55 | BACKBONE = "resnet101"
56 |
57 | # Only useful if you supply a callable to BACKBONE. Should compute
58 | # the shape of each layer of the FPN Pyramid.
59 | # See model.compute_backbone_shapes
60 | COMPUTE_BACKBONE_SHAPE = None
61 |
62 | # The strides of each layer of the FPN Pyramid. These values
63 | # are based on a Resnet101 backbone.
64 | BACKBONE_STRIDES = [4, 8, 16, 32, 64]
65 |
66 | # Size of the fully-connected layers in the classification graph
67 | FPN_CLASSIF_FC_LAYERS_SIZE = 1024
68 |
69 | # Size of the top-down layers used to build the feature pyramid
70 | TOP_DOWN_PYRAMID_SIZE = 256
71 |
72 | # Number of classification classes (including background)
73 | NUM_CLASSES = 1 # Override in sub-classes
74 |
75 | # Length of square anchor side in pixels
76 | RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512)
77 |
78 | # Ratios of anchors at each cell (width/height)
79 | # A value of 1 represents a square anchor, and 0.5 is a wide anchor
80 | RPN_ANCHOR_RATIOS = [0.5, 1, 2]
81 |
82 | # Anchor stride
83 | # If 1 then anchors are created for each cell in the backbone feature map.
84 | # If 2, then anchors are created for every other cell, and so on.
85 | RPN_ANCHOR_STRIDE = 1
86 |
87 | # Non-max suppression threshold to filter RPN proposals.
88 | # You can increase this during training to generate more propsals.
89 | RPN_NMS_THRESHOLD = 0.7
90 |
91 | # How many anchors per image to use for RPN training
92 | RPN_TRAIN_ANCHORS_PER_IMAGE = 256
93 |
94 | # ROIs kept after tf.nn.top_k and before non-maximum suppression
95 | PRE_NMS_LIMIT = 6000
96 |
97 | # ROIs kept after non-maximum suppression (training and inference)
98 | POST_NMS_ROIS_TRAINING = 2000
99 | POST_NMS_ROIS_INFERENCE = 1000
100 |
101 | # If enabled, resizes instance masks to a smaller size to reduce
102 | # memory load. Recommended when using high-resolution images.
103 | USE_MINI_MASK = True
104 | MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask
105 |
106 | # Input image resizing
107 | # Generally, use the "square" resizing mode for training and predicting
108 | # and it should work well in most cases. In this mode, images are scaled
109 | # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the
110 | # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is
111 | # padded with zeros to make it a square so multiple images can be put
112 | # in one batch.
113 | # Available resizing modes:
114 | # none: No resizing or padding. Return the image unchanged.
115 | # square: Resize and pad with zeros to get a square image
116 | # of size [max_dim, max_dim].
117 | # pad64: Pads width and height with zeros to make them multiples of 64.
118 | # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
119 | # up before padding. IMAGE_MAX_DIM is ignored in this mode.
120 | # The multiple of 64 is needed to ensure smooth scaling of feature
121 | # maps up and down the 6 levels of the FPN pyramid (2**6=64).
122 | # crop: Picks random crops from the image. First, scales the image based
123 | # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
124 | # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
125 | # IMAGE_MAX_DIM is not used in this mode.
126 | IMAGE_RESIZE_MODE = "square"
127 | IMAGE_MIN_DIM = 800
128 | IMAGE_MAX_DIM = 1024
129 | # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further
130 | # up scaling. For example, if set to 2 then images are scaled up to double
131 | # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it.
132 | # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM.
133 | IMAGE_MIN_SCALE = 0
134 | # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4
135 | # Changing this requires other changes in the code. See the WIKI for more
136 | # details: https://github.com/matterport/Mask_RCNN/wiki
137 | IMAGE_CHANNEL_COUNT = 3
138 |
139 | # Image mean (RGB)
140 | MEAN_PIXEL = np.array([123.7, 116.8, 103.9])
141 |
142 | # Number of ROIs per image to feed to classifier/mask heads
143 | # The Mask RCNN paper uses 512 but often the RPN doesn't generate
144 | # enough positive proposals to fill this and keep a positive:negative
145 | # ratio of 1:3. You can increase the number of proposals by adjusting
146 | # the RPN NMS threshold.
147 | TRAIN_ROIS_PER_IMAGE = 200
148 |
149 | # Percent of positive ROIs used to train classifier/mask heads
150 | ROI_POSITIVE_RATIO = 0.33
151 |
152 | # Pooled ROIs
153 | POOL_SIZE = 7
154 | MASK_POOL_SIZE = 14
155 |
156 | # Shape of output mask
157 | # To change this you also need to change the neural network mask branch
158 | MASK_SHAPE = [28, 28]
159 |
160 | # Maximum number of ground truth instances to use in one image
161 | MAX_GT_INSTANCES = 100
162 |
163 | # Bounding box refinement standard deviation for RPN and final detections.
164 | RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
165 | BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2])
166 |
167 | # Max number of final detections
168 | DETECTION_MAX_INSTANCES = 100
169 |
170 | # Minimum probability value to accept a detected instance
171 | # ROIs below this threshold are skipped
172 | DETECTION_MIN_CONFIDENCE = 0.7
173 |
174 | # Non-maximum suppression threshold for detection
175 | DETECTION_NMS_THRESHOLD = 0.3
176 |
177 | # Learning rate and momentum
178 | # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes
179 | # weights to explode. Likely due to differences in optimizer
180 | # implementation.
181 | LEARNING_RATE = 0.001
182 | LEARNING_MOMENTUM = 0.9
183 |
184 | # Weight decay regularization
185 | WEIGHT_DECAY = 0.0001
186 |
187 | # Loss weights for more precise optimization.
188 | # Can be used for R-CNN training setup.
189 | LOSS_WEIGHTS = {
190 | "rpn_class_loss": 1.,
191 | "rpn_bbox_loss": 1.,
192 | "mrcnn_class_loss": 1.,
193 | "mrcnn_bbox_loss": 1.,
194 | "mrcnn_mask_loss": 1.
195 | }
196 |
197 | # Use RPN ROIs or externally generated ROIs for training
198 | # Keep this True for most situations. Set to False if you want to train
199 | # the head branches on ROI generated by code rather than the ROIs from
200 | # the RPN. For example, to debug the classifier head without having to
201 | # train the RPN.
202 | USE_RPN_ROIS = True
203 |
204 | # Train or freeze batch normalization layers
205 | # None: Train BN layers. This is the normal mode
206 | # False: Freeze BN layers. Good when using a small batch size
207 | # True: (don't use). Set layer in training mode even when predicting
208 | TRAIN_BN = False # Defaulting to False since batch size is often small
209 |
210 | # Gradient norm clipping
211 | GRADIENT_CLIP_NORM = 5.0
212 |
213 | def __init__(self):
214 | """Set values of computed attributes."""
215 | # Effective batch size
216 | self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT
217 |
218 | # Input image size
219 | if self.IMAGE_RESIZE_MODE == "crop":
220 | self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM,
221 | self.IMAGE_CHANNEL_COUNT])
222 | else:
223 | self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM,
224 | self.IMAGE_CHANNEL_COUNT])
225 |
226 | # Image meta data length
227 | # See compose_image_meta() for details
228 | self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES
229 |
230 | def display(self):
231 | """Display Configuration values."""
232 | print("\nConfigurations:")
233 | for a in dir(self):
234 | if not a.startswith("__") and not callable(getattr(self, a)):
235 | print("{:30} {}".format(a, getattr(self, a)))
236 | print("\n")
237 |
--------------------------------------------------------------------------------
/annotate.py:
--------------------------------------------------------------------------------
1 | from pdf2image import convert_from_path
2 | import easyocr
3 | import numpy as np
4 | import PIL # Python Imaging Library
5 | from PIL import ImageDraw, Image # drawing bounding boxes
6 | import spacy # advanced NLP for key attributes
7 |
8 | def createBoxes(bounds):
9 | categories =['publications','community service','experience','technicallmanagement skills',
10 | 'project experience','activities','awards','summary','volunteer','education',
11 | 'interests','skills','projects','work experience','professional experience','references',
12 | 'volunteer experience','technical skills','research experience','computer skills',
13 | 'leadership & volunteer experience','skillset','extracurriculars', 'certifications', 'certification',
14 | 'academic projects','education & credentials','leadership and extra curriculars',
15 | 'skills & certifications','skill set_','extra curricular activities',
16 | 'leadership , awards & achievements','academic experience','leadership and achievements',
17 | 'education and training','work history','professional summary','skills and abilities',
18 | 'cqurse workand proiects','relevant skills','skill highlights','educations','experiences',
19 | 'personality and language','related courses and skills','key skills','people & capability development',
20 | 'skills and certifications', 'personal','caree interests','additional','interests 0','employment history',
21 | 'details','relevant coursework','other','social impact','experience_', 'leadership and community engagement',
22 | 'professional & leadership experience','additional skills/interests', 'leadership & volunteer experience',
23 | 'other_','social impact','professional distinctions','additional information','activities and leadership',
24 | 'additional experience','interests and skills','athletics','recent experience','awards and personal','selected patents and publications',
25 | 'awards, honors, and interests','activities & leadership','leadership & community activities','additional info',
26 | 'leadership','technical experience','community leadership & interests','community activities & additional information',
27 | 'miscellaneous','leadership & community involvement','prior work experience','publication','personal','extracurricular experience',
28 | 'additional skills and projects','publications & research','leadership, community & other','additional data',
29 | 'community work','awards and honours','others','volunteering and public service','skills, interests & publications','personal interests',
30 | 'languages','community & interests','community involement/personal','activities and skills','awards & community involvement',
31 | 'entrepreneurial experience','entrepreneurship','media & technology experience','business ownership','service and interests',
32 | 'extracurricular','certifications','skills & personal','other information','activities','professional sports experience',
33 | 'other activities and personal interests','professional','writing & publications','skills/activities','community & other',
34 | 'board experience','impact investing work','product, user, and strategy work','extra-curricular & community activities',
35 | 'additional interests','additional data','additional experience','social entrepreneurship','interests and extracurriculars',
36 | 'skills & personal','professional certifications and awards','community involvement','selected publications','volunteer experience and additional skills',
37 | 'internship experience','employment','community engagement','awards, speaking engagements & press','leadership & other activities','other leadership experience',
38 | 'hobbies','initiatives','additional projects','professional experience and leadership','professional experience & leadership','volunteer activities/activities outside job',
39 | 'other inerests/hobbies','professional experiences','athletic experience','community','skills/additional information','education_','additional_','community leadership',
40 | 'academic experience','academic experience_','prqeessional experience','leadershp & volunteer experience','additional:','volunteer & leadership experience',
41 | 'dditional','additiona','ed uc a tio n','e xp e rie n c e','ad dttio nal','awards, honors; and interests',
42 | 'professional experience: united states marine corps','skills and interests','leadership & additional information',
43 | 'leadership , community & other','addtional leadershp','experience__','prqeessional_experience','honors & awards',
44 | 'financial skills','extracurricular leadership','additional skills & interests','prqfessional experience','personal activities and interests',
45 | 'leadership experience and service: collegiate activities','leadership experience and service: post-collegiate activities','professional expereince','honors and awards',
46 | 'additional skills','skills & interests','leadershp & communty involvement','workexperience','addtional','honors; skills, & interests','leadership_awards_& skills',
47 | 'skills &','awards &','personal and interests','leadership and activities','education:','work experience:','skills_hobbies & interests',
48 | 'leadership experience','additional activties and interests','community_leadership_','interests & skills',
49 | 'leadership & involvement','awards & interests','work','activities:', 'awards:','skills, achievements & interests','leadership & activities','additional leadership , skills, and interests',
50 | 'education _','communty leadershp','skills, activities & interests','skills, languages and interests','experience (u.s_navy, submarines)','additional_experience','activities & interests',
51 | 'skills and personal','leadership activities','professional experience:','leadership experience:','key skills:','e d u c a tio n','ex p e rienc e',
52 | 'p e r s 0 na l','additional leadership','additional information and interests','professional experience_','leadership & service',
53 | 'skills, activities and interests','selected publica tions','teaching','iternshps','public service','communty involvement',
54 | 'professlonal experience','activities and interests','leadership & extracurricular','additional experience_','extracurricular activities & skills',
55 | 'leadership & interests','leadership & extracurricular activittes','leadership and social impact','additional projects_',
56 | 'education and honors','learn to_win (lzw executive and management experience_','naval intelligence officer_experience','navy surface warfare officer experience',
57 | 'education & honors','leadership experience_','summary: strategic, results-oriented leader with experience building cross-functional systems and processes. looking to',
58 | 'military','extracurricular activities','other experience','qther','edlcation','leadershpandcommunty service','education; honors and scholarships',
59 | 'other interestsihobbies','volunteer activitiesiactivities outside job','leadership and additional information','skills/ additional information','extracurricular activities',
60 | 'community and personal interests','community leadership & additional']
61 | box = []
62 | for x in bounds:
63 | if x[1].lower() in categories:
64 | box.append(x)
65 | box.append(x)
66 | return box
67 |
68 | def giveProperNames(new_bounds):
69 | properNameBounds = []
70 | educationNames = ['education','education & credentials','academic experience','education and training','educations','education_','academic experience',
71 | 'academic experience_','ed uc a tio n','education:','education _','e d u c a tio n','education and honors','education & honors','edlcation','education; honors and scholarships']
72 |
73 | workNames = ['professional experience','work experience','experience','work history','experiences','experience_','recent work experience','prior work experience',
74 | 'entrepreneurial experience','employment','professional experience and leadership','professional experience & leadership','professional experiences','recent experience',
75 | 'media & technology experience','business ownership','additional experience','professional & leadership experience','prqeessional experience',
76 | 'e xp e rie n c e','professional experience: united states marine corps','experience__','prqeessional_experience','prqfessional experience','professional expereince',
77 | 'workexperience','work experience:','entrepreneurship','work','experience (u.s_navy, submarines)','professional experience:','ex p e rienc e','professional experience_',
78 | 'iternshps','professlonal experience','learn to_win (lzw executive and management experience_','naval intelligence officer_experience','navy surface warfare officer experience']
79 |
80 | skillNames = ['skills','technicallmanagement skills','computer skills','skillset','skill set_','relevant skills',
81 | 'skills and abilities','skill highlights','skills/additional information','skills/activities','skills & personal',
82 | 'skills/additional information', 'activities and skills', 'skills', 'skills, interests & publications',
83 | 'interests and skills', 'additional skills/interests','skills and interests','financial skills','additional skills & interests',
84 | 'additional skills','skills & interests','leadership_awards_& skills', 'skills &','skills_hobbies & interests','interests & skills',
85 | 'skills, achievements & interests','skills, activities & interests','skills, languages and interests','skills and personal',
86 | 'key skills:','skills, activities and interests','extracurricular activities & skills','skills/ additional information']
87 | for x in new_bounds:
88 | if(len(properNameBounds)==0):
89 | properNameBounds.append(x)
90 | elif(x[1] in educationNames):
91 | properNameBounds.append((x[0],'Education'))
92 | elif(x[1] in workNames):
93 | properNameBounds.append((x[0],'Work Experience'))
94 | elif(x[1] in skillNames):
95 | properNameBounds.append((x[0],'Skills'))
96 | else:
97 | properNameBounds.append((x[0],'Extra'))
98 | return properNameBounds
99 |
100 | def draw_boxes(image,bounds,color="yellow",width=2):
101 | draw = ImageDraw.Draw(image)
102 | for bound in bounds:
103 | p0,p1,p2,p3 = bound[0]
104 | draw.line([*p0,*p1,*p2,*p3,*p0],fill=color,width=width)
105 | return image
106 |
107 | def createNormalBounds(box):
108 | new_bounds = []
109 | last_section = 'Personal Info'
110 | for x in box:
111 | if len(new_bounds)==0:
112 | new_bounds.append(([[0, x[0][0][1]-10], [1700,x[0][0][1]-10], [1700, 0], [0,0]], last_section))
113 | elif len(new_bounds)==1:
114 | new_bounds.append(([[0,new_bounds[-1][0][0][1]+10],[1700,new_bounds[-1][0][0][1]+10],[1700,x[0][0][1]-10],[0,x[0][0][1]-10]], last_section))
115 | else:
116 | new_bounds.append(([[0,new_bounds[-1][0][3][1]+10],[1700,new_bounds[-1][0][3][1]+10],[1700,x[0][0][1]-10],[0,x[0][0][1]-10]],last_section))
117 | last_section = x[1].lower()
118 | new_bounds[-1][0][3][1]=2200
119 | new_bounds[-1][0][2][1]=2200
120 | return new_bounds
121 |
122 | def createColumnBounds(box):
123 | new_bounds = []
124 | box.sort(key = lambda x: x[0][0][0])
125 | box.sort(key = lambda x: x[0][0][1])
126 |
127 | for i in range(2, len(box)):
128 | if len(new_bounds)==0:
129 | last_section = 'Personal Info'
130 | if (box[i-2][0][0][0]<500):
131 | new_bounds.append(([[0, 0], [1700,0], [1700, box[i-2][0][0][1]-10],
132 | [0,box[i-2][0][0][1]-10]], last_section))
133 | last_section = box[i-2][1].lower()
134 | if len(new_bounds)==1:
135 | new_bounds.append(([[0,box[i-2][0][0][1]],[1700,box[i-2][0][0][1]],
136 | [1700,box[i-1][0][0][1]-10],[0,box[i-1][0][0][1]-10]], last_section))
137 | last_section = box[i-1][1].lower()
138 | new_bounds.append(([[0,box[i-1][0][0][1]],[1700,box[i-1][0][0][1]],[1700,box[i][0][0][1]-10],
139 | [0,box[i][0][0][1]-10]],last_section))
140 | last_section = box[i][1].lower()
141 | print(new_bounds)
142 | if(box[i][0][0][0] <550):
143 | new_bounds.append(([[box[i][0][0][0],box[i][0][0][1]],[1700,box[i][0][0][1]],[1700,box[i][0][3][1]-10],
144 | [box[i][0][0][0],box[i][0][3][1]-10]],last_section))
145 |
146 | elif(box[i][0][0][0]>550) and (box[i-1][0][0][0]<550):
147 | new_bounds[-1][0][3][1]=2200
148 | new_bounds[-1][0][2][1]=2200
149 | last_section=box[i][1].lower()
150 | endOfColumn = i
151 | break
152 |
153 |
154 | for i in range(endOfColumn, len(box)-1):
155 | for x in new_bounds:
156 | if box[i][0][2][1] -1 else overlaps[i].max()))
220 | for i in range(len(pred_match))]
221 | # Set title if not provided
222 | title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU"
223 | # Display
224 | display_instances(
225 | image,
226 | boxes, masks, class_ids,
227 | class_names, scores, ax=ax,
228 | show_bbox=show_box, show_mask=show_mask,
229 | colors=colors, captions=captions,
230 | title=title)
231 |
232 |
233 | def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10):
234 | """
235 | anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates.
236 | proposals: [n, 4] the same anchors but refined to fit objects better.
237 | """
238 | masked_image = image.copy()
239 |
240 | # Pick random anchors in case there are too many.
241 | ids = np.arange(rois.shape[0], dtype=np.int32)
242 | ids = np.random.choice(
243 | ids, limit, replace=False) if ids.shape[0] > limit else ids
244 |
245 | fig, ax = plt.subplots(1, figsize=(12, 12))
246 | if rois.shape[0] > limit:
247 | plt.title("Showing {} random ROIs out of {}".format(
248 | len(ids), rois.shape[0]))
249 | else:
250 | plt.title("{} ROIs".format(len(ids)))
251 |
252 | # Show area outside image boundaries.
253 | ax.set_ylim(image.shape[0] + 20, -20)
254 | ax.set_xlim(-50, image.shape[1] + 20)
255 | ax.axis('off')
256 |
257 | for i, id in enumerate(ids):
258 | color = np.random.rand(3)
259 | class_id = class_ids[id]
260 | # ROI
261 | y1, x1, y2, x2 = rois[id]
262 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
263 | edgecolor=color if class_id else "gray",
264 | facecolor='none', linestyle="dashed")
265 | ax.add_patch(p)
266 | # Refined ROI
267 | if class_id:
268 | ry1, rx1, ry2, rx2 = refined_rois[id]
269 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
270 | edgecolor=color, facecolor='none')
271 | ax.add_patch(p)
272 | # Connect the top-left corners of the anchor and proposal for easy visualization
273 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
274 |
275 | # Label
276 | label = class_names[class_id]
277 | ax.text(rx1, ry1 + 8, "{}".format(label),
278 | color='w', size=11, backgroundcolor="none")
279 |
280 | # Mask
281 | m = utils.unmold_mask(mask[id], rois[id]
282 | [:4].astype(np.int32), image.shape)
283 | masked_image = apply_mask(masked_image, m, color)
284 |
285 | ax.imshow(masked_image)
286 |
287 | # Print stats
288 | print("Positive ROIs: ", class_ids[class_ids > 0].shape[0])
289 | print("Negative ROIs: ", class_ids[class_ids == 0].shape[0])
290 | print("Positive Ratio: {:.2f}".format(
291 | class_ids[class_ids > 0].shape[0] / class_ids.shape[0]))
292 |
293 |
294 | # TODO: Replace with matplotlib equivalent?
295 | def draw_box(image, box, color):
296 | """Draw 3-pixel width bounding boxes on the given image array.
297 | color: list of 3 int values for RGB.
298 | """
299 | y1, x1, y2, x2 = box
300 | image[y1:y1 + 2, x1:x2] = color
301 | image[y2:y2 + 2, x1:x2] = color
302 | image[y1:y2, x1:x1 + 2] = color
303 | image[y1:y2, x2:x2 + 2] = color
304 | return image
305 |
306 |
307 | def display_top_masks(image, mask, class_ids, class_names, limit=4):
308 | """Display the given image and the top few class masks."""
309 | to_display = []
310 | titles = []
311 | to_display.append(image)
312 | titles.append("H x W={}x{}".format(image.shape[0], image.shape[1]))
313 | # Pick top prominent classes in this image
314 | unique_class_ids = np.unique(class_ids)
315 | mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]])
316 | for i in unique_class_ids]
317 | top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area),
318 | key=lambda r: r[1], reverse=True) if v[1] > 0]
319 | # Generate images and titles
320 | for i in range(limit):
321 | class_id = top_ids[i] if i < len(top_ids) else -1
322 | # Pull masks of instances belonging to the same class.
323 | m = mask[:, :, np.where(class_ids == class_id)[0]]
324 | m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1)
325 | to_display.append(m)
326 | titles.append(class_names[class_id] if class_id != -1 else "-")
327 | display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r")
328 |
329 |
330 | def plot_precision_recall(AP, precisions, recalls):
331 | """Draw the precision-recall curve.
332 |
333 | AP: Average precision at IoU >= 0.5
334 | precisions: list of precision values
335 | recalls: list of recall values
336 | """
337 | # Plot the Precision-Recall curve
338 | _, ax = plt.subplots(1)
339 | ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP))
340 | ax.set_ylim(0, 1.1)
341 | ax.set_xlim(0, 1.1)
342 | _ = ax.plot(recalls, precisions)
343 |
344 |
345 | def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores,
346 | overlaps, class_names, threshold=0.5):
347 | """Draw a grid showing how ground truth objects are classified.
348 | gt_class_ids: [N] int. Ground truth class IDs
349 | pred_class_id: [N] int. Predicted class IDs
350 | pred_scores: [N] float. The probability scores of predicted classes
351 | overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes.
352 | class_names: list of all class names in the dataset
353 | threshold: Float. The prediction probability required to predict a class
354 | """
355 | gt_class_ids = gt_class_ids[gt_class_ids != 0]
356 | pred_class_ids = pred_class_ids[pred_class_ids != 0]
357 |
358 | plt.figure(figsize=(12, 10))
359 | plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues)
360 | plt.yticks(np.arange(len(pred_class_ids)),
361 | ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i])
362 | for i, id in enumerate(pred_class_ids)])
363 | plt.xticks(np.arange(len(gt_class_ids)),
364 | [class_names[int(id)] for id in gt_class_ids], rotation=90)
365 |
366 | thresh = overlaps.max() / 2.
367 | for i, j in itertools.product(range(overlaps.shape[0]),
368 | range(overlaps.shape[1])):
369 | text = ""
370 | if overlaps[i, j] > threshold:
371 | text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong"
372 | color = ("white" if overlaps[i, j] > thresh
373 | else "black" if overlaps[i, j] > 0
374 | else "grey")
375 | plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text),
376 | horizontalalignment="center", verticalalignment="center",
377 | fontsize=9, color=color)
378 |
379 | plt.tight_layout()
380 | plt.xlabel("Ground Truth")
381 | plt.ylabel("Predictions")
382 |
383 |
384 | def draw_boxes(image, boxes=None, refined_boxes=None,
385 | masks=None, captions=None, visibilities=None,
386 | title="", ax=None):
387 | """Draw bounding boxes and segmentation masks with different
388 | customizations.
389 |
390 | boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates.
391 | refined_boxes: Like boxes, but draw with solid lines to show
392 | that they're the result of refining 'boxes'.
393 | masks: [N, height, width]
394 | captions: List of N titles to display on each box
395 | visibilities: (optional) List of values of 0, 1, or 2. Determine how
396 | prominent each bounding box should be.
397 | title: An optional title to show over the image
398 | ax: (optional) Matplotlib axis to draw on.
399 | """
400 | # Number of boxes
401 | assert boxes is not None or refined_boxes is not None
402 | N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0]
403 |
404 | # Matplotlib Axis
405 | if not ax:
406 | _, ax = plt.subplots(1, figsize=(12, 12))
407 |
408 | # Generate random colors
409 | colors = random_colors(N)
410 |
411 | # Show area outside image boundaries.
412 | margin = image.shape[0] // 10
413 | ax.set_ylim(image.shape[0] + margin, -margin)
414 | ax.set_xlim(-margin, image.shape[1] + margin)
415 | ax.axis('off')
416 |
417 | ax.set_title(title)
418 |
419 | masked_image = image.astype(np.uint32).copy()
420 | for i in range(N):
421 | # Box visibility
422 | visibility = visibilities[i] if visibilities is not None else 1
423 | if visibility == 0:
424 | color = "gray"
425 | style = "dotted"
426 | alpha = 0.5
427 | elif visibility == 1:
428 | color = colors[i]
429 | style = "dotted"
430 | alpha = 1
431 | elif visibility == 2:
432 | color = colors[i]
433 | style = "solid"
434 | alpha = 1
435 |
436 | # Boxes
437 | if boxes is not None:
438 | if not np.any(boxes[i]):
439 | # Skip this instance. Has no bbox. Likely lost in cropping.
440 | continue
441 | y1, x1, y2, x2 = boxes[i]
442 | p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2,
443 | alpha=alpha, linestyle=style,
444 | edgecolor=color, facecolor='none')
445 | ax.add_patch(p)
446 |
447 | # Refined boxes
448 | if refined_boxes is not None and visibility > 0:
449 | ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32)
450 | p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2,
451 | edgecolor=color, facecolor='none')
452 | ax.add_patch(p)
453 | # Connect the top-left corners of the anchor and proposal
454 | if boxes is not None:
455 | ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color))
456 |
457 | # Captions
458 | if captions is not None:
459 | caption = captions[i]
460 | # If there are refined boxes, display captions on them
461 | if refined_boxes is not None:
462 | y1, x1, y2, x2 = ry1, rx1, ry2, rx2
463 | ax.text(x1, y1, caption, size=11, verticalalignment='top',
464 | color='w', backgroundcolor="none",
465 | bbox={'facecolor': color, 'alpha': 0.5,
466 | 'pad': 2, 'edgecolor': 'none'})
467 |
468 | # Masks
469 | if masks is not None:
470 | mask = masks[:, :, i]
471 | masked_image = apply_mask(masked_image, mask, color)
472 | # Mask Polygon
473 | # Pad to ensure proper polygons for masks that touch image edges.
474 | padded_mask = np.zeros(
475 | (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8)
476 | padded_mask[1:-1, 1:-1] = mask
477 | contours = find_contours(padded_mask, 0.5)
478 | for verts in contours:
479 | # Subtract the padding and flip (y, x) to (x, y)
480 | verts = np.fliplr(verts) - 1
481 | p = Polygon(verts, facecolor="none", edgecolor=color)
482 | ax.add_patch(p)
483 | ax.imshow(masked_image.astype(np.uint8))
484 |
485 |
486 | def display_table(table):
487 | """Display values in a table format.
488 | table: an iterable of rows, and each row is an iterable of values.
489 | """
490 | html = ""
491 | for row in table:
492 | row_html = ""
493 | for col in row:
494 | row_html += "{:40} | ".format(str(col))
495 | html += "" + row_html + "
"
496 | html = ""
497 | IPython.display.display(IPython.display.HTML(html))
498 |
499 |
500 | def display_weight_stats(model):
501 | """Scans all the weights in the model and returns a list of tuples
502 | that contain stats about each weight.
503 | """
504 | layers = model.get_trainable_layers()
505 | table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]]
506 | for l in layers:
507 | weight_values = l.get_weights() # list of Numpy arrays
508 | weight_tensors = l.weights # list of TF tensors
509 | for i, w in enumerate(weight_values):
510 | weight_name = weight_tensors[i].name
511 | # Detect problematic layers. Exclude biases of conv layers.
512 | alert = ""
513 | if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1):
514 | alert += "*** dead?"
515 | if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000:
516 | alert += "*** Overflow?"
517 | # Add row
518 | table.append([
519 | weight_name + alert,
520 | str(w.shape),
521 | "{:+9.4f}".format(w.min()),
522 | "{:+10.4f}".format(w.max()),
523 | "{:+9.4f}".format(w.std()),
524 | ])
525 | display_table(table)
526 |
--------------------------------------------------------------------------------
/mrcnn/utils.py:
--------------------------------------------------------------------------------
1 | """
2 | Mask R-CNN
3 | Common utility functions and classes.
4 |
5 | Copyright (c) 2017 Matterport, Inc.
6 | Licensed under the MIT License (see LICENSE for details)
7 | Written by Waleed Abdulla
8 | """
9 |
10 | import sys
11 | import os
12 | import logging
13 | import math
14 | import random
15 | import numpy as np
16 | import tensorflow as tf
17 | import scipy
18 | import skimage.color
19 | import skimage.io
20 | import skimage.transform
21 | import urllib.request
22 | import shutil
23 | import warnings
24 | from distutils.version import LooseVersion
25 |
26 | # URL from which to download the latest COCO trained weights
27 | COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5"
28 |
29 |
30 | ############################################################
31 | # Bounding Boxes
32 | ############################################################
33 |
34 | def extract_bboxes(mask):
35 | """Compute bounding boxes from masks.
36 | mask: [height, width, num_instances]. Mask pixels are either 1 or 0.
37 |
38 | Returns: bbox array [num_instances, (y1, x1, y2, x2)].
39 | """
40 | boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32)
41 | for i in range(mask.shape[-1]):
42 | m = mask[:, :, i]
43 | # Bounding box.
44 | horizontal_indicies = np.where(np.any(m, axis=0))[0]
45 | vertical_indicies = np.where(np.any(m, axis=1))[0]
46 | if horizontal_indicies.shape[0]:
47 | x1, x2 = horizontal_indicies[[0, -1]]
48 | y1, y2 = vertical_indicies[[0, -1]]
49 | # x2 and y2 should not be part of the box. Increment by 1.
50 | x2 += 1
51 | y2 += 1
52 | else:
53 | # No mask for this instance. Might happen due to
54 | # resizing or cropping. Set bbox to zeros
55 | x1, x2, y1, y2 = 0, 0, 0, 0
56 | boxes[i] = np.array([y1, x1, y2, x2])
57 | return boxes.astype(np.int32)
58 |
59 |
60 | def compute_iou(box, boxes, box_area, boxes_area):
61 | """Calculates IoU of the given box with the array of the given boxes.
62 | box: 1D vector [y1, x1, y2, x2]
63 | boxes: [boxes_count, (y1, x1, y2, x2)]
64 | box_area: float. the area of 'box'
65 | boxes_area: array of length boxes_count.
66 |
67 | Note: the areas are passed in rather than calculated here for
68 | efficiency. Calculate once in the caller to avoid duplicate work.
69 | """
70 | # Calculate intersection areas
71 | y1 = np.maximum(box[0], boxes[:, 0])
72 | y2 = np.minimum(box[2], boxes[:, 2])
73 | x1 = np.maximum(box[1], boxes[:, 1])
74 | x2 = np.minimum(box[3], boxes[:, 3])
75 | intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)
76 | union = box_area + boxes_area[:] - intersection[:]
77 | iou = intersection / union
78 | return iou
79 |
80 |
81 | def compute_overlaps(boxes1, boxes2):
82 | """Computes IoU overlaps between two sets of boxes.
83 | boxes1, boxes2: [N, (y1, x1, y2, x2)].
84 |
85 | For better performance, pass the largest set first and the smaller second.
86 | """
87 | # Areas of anchors and GT boxes
88 | area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1])
89 | area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1])
90 |
91 | # Compute overlaps to generate matrix [boxes1 count, boxes2 count]
92 | # Each cell contains the IoU value.
93 | overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0]))
94 | for i in range(overlaps.shape[1]):
95 | box2 = boxes2[i]
96 | overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1)
97 | return overlaps
98 |
99 |
100 | def compute_overlaps_masks(masks1, masks2):
101 | """Computes IoU overlaps between two sets of masks.
102 | masks1, masks2: [Height, Width, instances]
103 | """
104 |
105 | # If either set of masks is empty return empty result
106 | if masks1.shape[-1] == 0 or masks2.shape[-1] == 0:
107 | return np.zeros((masks1.shape[-1], masks2.shape[-1]))
108 | # flatten masks and compute their areas
109 | masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32)
110 | masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32)
111 | area1 = np.sum(masks1, axis=0)
112 | area2 = np.sum(masks2, axis=0)
113 |
114 | # intersections and union
115 | intersections = np.dot(masks1.T, masks2)
116 | union = area1[:, None] + area2[None, :] - intersections
117 | overlaps = intersections / union
118 |
119 | return overlaps
120 |
121 |
122 | def non_max_suppression(boxes, scores, threshold):
123 | """Performs non-maximum suppression and returns indices of kept boxes.
124 | boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box.
125 | scores: 1-D array of box scores.
126 | threshold: Float. IoU threshold to use for filtering.
127 | """
128 | assert boxes.shape[0] > 0
129 | if boxes.dtype.kind != "f":
130 | boxes = boxes.astype(np.float32)
131 |
132 | # Compute box areas
133 | y1 = boxes[:, 0]
134 | x1 = boxes[:, 1]
135 | y2 = boxes[:, 2]
136 | x2 = boxes[:, 3]
137 | area = (y2 - y1) * (x2 - x1)
138 |
139 | # Get indicies of boxes sorted by scores (highest first)
140 | ixs = scores.argsort()[::-1]
141 |
142 | pick = []
143 | while len(ixs) > 0:
144 | # Pick top box and add its index to the list
145 | i = ixs[0]
146 | pick.append(i)
147 | # Compute IoU of the picked box with the rest
148 | iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]])
149 | # Identify boxes with IoU over the threshold. This
150 | # returns indices into ixs[1:], so add 1 to get
151 | # indices into ixs.
152 | remove_ixs = np.where(iou > threshold)[0] + 1
153 | # Remove indices of the picked and overlapped boxes.
154 | ixs = np.delete(ixs, remove_ixs)
155 | ixs = np.delete(ixs, 0)
156 | return np.array(pick, dtype=np.int32)
157 |
158 |
159 | def apply_box_deltas(boxes, deltas):
160 | """Applies the given deltas to the given boxes.
161 | boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box.
162 | deltas: [N, (dy, dx, log(dh), log(dw))]
163 | """
164 | boxes = boxes.astype(np.float32)
165 | # Convert to y, x, h, w
166 | height = boxes[:, 2] - boxes[:, 0]
167 | width = boxes[:, 3] - boxes[:, 1]
168 | center_y = boxes[:, 0] + 0.5 * height
169 | center_x = boxes[:, 1] + 0.5 * width
170 | # Apply deltas
171 | center_y += deltas[:, 0] * height
172 | center_x += deltas[:, 1] * width
173 | height *= np.exp(deltas[:, 2])
174 | width *= np.exp(deltas[:, 3])
175 | # Convert back to y1, x1, y2, x2
176 | y1 = center_y - 0.5 * height
177 | x1 = center_x - 0.5 * width
178 | y2 = y1 + height
179 | x2 = x1 + width
180 | return np.stack([y1, x1, y2, x2], axis=1)
181 |
182 |
183 | def box_refinement_graph(box, gt_box):
184 | """Compute refinement needed to transform box to gt_box.
185 | box and gt_box are [N, (y1, x1, y2, x2)]
186 | """
187 | box = tf.cast(box, tf.float32)
188 | gt_box = tf.cast(gt_box, tf.float32)
189 |
190 | height = box[:, 2] - box[:, 0]
191 | width = box[:, 3] - box[:, 1]
192 | center_y = box[:, 0] + 0.5 * height
193 | center_x = box[:, 1] + 0.5 * width
194 |
195 | gt_height = gt_box[:, 2] - gt_box[:, 0]
196 | gt_width = gt_box[:, 3] - gt_box[:, 1]
197 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height
198 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width
199 |
200 | dy = (gt_center_y - center_y) / height
201 | dx = (gt_center_x - center_x) / width
202 | dh = tf.math.log(gt_height / height)
203 | dw = tf.math.log(gt_width / width)
204 |
205 | result = tf.stack([dy, dx, dh, dw], axis=1)
206 | return result
207 |
208 |
209 | def box_refinement(box, gt_box):
210 | """Compute refinement needed to transform box to gt_box.
211 | box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is
212 | assumed to be outside the box.
213 | """
214 | box = box.astype(np.float32)
215 | gt_box = gt_box.astype(np.float32)
216 |
217 | height = box[:, 2] - box[:, 0]
218 | width = box[:, 3] - box[:, 1]
219 | center_y = box[:, 0] + 0.5 * height
220 | center_x = box[:, 1] + 0.5 * width
221 |
222 | gt_height = gt_box[:, 2] - gt_box[:, 0]
223 | gt_width = gt_box[:, 3] - gt_box[:, 1]
224 | gt_center_y = gt_box[:, 0] + 0.5 * gt_height
225 | gt_center_x = gt_box[:, 1] + 0.5 * gt_width
226 |
227 | dy = (gt_center_y - center_y) / height
228 | dx = (gt_center_x - center_x) / width
229 | dh = np.log(gt_height / height)
230 | dw = np.log(gt_width / width)
231 |
232 | return np.stack([dy, dx, dh, dw], axis=1)
233 |
234 |
235 | ############################################################
236 | # Dataset
237 | ############################################################
238 |
239 | class Dataset(object):
240 | """The base class for dataset classes.
241 | To use it, create a new class that adds functions specific to the dataset
242 | you want to use. For example:
243 |
244 | class CatsAndDogsDataset(Dataset):
245 | def load_cats_and_dogs(self):
246 | ...
247 | def load_mask(self, image_id):
248 | ...
249 | def image_reference(self, image_id):
250 | ...
251 |
252 | See COCODataset and ShapesDataset as examples.
253 | """
254 |
255 | def __init__(self, class_map=None):
256 | self._image_ids = []
257 | self.image_info = []
258 | # Background is always the first class
259 | self.class_info = [{"source": "", "id": 0, "name": "BG"}]
260 | self.source_class_ids = {}
261 |
262 | def add_class(self, source, class_id, class_name):
263 | assert "." not in source, "Source name cannot contain a dot"
264 | # Does the class exist already?
265 | for info in self.class_info:
266 | if info['source'] == source and info["id"] == class_id:
267 | # source.class_id combination already available, skip
268 | return
269 | # Add the class
270 | self.class_info.append({
271 | "source": source,
272 | "id": class_id,
273 | "name": class_name,
274 | })
275 |
276 | def add_image(self, source, image_id, path, **kwargs):
277 | image_info = {
278 | "id": image_id,
279 | "source": source,
280 | "path": path,
281 | }
282 | image_info.update(kwargs)
283 | self.image_info.append(image_info)
284 |
285 | def image_reference(self, image_id):
286 | """Return a link to the image in its source Website or details about
287 | the image that help looking it up or debugging it.
288 |
289 | Override for your dataset, but pass to this function
290 | if you encounter images not in your dataset.
291 | """
292 | return ""
293 |
294 | def prepare(self, class_map=None):
295 | """Prepares the Dataset class for use.
296 |
297 | TODO: class map is not supported yet. When done, it should handle mapping
298 | classes from different datasets to the same class ID.
299 | """
300 |
301 | def clean_name(name):
302 | """Returns a shorter version of object names for cleaner display."""
303 | return ",".join(name.split(",")[:1])
304 |
305 | # Build (or rebuild) everything else from the info dicts.
306 | self.num_classes = len(self.class_info)
307 | self.class_ids = np.arange(self.num_classes)
308 | self.class_names = [clean_name(c["name"]) for c in self.class_info]
309 | self.num_images = len(self.image_info)
310 | self._image_ids = np.arange(self.num_images)
311 |
312 | # Mapping from source class and image IDs to internal IDs
313 | self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id
314 | for info, id in zip(self.class_info, self.class_ids)}
315 | self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id
316 | for info, id in zip(self.image_info, self.image_ids)}
317 |
318 | # Map sources to class_ids they support
319 | self.sources = list(set([i['source'] for i in self.class_info]))
320 | self.source_class_ids = {}
321 | # Loop over datasets
322 | for source in self.sources:
323 | self.source_class_ids[source] = []
324 | # Find classes that belong to this dataset
325 | for i, info in enumerate(self.class_info):
326 | # Include BG class in all datasets
327 | if i == 0 or source == info['source']:
328 | self.source_class_ids[source].append(i)
329 |
330 | def map_source_class_id(self, source_class_id):
331 | """Takes a source class ID and returns the int class ID assigned to it.
332 |
333 | For example:
334 | dataset.map_source_class_id("coco.12") -> 23
335 | """
336 | return self.class_from_source_map[source_class_id]
337 |
338 | def get_source_class_id(self, class_id, source):
339 | """Map an internal class ID to the corresponding class ID in the source dataset."""
340 | info = self.class_info[class_id]
341 | assert info['source'] == source
342 | return info['id']
343 |
344 | @property
345 | def image_ids(self):
346 | return self._image_ids
347 |
348 | def source_image_link(self, image_id):
349 | """Returns the path or URL to the image.
350 | Override this to return a URL to the image if it's available online for easy
351 | debugging.
352 | """
353 | return self.image_info[image_id]["path"]
354 |
355 | def load_image(self, image_id):
356 | """Load the specified image and return a [H,W,3] Numpy array.
357 | """
358 | # Load image
359 | image = skimage.io.imread(self.image_info[image_id]['path'])
360 | # If grayscale. Convert to RGB for consistency.
361 | if image.ndim != 3:
362 | image = skimage.color.gray2rgb(image)
363 | # If has an alpha channel, remove it for consistency
364 | if image.shape[-1] == 4:
365 | image = image[..., :3]
366 | return image
367 |
368 | def load_mask(self, image_id):
369 | """Load instance masks for the given image.
370 |
371 | Different datasets use different ways to store masks. Override this
372 | method to load instance masks and return them in the form of am
373 | array of binary masks of shape [height, width, instances].
374 |
375 | Returns:
376 | masks: A bool array of shape [height, width, instance count] with
377 | a binary mask per instance.
378 | class_ids: a 1D array of class IDs of the instance masks.
379 | """
380 | # Override this function to load a mask from your dataset.
381 | # Otherwise, it returns an empty mask.
382 | logging.warning("You are using the default load_mask(), maybe you need to define your own one.")
383 | mask = np.empty([0, 0, 0])
384 | class_ids = np.empty([0], np.int32)
385 | return mask, class_ids
386 |
387 |
388 | def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"):
389 | """Resizes an image keeping the aspect ratio unchanged.
390 |
391 | min_dim: if provided, resizes the image such that it's smaller
392 | dimension == min_dim
393 | max_dim: if provided, ensures that the image longest side doesn't
394 | exceed this value.
395 | min_scale: if provided, ensure that the image is scaled up by at least
396 | this percent even if min_dim doesn't require it.
397 | mode: Resizing mode.
398 | none: No resizing. Return the image unchanged.
399 | square: Resize and pad with zeros to get a square image
400 | of size [max_dim, max_dim].
401 | pad64: Pads width and height with zeros to make them multiples of 64.
402 | If min_dim or min_scale are provided, it scales the image up
403 | before padding. max_dim is ignored in this mode.
404 | The multiple of 64 is needed to ensure smooth scaling of feature
405 | maps up and down the 6 levels of the FPN pyramid (2**6=64).
406 | crop: Picks random crops from the image. First, scales the image based
407 | on min_dim and min_scale, then picks a random crop of
408 | size min_dim x min_dim. Can be used in training only.
409 | max_dim is not used in this mode.
410 |
411 | Returns:
412 | image: the resized image
413 | window: (y1, x1, y2, x2). If max_dim is provided, padding might
414 | be inserted in the returned image. If so, this window is the
415 | coordinates of the image part of the full image (excluding
416 | the padding). The x2, y2 pixels are not included.
417 | scale: The scale factor used to resize the image
418 | padding: Padding added to the image [(top, bottom), (left, right), (0, 0)]
419 | """
420 | # Keep track of image dtype and return results in the same dtype
421 | image_dtype = image.dtype
422 | # Default window (y1, x1, y2, x2) and default scale == 1.
423 | h, w = image.shape[:2]
424 | window = (0, 0, h, w)
425 | scale = 1
426 | padding = [(0, 0), (0, 0), (0, 0)]
427 | crop = None
428 |
429 | if mode == "none":
430 | return image, window, scale, padding, crop
431 |
432 | # Scale?
433 | if min_dim:
434 | # Scale up but not down
435 | scale = max(1, min_dim / min(h, w))
436 | if min_scale and scale < min_scale:
437 | scale = min_scale
438 |
439 | # Does it exceed max dim?
440 | if max_dim and mode == "square":
441 | image_max = max(h, w)
442 | if round(image_max * scale) > max_dim:
443 | scale = max_dim / image_max
444 |
445 | # Resize image using bilinear interpolation
446 | if scale != 1:
447 | image = resize(image, (round(h * scale), round(w * scale)),
448 | preserve_range=True)
449 |
450 | # Need padding or cropping?
451 | if mode == "square":
452 | # Get new height and width
453 | h, w = image.shape[:2]
454 | top_pad = (max_dim - h) // 2
455 | bottom_pad = max_dim - h - top_pad
456 | left_pad = (max_dim - w) // 2
457 | right_pad = max_dim - w - left_pad
458 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
459 | image = np.pad(image, padding, mode='constant', constant_values=0)
460 | window = (top_pad, left_pad, h + top_pad, w + left_pad)
461 | elif mode == "pad64":
462 | h, w = image.shape[:2]
463 | # Both sides must be divisible by 64
464 | assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64"
465 | # Height
466 | if h % 64 > 0:
467 | max_h = h - (h % 64) + 64
468 | top_pad = (max_h - h) // 2
469 | bottom_pad = max_h - h - top_pad
470 | else:
471 | top_pad = bottom_pad = 0
472 | # Width
473 | if w % 64 > 0:
474 | max_w = w - (w % 64) + 64
475 | left_pad = (max_w - w) // 2
476 | right_pad = max_w - w - left_pad
477 | else:
478 | left_pad = right_pad = 0
479 | padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)]
480 | image = np.pad(image, padding, mode='constant', constant_values=0)
481 | window = (top_pad, left_pad, h + top_pad, w + left_pad)
482 | elif mode == "crop":
483 | # Pick a random crop
484 | h, w = image.shape[:2]
485 | y = random.randint(0, (h - min_dim))
486 | x = random.randint(0, (w - min_dim))
487 | crop = (y, x, min_dim, min_dim)
488 | image = image[y:y + min_dim, x:x + min_dim]
489 | window = (0, 0, min_dim, min_dim)
490 | else:
491 | raise Exception("Mode {} not supported".format(mode))
492 | return image.astype(image_dtype), window, scale, padding, crop
493 |
494 |
495 | def resize_mask(mask, scale, padding, crop=None):
496 | """Resizes a mask using the given scale and padding.
497 | Typically, you get the scale and padding from resize_image() to
498 | ensure both, the image and the mask, are resized consistently.
499 |
500 | scale: mask scaling factor
501 | padding: Padding to add to the mask in the form
502 | [(top, bottom), (left, right), (0, 0)]
503 | """
504 | # Suppress warning from scipy 0.13.0, the output shape of zoom() is
505 | # calculated with round() instead of int()
506 | with warnings.catch_warnings():
507 | warnings.simplefilter("ignore")
508 | mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0)
509 | if crop is not None:
510 | y, x, h, w = crop
511 | mask = mask[y:y + h, x:x + w]
512 | else:
513 | mask = np.pad(mask, padding, mode='constant', constant_values=0)
514 | return mask
515 |
516 |
517 | def minimize_mask(bbox, mask, mini_shape):
518 | """Resize masks to a smaller version to reduce memory load.
519 | Mini-masks can be resized back to image scale using expand_masks()
520 |
521 | See inspect_data.ipynb notebook for more details.
522 | """
523 | mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool)
524 | for i in range(mask.shape[-1]):
525 | # Pick slice and cast to bool in case load_mask() returned wrong dtype
526 | m = mask[:, :, i].astype(bool)
527 | y1, x1, y2, x2 = bbox[i][:4]
528 | m = m[y1:y2, x1:x2]
529 | if m.size == 0:
530 | raise Exception("Invalid bounding box with area of zero")
531 | # Resize with bilinear interpolation
532 | m = resize(m, mini_shape)
533 | mini_mask[:, :, i] = np.around(m).astype(np.bool)
534 | return mini_mask
535 |
536 |
537 | def expand_mask(bbox, mini_mask, image_shape):
538 | """Resizes mini masks back to image size. Reverses the change
539 | of minimize_mask().
540 |
541 | See inspect_data.ipynb notebook for more details.
542 | """
543 | mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool)
544 | for i in range(mask.shape[-1]):
545 | m = mini_mask[:, :, i]
546 | y1, x1, y2, x2 = bbox[i][:4]
547 | h = y2 - y1
548 | w = x2 - x1
549 | # Resize with bilinear interpolation
550 | m = resize(m, (h, w))
551 | mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool)
552 | return mask
553 |
554 |
555 | # TODO: Build and use this function to reduce code duplication
556 | def mold_mask(mask, config):
557 | pass
558 |
559 |
560 | def unmold_mask(mask, bbox, image_shape):
561 | """Converts a mask generated by the neural network to a format similar
562 | to its original shape.
563 | mask: [height, width] of type float. A small, typically 28x28 mask.
564 | bbox: [y1, x1, y2, x2]. The box to fit the mask in.
565 |
566 | Returns a binary mask with the same size as the original image.
567 | """
568 | threshold = 0.5
569 | y1, x1, y2, x2 = bbox
570 | mask = resize(mask, (y2 - y1, x2 - x1))
571 | mask = np.where(mask >= threshold, 1, 0).astype(np.bool)
572 |
573 | # Put the mask in the right location.
574 | full_mask = np.zeros(image_shape[:2], dtype=np.bool)
575 | full_mask[y1:y2, x1:x2] = mask
576 | return full_mask
577 |
578 |
579 | ############################################################
580 | # Anchors
581 | ############################################################
582 |
583 | def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride):
584 | """
585 | scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128]
586 | ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2]
587 | shape: [height, width] spatial shape of the feature map over which
588 | to generate anchors.
589 | feature_stride: Stride of the feature map relative to the image in pixels.
590 | anchor_stride: Stride of anchors on the feature map. For example, if the
591 | value is 2 then generate anchors for every other feature map pixel.
592 | """
593 | # Get all combinations of scales and ratios
594 | scales, ratios = np.meshgrid(np.array(scales), np.array(ratios))
595 | scales = scales.flatten()
596 | ratios = ratios.flatten()
597 |
598 | # Enumerate heights and widths from scales and ratios
599 | heights = scales / np.sqrt(ratios)
600 | widths = scales * np.sqrt(ratios)
601 |
602 | # Enumerate shifts in feature space
603 | shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride
604 | shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride
605 | shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y)
606 |
607 | # Enumerate combinations of shifts, widths, and heights
608 | box_widths, box_centers_x = np.meshgrid(widths, shifts_x)
609 | box_heights, box_centers_y = np.meshgrid(heights, shifts_y)
610 |
611 | # Reshape to get a list of (y, x) and a list of (h, w)
612 | box_centers = np.stack(
613 | [box_centers_y, box_centers_x], axis=2).reshape([-1, 2])
614 | box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2])
615 |
616 | # Convert to corner coordinates (y1, x1, y2, x2)
617 | boxes = np.concatenate([box_centers - 0.5 * box_sizes,
618 | box_centers + 0.5 * box_sizes], axis=1)
619 | return boxes
620 |
621 |
622 | def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides,
623 | anchor_stride):
624 | """Generate anchors at different levels of a feature pyramid. Each scale
625 | is associated with a level of the pyramid, but each ratio is used in
626 | all levels of the pyramid.
627 |
628 | Returns:
629 | anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted
630 | with the same order of the given scales. So, anchors of scale[0] come
631 | first, then anchors of scale[1], and so on.
632 | """
633 | # Anchors
634 | # [anchor_count, (y1, x1, y2, x2)]
635 | anchors = []
636 | for i in range(len(scales)):
637 | anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i],
638 | feature_strides[i], anchor_stride))
639 | return np.concatenate(anchors, axis=0)
640 |
641 |
642 | ############################################################
643 | # Miscellaneous
644 | ############################################################
645 |
646 | def trim_zeros(x):
647 | """It's common to have tensors larger than the available data and
648 | pad with zeros. This function removes rows that are all zeros.
649 |
650 | x: [rows, columns].
651 | """
652 | assert len(x.shape) == 2
653 | return x[~np.all(x == 0, axis=1)]
654 |
655 |
656 | def compute_matches(gt_boxes, gt_class_ids, gt_masks,
657 | pred_boxes, pred_class_ids, pred_scores, pred_masks,
658 | iou_threshold=0.5, score_threshold=0.0):
659 | """Finds matches between prediction and ground truth instances.
660 |
661 | Returns:
662 | gt_match: 1-D array. For each GT box it has the index of the matched
663 | predicted box.
664 | pred_match: 1-D array. For each predicted box, it has the index of
665 | the matched ground truth box.
666 | overlaps: [pred_boxes, gt_boxes] IoU overlaps.
667 | """
668 | # Trim zero padding
669 | # TODO: cleaner to do zero unpadding upstream
670 | gt_boxes = trim_zeros(gt_boxes)
671 | gt_masks = gt_masks[..., :gt_boxes.shape[0]]
672 | pred_boxes = trim_zeros(pred_boxes)
673 | pred_scores = pred_scores[:pred_boxes.shape[0]]
674 | # Sort predictions by score from high to low
675 | indices = np.argsort(pred_scores)[::-1]
676 | pred_boxes = pred_boxes[indices]
677 | pred_class_ids = pred_class_ids[indices]
678 | pred_scores = pred_scores[indices]
679 | pred_masks = pred_masks[..., indices]
680 |
681 | # Compute IoU overlaps [pred_masks, gt_masks]
682 | overlaps = compute_overlaps_masks(pred_masks, gt_masks)
683 |
684 | # Loop through predictions and find matching ground truth boxes
685 | match_count = 0
686 | pred_match = -1 * np.ones([pred_boxes.shape[0]])
687 | gt_match = -1 * np.ones([gt_boxes.shape[0]])
688 | for i in range(len(pred_boxes)):
689 | # Find best matching ground truth box
690 | # 1. Sort matches by score
691 | sorted_ixs = np.argsort(overlaps[i])[::-1]
692 | # 2. Remove low scores
693 | low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0]
694 | if low_score_idx.size > 0:
695 | sorted_ixs = sorted_ixs[:low_score_idx[0]]
696 | # 3. Find the match
697 | for j in sorted_ixs:
698 | # If ground truth box is already matched, go to next one
699 | if gt_match[j] > -1:
700 | continue
701 | # If we reach IoU smaller than the threshold, end the loop
702 | iou = overlaps[i, j]
703 | if iou < iou_threshold:
704 | break
705 | # Do we have a match?
706 | if pred_class_ids[i] == gt_class_ids[j]:
707 | match_count += 1
708 | gt_match[j] = i
709 | pred_match[i] = j
710 | break
711 |
712 | return gt_match, pred_match, overlaps
713 |
714 |
715 | def compute_ap(gt_boxes, gt_class_ids, gt_masks,
716 | pred_boxes, pred_class_ids, pred_scores, pred_masks,
717 | iou_threshold=0.5):
718 | """Compute Average Precision at a set IoU threshold (default 0.5).
719 |
720 | Returns:
721 | mAP: Mean Average Precision
722 | precisions: List of precisions at different class score thresholds.
723 | recalls: List of recall values at different class score thresholds.
724 | overlaps: [pred_boxes, gt_boxes] IoU overlaps.
725 | """
726 | # Get matches and overlaps
727 | gt_match, pred_match, overlaps = compute_matches(
728 | gt_boxes, gt_class_ids, gt_masks,
729 | pred_boxes, pred_class_ids, pred_scores, pred_masks,
730 | iou_threshold)
731 |
732 | # Compute precision and recall at each prediction box step
733 | precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1)
734 | recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match)
735 |
736 | # Pad with start and end values to simplify the math
737 | precisions = np.concatenate([[0], precisions, [0]])
738 | recalls = np.concatenate([[0], recalls, [1]])
739 |
740 | # Ensure precision values decrease but don't increase. This way, the
741 | # precision value at each recall threshold is the maximum it can be
742 | # for all following recall thresholds, as specified by the VOC paper.
743 | for i in range(len(precisions) - 2, -1, -1):
744 | precisions[i] = np.maximum(precisions[i], precisions[i + 1])
745 |
746 | # Compute mean AP over recall range
747 | indices = np.where(recalls[:-1] != recalls[1:])[0] + 1
748 | mAP = np.sum((recalls[indices] - recalls[indices - 1]) *
749 | precisions[indices])
750 |
751 | return mAP, precisions, recalls, overlaps
752 |
753 |
754 | def compute_ap_range(gt_box, gt_class_id, gt_mask,
755 | pred_box, pred_class_id, pred_score, pred_mask,
756 | iou_thresholds=None, verbose=1):
757 | """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95."""
758 | # Default is 0.5 to 0.95 with increments of 0.05
759 | iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05)
760 |
761 | # Compute AP over range of IoU thresholds
762 | AP = []
763 | for iou_threshold in iou_thresholds:
764 | ap, precisions, recalls, overlaps =\
765 | compute_ap(gt_box, gt_class_id, gt_mask,
766 | pred_box, pred_class_id, pred_score, pred_mask,
767 | iou_threshold=iou_threshold)
768 | if verbose:
769 | print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap))
770 | AP.append(ap)
771 | AP = np.array(AP).mean()
772 | if verbose:
773 | print("AP @{:.2f}-{:.2f}:\t {:.3f}".format(
774 | iou_thresholds[0], iou_thresholds[-1], AP))
775 | return AP
776 |
777 |
778 | def compute_recall(pred_boxes, gt_boxes, iou):
779 | """Compute the recall at the given IoU threshold. It's an indication
780 | of how many GT boxes were found by the given prediction boxes.
781 |
782 | pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates
783 | gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates
784 | """
785 | # Measure overlaps
786 | overlaps = compute_overlaps(pred_boxes, gt_boxes)
787 | iou_max = np.max(overlaps, axis=1)
788 | iou_argmax = np.argmax(overlaps, axis=1)
789 | positive_ids = np.where(iou_max >= iou)[0]
790 | matched_gt_boxes = iou_argmax[positive_ids]
791 |
792 | recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0]
793 | return recall, positive_ids
794 |
795 |
796 | # ## Batch Slicing
797 | # Some custom layers support a batch size of 1 only, and require a lot of work
798 | # to support batches greater than 1. This function slices an input tensor
799 | # across the batch dimension and feeds batches of size 1. Effectively,
800 | # an easy way to support batches > 1 quickly with little code modification.
801 | # In the long run, it's more efficient to modify the code to support large
802 | # batches and getting rid of this function. Consider this a temporary solution
803 | def batch_slice(inputs, graph_fn, batch_size, names=None):
804 | """Splits inputs into slices and feeds each slice to a copy of the given
805 | computation graph and then combines the results. It allows you to run a
806 | graph on a batch of inputs even if the graph is written to support one
807 | instance only.
808 |
809 | inputs: list of tensors. All must have the same first dimension length
810 | graph_fn: A function that returns a TF tensor that's part of a graph.
811 | batch_size: number of slices to divide the data into.
812 | names: If provided, assigns names to the resulting tensors.
813 | """
814 | if not isinstance(inputs, list):
815 | inputs = [inputs]
816 |
817 | outputs = []
818 | for i in range(batch_size):
819 | inputs_slice = [x[i] for x in inputs]
820 | output_slice = graph_fn(*inputs_slice)
821 | if not isinstance(output_slice, (tuple, list)):
822 | output_slice = [output_slice]
823 | outputs.append(output_slice)
824 | # Change outputs from a list of slices where each is
825 | # a list of outputs to a list of outputs and each has
826 | # a list of slices
827 | outputs = list(zip(*outputs))
828 |
829 | if names is None:
830 | names = [None] * len(outputs)
831 |
832 | result = [tf.stack(o, axis=0, name=n)
833 | for o, n in zip(outputs, names)]
834 | if len(result) == 1:
835 | result = result[0]
836 |
837 | return result
838 |
839 |
840 | def download_trained_weights(coco_model_path, verbose=1):
841 | """Download COCO trained weights from Releases.
842 |
843 | coco_model_path: local path of COCO trained weights
844 | """
845 | if verbose > 0:
846 | print("Downloading pretrained model to " + coco_model_path + " ...")
847 | with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out:
848 | shutil.copyfileobj(resp, out)
849 | if verbose > 0:
850 | print("... done downloading pretrained model!")
851 |
852 |
853 | def norm_boxes(boxes, shape):
854 | """Converts boxes from pixel coordinates to normalized coordinates.
855 | boxes: [N, (y1, x1, y2, x2)] in pixel coordinates
856 | shape: [..., (height, width)] in pixels
857 |
858 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
859 | coordinates it's inside the box.
860 |
861 | Returns:
862 | [N, (y1, x1, y2, x2)] in normalized coordinates
863 | """
864 | h, w = shape
865 | scale = np.array([h - 1, w - 1, h - 1, w - 1])
866 | shift = np.array([0, 0, 1, 1])
867 | return np.divide((boxes - shift), scale).astype(np.float32)
868 |
869 |
870 | def denorm_boxes(boxes, shape):
871 | """Converts boxes from normalized coordinates to pixel coordinates.
872 | boxes: [N, (y1, x1, y2, x2)] in normalized coordinates
873 | shape: [..., (height, width)] in pixels
874 |
875 | Note: In pixel coordinates (y2, x2) is outside the box. But in normalized
876 | coordinates it's inside the box.
877 |
878 | Returns:
879 | [N, (y1, x1, y2, x2)] in pixel coordinates
880 | """
881 | h, w = shape
882 | scale = np.array([h - 1, w - 1, h - 1, w - 1])
883 | shift = np.array([0, 0, 1, 1])
884 | return np.around(np.multiply(boxes, scale) + shift).astype(np.int32)
885 |
886 |
887 | def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
888 | preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
889 | """A wrapper for Scikit-Image resize().
890 |
891 | Scikit-Image generates warnings on every call to resize() if it doesn't
892 | receive the right parameters. The right parameters depend on the version
893 | of skimage. This solves the problem by using different parameters per
894 | version. And it provides a central place to control resizing defaults.
895 | """
896 | if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
897 | # New in 0.14: anti_aliasing. Default it to False for backward
898 | # compatibility with skimage 0.13.
899 | return skimage.transform.resize(
900 | image, output_shape,
901 | order=order, mode=mode, cval=cval, clip=clip,
902 | preserve_range=preserve_range, anti_aliasing=anti_aliasing,
903 | anti_aliasing_sigma=anti_aliasing_sigma)
904 | else:
905 | return skimage.transform.resize(
906 | image, output_shape,
907 | order=order, mode=mode, cval=cval, clip=clip,
908 | preserve_range=preserve_range)
909 |
--------------------------------------------------------------------------------