├── .gitignore
├── LICENSE
├── README.rst
├── bin
├── extract-figures.py
└── locate-thumbnail.py
├── image_mining
├── __init__.py
├── figure_extraction.py
└── utils.py
└── setup.py
/.gitignore:
--------------------------------------------------------------------------------
1 | data
2 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Released into the public domain
2 |
3 |
--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
1 | Experimental image mining using OpenCV
2 | ======================================
3 |
4 | Attempting to build tools to mine interesting data from large collections of scanned images
5 |
6 | Current
7 | -------
8 |
9 | * bin/locate-thumbnail:
10 | - `Reconstructing thumbnails using OpenCV: `_
11 | - `Upgrading Image Thumbnails… Or How to Fill a Large Display Without Your Content Team Quitting `_
12 | * bin/extract-figures:
13 | - `locate interesting non-text elements (images, figures, tables, etc.) on scanned book pages `_
14 |
15 | Prerequisites
16 | -------------
17 |
18 | * Python 2.6+
19 | * OpenCV 2.4+
20 | * numpy
21 |
22 | Using Mac Homebrew this should install cleanly::
23 |
24 | brew install python numpy opencv
25 |
26 | On Ubuntu 12.04 Precise the following is known to work - note the need for a PPA to get OpenCV 2.4+::
27 |
28 | sudo add-apt-repository ppa:alexei.colin/opencv
29 | sudo apt-get update
30 | sudo apt-get install python-numpy python-opencv
31 |
32 | Discussion
33 | ----------
34 |
35 | .. image:: https://badges.gitter.im/Join%20Chat.svg
36 | :target: https://gitter.im/acdha/image-mining?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge
37 | :alt: Join Chat on Gitter.im
38 |
--------------------------------------------------------------------------------
/bin/extract-figures.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import argparse
4 | import json
5 | import os
6 | import sys
7 |
8 | import cv2
9 | import numpy
10 |
11 | from image_mining.figure_extraction import FigureExtractor
12 | from image_mining.utils import open_image
13 |
14 |
15 | def display_images(extractor, files):
16 | window_name = "Controls"
17 |
18 | images = []
19 | for f in files:
20 | print "Loading %s" % f
21 |
22 | try:
23 | images.append(open_image(f))
24 | except StandardError as exc:
25 | print >>sys.stderr, exc
26 | continue
27 |
28 | def update_display(*args):
29 | extractor.canny_threshold = cv2.getTrackbarPos("Canny Threshold", window_name)
30 | extractor.erosion_element = cv2.getTrackbarPos("Erosion Element", window_name)
31 | extractor.erosion_size = cv2.getTrackbarPos("Erosion Size", window_name)
32 | extractor.dilation_element = cv2.getTrackbarPos("Dilation Element", window_name)
33 | extractor.dilation_size = cv2.getTrackbarPos("Dilation Size", window_name)
34 |
35 | # TODO: tame configuration hideousness:
36 | labels = ["Canny Threshold: %s" % extractor.canny_threshold,
37 | "Erosion Element: %s" % FigureExtractor.MORPH_TYPE_KEYS[extractor.erosion_element],
38 | "Erosion Size: %s" % extractor.erosion_size,
39 | "Dilation Element: %s" % FigureExtractor.MORPH_TYPE_KEYS[extractor.dilation_element],
40 | "Dilation Size: %s" % extractor.dilation_size]
41 |
42 | labels_img = numpy.zeros((30 * (len(labels) + 1), 600, 3), numpy.uint8)
43 | for i, label in enumerate(labels, 1):
44 | cv2.putText(labels_img, label, (0, i * 30), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (192, 192, 192))
45 | cv2.imshow("Controls", labels_img)
46 |
47 | print "Settings:\n\t", "\n\t".join(labels)
48 | print
49 |
50 | for name, image in images:
51 | filtered_image = extractor.filter_image(image)
52 | contours, hierarchy = extractor.find_contours(filtered_image)
53 |
54 | # The filtered image will be heavily processed down to 1-bit depth. We'll convert it to RGB
55 | # so we can display the effects of the filters with full-color overlays for detected figures:
56 | output = cv2.cvtColor(filtered_image, cv2.COLOR_GRAY2RGB)
57 |
58 | print "Processing %s" % name
59 |
60 | for bbox in extractor.get_bounding_boxes_from_contours(contours, filtered_image):
61 | print "\tExtract: %s" % bbox
62 | output[bbox.image_slice] = image[bbox.image_slice]
63 |
64 | cv2.polylines(output, bbox.poly, True, (32, 192, 32), thickness=3)
65 | cv2.drawContours(output, contours, bbox.contour_index, (32, 192, 32), hierarchy=hierarchy, maxLevel=0)
66 |
67 | cv2.rectangle(output, (bbox.x1, bbox.y1), (bbox.x2, bbox.y2), color=(32, 192, 192))
68 |
69 | cv2.imshow(name, output)
70 |
71 | cv2.namedWindow(window_name)
72 | cv2.resizeWindow(window_name, 600, 340)
73 |
74 | cv2.createTrackbar("Canny Threshold", window_name, extractor.canny_threshold, 255, update_display)
75 | cv2.createTrackbar("Erosion Element", window_name, extractor.erosion_element, len(extractor.MORPH_TYPES) - 1, update_display)
76 | cv2.createTrackbar("Erosion Size", window_name, extractor.erosion_size, 64, update_display)
77 | cv2.createTrackbar("Dilation Element", window_name, extractor.dilation_element, len(extractor.MORPH_TYPES) - 1, update_display)
78 | cv2.createTrackbar("Dilation Size", window_name, extractor.dilation_size, 64, update_display)
79 |
80 | update_display()
81 |
82 | if args.interactive:
83 | while cv2.waitKey() not in (13, 27):
84 | continue
85 | cv2.destroyAllWindows()
86 |
87 |
88 | if __name__ == "__main__":
89 | parser = argparse.ArgumentParser()
90 |
91 | parser.add_argument('--debug', action="store_true", help="Open debugger for errors")
92 |
93 | parser.add_argument('files', metavar="IMAGE_FILE", nargs="+")
94 |
95 | mode_group = parser.add_mutually_exclusive_group(required=True)
96 | mode_group.add_argument('--interactive', default=False, action="store_true", help="Display visualization windows")
97 | mode_group.add_argument('--output-directory', default=None, help="Directory to store extracted files")
98 |
99 | parser.add_argument('--save-json', action="store_true", help="Save bounding boxes as JSON files along with extracts")
100 |
101 | extraction_params = parser.add_argument_group("Extraction Parameters")
102 | extraction_params.add_argument('--canny-threshold', type=int, default=0, help="Canny edge detection threshold (%(type)s, default=%(default)s, 0 to disable)")
103 |
104 | extraction_params.add_argument('--erosion-element', default="rectangle", choices=FigureExtractor.MORPH_TYPE_KEYS, help="Erosion Element (default: %(default)s)")
105 | extraction_params.add_argument('--erosion-size', type=int, default=0, help="Erosion Size (%(type)s, default=%(default)s, 0 to disable)")
106 |
107 | extraction_params.add_argument('--dilation-element', default="rectangle", choices=FigureExtractor.MORPH_TYPE_KEYS, help="Dilation Element (default: %(default)s)")
108 | extraction_params.add_argument('--dilation-size', type=int, default=0, help="Dilation Size (%(type)s, default=%(default)s, 0 to disable)")
109 |
110 | args = parser.parse_args()
111 |
112 | if not args.output_directory:
113 | output_dir = None
114 | else:
115 | output_dir = os.path.realpath(args.output_directory)
116 | if not os.path.isdir(output_dir):
117 | parser.error("Output directory %s does not exist" % args.output_directory)
118 | else:
119 | print "Output will be saved to %s" % output_dir
120 |
121 | if output_dir is None and not args.interactive:
122 | parser.error("Either use --interactive or specify an output directory to save results!")
123 |
124 | if args.debug:
125 | try:
126 | import bpdb as pdb
127 | except ImportError:
128 | import pdb
129 |
130 | # FIXME: we should have a way to enumerate this from FigureExtractor and feed argparse that way:
131 | param_names = [action.dest for action in extraction_params._group_actions]
132 | params = {k: v for (k, v) in args._get_kwargs() if k in param_names}
133 |
134 | try:
135 | extractor = FigureExtractor(**params)
136 |
137 | if args.interactive:
138 | display_images(extractor, args.files)
139 | else:
140 | for f in args.files:
141 | try:
142 | base_name, source_image = open_image(f)
143 | except StandardError as exc:
144 | print >>sys.stderr, exc
145 | continue
146 |
147 | output_base = os.path.join(output_dir, base_name)
148 |
149 | print "Processing %s" % f
150 |
151 | boxes = []
152 |
153 | for i, bbox in enumerate(extractor.find_figures(source_image), 1):
154 | extracted = source_image[bbox.image_slice]
155 | extract_filename = os.path.join(output_dir, "%s-%d.jpg" % (output_base, i))
156 | print "\tSaving %s" % extract_filename
157 | cv2.imwrite(extract_filename, extracted)
158 |
159 | boxes.append(bbox.as_dict())
160 |
161 | if args.save_json and boxes:
162 | json_data = {"source_image": {"filename": f,
163 | "dimensions": {"width": source_image.shape[1],
164 | "height": source_image.shape[0]}},
165 | "regions": boxes}
166 |
167 | json_filename = os.path.join(output_dir, "%s.json" % output_base)
168 | with open(json_filename, "wb") as json_f:
169 | json.dump(json_data, json_f, allow_nan=False)
170 | print "\tSaved extract information to %s" % json_filename
171 |
172 | except Exception as exc:
173 | if args.debug:
174 | print >>sys.stderr, exc
175 | pdb.pm()
176 | raise
177 |
--------------------------------------------------------------------------------
/bin/locate-thumbnail.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # encoding: utf-8
3 | """
4 | Detect the crop box for a thumbnail inside a larger image
5 |
6 | The thumbnail image can be cropped and scaled arbitrarily from the larger image. Rotation and other more
7 | complex transformations should work but may lower accuracy.
8 | """
9 | from __future__ import (absolute_import, division, print_function,
10 | unicode_literals)
11 |
12 | import argparse
13 | import json
14 | import logging
15 | import os
16 | import sys
17 |
18 | import cv
19 | import cv2
20 | import numpy
21 | from image_mining.utils import open_image
22 |
23 |
24 | def match_images(template, source):
25 | """Return filtered matches from the template and source images"""
26 |
27 | # TODO: Compare non-encumbered options – see http://docs.opencv.org/modules/features2d/doc/features2d.html
28 | detector = cv2.SURF(400, 5, 5)
29 | matcher = cv2.BFMatcher(cv2.NORM_L2)
30 |
31 | kp1, desc1 = detector.detectAndCompute(template, None)
32 | kp2, desc2 = detector.detectAndCompute(source, None)
33 | logging.debug('Features: template %d, source %d', len(kp1), len(kp2))
34 |
35 | raw_matches = matcher.knnMatch(desc1, trainDescriptors=desc2, k=2)
36 | kp_pairs = filter_matches(kp1, kp2, raw_matches)
37 |
38 | return kp_pairs
39 |
40 |
41 | def filter_matches(kp1, kp2, matches, ratio=0.75):
42 | kp_pairs = []
43 |
44 | for m1, m2 in matches:
45 | if m1.distance < m2.distance * ratio:
46 | kp_pairs.append((kp1[m1.queryIdx], kp2[m1.trainIdx]))
47 |
48 | return kp_pairs
49 |
50 |
51 | def autorotate_image(img, corners):
52 | corners_x, corners_y = zip(*corners)
53 |
54 | # n.b. numpy rot90 rotates 90° counter-clockwise but our terminology is clockwise
55 | # so the rotations below aren't actually flippy:
56 |
57 | print(corners_x, corners_y)
58 |
59 | if (((min(corners_x[0], corners_x[1]) > max(corners_x[2], corners_x[3]))
60 | and min(corners_y[1], corners_y[2]) > max(corners_y[0], corners_y[3]))):
61 | return 270, numpy.rot90(img)
62 | elif min(corners_x[2], corners_x[3]) > max(corners_x[0], corners_x[1]):
63 | return 90, numpy.rot90(img, 3)
64 | elif min(corners_x[0], corners_x[3]) > max(corners_x[1], corners_x[2]):
65 | return 180, cv2.flip(img, -1)
66 | else:
67 | return 0, img
68 |
69 |
70 | def fit_image_within(img, max_height, max_width):
71 | current_h, current_w = img.shape[:2]
72 |
73 | # Confirm that we need to do anything:
74 | if current_h <= max_height and current_w <= max_width:
75 | return img
76 |
77 | if current_h > current_w:
78 | scale = max_height / current_h
79 | else:
80 | scale = max_width / current_w
81 |
82 | new_dims = (int(round(current_w * scale)), int(round(current_h * scale)))
83 |
84 | # Note the flip from numpy's .shape to opencv's (x, y) format:
85 | logging.info('Resizing from %s to %s', (current_w, current_h), new_dims)
86 |
87 | return cv2.resize(img, new_dims, interpolation=cv2.INTER_AREA)
88 |
89 |
90 | def get_scaled_corners(thumbnail_image, source_image, full_source_image, kp_pairs, H):
91 | thumb_h, thumb_w = thumbnail_image.shape[:2]
92 |
93 | corners = numpy.float32([[0, 0], [thumb_w, 0], [thumb_w, thumb_h], [0, thumb_h]])
94 | corners = numpy.int32(cv2.perspectiveTransform(corners.reshape(1, -1, 2), H).reshape(-1, 2))
95 |
96 | # It's possible for rounding errors to produce values which are slightly outside of the image dimensions
97 | # so we'll clamp the boundaries within the source image: https://github.com/acdha/image-mining/issues/5
98 | source_h, source_w = source_image.shape[:2]
99 |
100 | # Transpose the array so we can operate on it *in-place* to clamp values:
101 | corners_x, corners_y = corners.T
102 | numpy.clip(corners_x, 0.0, source_w, out=corners_x)
103 | numpy.clip(corners_y, 0.0, source_h, out=corners_y)
104 |
105 | corners = corners.tolist()
106 |
107 | logging.info("Thumbnail bounds within analyzed image: %s", corners)
108 |
109 | if full_source_image is not None and full_source_image is not source_image:
110 | scale_y = full_source_image.shape[0] / source_image.shape[0]
111 | scale_x = full_source_image.shape[1] / source_image.shape[1]
112 |
113 | corners = [(int(round(x * scale_x)), int(round(y * scale_y))) for x, y in corners]
114 |
115 | logging.info("Thumbnail bounds within full-size source image: %s", corners)
116 |
117 | return corners
118 |
119 |
120 | def adjust_crop_aspect_ratio(cropbox, target_aspect_ratio, original_height=0, original_width=0,
121 | max_height=0, max_width=0):
122 |
123 | new_crop_y, new_crop_x = cropbox
124 | new_crop_height = (new_crop_y[1] - new_crop_y[0])
125 | new_crop_width = (new_crop_x[1] - new_crop_x[0])
126 | new_aspect_ratio = new_crop_height / new_crop_width
127 |
128 | if abs(target_aspect_ratio - new_aspect_ratio) < 0.001:
129 | return cropbox
130 |
131 | logging.info('Adjusting reconstruction to match original %0.4f aspect ratio', target_aspect_ratio)
132 |
133 | assert original_height < new_crop_height
134 | assert original_width < new_crop_width
135 |
136 | # The basic idea is that we'll adjust the crop's short axis up or down to match the input aspect
137 | # ratio. To avoid shifting the crop too much we'll attempt to evenly move both sides as long as
138 | # that won't hit the image boundaries:
139 |
140 | if new_aspect_ratio > 1.0:
141 | scale = new_crop_width / original_width
142 | else:
143 | scale = new_crop_height / original_height
144 |
145 | logging.info('Original crop box: %r (%0.4f)', cropbox, new_crop_height / new_crop_width)
146 | logging.info('Reconstructed image is %0.2f%% of the original', scale * 100)
147 |
148 | delta_y = round(original_height * scale) - new_crop_height
149 | delta_x = round(original_width * scale) - new_crop_width
150 |
151 | logging.info('Crop box needs to change by: %0.1f x, %0.1f y', delta_x, delta_y)
152 |
153 | if delta_y != 0:
154 | new_crop_y = clamp_values(delta=delta_y, max_value=max_height, *cropbox[0])
155 |
156 | if delta_x != 0:
157 | new_crop_x = clamp_values(delta=delta_x, max_value=max_width, *cropbox[1])
158 |
159 | cropbox = (new_crop_y, new_crop_x)
160 |
161 | logging.info('Updated crop box: %r (%0.4f)', cropbox,
162 | (new_crop_y[1] - new_crop_y[0]) / (new_crop_x[1] - new_crop_x[0]))
163 |
164 | return cropbox
165 |
166 |
167 | def clamp_values(low_value, high_value, delta, min_value=0, max_value=0):
168 | if delta == 0.0:
169 | return low_value, high_value
170 |
171 | top_pad = bottom_pad = delta / 2
172 |
173 | if delta > 0:
174 | # We'll shift the box to avoid hitting an image edge:
175 | top_pad = max(0, top_pad)
176 | bottom_pad = delta - top_pad
177 |
178 | low_value = int(round(low_value - top_pad))
179 |
180 | if low_value < min_value:
181 | logging.warning('Clamping crop to %f instead of %f', min_value, low_value)
182 | bottom_pad += min_value - low_value
183 | low_value = min_value
184 |
185 | high_value = int(round(high_value + bottom_pad))
186 |
187 | if high_value > max_value:
188 | logging.warning('Clamping crop to %f instead of %f', max_value, high_value)
189 | high_value = max_value
190 |
191 | return low_value, high_value
192 |
193 |
194 | def reconstruct_thumbnail(thumbnail_image, source_image, corners, downsize_reconstruction=False,
195 | max_aspect_ratio_delta=0.1, match_aspect_ratio=False):
196 | logging.info("Reconstructing thumbnail from source image")
197 |
198 | thumb_h, thumb_w = thumbnail_image.shape[:2]
199 | source_h, source_w = source_image.shape[:2]
200 |
201 | old_aspect_ratio = thumb_h / thumb_w
202 |
203 | corners_x, corners_y = zip(*corners)
204 | new_thumb_crop = [(min(corners_y), max(corners_y)),
205 | (min(corners_x), max(corners_x))]
206 |
207 | if match_aspect_ratio:
208 | new_thumb_crop = adjust_crop_aspect_ratio(new_thumb_crop, old_aspect_ratio,
209 | original_height=thumb_h,
210 | original_width=thumb_w,
211 | max_height=source_h, max_width=source_w)
212 |
213 | new_thumb = source_image[slice(*new_thumb_crop[0]), slice(*new_thumb_crop[1])]
214 |
215 | new_thumb_rotation, new_thumb = autorotate_image(new_thumb, corners)
216 | logging.info('Detected image rotation: %d°', new_thumb_rotation)
217 |
218 | if match_aspect_ratio and new_thumb_rotation not in (0, 180):
219 | raise NotImplementedError('FIXME: refactor autorotation to work with aspect ratio matching!')
220 |
221 | new_thumb_h, new_thumb_w = new_thumb.shape[:2]
222 |
223 | if downsize_reconstruction and (new_thumb_h > thumb_h or new_thumb_w > thumb_w):
224 | new_thumb = fit_image_within(new_thumb, thumb_h, thumb_w)
225 |
226 | new_aspect_ratio = new_thumb.shape[0] / new_thumb.shape[1]
227 | logging.info('Master dimensions: width=%s, height=%s', source_image.shape[1], source_image.shape[0])
228 | logging.info('Thumbnail dimensions: width=%s, height=%s (aspect ratio: %0.4f)',
229 | thumbnail_image.shape[1], thumbnail_image.shape[0],
230 | old_aspect_ratio)
231 | logging.info('Reconstructed thumb dimensions: width=%s, height=%s (rotation=%d°, aspect ratio: %0.4f)',
232 | new_thumb.shape[1], new_thumb.shape[0],
233 | new_thumb_rotation, new_aspect_ratio)
234 |
235 | if match_aspect_ratio:
236 | scale = thumbnail_image.shape[0] / new_thumb.shape[0]
237 | if thumbnail_image.shape[:2] != tuple(int(round(i * scale)) for i in new_thumb.shape[:2]):
238 | raise RuntimeError('Unable to match aspect ratios: %0.4f != %0.4f' % (old_aspect_ratio,
239 | new_aspect_ratio))
240 |
241 | if abs(old_aspect_ratio - new_aspect_ratio) > max_aspect_ratio_delta:
242 | raise RuntimeError('Aspect ratios are significantly different – reconstruction likely failed!')
243 |
244 | if (new_thumb_h <= thumb_h) or (new_thumb_w <= thumb_w):
245 | raise RuntimeError("Reconstructed thumbnail wasn't larger than the original!")
246 |
247 | return new_thumb, new_thumb_crop, new_thumb_rotation
248 |
249 |
250 | def visualize_matches(source_image, original_thumbnail, reconstructed_thumbnail, corners, kp_pairs, mask):
251 | thumb_h, thumb_w = original_thumbnail.shape[:2]
252 | source_h, source_w = source_image.shape[:2]
253 |
254 | # Create a new image for the visualization:
255 | vis = numpy.zeros((max(thumb_h, source_h), thumb_w + source_w, source_image.shape[2]), numpy.uint8)
256 | # Draw the original images adjacent to each other:
257 | vis[:thumb_h, :thumb_w] = original_thumbnail
258 | vis[:source_h, thumb_w:thumb_w+source_w] = source_image
259 |
260 | if reconstructed_thumbnail is not None:
261 | # Display the reconstructed thumbnail just below the original thumbnail:
262 | reconstructed_thumbnail = fit_image_within(reconstructed_thumbnail, thumb_h, thumb_w)
263 | reconstructed_h, reconstructed_w = reconstructed_thumbnail.shape[:2]
264 | vis[thumb_h:thumb_h + reconstructed_h, :reconstructed_w] = reconstructed_thumbnail
265 |
266 | if corners is not None:
267 | # Highlight our bounding box on the source image:
268 | cv2.polylines(vis, [numpy.int32(corners) + (thumb_w, 0)], True, (255, 255, 255))
269 |
270 | thumb_points = numpy.int32([kpp[0].pt for kpp in kp_pairs])
271 | source_points = numpy.int32([kpp[1].pt for kpp in kp_pairs]) + (thumb_w, 0)
272 |
273 | # Points which fit the model will be marked in green:
274 | inlier_color = (0, 255, 0)
275 | # … while those which do not will be marked in red:
276 | outlier_color = (0, 0, 255)
277 | # Connecting lines will be less intense green:
278 | line_color = (0, 192, 0)
279 |
280 | if mask is None:
281 | mask = numpy.zeros(len(thumb_points))
282 |
283 | for (x1, y1), (x2, y2), inlier in zip(thumb_points, source_points, mask):
284 | if inlier:
285 | cv2.line(vis, (x1, y1), (x2, y2), line_color)
286 | cv2.circle(vis, (x1, y1), 2, inlier_color, -1)
287 | cv2.circle(vis, (x2, y2), 2, inlier_color, -1)
288 | else:
289 | cv2.circle(vis, (x1, y1), 2, outlier_color, -1)
290 | cv2.circle(vis, (x2, y2), 2, outlier_color, -1)
291 |
292 | return vis
293 |
294 |
295 | def find_homography(kp_pairs):
296 | mkp1, mkp2 = zip(*kp_pairs)
297 |
298 | p1 = numpy.float32([kp.pt for kp in mkp1])
299 | p2 = numpy.float32([kp.pt for kp in mkp2])
300 |
301 | assert len(kp_pairs) >= 4
302 |
303 | logging.debug('finding homography')
304 | H, mask = cv2.findHomography(p1, p2, cv2.RANSAC, 5.0)
305 | logging.info('%d inliers, %d matched features', numpy.sum(mask), len(mask))
306 | return H, mask
307 |
308 |
309 | def locate_thumbnail(thumbnail_filename, source_filename, display=False, save_visualization=False,
310 | save_reconstruction=False, reconstruction_format="jpg",
311 | max_aspect_ratio_delta=0.1, match_aspect_ratio=False,
312 | minimum_matches=10,
313 | json_output_filename=None, max_master_edge=4096, max_output_edge=2048):
314 | thumbnail_basename, thumbnail_image = open_image(thumbnail_filename)
315 | source_basename, source_image = open_image(source_filename)
316 |
317 | if (((source_image.shape[0] <= thumbnail_image.shape[0])
318 | or (source_image.shape[1] <= thumbnail_image.shape[1]))):
319 | raise RuntimeError("Master file wasn't larger than the thumbnail: %r vs %r" % (source_image.shape,
320 | thumbnail_image.shape))
321 |
322 | logging.info("Attempting to locate %s within %s", thumbnail_filename, source_filename)
323 |
324 | full_source_image = source_image
325 | if max_master_edge and any(i for i in source_image.shape if i > max_master_edge):
326 | logging.info("Resizing master to fit within %d pixels", max_master_edge)
327 | source_image = fit_image_within(source_image, max_master_edge, max_master_edge)
328 |
329 | logging.info('Finding common features')
330 | kp_pairs = match_images(thumbnail_image, source_image)
331 |
332 | if len(kp_pairs) >= minimum_matches:
333 | title = "Found %d matches" % len(kp_pairs)
334 | logging.info(title)
335 |
336 | H, mask = find_homography(kp_pairs)
337 |
338 | corners = get_scaled_corners(thumbnail_image, source_image, full_source_image, kp_pairs, H)
339 |
340 | new_thumbnail, corners, rotation = reconstruct_thumbnail(thumbnail_image, full_source_image, corners,
341 | match_aspect_ratio=match_aspect_ratio,
342 | max_aspect_ratio_delta=max_aspect_ratio_delta)
343 |
344 | if json_output_filename:
345 | with open(json_output_filename, mode='wb') as json_file:
346 | json.dump({
347 | "master": {
348 | "source": source_filename,
349 | "dimensions": {
350 | "height": full_source_image.shape[0],
351 | "width": full_source_image.shape[1],
352 | }
353 | },
354 | "thumbnail": {
355 | "source": thumbnail_filename,
356 | "dimensions": {
357 | "height": thumbnail_image.shape[0],
358 | "width": thumbnail_image.shape[1],
359 | }
360 | },
361 | "bounding_box": {
362 | "height": corners[0][1] - corners[0][0],
363 | "width": corners[1][1] - corners[1][0],
364 | "x": corners[1][0],
365 | "y": corners[0][0],
366 | },
367 | "rotation_degrees": rotation
368 | }, json_file, indent=4)
369 |
370 | if save_reconstruction:
371 | new_filename = "%s.reconstructed.%s" % (thumbnail_basename, reconstruction_format)
372 |
373 | new_thumb_img = fit_image_within(new_thumbnail, max_output_edge, max_output_edge)
374 | cv2.imwrite(new_filename, new_thumb_img)
375 | logging.info("Saved reconstructed %s thumbnail %s", new_thumb_img.shape[:2], new_filename)
376 | else:
377 | logging.warning("Found only %d matches; skipping reconstruction", len(kp_pairs))
378 | title = "MATCH FAILED: %d pairs" % len(kp_pairs)
379 | new_thumbnail = corners = H = mask = None
380 |
381 | if display or save_visualization:
382 | vis_image = visualize_matches(source_image, thumbnail_image, new_thumbnail, corners, kp_pairs, mask)
383 |
384 | if save_visualization:
385 | vis_filename = "%s.visualized%s" % os.path.splitext(thumbnail_filename)
386 | cv2.imwrite(vis_filename, vis_image)
387 | logging.info("Saved match visualization %s", vis_filename)
388 |
389 | if display:
390 | # This may or may not exist depending on whether OpenCV was compiled using the QT backend:
391 | window_flags = getattr(cv, 'CV_WINDOW_NORMAL', cv.CV_WINDOW_AUTOSIZE)
392 | window_title = '%s - %s' % (thumbnail_basename, title)
393 | cv2.namedWindow(window_title, flags=window_flags)
394 | cv2.imshow(window_title, vis_image)
395 | cv2.waitKey()
396 | cv2.destroyAllWindows()
397 |
398 |
399 | def main():
400 | logging.basicConfig(level=logging.INFO, format='%(levelname)s %(funcName)s: %(message)s')
401 |
402 | parser = argparse.ArgumentParser()
403 | parser.add_argument('files', metavar="THUMBNAIL MASTER", nargs="+")
404 | parser.add_argument('--save-visualization', action="store_true", help="Save match visualization")
405 | parser.add_argument('--save-thumbnail', action="store_true",
406 | help="Save reconstructed thumbnail at full size")
407 | parser.add_argument('--save-json', action="store_true",
408 | help="Save JSON file with thumbnail crop information")
409 | parser.add_argument('--thumbnail-format', default='jpg',
410 | help='Format for reconstructed thumbnails (png or default %(default)s)')
411 | parser.add_argument('--fit-master-within', type=int, default=8192,
412 | help="Resize master so the largest edge is below the specified value "
413 | "(faster but possibly less accurate)")
414 | parser.add_argument('--fit-output-within', type=int, default=2048,
415 | help="Resize output so the largest edge is below the specified value")
416 | parser.add_argument('--minimum-matches', type=int, default=20,
417 | help='Require at least this many features for a match (default %(default)s)')
418 | parser.add_argument('--max-aspect-ratio-delta', type=float, default=0.1,
419 | help='Raise an error if the reconstructed image\'s aspect ratio differs by more than '
420 | 'this percentage default %(default)s)')
421 | parser.add_argument('--match-aspect-ratio', action='store_true',
422 | help='Adjust the reconstructed crop box to exactly match the original thumbnail')
423 | parser.add_argument('--display', action="store_true", help="Display match visualization")
424 | parser.add_argument('--debug', action="store_true", help="Open debugger for errors")
425 | args = parser.parse_args()
426 |
427 | if len(args.files) % 2 != 0:
428 | parser.error("Files must be provided in thumbnail and master pairs")
429 |
430 | if args.thumbnail_format not in ('jpg', 'png'):
431 | parser.error('Thumbnail format must be either jpg or png')
432 |
433 | if args.debug:
434 | import pdb
435 |
436 | for i in xrange(0, len(args.files), 2):
437 | thumbnail = args.files[i]
438 | source = args.files[i + 1]
439 |
440 | if args.save_json:
441 | json_output_filename = '%s.json' % os.path.splitext(thumbnail)[0]
442 | else:
443 | json_output_filename = None
444 |
445 | try:
446 | locate_thumbnail(thumbnail, source, display=args.display,
447 | save_reconstruction=args.save_thumbnail,
448 | reconstruction_format=args.thumbnail_format,
449 | save_visualization=args.save_visualization,
450 | json_output_filename=json_output_filename,
451 | max_master_edge=args.fit_master_within,
452 | max_output_edge=args.fit_output_within,
453 | max_aspect_ratio_delta=args.max_aspect_ratio_delta,
454 | match_aspect_ratio=args.match_aspect_ratio,
455 | minimum_matches=args.minimum_matches)
456 | except Exception as e:
457 | logging.error("Error processing %s %s: %s", thumbnail, source, e)
458 | if args.debug:
459 | pdb.post_mortem()
460 | sys.exit(1)
461 |
462 |
463 | if __name__ == '__main__':
464 | main()
465 |
--------------------------------------------------------------------------------
/image_mining/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/acdha/image-mining/cfe842c42f122d676924b16f8af30c2431f9cd5c/image_mining/__init__.py
--------------------------------------------------------------------------------
/image_mining/figure_extraction.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | import logging
4 |
5 | import cv2
6 | import numpy
7 |
8 |
9 | class ImageRegion(object):
10 | def __init__(self, x1, y1, x2, y2, poly=None, contour_index=None):
11 | assert x1 < x2
12 | assert y1 < y2
13 | self.x1 = x1
14 | self.x2 = x2
15 | self.y1 = y1
16 | self.y2 = y2
17 |
18 | self.poly = poly
19 | self.contour_index = contour_index
20 |
21 | def __repr__(self):
22 | return "({0.x1}, {0.y1})-({0.x2}, {0.y2})".format(self)
23 |
24 | @property
25 | def area(self):
26 | return (self.y2 - self.y1) * (self.x2 - self.x1)
27 |
28 | @property
29 | def height(self):
30 | return self.y2 - self.y1
31 |
32 | @property
33 | def width(self):
34 | return self.x2 - self.x1
35 |
36 | @property
37 | def image_slice(self):
38 | """Return a Python slice suitable for use on an OpenCV image (i.e. numpy 2D array)"""
39 | return slice(self.y1, self.y2), slice(self.x1, self.x2)
40 |
41 | def contains(self, other):
42 | """Returns True if the other ImageRegion is entirely contained by this one"""
43 | return ((other.x1 >= self.x1) and (other.x2 <= self.x2)
44 | and (other.y1 >= self.y1) and (other.y2 <= self.y2))
45 |
46 | def overlaps(self, other):
47 | """Returns True if any part of the other ImageRegion is entirely contained by this one"""
48 |
49 | return (((self.x1 < other.x1 < self.x2) or (self.x1 < other.x2 < self.x2))
50 | and ((self.y1 < other.y1 < self.y2) or (self.y1 < other.y2 < self.y2)))
51 |
52 | def merge(self, other):
53 | """Expand this ImageRegion to contain other"""
54 | self.x1 = min(self.x1, other.x1)
55 | self.y1 = min(self.y1, other.y1)
56 | self.x2 = max(self.x2, other.x2)
57 | self.y2 = max(self.y2, other.y2)
58 |
59 | def as_dict(self):
60 | return {"x1": self.x1, "y1": self.y1, "x2": self.x2, "y2": self.y2}
61 |
62 |
63 | class FigureExtractor(object):
64 | MORPH_TYPES = {"cross": cv2.MORPH_CROSS,
65 | "ellipse": cv2.MORPH_ELLIPSE,
66 | "rectangle": cv2.MORPH_RECT}
67 | MORPH_TYPE_KEYS = sorted(MORPH_TYPES.keys())
68 |
69 | def __init__(self, canny_threshold=0, erosion_element=None, erosion_size=4,
70 | dilation_element=None, dilation_size=4,
71 | min_area=0.01,
72 | min_height=0.1, max_height=0.9,
73 | min_width=0.1, max_width=0.9):
74 | # TODO: reconsider whether we should split to global config + per-image extractor instances
75 |
76 | # TODO: better way to set configuration options & docs
77 | self.canny_threshold = canny_threshold
78 | self.erosion_element = self.MORPH_TYPE_KEYS.index(erosion_element)
79 | self.erosion_size = erosion_size
80 | self.dilation_element = self.MORPH_TYPE_KEYS.index(dilation_element)
81 | self.dilation_size = dilation_size
82 |
83 | self.min_area_percentage = min_area
84 | self.min_height = min_height
85 | self.max_height = max_height
86 | self.min_width = min_width
87 | self.max_width = max_width
88 |
89 | def find_figures(self, source_image):
90 | assert source_image is not None, "source_image was None. Perhaps imread() failed?"
91 | output_image = self.filter_image(source_image)
92 |
93 | contours, hierarchy = self.find_contours(output_image)
94 |
95 | for bbox in self.get_bounding_boxes_from_contours(contours, source_image):
96 | yield bbox
97 |
98 | def _find_contours_opencv2(self, image):
99 | return cv2.findContours(image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
100 |
101 | def _find_contours_opencv3(self, image):
102 | _, a, b = cv2.findContours(image, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
103 | return a, b
104 |
105 | if cv2.__version__.startswith('2.'):
106 | find_contours = _find_contours_opencv2
107 | else:
108 | find_contours = _find_contours_opencv3
109 |
110 | def filter_image(self, source_image):
111 | # TODO: Refactor this into a more reusable filter chain
112 |
113 | output_image = cv2.cvtColor(source_image, cv2.COLOR_BGR2GRAY)
114 | # TODO: make blurring configurable:
115 | # output_image = cv2.medianBlur(output_image, 5)
116 | # output_image = cv2.blur(output_image, (3, 3))
117 | # output_image = cv2.GaussianBlur(output_image, (5, 5))
118 |
119 | # TODO: make thresholding configurable
120 | # See http://docs.opencv.org/modules/imgproc/doc/miscellaneous_transformations.html#adaptivethreshold
121 | # output_image = cv2.adaptiveThreshold(output_image, 255.0, cv2.THRESH_BINARY_INV, cv2.ADAPTIVE_THRESH_MEAN_C, 15, 5)
122 | # threshold_rc, output_image = cv2.threshold(output_image, 192, 255, cv2.THRESH_BINARY_INV)
123 |
124 | # Otsu's binarization: see http://bit.ly/194YCPp
125 | output_image = cv2.GaussianBlur(output_image, (3, 3), 0)
126 | threshold_rc, output_image = cv2.threshold(output_image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
127 |
128 | if self.erosion_size > 0:
129 | element_name = self.MORPH_TYPE_KEYS[self.erosion_element]
130 | element = self.MORPH_TYPES[element_name]
131 |
132 | structuring_element = cv2.getStructuringElement(element, (self.erosion_size, self.erosion_size))
133 | output_image = cv2.erode(output_image, structuring_element)
134 |
135 | if self.dilation_size > 0:
136 | element_name = self.MORPH_TYPE_KEYS[self.dilation_element]
137 | element = self.MORPH_TYPES[element_name]
138 |
139 | structuring_element = cv2.getStructuringElement(element, (self.dilation_size, self.dilation_size))
140 | output_image = cv2.dilate(output_image, structuring_element)
141 |
142 | if self.canny_threshold > 0:
143 | # TODO: Make all of Canny options configurable
144 | # See http://docs.opencv.org/modules/imgproc/doc/feature_detection.html#canny
145 | output_image = cv2.Canny(output_image, self.canny_threshold, self.canny_threshold * 3, 12)
146 |
147 | return output_image
148 |
149 | def detect_lines(self, source_image):
150 | # TODO: Make HoughLinesP a configurable option
151 | lines = cv2.HoughLinesP(source_image, rho=1, theta=numpy.pi / 180,
152 | threshold=160, minLineLength=80, maxLineGap=10)
153 |
154 | # for line in lines[0]:
155 | # cv2.line(output_image, (line[0], line[1]), (line[2], line[3]), (0, 0, 255), 2, 4)
156 | return lines
157 |
158 | def get_bounding_boxes_from_contours(self, contours, source_image):
159 | # We'll return the boxes ordered largest first to make overlaps easier to see interactively:
160 | boxes = sorted(self.filter_bounding_boxes(contours, source_image), reverse=True,
161 | key=lambda i: i.area)
162 |
163 | # This could be stored in a much more efficient structure but in testing the number
164 | # of boxes is so small that it doesn't seem worth greater effort:
165 | boxes = [i for i in boxes if not any(j.contains(i) for j in boxes if j is not i)]
166 |
167 | restart = True
168 | while restart:
169 | restart = False
170 | for i in boxes:
171 | other_boxes = [j for j in boxes if j is not i]
172 | for j in other_boxes:
173 | if j.overlaps(i):
174 | print "\tMerging overlapping extracts: %s %s" % (i, j)
175 | i.merge(j)
176 | boxes.remove(j)
177 | restart = True
178 | break
179 |
180 | return boxes
181 |
182 | def filter_bounding_boxes(self, contours, source_image):
183 | # TODO: confirm that the min area check buys us anything over the bounding box min/max filtering
184 | min_area = self.min_area_percentage * source_image.size
185 |
186 | # TODO: more robust algorithm for detecting likely scan edge artifacts which can handle cropped scans of large images (e.g. http://dl.wdl.org/107_1_1.png)
187 | max_height = int(round(self.max_height * source_image.shape[0]))
188 | max_width = int(round(self.max_width * source_image.shape[1]))
189 | min_height = int(round(self.min_height * source_image.shape[0]))
190 | min_width = int(round(self.min_width * source_image.shape[1]))
191 |
192 | logging.info("Contour length & area (area: >%d pixels, box: height >%d, <%d, width >%d, <%d)",
193 | min_area, min_height, max_height, min_width, max_width)
194 |
195 | for i, contour in enumerate(contours):
196 | area = cv2.contourArea(contours[i], False)
197 |
198 | if area < min_area:
199 | logging.debug("Contour %4d: failed area check", i)
200 | continue
201 |
202 | poly = cv2.approxPolyDP(contour, 0.01 * cv2.arcLength(contour, False), False)
203 | x, y, w, h = cv2.boundingRect(poly)
204 | bbox = ImageRegion(x, y, x + w, y + h, poly=poly, contour_index=i)
205 |
206 | if w > max_width or w < min_width or h > max_height or h < min_height:
207 | logging.debug("Contour %4d: failed min/max check: %s", i, bbox)
208 | continue
209 |
210 | yield bbox
211 |
--------------------------------------------------------------------------------
/image_mining/utils.py:
--------------------------------------------------------------------------------
1 | # encoding: utf-8
2 | from __future__ import absolute_import, unicode_literals, print_function
3 |
4 | from urllib import urlopen
5 | from urlparse import urlparse
6 | import os
7 |
8 | import cv2
9 | import numpy
10 |
11 |
12 | def open_image(file_or_url):
13 | """Load an OpenCV image from a filename or URL
14 |
15 | Returns a base_name, image tuple containing a processed name suitable for naming output files
16 | """
17 |
18 | if file_or_url.startswith("http"):
19 | source_image = open_image_from_url(file_or_url, cv2_img_flag=cv2.IMREAD_COLOR)
20 |
21 | url_p = urlparse(file_or_url)
22 |
23 | base_name = os.path.splitext(os.path.basename(url_p.path))[0]
24 | else:
25 | if not os.path.exists(file_or_url):
26 | raise IOError("%s does not exist" % file_or_url)
27 |
28 | base_name = os.path.splitext(os.path.basename(file_or_url))[0]
29 |
30 | source_image = cv2.imread(file_or_url, flags=cv2.IMREAD_COLOR)
31 |
32 | if source_image is None:
33 | raise RuntimeError("%s could not be decoded as an image" % file_or_url)
34 |
35 | return base_name, source_image
36 |
37 |
38 | def open_image_from_url(url, cv2_img_flag=0):
39 | """Attempt to load an OpenCV image from a URL"""
40 | # See http://stackoverflow.com/a/13329446/59984
41 | request = urlopen(url)
42 | img_array = numpy.asarray(bytearray(request.read()), dtype=numpy.uint8)
43 | return cv2.imdecode(img_array, cv2_img_flag)
44 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup
2 |
3 | setup(name='image-mining',
4 | version='0.1.6',
5 | author='Chris Adams',
6 | author_email='chris@improbable.org',
7 | packages=['image_mining'],
8 | scripts=['bin/extract-figures.py', 'bin/locate-thumbnail.py'],
9 | url='https://github.com/acdha/image-mining/',
10 | license='LICENSE.txt',
11 | description='Extract useful information from scanned images using OpenCV',
12 | long_description=open('README.rst').read(),
13 | install_requires=['numpy'])
14 |
--------------------------------------------------------------------------------