├── setup.cfg
├── selectivesearch
    ├── __init__.py
    └── selectivesearch.py
├── example
    ├── result.png
    └── example.py
├── setup.py
├── .gitignore
├── LICENSE
└── README.md


/setup.cfg:
--------------------------------------------------------------------------------
1 | [metadata]
2 | description-file = README.md
3 | 


--------------------------------------------------------------------------------
/selectivesearch/__init__.py:
--------------------------------------------------------------------------------
1 | from .selectivesearch import selective_search  # NOQA
2 | 


--------------------------------------------------------------------------------
/example/result.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/AlpacaTechJP/selectivesearch/HEAD/example/result.png


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup, find_packages
 2 | 
 3 | setup(
 4 |     name="selectivesearch",
 5 |     version="0.4",
 6 |     url="https://github.com/AlpacaDB/selectivesearch",
 7 |     description="Selective Search implementation for Python",
 8 |     author="AlpacaDB, Inc.",
 9 |     license='MIT',
10 |     classifiers=[
11 |         'Development Status :: 3 - Alpha',
12 |         'Intended Audience :: Information Technology',
13 |         'License :: OSI Approved :: MIT License',
14 |         'Programming Language :: Python :: 2.7',
15 |         'Programming Language :: Python :: 3',
16 |     ],
17 |     keywords='rcnn',
18 |     packages=find_packages(),
19 |     install_requires=['numpy', 'scikit-image'],
20 | )
21 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # VS code
60 | .vscode
61 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015-2016 AlpacaDB
 4 | Copyright (c) 2016 Oussama ENNAFII
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy
 7 | of this software and associated documentation files (the "Software"), to deal
 8 | in the Software without restriction, including without limitation the rights
 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 | copies of the Software, and to permit persons to whom the Software is
11 | furnished to do so, subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 | SOFTWARE.
23 | 
24 | 


--------------------------------------------------------------------------------
/example/example.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import (
 3 |     division,
 4 |     print_function,
 5 | )
 6 | 
 7 | import skimage.data
 8 | import matplotlib.pyplot as plt
 9 | import matplotlib.patches as mpatches
10 | import selectivesearch
11 | 
12 | 
13 | def main():
14 | 
15 |     # loading astronaut image
16 |     img = skimage.data.astronaut()
17 | 
18 |     # perform selective search
19 |     img_lbl, regions = selectivesearch.selective_search(
20 |         img, scale=500, sigma=0.9, min_size=10)
21 | 
22 |     candidates = set()
23 |     for r in regions:
24 |         # excluding same rectangle (with different segments)
25 |         if r['rect'] in candidates:
26 |             continue
27 |         # excluding regions smaller than 2000 pixels
28 |         if r['size'] < 2000:
29 |             continue
30 |         # distorted rects
31 |         x, y, w, h = r['rect']
32 |         if w / h > 1.2 or h / w > 1.2:
33 |             continue
34 |         candidates.add(r['rect'])
35 | 
36 |     # draw rectangles on the original image
37 |     fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6))
38 |     ax.imshow(img)
39 |     for x, y, w, h in candidates:
40 |         print(x, y, w, h)
41 |         rect = mpatches.Rectangle(
42 |             (x, y), w, h, fill=False, edgecolor='red', linewidth=1)
43 |         ax.add_patch(rect)
44 | 
45 |     plt.show()
46 | 
47 | if __name__ == "__main__":
48 |     main()
49 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Selective Search Implementation for Python
 2 | 
 3 | This is a simple Selective Search Implementation for Python
 4 | 
 5 | ## Install
 6 | 
 7 | ```
 8 | $ pip install selectivesearch
 9 | ```
10 | 
11 | ## Usage
12 | 
13 | It is super-simple.
14 | 
15 | ```python
16 | import skimage.data
17 | import selectivesearch
18 | 
19 | img = skimage.data.astronaut()
20 | img_lbl, regions = selectivesearch.selective_search(img, scale=500, sigma=0.9, min_size=10)
21 | regions[:10]
22 | =>
23 | [{'labels': [0.0], 'rect': (0, 0, 15, 24), 'size': 260},
24 |  {'labels': [1.0], 'rect': (13, 0, 1, 12), 'size': 23},
25 |  {'labels': [2.0], 'rect': (0, 15, 15, 11), 'size': 30},
26 |  {'labels': [3.0], 'rect': (15, 14, 0, 0), 'size': 1},
27 |  {'labels': [4.0], 'rect': (0, 0, 61, 153), 'size': 4927},
28 |  {'labels': [5.0], 'rect': (0, 12, 61, 142), 'size': 177},
29 |  {'labels': [6.0], 'rect': (7, 54, 6, 17), 'size': 8},
30 |  {'labels': [7.0], 'rect': (28, 50, 18, 32), 'size': 22},
31 |  {'labels': [8.0], 'rect': (2, 99, 7, 24), 'size': 24},
32 |  {'labels': [9.0], 'rect': (14, 118, 79, 117), 'size': 4008}]
33 | ```
34 | 
35 | See also an example/example.py which generates :
36 | ![alt tag](https://github.com/AlpacaDB/selectivesearch/raw/develop/example/result.png)
37 | 
38 | ## Parameters of selective search
39 | 
40 | Let's see this paper: http://cs.brown.edu/~pff/papers/seg-ijcv.pdf
41 | 
42 | #### sigma
43 | 
44 | ```
45 | In general we use a Gaussian filter to
46 | smooth the image slightly before computing the edge weights, in order to compensate
47 | for digitization artifacts. We always use a Gaussian with σ = 0.8, which does not
48 | produce any visible change to the image but helps remove artifacts.
49 | ```
50 | 
51 | #### min_size
52 | 
53 | If the rect size is reached on `min_size`, the calculation is stopped.
54 | 
55 | #### scale
56 | 
57 | ```
58 | There is one runtime parameter for the algorithm, which is the value of k that
59 | is used to compute the threshold function τ . Recall we use the function τ (C) =
60 | 14
61 | k/|C| where |C| is the number of elements in C. Thus k effectively sets a scale of
62 | observation, in that a larger k causes a preference for larger components. We use
63 | two different parameter settings for the examples in this section (and throughout the
64 | paper), depending on the resolution of the image and the degree to which fine detail
65 | is important in the scene.
66 | ```
67 | 
68 | ## Blog
69 | - EN: http://blog.alpaca.ai/open-source-pure-python-selective-search-and-advanced-object-recognition-with-labellio/
70 | - JP: http://blog-jp.alpaca.ai/entry/2015/08/05/235408
71 | 


--------------------------------------------------------------------------------
/selectivesearch/selectivesearch.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | from __future__ import division
  3 | 
  4 | import skimage.io
  5 | import skimage.feature
  6 | import skimage.color
  7 | import skimage.transform
  8 | import skimage.util
  9 | import skimage.segmentation
 10 | import numpy
 11 | 
 12 | 
 13 | # "Selective Search for Object Recognition" by J.R.R. Uijlings et al.
 14 | #
 15 | #  - Modified version with LBP extractor for texture vectorization
 16 | 
 17 | 
 18 | def _generate_segments(im_orig, scale, sigma, min_size):
 19 |     """
 20 |         segment smallest regions by the algorithm of Felzenswalb and
 21 |         Huttenlocher
 22 |     """
 23 | 
 24 |     # open the Image
 25 |     im_mask = skimage.segmentation.felzenszwalb(
 26 |         skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
 27 |         min_size=min_size)
 28 | 
 29 |     # merge mask channel to the image as a 4th channel
 30 |     im_orig = numpy.append(
 31 |         im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2)
 32 |     im_orig[:, :, 3] = im_mask
 33 | 
 34 |     return im_orig
 35 | 
 36 | 
 37 | def _sim_colour(r1, r2):
 38 |     """
 39 |         calculate the sum of histogram intersection of colour
 40 |     """
 41 |     return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])])
 42 | 
 43 | 
 44 | def _sim_texture(r1, r2):
 45 |     """
 46 |         calculate the sum of histogram intersection of texture
 47 |     """
 48 |     return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])])
 49 | 
 50 | 
 51 | def _sim_size(r1, r2, imsize):
 52 |     """
 53 |         calculate the size similarity over the image
 54 |     """
 55 |     return 1.0 - (r1["size"] + r2["size"]) / imsize
 56 | 
 57 | 
 58 | def _sim_fill(r1, r2, imsize):
 59 |     """
 60 |         calculate the fill similarity over the image
 61 |     """
 62 |     bbsize = (
 63 |         (max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"]))
 64 |         * (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"]))
 65 |     )
 66 |     return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize
 67 | 
 68 | 
 69 | def _calc_sim(r1, r2, imsize):
 70 |     return (_sim_colour(r1, r2) + _sim_texture(r1, r2)
 71 |             + _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize))
 72 | 
 73 | 
 74 | def _calc_colour_hist(img):
 75 |     """
 76 |         calculate colour histogram for each region
 77 | 
 78 |         the size of output histogram will be BINS * COLOUR_CHANNELS(3)
 79 | 
 80 |         number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]
 81 | 
 82 |         extract HSV
 83 |     """
 84 | 
 85 |     BINS = 25
 86 |     hist = numpy.array([])
 87 | 
 88 |     for colour_channel in (0, 1, 2):
 89 | 
 90 |         # extracting one colour channel
 91 |         c = img[:, colour_channel]
 92 | 
 93 |         # calculate histogram for each colour and join to the result
 94 |         hist = numpy.concatenate(
 95 |             [hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]])
 96 | 
 97 |     # L1 normalize
 98 |     hist = hist / len(img)
 99 | 
100 |     return hist
101 | 
102 | 
103 | def _calc_texture_gradient(img):
104 |     """
105 |         calculate texture gradient for entire image
106 | 
107 |         The original SelectiveSearch algorithm proposed Gaussian derivative
108 |         for 8 orientations, but we use LBP instead.
109 | 
110 |         output will be [height(*)][width(*)]
111 |     """
112 |     ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))
113 | 
114 |     for colour_channel in (0, 1, 2):
115 |         ret[:, :, colour_channel] = skimage.feature.local_binary_pattern(
116 |             img[:, :, colour_channel], 8, 1.0)
117 | 
118 |     return ret
119 | 
120 | 
121 | def _calc_texture_hist(img):
122 |     """
123 |         calculate texture histogram for each region
124 | 
125 |         calculate the histogram of gradient for each colours
126 |         the size of output histogram will be
127 |             BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
128 |     """
129 |     BINS = 10
130 | 
131 |     hist = numpy.array([])
132 | 
133 |     for colour_channel in (0, 1, 2):
134 | 
135 |         # mask by the colour channel
136 |         fd = img[:, colour_channel]
137 | 
138 |         # calculate histogram for each orientation and concatenate them all
139 |         # and join to the result
140 |         hist = numpy.concatenate(
141 |             [hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]])
142 | 
143 |     # L1 Normalize
144 |     hist = hist / len(img)
145 | 
146 |     return hist
147 | 
148 | 
149 | def _extract_regions(img):
150 | 
151 |     R = {}
152 | 
153 |     # get hsv image
154 |     hsv = skimage.color.rgb2hsv(img[:, :, :3])
155 | 
156 |     # pass 1: count pixel positions
157 |     for y, i in enumerate(img):
158 | 
159 |         for x, (r, g, b, l) in enumerate(i):
160 | 
161 |             # initialize a new region
162 |             if l not in R:
163 |                 R[l] = {
164 |                     "min_x": 0xffff, "min_y": 0xffff,
165 |                     "max_x": 0, "max_y": 0, "labels": [l]}
166 | 
167 |             # bounding box
168 |             if R[l]["min_x"] > x:
169 |                 R[l]["min_x"] = x
170 |             if R[l]["min_y"] > y:
171 |                 R[l]["min_y"] = y
172 |             if R[l]["max_x"] < x:
173 |                 R[l]["max_x"] = x
174 |             if R[l]["max_y"] < y:
175 |                 R[l]["max_y"] = y
176 | 
177 |     # pass 2: calculate texture gradient
178 |     tex_grad = _calc_texture_gradient(img)
179 | 
180 |     # pass 3: calculate colour histogram of each region
181 |     for k, v in list(R.items()):
182 | 
183 |         # colour histogram
184 |         masked_pixels = hsv[:, :, :][img[:, :, 3] == k]
185 |         R[k]["size"] = len(masked_pixels / 4)
186 |         R[k]["hist_c"] = _calc_colour_hist(masked_pixels)
187 | 
188 |         # texture histogram
189 |         R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k])
190 | 
191 |     return R
192 | 
193 | 
194 | def _extract_neighbours(regions):
195 | 
196 |     def intersect(a, b):
197 |         if (a["min_x"] < b["min_x"] < a["max_x"]
198 |                 and a["min_y"] < b["min_y"] < a["max_y"]) or (
199 |             a["min_x"] < b["max_x"] < a["max_x"]
200 |                 and a["min_y"] < b["max_y"] < a["max_y"]) or (
201 |             a["min_x"] < b["min_x"] < a["max_x"]
202 |                 and a["min_y"] < b["max_y"] < a["max_y"]) or (
203 |             a["min_x"] < b["max_x"] < a["max_x"]
204 |                 and a["min_y"] < b["min_y"] < a["max_y"]):
205 |             return True
206 |         return False
207 | 
208 |     R = list(regions.items())
209 |     neighbours = []
210 |     for cur, a in enumerate(R[:-1]):
211 |         for b in R[cur + 1:]:
212 |             if intersect(a[1], b[1]):
213 |                 neighbours.append((a, b))
214 | 
215 |     return neighbours
216 | 
217 | 
218 | def _merge_regions(r1, r2):
219 |     new_size = r1["size"] + r2["size"]
220 |     rt = {
221 |         "min_x": min(r1["min_x"], r2["min_x"]),
222 |         "min_y": min(r1["min_y"], r2["min_y"]),
223 |         "max_x": max(r1["max_x"], r2["max_x"]),
224 |         "max_y": max(r1["max_y"], r2["max_y"]),
225 |         "size": new_size,
226 |         "hist_c": (
227 |             r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size,
228 |         "hist_t": (
229 |             r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size,
230 |         "labels": r1["labels"] + r2["labels"]
231 |     }
232 |     return rt
233 | 
234 | 
235 | def selective_search(
236 |         im_orig, scale=1.0, sigma=0.8, min_size=50):
237 |     '''Selective Search
238 | 
239 |     Parameters
240 |     ----------
241 |         im_orig : ndarray
242 |             Input image
243 |         scale : int
244 |             Free parameter. Higher means larger clusters in felzenszwalb segmentation.
245 |         sigma : float
246 |             Width of Gaussian kernel for felzenszwalb segmentation.
247 |         min_size : int
248 |             Minimum component size for felzenszwalb segmentation.
249 |     Returns
250 |     -------
251 |         img : ndarray
252 |             image with region label
253 |             region label is stored in the 4th value of each pixel [r,g,b,(region)]
254 |         regions : array of dict
255 |             [
256 |                 {
257 |                     'rect': (left, top, width, height),
258 |                     'labels': [...],
259 |                     'size': component_size
260 |                 },
261 |                 ...
262 |             ]
263 |     '''
264 |     assert im_orig.shape[2] == 3, "3ch image is expected"
265 | 
266 |     # load image and get smallest regions
267 |     # region label is stored in the 4th value of each pixel [r,g,b,(region)]
268 |     img = _generate_segments(im_orig, scale, sigma, min_size)
269 | 
270 |     if img is None:
271 |         return None, {}
272 | 
273 |     imsize = img.shape[0] * img.shape[1]
274 |     R = _extract_regions(img)
275 | 
276 |     # extract neighbouring information
277 |     neighbours = _extract_neighbours(R)
278 | 
279 |     # calculate initial similarities
280 |     S = {}
281 |     for (ai, ar), (bi, br) in neighbours:
282 |         S[(ai, bi)] = _calc_sim(ar, br, imsize)
283 | 
284 |     # hierarchal search
285 |     while S != {}:
286 | 
287 |         # get highest similarity
288 |         i, j = sorted(S.items(), key=lambda i: i[1])[-1][0]
289 | 
290 |         # merge corresponding regions
291 |         t = max(R.keys()) + 1.0
292 |         R[t] = _merge_regions(R[i], R[j])
293 | 
294 |         # mark similarities for regions to be removed
295 |         key_to_delete = []
296 |         for k, v in list(S.items()):
297 |             if (i in k) or (j in k):
298 |                 key_to_delete.append(k)
299 | 
300 |         # remove old similarities of related regions
301 |         for k in key_to_delete:
302 |             del S[k]
303 | 
304 |         # calculate similarity set with the new region
305 |         for k in [a for a in key_to_delete if a != (i, j)]:
306 |             n = k[1] if k[0] in (i, j) else k[0]
307 |             S[(t, n)] = _calc_sim(R[t], R[n], imsize)
308 | 
309 |     regions = []
310 |     for k, r in list(R.items()):
311 |         regions.append({
312 |             'rect': (
313 |                 r['min_x'], r['min_y'],
314 |                 r['max_x'] - r['min_x'], r['max_y'] - r['min_y']),
315 |             'size': r['size'],
316 |             'labels': r['labels']
317 |         })
318 | 
319 |     return img, regions
320 | 


--------------------------------------------------------------------------------