├── setup.cfg ├── selectivesearch ├── __init__.py └── selectivesearch.py ├── example ├── result.png └── example.py ├── setup.py ├── .gitignore ├── LICENSE └── README.md /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | -------------------------------------------------------------------------------- /selectivesearch/__init__.py: -------------------------------------------------------------------------------- 1 | from .selectivesearch import selective_search # NOQA 2 | -------------------------------------------------------------------------------- /example/result.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/AlpacaTechJP/selectivesearch/HEAD/example/result.png -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, find_packages 2 | 3 | setup( 4 | name="selectivesearch", 5 | version="0.4", 6 | url="https://github.com/AlpacaDB/selectivesearch", 7 | description="Selective Search implementation for Python", 8 | author="AlpacaDB, Inc.", 9 | license='MIT', 10 | classifiers=[ 11 | 'Development Status :: 3 - Alpha', 12 | 'Intended Audience :: Information Technology', 13 | 'License :: OSI Approved :: MIT License', 14 | 'Programming Language :: Python :: 2.7', 15 | 'Programming Language :: Python :: 3', 16 | ], 17 | keywords='rcnn', 18 | packages=find_packages(), 19 | install_requires=['numpy', 'scikit-image'], 20 | ) 21 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | 59 | # VS code 60 | .vscode 61 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015-2016 AlpacaDB 4 | Copyright (c) 2016 Oussama ENNAFII 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | 24 | -------------------------------------------------------------------------------- /example/example.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import ( 3 | division, 4 | print_function, 5 | ) 6 | 7 | import skimage.data 8 | import matplotlib.pyplot as plt 9 | import matplotlib.patches as mpatches 10 | import selectivesearch 11 | 12 | 13 | def main(): 14 | 15 | # loading astronaut image 16 | img = skimage.data.astronaut() 17 | 18 | # perform selective search 19 | img_lbl, regions = selectivesearch.selective_search( 20 | img, scale=500, sigma=0.9, min_size=10) 21 | 22 | candidates = set() 23 | for r in regions: 24 | # excluding same rectangle (with different segments) 25 | if r['rect'] in candidates: 26 | continue 27 | # excluding regions smaller than 2000 pixels 28 | if r['size'] < 2000: 29 | continue 30 | # distorted rects 31 | x, y, w, h = r['rect'] 32 | if w / h > 1.2 or h / w > 1.2: 33 | continue 34 | candidates.add(r['rect']) 35 | 36 | # draw rectangles on the original image 37 | fig, ax = plt.subplots(ncols=1, nrows=1, figsize=(6, 6)) 38 | ax.imshow(img) 39 | for x, y, w, h in candidates: 40 | print(x, y, w, h) 41 | rect = mpatches.Rectangle( 42 | (x, y), w, h, fill=False, edgecolor='red', linewidth=1) 43 | ax.add_patch(rect) 44 | 45 | plt.show() 46 | 47 | if __name__ == "__main__": 48 | main() 49 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Selective Search Implementation for Python 2 | 3 | This is a simple Selective Search Implementation for Python 4 | 5 | ## Install 6 | 7 | ``` 8 | $ pip install selectivesearch 9 | ``` 10 | 11 | ## Usage 12 | 13 | It is super-simple. 14 | 15 | ```python 16 | import skimage.data 17 | import selectivesearch 18 | 19 | img = skimage.data.astronaut() 20 | img_lbl, regions = selectivesearch.selective_search(img, scale=500, sigma=0.9, min_size=10) 21 | regions[:10] 22 | => 23 | [{'labels': [0.0], 'rect': (0, 0, 15, 24), 'size': 260}, 24 | {'labels': [1.0], 'rect': (13, 0, 1, 12), 'size': 23}, 25 | {'labels': [2.0], 'rect': (0, 15, 15, 11), 'size': 30}, 26 | {'labels': [3.0], 'rect': (15, 14, 0, 0), 'size': 1}, 27 | {'labels': [4.0], 'rect': (0, 0, 61, 153), 'size': 4927}, 28 | {'labels': [5.0], 'rect': (0, 12, 61, 142), 'size': 177}, 29 | {'labels': [6.0], 'rect': (7, 54, 6, 17), 'size': 8}, 30 | {'labels': [7.0], 'rect': (28, 50, 18, 32), 'size': 22}, 31 | {'labels': [8.0], 'rect': (2, 99, 7, 24), 'size': 24}, 32 | {'labels': [9.0], 'rect': (14, 118, 79, 117), 'size': 4008}] 33 | ``` 34 | 35 | See also an example/example.py which generates : 36 | ![alt tag](https://github.com/AlpacaDB/selectivesearch/raw/develop/example/result.png) 37 | 38 | ## Parameters of selective search 39 | 40 | Let's see this paper: http://cs.brown.edu/~pff/papers/seg-ijcv.pdf 41 | 42 | #### sigma 43 | 44 | ``` 45 | In general we use a Gaussian filter to 46 | smooth the image slightly before computing the edge weights, in order to compensate 47 | for digitization artifacts. We always use a Gaussian with σ = 0.8, which does not 48 | produce any visible change to the image but helps remove artifacts. 49 | ``` 50 | 51 | #### min_size 52 | 53 | If the rect size is reached on `min_size`, the calculation is stopped. 54 | 55 | #### scale 56 | 57 | ``` 58 | There is one runtime parameter for the algorithm, which is the value of k that 59 | is used to compute the threshold function τ . Recall we use the function τ (C) = 60 | 14 61 | k/|C| where |C| is the number of elements in C. Thus k effectively sets a scale of 62 | observation, in that a larger k causes a preference for larger components. We use 63 | two different parameter settings for the examples in this section (and throughout the 64 | paper), depending on the resolution of the image and the degree to which fine detail 65 | is important in the scene. 66 | ``` 67 | 68 | ## Blog 69 | - EN: http://blog.alpaca.ai/open-source-pure-python-selective-search-and-advanced-object-recognition-with-labellio/ 70 | - JP: http://blog-jp.alpaca.ai/entry/2015/08/05/235408 71 | -------------------------------------------------------------------------------- /selectivesearch/selectivesearch.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import division 3 | 4 | import skimage.io 5 | import skimage.feature 6 | import skimage.color 7 | import skimage.transform 8 | import skimage.util 9 | import skimage.segmentation 10 | import numpy 11 | 12 | 13 | # "Selective Search for Object Recognition" by J.R.R. Uijlings et al. 14 | # 15 | # - Modified version with LBP extractor for texture vectorization 16 | 17 | 18 | def _generate_segments(im_orig, scale, sigma, min_size): 19 | """ 20 | segment smallest regions by the algorithm of Felzenswalb and 21 | Huttenlocher 22 | """ 23 | 24 | # open the Image 25 | im_mask = skimage.segmentation.felzenszwalb( 26 | skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma, 27 | min_size=min_size) 28 | 29 | # merge mask channel to the image as a 4th channel 30 | im_orig = numpy.append( 31 | im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2) 32 | im_orig[:, :, 3] = im_mask 33 | 34 | return im_orig 35 | 36 | 37 | def _sim_colour(r1, r2): 38 | """ 39 | calculate the sum of histogram intersection of colour 40 | """ 41 | return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])]) 42 | 43 | 44 | def _sim_texture(r1, r2): 45 | """ 46 | calculate the sum of histogram intersection of texture 47 | """ 48 | return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])]) 49 | 50 | 51 | def _sim_size(r1, r2, imsize): 52 | """ 53 | calculate the size similarity over the image 54 | """ 55 | return 1.0 - (r1["size"] + r2["size"]) / imsize 56 | 57 | 58 | def _sim_fill(r1, r2, imsize): 59 | """ 60 | calculate the fill similarity over the image 61 | """ 62 | bbsize = ( 63 | (max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"])) 64 | * (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"])) 65 | ) 66 | return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize 67 | 68 | 69 | def _calc_sim(r1, r2, imsize): 70 | return (_sim_colour(r1, r2) + _sim_texture(r1, r2) 71 | + _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize)) 72 | 73 | 74 | def _calc_colour_hist(img): 75 | """ 76 | calculate colour histogram for each region 77 | 78 | the size of output histogram will be BINS * COLOUR_CHANNELS(3) 79 | 80 | number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf] 81 | 82 | extract HSV 83 | """ 84 | 85 | BINS = 25 86 | hist = numpy.array([]) 87 | 88 | for colour_channel in (0, 1, 2): 89 | 90 | # extracting one colour channel 91 | c = img[:, colour_channel] 92 | 93 | # calculate histogram for each colour and join to the result 94 | hist = numpy.concatenate( 95 | [hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]]) 96 | 97 | # L1 normalize 98 | hist = hist / len(img) 99 | 100 | return hist 101 | 102 | 103 | def _calc_texture_gradient(img): 104 | """ 105 | calculate texture gradient for entire image 106 | 107 | The original SelectiveSearch algorithm proposed Gaussian derivative 108 | for 8 orientations, but we use LBP instead. 109 | 110 | output will be [height(*)][width(*)] 111 | """ 112 | ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2])) 113 | 114 | for colour_channel in (0, 1, 2): 115 | ret[:, :, colour_channel] = skimage.feature.local_binary_pattern( 116 | img[:, :, colour_channel], 8, 1.0) 117 | 118 | return ret 119 | 120 | 121 | def _calc_texture_hist(img): 122 | """ 123 | calculate texture histogram for each region 124 | 125 | calculate the histogram of gradient for each colours 126 | the size of output histogram will be 127 | BINS * ORIENTATIONS * COLOUR_CHANNELS(3) 128 | """ 129 | BINS = 10 130 | 131 | hist = numpy.array([]) 132 | 133 | for colour_channel in (0, 1, 2): 134 | 135 | # mask by the colour channel 136 | fd = img[:, colour_channel] 137 | 138 | # calculate histogram for each orientation and concatenate them all 139 | # and join to the result 140 | hist = numpy.concatenate( 141 | [hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]]) 142 | 143 | # L1 Normalize 144 | hist = hist / len(img) 145 | 146 | return hist 147 | 148 | 149 | def _extract_regions(img): 150 | 151 | R = {} 152 | 153 | # get hsv image 154 | hsv = skimage.color.rgb2hsv(img[:, :, :3]) 155 | 156 | # pass 1: count pixel positions 157 | for y, i in enumerate(img): 158 | 159 | for x, (r, g, b, l) in enumerate(i): 160 | 161 | # initialize a new region 162 | if l not in R: 163 | R[l] = { 164 | "min_x": 0xffff, "min_y": 0xffff, 165 | "max_x": 0, "max_y": 0, "labels": [l]} 166 | 167 | # bounding box 168 | if R[l]["min_x"] > x: 169 | R[l]["min_x"] = x 170 | if R[l]["min_y"] > y: 171 | R[l]["min_y"] = y 172 | if R[l]["max_x"] < x: 173 | R[l]["max_x"] = x 174 | if R[l]["max_y"] < y: 175 | R[l]["max_y"] = y 176 | 177 | # pass 2: calculate texture gradient 178 | tex_grad = _calc_texture_gradient(img) 179 | 180 | # pass 3: calculate colour histogram of each region 181 | for k, v in list(R.items()): 182 | 183 | # colour histogram 184 | masked_pixels = hsv[:, :, :][img[:, :, 3] == k] 185 | R[k]["size"] = len(masked_pixels / 4) 186 | R[k]["hist_c"] = _calc_colour_hist(masked_pixels) 187 | 188 | # texture histogram 189 | R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k]) 190 | 191 | return R 192 | 193 | 194 | def _extract_neighbours(regions): 195 | 196 | def intersect(a, b): 197 | if (a["min_x"] < b["min_x"] < a["max_x"] 198 | and a["min_y"] < b["min_y"] < a["max_y"]) or ( 199 | a["min_x"] < b["max_x"] < a["max_x"] 200 | and a["min_y"] < b["max_y"] < a["max_y"]) or ( 201 | a["min_x"] < b["min_x"] < a["max_x"] 202 | and a["min_y"] < b["max_y"] < a["max_y"]) or ( 203 | a["min_x"] < b["max_x"] < a["max_x"] 204 | and a["min_y"] < b["min_y"] < a["max_y"]): 205 | return True 206 | return False 207 | 208 | R = list(regions.items()) 209 | neighbours = [] 210 | for cur, a in enumerate(R[:-1]): 211 | for b in R[cur + 1:]: 212 | if intersect(a[1], b[1]): 213 | neighbours.append((a, b)) 214 | 215 | return neighbours 216 | 217 | 218 | def _merge_regions(r1, r2): 219 | new_size = r1["size"] + r2["size"] 220 | rt = { 221 | "min_x": min(r1["min_x"], r2["min_x"]), 222 | "min_y": min(r1["min_y"], r2["min_y"]), 223 | "max_x": max(r1["max_x"], r2["max_x"]), 224 | "max_y": max(r1["max_y"], r2["max_y"]), 225 | "size": new_size, 226 | "hist_c": ( 227 | r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size, 228 | "hist_t": ( 229 | r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size, 230 | "labels": r1["labels"] + r2["labels"] 231 | } 232 | return rt 233 | 234 | 235 | def selective_search( 236 | im_orig, scale=1.0, sigma=0.8, min_size=50): 237 | '''Selective Search 238 | 239 | Parameters 240 | ---------- 241 | im_orig : ndarray 242 | Input image 243 | scale : int 244 | Free parameter. Higher means larger clusters in felzenszwalb segmentation. 245 | sigma : float 246 | Width of Gaussian kernel for felzenszwalb segmentation. 247 | min_size : int 248 | Minimum component size for felzenszwalb segmentation. 249 | Returns 250 | ------- 251 | img : ndarray 252 | image with region label 253 | region label is stored in the 4th value of each pixel [r,g,b,(region)] 254 | regions : array of dict 255 | [ 256 | { 257 | 'rect': (left, top, width, height), 258 | 'labels': [...], 259 | 'size': component_size 260 | }, 261 | ... 262 | ] 263 | ''' 264 | assert im_orig.shape[2] == 3, "3ch image is expected" 265 | 266 | # load image and get smallest regions 267 | # region label is stored in the 4th value of each pixel [r,g,b,(region)] 268 | img = _generate_segments(im_orig, scale, sigma, min_size) 269 | 270 | if img is None: 271 | return None, {} 272 | 273 | imsize = img.shape[0] * img.shape[1] 274 | R = _extract_regions(img) 275 | 276 | # extract neighbouring information 277 | neighbours = _extract_neighbours(R) 278 | 279 | # calculate initial similarities 280 | S = {} 281 | for (ai, ar), (bi, br) in neighbours: 282 | S[(ai, bi)] = _calc_sim(ar, br, imsize) 283 | 284 | # hierarchal search 285 | while S != {}: 286 | 287 | # get highest similarity 288 | i, j = sorted(S.items(), key=lambda i: i[1])[-1][0] 289 | 290 | # merge corresponding regions 291 | t = max(R.keys()) + 1.0 292 | R[t] = _merge_regions(R[i], R[j]) 293 | 294 | # mark similarities for regions to be removed 295 | key_to_delete = [] 296 | for k, v in list(S.items()): 297 | if (i in k) or (j in k): 298 | key_to_delete.append(k) 299 | 300 | # remove old similarities of related regions 301 | for k in key_to_delete: 302 | del S[k] 303 | 304 | # calculate similarity set with the new region 305 | for k in [a for a in key_to_delete if a != (i, j)]: 306 | n = k[1] if k[0] in (i, j) else k[0] 307 | S[(t, n)] = _calc_sim(R[t], R[n], imsize) 308 | 309 | regions = [] 310 | for k, r in list(R.items()): 311 | regions.append({ 312 | 'rect': ( 313 | r['min_x'], r['min_y'], 314 | r['max_x'] - r['min_x'], r['max_y'] - r['min_y']), 315 | 'size': r['size'], 316 | 'labels': r['labels'] 317 | }) 318 | 319 | return img, regions 320 | --------------------------------------------------------------------------------