├── .gitignore
├── LICENSE
├── README.md
├── requirements.txt
├── setup.py
├── spp
    ├── RoiPooling.py
    ├── RoiPoolingConv.py
    ├── SpatialPyramidPooling.py
    └── __init__.py
└── tests
    ├── __init__.py
    ├── test_roi_pooling.py
    ├── test_roi_pooling_conv.py
    └── test_spp.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # C extensions
 7 | *.so
 8 | 
 9 | # Distribution / packaging
10 | .Python
11 | env/
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | *.egg-info/
24 | .installed.cfg
25 | *.egg
26 | 
27 | # PyInstaller
28 | #  Usually these files are written by a python script from a template
29 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
30 | *.manifest
31 | *.spec
32 | 
33 | # Installer logs
34 | pip-log.txt
35 | pip-delete-this-directory.txt
36 | 
37 | # Unit test / coverage reports
38 | htmlcov/
39 | .tox/
40 | .coverage
41 | .coverage.*
42 | .cache
43 | nosetests.xml
44 | coverage.xml
45 | *,cover
46 | .hypothesis/
47 | 
48 | # Translations
49 | *.mo
50 | *.pot
51 | 
52 | # Django stuff:
53 | *.log
54 | local_settings.py
55 | 
56 | # Flask stuff:
57 | instance/
58 | .webassets-cache
59 | 
60 | # Scrapy stuff:
61 | .scrapy
62 | 
63 | # Sphinx documentation
64 | docs/_build/
65 | 
66 | # PyBuilder
67 | target/
68 | 
69 | # IPython Notebook
70 | .ipynb_checkpoints
71 | 
72 | # pyenv
73 | .python-version
74 | 
75 | # celery beat schedule file
76 | celerybeat-schedule
77 | 
78 | # dotenv
79 | .env
80 | 
81 | # virtualenv
82 | venv/
83 | ENV/
84 | 
85 | # Spyder project settings
86 | .spyderproject
87 | 
88 | # Rope project settings
89 | .ropeproject
90 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2016 Yann Henon
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # keras-spp
 2 | Spatial pyramid pooling layers for keras, based on https://arxiv.org/abs/1406.4729 . This code requires Keras version 2.0 or greater.
 3 | 
 4 | ![spp](http://i.imgur.com/SQWJVoD.png)
 5 | 
 6 | (Image credit: Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, K. He, X. Zhang, S. Ren, J. Sun)
 7 | 
 8 | 
 9 | Three types of pooling layers are currently available:
10 | 
11 | - SpatialPyramidPooling: apply the pooling procedure on the entire image, given an image batch. This is especially useful if the image input
12 | can have varying dimensions, but needs to be fed to a fully connected layer. 
13 | 
14 | For example, this trains a network on images of both 32x32 and 64x64 size:
15 | 
16 | ```
17 | import numpy as np
18 | from keras.models import Sequential
19 | from keras.layers import Convolution2D, Activation, MaxPooling2D, Dense
20 | from spp.SpatialPyramidPooling import SpatialPyramidPooling
21 | 
22 | batch_size = 64
23 | num_channels = 3
24 | num_classes = 10
25 | 
26 | model = Sequential()
27 | 
28 | # uses theano ordering. Note that we leave the image size as None to allow multiple image sizes
29 | model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(3, None, None)))
30 | model.add(Activation('relu'))
31 | model.add(Convolution2D(32, 3, 3))
32 | model.add(Activation('relu'))
33 | model.add(MaxPooling2D(pool_size=(2, 2)))
34 | model.add(Convolution2D(64, 3, 3, border_mode='same'))
35 | model.add(Activation('relu'))
36 | model.add(Convolution2D(64, 3, 3))
37 | model.add(Activation('relu'))
38 | model.add(SpatialPyramidPooling([1, 2, 4]))
39 | model.add(Dense(num_classes))
40 | model.add(Activation('softmax'))
41 | 
42 | model.compile(loss='categorical_crossentropy', optimizer='sgd')
43 | 
44 | # train on 64x64x3 images
45 | model.fit(np.random.rand(batch_size, num_channels, 64, 64), np.zeros((batch_size, num_classes)))
46 | # train on 32x32x3 images
47 | model.fit(np.random.rand(batch_size, num_channels, 32, 32), np.zeros((batch_size, num_classes)))
48 | ```
49 | 
50 | - RoiPooling: extract multiple rois from a single image. In roi pooling, the spatial pyramid pooling is applied at the specified subregions of the image. This is useful for object detection, and is used in fast-RCNN and faster-RCNN. Note that the batch_size is limited to 1 currently.
51 | 
52 | ```
53 | pooling_regions = [1, 2, 4]
54 | num_rois = 2
55 | num_channels = 3
56 | 
57 | if dim_ordering == 'tf':
58 |     in_img = Input(shape=(None, None, num_channels))
59 | elif dim_ordering == 'th':
60 |     in_img = Input(shape=(num_channels, None, None))
61 | 
62 | in_roi = Input(shape=(num_rois, 4))
63 | 
64 | out_roi_pool = RoiPooling(pooling_regions, num_rois)([in_img, in_roi])
65 | 
66 | model = Model([in_img, in_roi], out_roi_pool)
67 | 
68 | if dim_ordering == 'th':
69 |     X_img = np.random.rand(1, num_channels, img_size, img_size)
70 |     row_length = [float(X_img.shape[2]) / i for i in pooling_regions]
71 |     col_length = [float(X_img.shape[3]) / i for i in pooling_regions]
72 | elif dim_ordering == 'tf':
73 |     X_img = np.random.rand(1, img_size, img_size, num_channels)
74 |     row_length = [float(X_img.shape[1]) / i for i in pooling_regions]
75 |     col_length = [float(X_img.shape[2]) / i for i in pooling_regions]
76 | 
77 | X_roi = np.array([[0, 0, img_size / 1, img_size / 1],
78 |                   [0, 0, img_size / 2, img_size / 2]])
79 | 
80 | X_roi = np.reshape(X_roi, (1, num_rois, 4))
81 | 
82 | Y = model.predict([X_img, X_roi])
83 | 
84 | ```
85 | 
86 | - RoiPoolingConv: like RoiPooling, but maintains spatial information.
87 | 
88 | - Thank you to @jlhbaseball15 for his contribution
89 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | keras
2 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from distutils.core import setup
 2 | 
 3 | setup(
 4 |     name='keras-spp',
 5 |     version='',
 6 |     packages=['spp'],
 7 |     url='',
 8 |     license='',
 9 |     author='',
10 |     author_email='',
11 |     description='',
12 |     requires=[
13 |         'keras'
14 |         ]
15 | )
16 | 


--------------------------------------------------------------------------------
/spp/RoiPooling.py:
--------------------------------------------------------------------------------
  1 | from keras.engine.topology import Layer
  2 | import keras.backend as K
  3 | 
  4 | 
  5 | class RoiPooling(Layer):
  6 |     """ROI pooling layer for 2D inputs.
  7 |     See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
  8 |     K. He, X. Zhang, S. Ren, J. Sun
  9 |     # Arguments
 10 |         pool_list: list of int
 11 |             List of pooling regions to use. The length of the list is the number of pooling regions,
 12 |             each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
 13 |             regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
 14 |         num_rois: number of regions of interest to be used
 15 |     # Input shape
 16 |         list of two 4D tensors [X_img,X_roi] with shape:
 17 |         X_img:
 18 |         `(1, channels, rows, cols)` if dim_ordering='th'
 19 |         or 4D tensor with shape:
 20 |         `(1, rows, cols, channels)` if dim_ordering='tf'.
 21 |         X_roi:
 22 |         `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
 23 |     # Output shape
 24 |         3D tensor with shape:
 25 |         `(1, num_rois, channels * sum([i * i for i in pool_list])`
 26 |     """
 27 | 
 28 |     def __init__(self, pool_list, num_rois, **kwargs):
 29 | 
 30 |         self.dim_ordering = K.image_dim_ordering()
 31 |         assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
 32 | 
 33 |         self.pool_list = pool_list
 34 |         self.num_rois = num_rois
 35 | 
 36 |         self.num_outputs_per_channel = sum([i * i for i in pool_list])
 37 | 
 38 |         super(RoiPooling, self).__init__(**kwargs)
 39 | 
 40 |     def build(self, input_shape):
 41 |         if self.dim_ordering == 'th':
 42 |             self.nb_channels = input_shape[0][1]
 43 |         elif self.dim_ordering == 'tf':
 44 |             self.nb_channels = input_shape[0][3]
 45 | 
 46 |     def compute_output_shape(self, input_shape):
 47 |         return None, self.num_rois, self.nb_channels * self.num_outputs_per_channel
 48 | 
 49 |     def get_config(self):
 50 |         config = {'pool_list': self.pool_list, 'num_rois': self.num_rois}
 51 |         base_config = super(RoiPooling, self).get_config()
 52 |         return dict(list(base_config.items()) + list(config.items()))
 53 | 
 54 |     def call(self, x, mask=None):
 55 | 
 56 |         assert (len(x) == 2)
 57 | 
 58 |         img = x[0]
 59 |         rois = x[1]
 60 | 
 61 |         input_shape = K.shape(img)
 62 | 
 63 |         outputs = []
 64 | 
 65 |         for roi_idx in range(self.num_rois):
 66 | 
 67 |             x = rois[0, roi_idx, 0]
 68 |             y = rois[0, roi_idx, 1]
 69 |             w = rois[0, roi_idx, 2]
 70 |             h = rois[0, roi_idx, 3]
 71 | 
 72 |             row_length = [w / i for i in self.pool_list]
 73 |             col_length = [h / i for i in self.pool_list]
 74 | 
 75 |             if self.dim_ordering == 'th':
 76 |                 for pool_num, num_pool_regions in enumerate(self.pool_list):
 77 |                     for ix in range(num_pool_regions):
 78 |                         for jy in range(num_pool_regions):
 79 |                             x1 = x + ix * col_length[pool_num]
 80 |                             x2 = x1 + col_length[pool_num]
 81 |                             y1 = y + jy * row_length[pool_num]
 82 |                             y2 = y1 + row_length[pool_num]
 83 | 
 84 |                             x1 = K.cast(K.round(x1), 'int32')
 85 |                             x2 = K.cast(K.round(x2), 'int32')
 86 |                             y1 = K.cast(K.round(y1), 'int32')
 87 |                             y2 = K.cast(K.round(y2), 'int32')
 88 | 
 89 |                             new_shape = [input_shape[0], input_shape[1],
 90 |                                          y2 - y1, x2 - x1]
 91 |                             x_crop = img[:, :, y1:y2, x1:x2]
 92 |                             xm = K.reshape(x_crop, new_shape)
 93 |                             pooled_val = K.max(xm, axis=(2, 3))
 94 |                             outputs.append(pooled_val)
 95 | 
 96 |             elif self.dim_ordering == 'tf':
 97 |                 for pool_num, num_pool_regions in enumerate(self.pool_list):
 98 |                     for ix in range(num_pool_regions):
 99 |                         for jy in range(num_pool_regions):
100 |                             x1 = x + ix * col_length[pool_num]
101 |                             x2 = x1 + col_length[pool_num]
102 |                             y1 = y + jy * row_length[pool_num]
103 |                             y2 = y1 + row_length[pool_num]
104 | 
105 |                             x1 = K.cast(K.round(x1), 'int32')
106 |                             x2 = K.cast(K.round(x2), 'int32')
107 |                             y1 = K.cast(K.round(y1), 'int32')
108 |                             y2 = K.cast(K.round(y2), 'int32')
109 | 
110 |                             new_shape = [input_shape[0], y2 - y1,
111 |                                          x2 - x1, input_shape[3]]
112 |                             x_crop = img[:, y1:y2, x1:x2, :]
113 |                             xm = K.reshape(x_crop, new_shape)
114 |                             pooled_val = K.max(xm, axis=(1, 2))
115 |                             outputs.append(pooled_val)
116 | 
117 |         final_output = K.concatenate(outputs, axis=0)
118 |         final_output = K.reshape(final_output, (1, self.num_rois, self.nb_channels * self.num_outputs_per_channel))
119 | 
120 |         return final_output
121 | 


--------------------------------------------------------------------------------
/spp/RoiPoolingConv.py:
--------------------------------------------------------------------------------
  1 | from keras.engine.topology import Layer
  2 | import keras.backend as K
  3 | 
  4 | 
  5 | class RoiPoolingConv(Layer):
  6 |     """ROI pooling layer for 2D inputs.
  7 |     See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
  8 |     K. He, X. Zhang, S. Ren, J. Sun
  9 |     # Arguments
 10 |         pool_size: int
 11 |             Size of pooling region to use. pool_size = 7 will result in a 7x7 region.
 12 |         num_rois: number of regions of interest to be used
 13 |     # Input shape
 14 |         list of two 4D tensors [X_img,X_roi] with shape:
 15 |         X_img:
 16 |         `(1, channels, rows, cols)` if dim_ordering='th'
 17 |         or 4D tensor with shape:
 18 |         `(1, rows, cols, channels)` if dim_ordering='tf'.
 19 |         X_roi:
 20 |         `(1,num_rois,4)` list of rois, with ordering (x,y,w,h)
 21 |     # Output shape
 22 |         3D tensor with shape:
 23 |         `(1, num_rois, channels, pool_size, pool_size)`
 24 |     """
 25 | 
 26 |     def __init__(self, pool_size, num_rois, **kwargs):
 27 | 
 28 |         self.dim_ordering = K.image_dim_ordering()
 29 |         assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
 30 | 
 31 |         self.pool_size = pool_size
 32 |         self.num_rois = num_rois
 33 | 
 34 |         super(RoiPoolingConv, self).__init__(**kwargs)
 35 | 
 36 |     def build(self, input_shape):
 37 |         if self.dim_ordering == 'th':
 38 |             self.nb_channels = input_shape[0][1]
 39 |         elif self.dim_ordering == 'tf':
 40 |             self.nb_channels = input_shape[0][3]
 41 | 
 42 |     def compute_output_shape(self, input_shape):
 43 |         if self.dim_ordering == 'th':
 44 |             return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size
 45 |         else:
 46 |             return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels
 47 | 
 48 |     def call(self, x, mask=None):
 49 | 
 50 |         assert (len(x) == 2)
 51 | 
 52 |         img = x[0]
 53 |         rois = x[1]
 54 | 
 55 |         input_shape = K.shape(img)
 56 | 
 57 |         outputs = []
 58 | 
 59 |         for roi_idx in range(self.num_rois):
 60 | 
 61 |             x = rois[0, roi_idx, 0]
 62 |             y = rois[0, roi_idx, 1]
 63 |             w = rois[0, roi_idx, 2]
 64 |             h = rois[0, roi_idx, 3]
 65 | 
 66 |             row_length = w / float(self.pool_size)
 67 |             col_length = h / float(self.pool_size)
 68 | 
 69 |             num_pool_regions = self.pool_size
 70 | 
 71 |             if self.dim_ordering == 'th':
 72 |                 for jy in range(num_pool_regions):
 73 |                     for ix in range(num_pool_regions):
 74 |                         x1 = x + ix * row_length
 75 |                         x2 = x1 + row_length
 76 |                         y1 = y + jy * col_length
 77 |                         y2 = y1 + col_length
 78 | 
 79 |                         x1 = K.cast(x1, 'int32')
 80 |                         x2 = K.cast(x2, 'int32')
 81 |                         y1 = K.cast(y1, 'int32')
 82 |                         y2 = K.cast(y2, 'int32')
 83 | 
 84 |                         dx = K.maximum(1, x2 - x1)
 85 |                         x2 = x1 + dx
 86 | 
 87 |                         dy = K.maximum(1, y2 - y1)
 88 |                         y2 = y1 + dy
 89 | 
 90 |                         new_shape = [input_shape[0], input_shape[1],
 91 |                                      y2 - y1, x2 - x1]
 92 | 
 93 |                         x_crop = img[:, :, y1:y2, x1:x2]
 94 |                         xm = K.reshape(x_crop, new_shape)
 95 |                         pooled_val = K.max(xm, axis=(2, 3))
 96 |                         outputs.append(pooled_val)
 97 | 
 98 |             elif self.dim_ordering == 'tf':
 99 |                 for jy in range(num_pool_regions):
100 |                     for ix in range(num_pool_regions):
101 |                         x1 = x + ix * row_length
102 |                         x2 = x1 + row_length
103 |                         y1 = y + jy * col_length
104 |                         y2 = y1 + col_length
105 | 
106 |                         x1 = K.cast(x1, 'int32')
107 |                         x2 = K.cast(x2, 'int32')
108 |                         y1 = K.cast(y1, 'int32')
109 |                         y2 = K.cast(y2, 'int32')
110 | 
111 |                         new_shape = [input_shape[0], y2 - y1,
112 |                                      x2 - x1, input_shape[3]]
113 |                         x_crop = img[:, y1:y2, x1:x2, :]
114 |                         xm = K.reshape(x_crop, new_shape)
115 |                         pooled_val = K.max(xm, axis=(1, 2))
116 |                         outputs.append(pooled_val)
117 | 
118 |         final_output = K.concatenate(outputs, axis=0)
119 |         final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels))
120 | 
121 |         if self.dim_ordering == 'th':
122 |             final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3))
123 |         else:
124 |             final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4))
125 | 
126 |         return final_output
127 | 


--------------------------------------------------------------------------------
/spp/SpatialPyramidPooling.py:
--------------------------------------------------------------------------------
  1 | from keras.engine.topology import Layer
  2 | import keras.backend as K
  3 | 
  4 | 
  5 | class SpatialPyramidPooling(Layer):
  6 |     """Spatial pyramid pooling layer for 2D inputs.
  7 |     See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition,
  8 |     K. He, X. Zhang, S. Ren, J. Sun
  9 |     # Arguments
 10 |         pool_list: list of int
 11 |             List of pooling regions to use. The length of the list is the number of pooling regions,
 12 |             each int in the list is the number of regions in that pool. For example [1,2,4] would be 3
 13 |             regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map
 14 |     # Input shape
 15 |         4D tensor with shape:
 16 |         `(samples, channels, rows, cols)` if dim_ordering='th'
 17 |         or 4D tensor with shape:
 18 |         `(samples, rows, cols, channels)` if dim_ordering='tf'.
 19 |     # Output shape
 20 |         2D tensor with shape:
 21 |         `(samples, channels * sum([i * i for i in pool_list])`
 22 |     """
 23 | 
 24 |     def __init__(self, pool_list, **kwargs):
 25 | 
 26 |         self.dim_ordering = K.image_dim_ordering()
 27 |         assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
 28 | 
 29 |         self.pool_list = pool_list
 30 | 
 31 |         self.num_outputs_per_channel = sum([i * i for i in pool_list])
 32 | 
 33 |         super(SpatialPyramidPooling, self).__init__(**kwargs)
 34 | 
 35 |     def build(self, input_shape):
 36 |         if self.dim_ordering == 'th':
 37 |             self.nb_channels = input_shape[1]
 38 |         elif self.dim_ordering == 'tf':
 39 |             self.nb_channels = input_shape[3]
 40 | 
 41 |     def compute_output_shape(self, input_shape):
 42 |         return (input_shape[0], self.nb_channels * self.num_outputs_per_channel)
 43 | 
 44 |     def get_config(self):
 45 |         config = {'pool_list': self.pool_list}
 46 |         base_config = super(SpatialPyramidPooling, self).get_config()
 47 |         return dict(list(base_config.items()) + list(config.items()))
 48 | 
 49 |     def call(self, x, mask=None):
 50 | 
 51 |         input_shape = K.shape(x)
 52 | 
 53 |         if self.dim_ordering == 'th':
 54 |             num_rows = input_shape[2]
 55 |             num_cols = input_shape[3]
 56 |         elif self.dim_ordering == 'tf':
 57 |             num_rows = input_shape[1]
 58 |             num_cols = input_shape[2]
 59 | 
 60 |         row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list]
 61 |         col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list]
 62 | 
 63 |         outputs = []
 64 | 
 65 |         if self.dim_ordering == 'th':
 66 |             for pool_num, num_pool_regions in enumerate(self.pool_list):
 67 |                 for jy in range(num_pool_regions):
 68 |                     for ix in range(num_pool_regions):
 69 |                         x1 = ix * col_length[pool_num]
 70 |                         x2 = ix * col_length[pool_num] + col_length[pool_num]
 71 |                         y1 = jy * row_length[pool_num]
 72 |                         y2 = jy * row_length[pool_num] + row_length[pool_num]
 73 | 
 74 |                         x1 = K.cast(K.round(x1), 'int32')
 75 |                         x2 = K.cast(K.round(x2), 'int32')
 76 |                         y1 = K.cast(K.round(y1), 'int32')
 77 |                         y2 = K.cast(K.round(y2), 'int32')
 78 |                         new_shape = [input_shape[0], input_shape[1],
 79 |                                      y2 - y1, x2 - x1]
 80 |                         x_crop = x[:, :, y1:y2, x1:x2]
 81 |                         xm = K.reshape(x_crop, new_shape)
 82 |                         pooled_val = K.max(xm, axis=(2, 3))
 83 |                         outputs.append(pooled_val)
 84 | 
 85 |         elif self.dim_ordering == 'tf':
 86 |             for pool_num, num_pool_regions in enumerate(self.pool_list):
 87 |                 for jy in range(num_pool_regions):
 88 |                     for ix in range(num_pool_regions):
 89 |                         x1 = ix * col_length[pool_num]
 90 |                         x2 = ix * col_length[pool_num] + col_length[pool_num]
 91 |                         y1 = jy * row_length[pool_num]
 92 |                         y2 = jy * row_length[pool_num] + row_length[pool_num]
 93 | 
 94 |                         x1 = K.cast(K.round(x1), 'int32')
 95 |                         x2 = K.cast(K.round(x2), 'int32')
 96 |                         y1 = K.cast(K.round(y1), 'int32')
 97 |                         y2 = K.cast(K.round(y2), 'int32')
 98 | 
 99 |                         new_shape = [input_shape[0], y2 - y1,
100 |                                      x2 - x1, input_shape[3]]
101 | 
102 |                         x_crop = x[:, y1:y2, x1:x2, :]
103 |                         xm = K.reshape(x_crop, new_shape)
104 |                         pooled_val = K.max(xm, axis=(1, 2))
105 |                         outputs.append(pooled_val)
106 | 
107 |         if self.dim_ordering == 'th':
108 |             outputs = K.concatenate(outputs)
109 |         elif self.dim_ordering == 'tf':
110 |             #outputs = K.concatenate(outputs,axis = 1)
111 |             outputs = K.concatenate(outputs)
112 |             #outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1]))
113 |             #outputs = K.permute_dimensions(outputs,(3,1,0,2))
114 |             #outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels))
115 | 
116 |         return outputs
117 | 


--------------------------------------------------------------------------------
/spp/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhenon/keras-spp/229cb5682c1fd9b34671df6a674e25aba32695e0/spp/__init__.py


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/yhenon/keras-spp/229cb5682c1fd9b34671df6a674e25aba32695e0/tests/__init__.py


--------------------------------------------------------------------------------
/tests/test_roi_pooling.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | import numpy as np
 3 | from keras.layers import Input
 4 | from keras.models import Model
 5 | 
 6 | from spp.RoiPooling import RoiPooling
 7 | 
 8 | dim_ordering = K.image_dim_ordering()
 9 | assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
10 | 
11 | pooling_regions = [1, 2, 4]
12 | num_rois = 2
13 | num_channels = 3
14 | 
15 | if dim_ordering == 'tf':
16 |     in_img = Input(shape=(None, None, num_channels))
17 | elif dim_ordering == 'th':
18 |     in_img = Input(shape=(num_channels, None, None))
19 | 
20 | in_roi = Input(shape=(num_rois, 4))
21 | 
22 | out_roi_pool = RoiPooling(pooling_regions, num_rois)([in_img, in_roi])
23 | 
24 | model = Model([in_img, in_roi], out_roi_pool)
25 | model.summary()
26 | 
27 | model.compile(loss='mse', optimizer='sgd')
28 | 
29 | for img_size in [8, 16, 32]:
30 | 
31 |     if dim_ordering == 'th':
32 |         X_img = np.random.rand(1, num_channels, img_size, img_size)
33 |         row_length = [float(X_img.shape[2]) / i for i in pooling_regions]
34 |         col_length = [float(X_img.shape[3]) / i for i in pooling_regions]
35 |     elif dim_ordering == 'tf':
36 |         X_img = np.random.rand(1, img_size, img_size, num_channels)
37 |         row_length = [float(X_img.shape[1]) / i for i in pooling_regions]
38 |         col_length = [float(X_img.shape[2]) / i for i in pooling_regions]
39 | 
40 |     X_roi = np.array([[0, 0, img_size / 1, img_size / 1],
41 |                       [0, 0, img_size / 2, img_size / 2]])
42 | 
43 |     X_roi = np.reshape(X_roi, (1, num_rois, 4))
44 | 
45 |     Y = model.predict([X_img, X_roi])
46 | 
47 |     for roi in range(num_rois):
48 | 
49 |         if dim_ordering == 'th':
50 |             X_curr = X_img[0, :, X_roi[0, roi, 0]:X_roi[0, roi, 2], X_roi[0, roi, 1]:X_roi[0, roi, 3]]
51 |             row_length = [float(X_curr.shape[1]) / i for i in pooling_regions]
52 |             col_length = [float(X_curr.shape[2]) / i for i in pooling_regions]
53 |         elif dim_ordering == 'tf':
54 |             X_curr = X_img[0, X_roi[0, roi, 0]:X_roi[0, roi, 2], X_roi[0, roi, 1]:X_roi[0, roi, 3], :]
55 |             row_length = [float(X_curr.shape[0]) / i for i in pooling_regions]
56 |             col_length = [float(X_curr.shape[1]) / i for i in pooling_regions]
57 | 
58 |         idx = 0
59 | 
60 |         for pool_num, num_pool_regions in enumerate(pooling_regions):
61 |             for ix in range(num_pool_regions):
62 |                 for jy in range(num_pool_regions):
63 |                     for cn in range(num_channels):
64 | 
65 |                         x1 = int(round(ix * col_length[pool_num]))
66 |                         x2 = int(round(ix * col_length[pool_num] + col_length[pool_num]))
67 |                         y1 = int(round(jy * row_length[pool_num]))
68 |                         y2 = int(round(jy * row_length[pool_num] + row_length[pool_num]))
69 | 
70 |                         if dim_ordering == 'th':
71 |                             m_val = np.max(X_curr[cn, y1:y2, x1:x2])
72 |                         elif dim_ordering == 'tf':
73 |                             m_val = np.max(X_curr[y1:y2, x1:x2, cn])
74 | 
75 |                         np.testing.assert_almost_equal(
76 |                             m_val, Y[0, roi, idx], decimal=6)
77 |                         idx += 1
78 |                         
79 | print('Passed roi pooling test')


--------------------------------------------------------------------------------
/tests/test_roi_pooling_conv.py:
--------------------------------------------------------------------------------
 1 | import pdb
 2 | 
 3 | import keras.backend as K
 4 | import numpy as np
 5 | from keras.layers import Input
 6 | from keras.models import Model
 7 | 
 8 | from spp.RoiPoolingConv import RoiPoolingConv
 9 | 
10 | dim_ordering = K.image_dim_ordering()
11 | assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
12 | 
13 | pooling_regions = 2
14 | num_rois = 4
15 | num_channels = 12
16 | 
17 | if dim_ordering == 'tf':
18 |     in_img = Input(shape=(None, None, num_channels))
19 | elif dim_ordering == 'th':
20 |     in_img = Input(shape=(num_channels, None, None))
21 | 
22 | in_roi = Input(shape=(num_rois, 4))
23 | 
24 | out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([in_img, in_roi])
25 | 
26 | model = Model([in_img, in_roi], out_roi_pool)
27 | model.summary()
28 | 
29 | model.compile(loss='mse', optimizer='sgd')
30 | 
31 | for img_size in [32]:
32 |     if dim_ordering == 'th':
33 |         X_img = np.random.rand(1, num_channels, img_size, img_size)
34 |         row_length = [float(X_img.shape[2]) / pooling_regions]
35 |         col_length = [float(X_img.shape[3]) / pooling_regions]
36 |     elif dim_ordering == 'tf':
37 |         X_img = np.random.rand(1, img_size, img_size, num_channels)
38 |         row_length = [float(X_img.shape[1]) / pooling_regions]
39 |         col_length = [float(X_img.shape[2]) / pooling_regions]
40 | 
41 |     X_roi = np.array([[0, 0, img_size / 2, img_size / 2],
42 |                       [0, img_size / 2, img_size / 2, img_size / 2],
43 |                       [img_size / 2, 0, img_size / 2, img_size / 2],
44 |                       [img_size / 2, img_size / 2, img_size / 2, img_size / 2]])
45 | 
46 |     X_roi = np.reshape(X_roi, (1, num_rois, 4))
47 | 
48 |     Y = model.predict([X_img, X_roi])
49 | 
50 |     for roi in range(num_rois):
51 | 
52 |         if dim_ordering == 'th':
53 |             X_curr = X_img[0, :, X_roi[0, roi, 1]:X_roi[0, roi, 1] + X_roi[0, roi, 3],
54 |                      X_roi[0, roi, 0]:X_roi[0, roi, 0] + X_roi[0, roi, 2]]
55 |             row_length = float(X_curr.shape[1]) / pooling_regions
56 |             col_length = float(X_curr.shape[2]) / pooling_regions
57 |         elif dim_ordering == 'tf':
58 |             X_curr = X_img[0, X_roi[0, roi, 1]:X_roi[0, roi, 1] + X_roi[0, roi, 3],
59 |                      X_roi[0, roi, 0]:X_roi[0, roi, 0] + X_roi[0, roi, 2], :]
60 |             row_length = float(X_curr.shape[0]) / pooling_regions
61 |             col_length = float(X_curr.shape[1]) / pooling_regions
62 | 
63 |         idx = 0
64 | 
65 |         for ix in range(pooling_regions):
66 |             for jy in range(pooling_regions):
67 |                 for cn in range(num_channels):
68 | 
69 |                     x1 = int((ix * col_length))
70 |                     x2 = int((ix * col_length + col_length))
71 |                     y1 = int((jy * row_length))
72 |                     y2 = int((jy * row_length + row_length))
73 |                     dx = max(1, x2 - x1)
74 |                     dy = max(1, y2 - y1)
75 |                     x2 = x1 + dx
76 |                     y2 = y1 + dy
77 | 
78 |                     if dim_ordering == 'th':
79 |                         m_val = np.max(X_curr[cn, y1:y2, x1:x2])
80 |                         if abs(m_val - Y[0, roi, cn, jy, ix]) > 0.01:
81 |                             pdb.set_trace()
82 |                         np.testing.assert_almost_equal(
83 |                             m_val, Y[0, roi, cn, jy, ix], decimal=6)
84 |                         idx += 1
85 |                     elif dim_ordering == 'tf':
86 |                         m_val = np.max(X_curr[y1:y2, x1:x2, cn])
87 |                         if abs(m_val - Y[0, roi, jy, ix, cn]) > 0.01:
88 |                             pdb.set_trace()
89 |                         np.testing.assert_almost_equal(
90 |                             m_val, Y[0, roi, jy, ix, cn], decimal=6)
91 |                         idx += 1
92 | 
93 | print('Passed roi pooling test')
94 | 


--------------------------------------------------------------------------------
/tests/test_spp.py:
--------------------------------------------------------------------------------
 1 | import keras.backend as K
 2 | import numpy as np
 3 | from keras.models import Sequential
 4 | 
 5 | from spp.SpatialPyramidPooling import SpatialPyramidPooling
 6 | 
 7 | dim_ordering = K.image_dim_ordering()
 8 | assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}'
 9 | 
10 | pooling_regions = [1,2,4]
11 | 
12 | num_channels = 12
13 | batch_size = 16
14 | 
15 | if dim_ordering == 'th':
16 |     input_shape = (num_channels, None, None)
17 | elif dim_ordering == 'tf':
18 |     input_shape = (None, None, num_channels)
19 | 
20 | model = Sequential()
21 | model.add(SpatialPyramidPooling(pooling_regions, input_shape=input_shape))
22 | model.summary()
23 | 
24 | model.compile(loss='mse', optimizer='sgd')
25 | 
26 | for img_size in [8,16]:
27 | 
28 |     if dim_ordering == 'th':
29 |         X = np.random.rand(batch_size, num_channels, img_size, img_size*2)
30 |         row_length = [float(X.shape[2]) / i for i in pooling_regions]
31 |         col_length = [float(X.shape[3]) / i for i in pooling_regions]
32 |     elif dim_ordering == 'tf':
33 |         X = np.random.rand(batch_size, img_size, img_size*2, num_channels)
34 |         row_length = [float(X.shape[1]) / i for i in pooling_regions]
35 |         col_length = [float(X.shape[2]) / i for i in pooling_regions]
36 | 
37 |     Y = model.predict(X)
38 | 
39 |     for batch_num in range(batch_size):
40 |         idx = 0
41 |         for pool_num, num_pool_regions in enumerate(pooling_regions):
42 |             for jy in range(num_pool_regions):
43 |                 for ix in range(num_pool_regions):
44 |                     for cn in range(num_channels):
45 |                         x1 = int(round(ix * col_length[pool_num]))
46 |                         x2 = int(round(ix * col_length[pool_num] + col_length[pool_num]))
47 |                         y1 = int(round(jy * row_length[pool_num]))
48 |                         y2 = int(round(jy * row_length[pool_num] + row_length[pool_num]))
49 | 
50 |                         if dim_ordering == 'th':
51 |                             m_val = np.max(X[batch_num, cn, y1:y2, x1:x2])
52 |                         elif dim_ordering == 'tf':
53 |                             m_val = np.max(X[batch_num, y1:y2, x1:x2, cn])
54 | 
55 |                         np.testing.assert_almost_equal(
56 |                            m_val, Y[batch_num, idx], decimal=6)
57 |                         idx += 1
58 | 
59 | print('Spatial pyramid pooling test passed')
60 | 


--------------------------------------------------------------------------------