├── .gitignore ├── LICENSE ├── README.md ├── requirements.txt ├── setup.py ├── spp ├── RoiPooling.py ├── RoiPoolingConv.py ├── SpatialPyramidPooling.py └── __init__.py └── tests ├── __init__.py ├── test_roi_pooling.py ├── test_roi_pooling_conv.py └── test_spp.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # PyInstaller 28 | # Usually these files are written by a python script from a template 29 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 30 | *.manifest 31 | *.spec 32 | 33 | # Installer logs 34 | pip-log.txt 35 | pip-delete-this-directory.txt 36 | 37 | # Unit test / coverage reports 38 | htmlcov/ 39 | .tox/ 40 | .coverage 41 | .coverage.* 42 | .cache 43 | nosetests.xml 44 | coverage.xml 45 | *,cover 46 | .hypothesis/ 47 | 48 | # Translations 49 | *.mo 50 | *.pot 51 | 52 | # Django stuff: 53 | *.log 54 | local_settings.py 55 | 56 | # Flask stuff: 57 | instance/ 58 | .webassets-cache 59 | 60 | # Scrapy stuff: 61 | .scrapy 62 | 63 | # Sphinx documentation 64 | docs/_build/ 65 | 66 | # PyBuilder 67 | target/ 68 | 69 | # IPython Notebook 70 | .ipynb_checkpoints 71 | 72 | # pyenv 73 | .python-version 74 | 75 | # celery beat schedule file 76 | celerybeat-schedule 77 | 78 | # dotenv 79 | .env 80 | 81 | # virtualenv 82 | venv/ 83 | ENV/ 84 | 85 | # Spyder project settings 86 | .spyderproject 87 | 88 | # Rope project settings 89 | .ropeproject 90 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Yann Henon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # keras-spp 2 | Spatial pyramid pooling layers for keras, based on https://arxiv.org/abs/1406.4729 . This code requires Keras version 2.0 or greater. 3 | 4 | ![spp](http://i.imgur.com/SQWJVoD.png) 5 | 6 | (Image credit: Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, K. He, X. Zhang, S. Ren, J. Sun) 7 | 8 | 9 | Three types of pooling layers are currently available: 10 | 11 | - SpatialPyramidPooling: apply the pooling procedure on the entire image, given an image batch. This is especially useful if the image input 12 | can have varying dimensions, but needs to be fed to a fully connected layer. 13 | 14 | For example, this trains a network on images of both 32x32 and 64x64 size: 15 | 16 | ``` 17 | import numpy as np 18 | from keras.models import Sequential 19 | from keras.layers import Convolution2D, Activation, MaxPooling2D, Dense 20 | from spp.SpatialPyramidPooling import SpatialPyramidPooling 21 | 22 | batch_size = 64 23 | num_channels = 3 24 | num_classes = 10 25 | 26 | model = Sequential() 27 | 28 | # uses theano ordering. Note that we leave the image size as None to allow multiple image sizes 29 | model.add(Convolution2D(32, 3, 3, border_mode='same', input_shape=(3, None, None))) 30 | model.add(Activation('relu')) 31 | model.add(Convolution2D(32, 3, 3)) 32 | model.add(Activation('relu')) 33 | model.add(MaxPooling2D(pool_size=(2, 2))) 34 | model.add(Convolution2D(64, 3, 3, border_mode='same')) 35 | model.add(Activation('relu')) 36 | model.add(Convolution2D(64, 3, 3)) 37 | model.add(Activation('relu')) 38 | model.add(SpatialPyramidPooling([1, 2, 4])) 39 | model.add(Dense(num_classes)) 40 | model.add(Activation('softmax')) 41 | 42 | model.compile(loss='categorical_crossentropy', optimizer='sgd') 43 | 44 | # train on 64x64x3 images 45 | model.fit(np.random.rand(batch_size, num_channels, 64, 64), np.zeros((batch_size, num_classes))) 46 | # train on 32x32x3 images 47 | model.fit(np.random.rand(batch_size, num_channels, 32, 32), np.zeros((batch_size, num_classes))) 48 | ``` 49 | 50 | - RoiPooling: extract multiple rois from a single image. In roi pooling, the spatial pyramid pooling is applied at the specified subregions of the image. This is useful for object detection, and is used in fast-RCNN and faster-RCNN. Note that the batch_size is limited to 1 currently. 51 | 52 | ``` 53 | pooling_regions = [1, 2, 4] 54 | num_rois = 2 55 | num_channels = 3 56 | 57 | if dim_ordering == 'tf': 58 | in_img = Input(shape=(None, None, num_channels)) 59 | elif dim_ordering == 'th': 60 | in_img = Input(shape=(num_channels, None, None)) 61 | 62 | in_roi = Input(shape=(num_rois, 4)) 63 | 64 | out_roi_pool = RoiPooling(pooling_regions, num_rois)([in_img, in_roi]) 65 | 66 | model = Model([in_img, in_roi], out_roi_pool) 67 | 68 | if dim_ordering == 'th': 69 | X_img = np.random.rand(1, num_channels, img_size, img_size) 70 | row_length = [float(X_img.shape[2]) / i for i in pooling_regions] 71 | col_length = [float(X_img.shape[3]) / i for i in pooling_regions] 72 | elif dim_ordering == 'tf': 73 | X_img = np.random.rand(1, img_size, img_size, num_channels) 74 | row_length = [float(X_img.shape[1]) / i for i in pooling_regions] 75 | col_length = [float(X_img.shape[2]) / i for i in pooling_regions] 76 | 77 | X_roi = np.array([[0, 0, img_size / 1, img_size / 1], 78 | [0, 0, img_size / 2, img_size / 2]]) 79 | 80 | X_roi = np.reshape(X_roi, (1, num_rois, 4)) 81 | 82 | Y = model.predict([X_img, X_roi]) 83 | 84 | ``` 85 | 86 | - RoiPoolingConv: like RoiPooling, but maintains spatial information. 87 | 88 | - Thank you to @jlhbaseball15 for his contribution 89 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | keras 2 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='keras-spp', 5 | version='', 6 | packages=['spp'], 7 | url='', 8 | license='', 9 | author='', 10 | author_email='', 11 | description='', 12 | requires=[ 13 | 'keras' 14 | ] 15 | ) 16 | -------------------------------------------------------------------------------- /spp/RoiPooling.py: -------------------------------------------------------------------------------- 1 | from keras.engine.topology import Layer 2 | import keras.backend as K 3 | 4 | 5 | class RoiPooling(Layer): 6 | """ROI pooling layer for 2D inputs. 7 | See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, 8 | K. He, X. Zhang, S. Ren, J. Sun 9 | # Arguments 10 | pool_list: list of int 11 | List of pooling regions to use. The length of the list is the number of pooling regions, 12 | each int in the list is the number of regions in that pool. For example [1,2,4] would be 3 13 | regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map 14 | num_rois: number of regions of interest to be used 15 | # Input shape 16 | list of two 4D tensors [X_img,X_roi] with shape: 17 | X_img: 18 | `(1, channels, rows, cols)` if dim_ordering='th' 19 | or 4D tensor with shape: 20 | `(1, rows, cols, channels)` if dim_ordering='tf'. 21 | X_roi: 22 | `(1,num_rois,4)` list of rois, with ordering (x,y,w,h) 23 | # Output shape 24 | 3D tensor with shape: 25 | `(1, num_rois, channels * sum([i * i for i in pool_list])` 26 | """ 27 | 28 | def __init__(self, pool_list, num_rois, **kwargs): 29 | 30 | self.dim_ordering = K.image_dim_ordering() 31 | assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 32 | 33 | self.pool_list = pool_list 34 | self.num_rois = num_rois 35 | 36 | self.num_outputs_per_channel = sum([i * i for i in pool_list]) 37 | 38 | super(RoiPooling, self).__init__(**kwargs) 39 | 40 | def build(self, input_shape): 41 | if self.dim_ordering == 'th': 42 | self.nb_channels = input_shape[0][1] 43 | elif self.dim_ordering == 'tf': 44 | self.nb_channels = input_shape[0][3] 45 | 46 | def compute_output_shape(self, input_shape): 47 | return None, self.num_rois, self.nb_channels * self.num_outputs_per_channel 48 | 49 | def get_config(self): 50 | config = {'pool_list': self.pool_list, 'num_rois': self.num_rois} 51 | base_config = super(RoiPooling, self).get_config() 52 | return dict(list(base_config.items()) + list(config.items())) 53 | 54 | def call(self, x, mask=None): 55 | 56 | assert (len(x) == 2) 57 | 58 | img = x[0] 59 | rois = x[1] 60 | 61 | input_shape = K.shape(img) 62 | 63 | outputs = [] 64 | 65 | for roi_idx in range(self.num_rois): 66 | 67 | x = rois[0, roi_idx, 0] 68 | y = rois[0, roi_idx, 1] 69 | w = rois[0, roi_idx, 2] 70 | h = rois[0, roi_idx, 3] 71 | 72 | row_length = [w / i for i in self.pool_list] 73 | col_length = [h / i for i in self.pool_list] 74 | 75 | if self.dim_ordering == 'th': 76 | for pool_num, num_pool_regions in enumerate(self.pool_list): 77 | for ix in range(num_pool_regions): 78 | for jy in range(num_pool_regions): 79 | x1 = x + ix * col_length[pool_num] 80 | x2 = x1 + col_length[pool_num] 81 | y1 = y + jy * row_length[pool_num] 82 | y2 = y1 + row_length[pool_num] 83 | 84 | x1 = K.cast(K.round(x1), 'int32') 85 | x2 = K.cast(K.round(x2), 'int32') 86 | y1 = K.cast(K.round(y1), 'int32') 87 | y2 = K.cast(K.round(y2), 'int32') 88 | 89 | new_shape = [input_shape[0], input_shape[1], 90 | y2 - y1, x2 - x1] 91 | x_crop = img[:, :, y1:y2, x1:x2] 92 | xm = K.reshape(x_crop, new_shape) 93 | pooled_val = K.max(xm, axis=(2, 3)) 94 | outputs.append(pooled_val) 95 | 96 | elif self.dim_ordering == 'tf': 97 | for pool_num, num_pool_regions in enumerate(self.pool_list): 98 | for ix in range(num_pool_regions): 99 | for jy in range(num_pool_regions): 100 | x1 = x + ix * col_length[pool_num] 101 | x2 = x1 + col_length[pool_num] 102 | y1 = y + jy * row_length[pool_num] 103 | y2 = y1 + row_length[pool_num] 104 | 105 | x1 = K.cast(K.round(x1), 'int32') 106 | x2 = K.cast(K.round(x2), 'int32') 107 | y1 = K.cast(K.round(y1), 'int32') 108 | y2 = K.cast(K.round(y2), 'int32') 109 | 110 | new_shape = [input_shape[0], y2 - y1, 111 | x2 - x1, input_shape[3]] 112 | x_crop = img[:, y1:y2, x1:x2, :] 113 | xm = K.reshape(x_crop, new_shape) 114 | pooled_val = K.max(xm, axis=(1, 2)) 115 | outputs.append(pooled_val) 116 | 117 | final_output = K.concatenate(outputs, axis=0) 118 | final_output = K.reshape(final_output, (1, self.num_rois, self.nb_channels * self.num_outputs_per_channel)) 119 | 120 | return final_output 121 | -------------------------------------------------------------------------------- /spp/RoiPoolingConv.py: -------------------------------------------------------------------------------- 1 | from keras.engine.topology import Layer 2 | import keras.backend as K 3 | 4 | 5 | class RoiPoolingConv(Layer): 6 | """ROI pooling layer for 2D inputs. 7 | See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, 8 | K. He, X. Zhang, S. Ren, J. Sun 9 | # Arguments 10 | pool_size: int 11 | Size of pooling region to use. pool_size = 7 will result in a 7x7 region. 12 | num_rois: number of regions of interest to be used 13 | # Input shape 14 | list of two 4D tensors [X_img,X_roi] with shape: 15 | X_img: 16 | `(1, channels, rows, cols)` if dim_ordering='th' 17 | or 4D tensor with shape: 18 | `(1, rows, cols, channels)` if dim_ordering='tf'. 19 | X_roi: 20 | `(1,num_rois,4)` list of rois, with ordering (x,y,w,h) 21 | # Output shape 22 | 3D tensor with shape: 23 | `(1, num_rois, channels, pool_size, pool_size)` 24 | """ 25 | 26 | def __init__(self, pool_size, num_rois, **kwargs): 27 | 28 | self.dim_ordering = K.image_dim_ordering() 29 | assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 30 | 31 | self.pool_size = pool_size 32 | self.num_rois = num_rois 33 | 34 | super(RoiPoolingConv, self).__init__(**kwargs) 35 | 36 | def build(self, input_shape): 37 | if self.dim_ordering == 'th': 38 | self.nb_channels = input_shape[0][1] 39 | elif self.dim_ordering == 'tf': 40 | self.nb_channels = input_shape[0][3] 41 | 42 | def compute_output_shape(self, input_shape): 43 | if self.dim_ordering == 'th': 44 | return None, self.num_rois, self.nb_channels, self.pool_size, self.pool_size 45 | else: 46 | return None, self.num_rois, self.pool_size, self.pool_size, self.nb_channels 47 | 48 | def call(self, x, mask=None): 49 | 50 | assert (len(x) == 2) 51 | 52 | img = x[0] 53 | rois = x[1] 54 | 55 | input_shape = K.shape(img) 56 | 57 | outputs = [] 58 | 59 | for roi_idx in range(self.num_rois): 60 | 61 | x = rois[0, roi_idx, 0] 62 | y = rois[0, roi_idx, 1] 63 | w = rois[0, roi_idx, 2] 64 | h = rois[0, roi_idx, 3] 65 | 66 | row_length = w / float(self.pool_size) 67 | col_length = h / float(self.pool_size) 68 | 69 | num_pool_regions = self.pool_size 70 | 71 | if self.dim_ordering == 'th': 72 | for jy in range(num_pool_regions): 73 | for ix in range(num_pool_regions): 74 | x1 = x + ix * row_length 75 | x2 = x1 + row_length 76 | y1 = y + jy * col_length 77 | y2 = y1 + col_length 78 | 79 | x1 = K.cast(x1, 'int32') 80 | x2 = K.cast(x2, 'int32') 81 | y1 = K.cast(y1, 'int32') 82 | y2 = K.cast(y2, 'int32') 83 | 84 | dx = K.maximum(1, x2 - x1) 85 | x2 = x1 + dx 86 | 87 | dy = K.maximum(1, y2 - y1) 88 | y2 = y1 + dy 89 | 90 | new_shape = [input_shape[0], input_shape[1], 91 | y2 - y1, x2 - x1] 92 | 93 | x_crop = img[:, :, y1:y2, x1:x2] 94 | xm = K.reshape(x_crop, new_shape) 95 | pooled_val = K.max(xm, axis=(2, 3)) 96 | outputs.append(pooled_val) 97 | 98 | elif self.dim_ordering == 'tf': 99 | for jy in range(num_pool_regions): 100 | for ix in range(num_pool_regions): 101 | x1 = x + ix * row_length 102 | x2 = x1 + row_length 103 | y1 = y + jy * col_length 104 | y2 = y1 + col_length 105 | 106 | x1 = K.cast(x1, 'int32') 107 | x2 = K.cast(x2, 'int32') 108 | y1 = K.cast(y1, 'int32') 109 | y2 = K.cast(y2, 'int32') 110 | 111 | new_shape = [input_shape[0], y2 - y1, 112 | x2 - x1, input_shape[3]] 113 | x_crop = img[:, y1:y2, x1:x2, :] 114 | xm = K.reshape(x_crop, new_shape) 115 | pooled_val = K.max(xm, axis=(1, 2)) 116 | outputs.append(pooled_val) 117 | 118 | final_output = K.concatenate(outputs, axis=0) 119 | final_output = K.reshape(final_output, (1, self.num_rois, self.pool_size, self.pool_size, self.nb_channels)) 120 | 121 | if self.dim_ordering == 'th': 122 | final_output = K.permute_dimensions(final_output, (0, 1, 4, 2, 3)) 123 | else: 124 | final_output = K.permute_dimensions(final_output, (0, 1, 2, 3, 4)) 125 | 126 | return final_output 127 | -------------------------------------------------------------------------------- /spp/SpatialPyramidPooling.py: -------------------------------------------------------------------------------- 1 | from keras.engine.topology import Layer 2 | import keras.backend as K 3 | 4 | 5 | class SpatialPyramidPooling(Layer): 6 | """Spatial pyramid pooling layer for 2D inputs. 7 | See Spatial Pyramid Pooling in Deep Convolutional Networks for Visual Recognition, 8 | K. He, X. Zhang, S. Ren, J. Sun 9 | # Arguments 10 | pool_list: list of int 11 | List of pooling regions to use. The length of the list is the number of pooling regions, 12 | each int in the list is the number of regions in that pool. For example [1,2,4] would be 3 13 | regions with 1, 2x2 and 4x4 max pools, so 21 outputs per feature map 14 | # Input shape 15 | 4D tensor with shape: 16 | `(samples, channels, rows, cols)` if dim_ordering='th' 17 | or 4D tensor with shape: 18 | `(samples, rows, cols, channels)` if dim_ordering='tf'. 19 | # Output shape 20 | 2D tensor with shape: 21 | `(samples, channels * sum([i * i for i in pool_list])` 22 | """ 23 | 24 | def __init__(self, pool_list, **kwargs): 25 | 26 | self.dim_ordering = K.image_dim_ordering() 27 | assert self.dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 28 | 29 | self.pool_list = pool_list 30 | 31 | self.num_outputs_per_channel = sum([i * i for i in pool_list]) 32 | 33 | super(SpatialPyramidPooling, self).__init__(**kwargs) 34 | 35 | def build(self, input_shape): 36 | if self.dim_ordering == 'th': 37 | self.nb_channels = input_shape[1] 38 | elif self.dim_ordering == 'tf': 39 | self.nb_channels = input_shape[3] 40 | 41 | def compute_output_shape(self, input_shape): 42 | return (input_shape[0], self.nb_channels * self.num_outputs_per_channel) 43 | 44 | def get_config(self): 45 | config = {'pool_list': self.pool_list} 46 | base_config = super(SpatialPyramidPooling, self).get_config() 47 | return dict(list(base_config.items()) + list(config.items())) 48 | 49 | def call(self, x, mask=None): 50 | 51 | input_shape = K.shape(x) 52 | 53 | if self.dim_ordering == 'th': 54 | num_rows = input_shape[2] 55 | num_cols = input_shape[3] 56 | elif self.dim_ordering == 'tf': 57 | num_rows = input_shape[1] 58 | num_cols = input_shape[2] 59 | 60 | row_length = [K.cast(num_rows, 'float32') / i for i in self.pool_list] 61 | col_length = [K.cast(num_cols, 'float32') / i for i in self.pool_list] 62 | 63 | outputs = [] 64 | 65 | if self.dim_ordering == 'th': 66 | for pool_num, num_pool_regions in enumerate(self.pool_list): 67 | for jy in range(num_pool_regions): 68 | for ix in range(num_pool_regions): 69 | x1 = ix * col_length[pool_num] 70 | x2 = ix * col_length[pool_num] + col_length[pool_num] 71 | y1 = jy * row_length[pool_num] 72 | y2 = jy * row_length[pool_num] + row_length[pool_num] 73 | 74 | x1 = K.cast(K.round(x1), 'int32') 75 | x2 = K.cast(K.round(x2), 'int32') 76 | y1 = K.cast(K.round(y1), 'int32') 77 | y2 = K.cast(K.round(y2), 'int32') 78 | new_shape = [input_shape[0], input_shape[1], 79 | y2 - y1, x2 - x1] 80 | x_crop = x[:, :, y1:y2, x1:x2] 81 | xm = K.reshape(x_crop, new_shape) 82 | pooled_val = K.max(xm, axis=(2, 3)) 83 | outputs.append(pooled_val) 84 | 85 | elif self.dim_ordering == 'tf': 86 | for pool_num, num_pool_regions in enumerate(self.pool_list): 87 | for jy in range(num_pool_regions): 88 | for ix in range(num_pool_regions): 89 | x1 = ix * col_length[pool_num] 90 | x2 = ix * col_length[pool_num] + col_length[pool_num] 91 | y1 = jy * row_length[pool_num] 92 | y2 = jy * row_length[pool_num] + row_length[pool_num] 93 | 94 | x1 = K.cast(K.round(x1), 'int32') 95 | x2 = K.cast(K.round(x2), 'int32') 96 | y1 = K.cast(K.round(y1), 'int32') 97 | y2 = K.cast(K.round(y2), 'int32') 98 | 99 | new_shape = [input_shape[0], y2 - y1, 100 | x2 - x1, input_shape[3]] 101 | 102 | x_crop = x[:, y1:y2, x1:x2, :] 103 | xm = K.reshape(x_crop, new_shape) 104 | pooled_val = K.max(xm, axis=(1, 2)) 105 | outputs.append(pooled_val) 106 | 107 | if self.dim_ordering == 'th': 108 | outputs = K.concatenate(outputs) 109 | elif self.dim_ordering == 'tf': 110 | #outputs = K.concatenate(outputs,axis = 1) 111 | outputs = K.concatenate(outputs) 112 | #outputs = K.reshape(outputs,(len(self.pool_list),self.num_outputs_per_channel,input_shape[0],input_shape[1])) 113 | #outputs = K.permute_dimensions(outputs,(3,1,0,2)) 114 | #outputs = K.reshape(outputs,(input_shape[0], self.num_outputs_per_channel * self.nb_channels)) 115 | 116 | return outputs 117 | -------------------------------------------------------------------------------- /spp/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhenon/keras-spp/229cb5682c1fd9b34671df6a674e25aba32695e0/spp/__init__.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/yhenon/keras-spp/229cb5682c1fd9b34671df6a674e25aba32695e0/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_roi_pooling.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | import numpy as np 3 | from keras.layers import Input 4 | from keras.models import Model 5 | 6 | from spp.RoiPooling import RoiPooling 7 | 8 | dim_ordering = K.image_dim_ordering() 9 | assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 10 | 11 | pooling_regions = [1, 2, 4] 12 | num_rois = 2 13 | num_channels = 3 14 | 15 | if dim_ordering == 'tf': 16 | in_img = Input(shape=(None, None, num_channels)) 17 | elif dim_ordering == 'th': 18 | in_img = Input(shape=(num_channels, None, None)) 19 | 20 | in_roi = Input(shape=(num_rois, 4)) 21 | 22 | out_roi_pool = RoiPooling(pooling_regions, num_rois)([in_img, in_roi]) 23 | 24 | model = Model([in_img, in_roi], out_roi_pool) 25 | model.summary() 26 | 27 | model.compile(loss='mse', optimizer='sgd') 28 | 29 | for img_size in [8, 16, 32]: 30 | 31 | if dim_ordering == 'th': 32 | X_img = np.random.rand(1, num_channels, img_size, img_size) 33 | row_length = [float(X_img.shape[2]) / i for i in pooling_regions] 34 | col_length = [float(X_img.shape[3]) / i for i in pooling_regions] 35 | elif dim_ordering == 'tf': 36 | X_img = np.random.rand(1, img_size, img_size, num_channels) 37 | row_length = [float(X_img.shape[1]) / i for i in pooling_regions] 38 | col_length = [float(X_img.shape[2]) / i for i in pooling_regions] 39 | 40 | X_roi = np.array([[0, 0, img_size / 1, img_size / 1], 41 | [0, 0, img_size / 2, img_size / 2]]) 42 | 43 | X_roi = np.reshape(X_roi, (1, num_rois, 4)) 44 | 45 | Y = model.predict([X_img, X_roi]) 46 | 47 | for roi in range(num_rois): 48 | 49 | if dim_ordering == 'th': 50 | X_curr = X_img[0, :, X_roi[0, roi, 0]:X_roi[0, roi, 2], X_roi[0, roi, 1]:X_roi[0, roi, 3]] 51 | row_length = [float(X_curr.shape[1]) / i for i in pooling_regions] 52 | col_length = [float(X_curr.shape[2]) / i for i in pooling_regions] 53 | elif dim_ordering == 'tf': 54 | X_curr = X_img[0, X_roi[0, roi, 0]:X_roi[0, roi, 2], X_roi[0, roi, 1]:X_roi[0, roi, 3], :] 55 | row_length = [float(X_curr.shape[0]) / i for i in pooling_regions] 56 | col_length = [float(X_curr.shape[1]) / i for i in pooling_regions] 57 | 58 | idx = 0 59 | 60 | for pool_num, num_pool_regions in enumerate(pooling_regions): 61 | for ix in range(num_pool_regions): 62 | for jy in range(num_pool_regions): 63 | for cn in range(num_channels): 64 | 65 | x1 = int(round(ix * col_length[pool_num])) 66 | x2 = int(round(ix * col_length[pool_num] + col_length[pool_num])) 67 | y1 = int(round(jy * row_length[pool_num])) 68 | y2 = int(round(jy * row_length[pool_num] + row_length[pool_num])) 69 | 70 | if dim_ordering == 'th': 71 | m_val = np.max(X_curr[cn, y1:y2, x1:x2]) 72 | elif dim_ordering == 'tf': 73 | m_val = np.max(X_curr[y1:y2, x1:x2, cn]) 74 | 75 | np.testing.assert_almost_equal( 76 | m_val, Y[0, roi, idx], decimal=6) 77 | idx += 1 78 | 79 | print('Passed roi pooling test') -------------------------------------------------------------------------------- /tests/test_roi_pooling_conv.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | 3 | import keras.backend as K 4 | import numpy as np 5 | from keras.layers import Input 6 | from keras.models import Model 7 | 8 | from spp.RoiPoolingConv import RoiPoolingConv 9 | 10 | dim_ordering = K.image_dim_ordering() 11 | assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 12 | 13 | pooling_regions = 2 14 | num_rois = 4 15 | num_channels = 12 16 | 17 | if dim_ordering == 'tf': 18 | in_img = Input(shape=(None, None, num_channels)) 19 | elif dim_ordering == 'th': 20 | in_img = Input(shape=(num_channels, None, None)) 21 | 22 | in_roi = Input(shape=(num_rois, 4)) 23 | 24 | out_roi_pool = RoiPoolingConv(pooling_regions, num_rois)([in_img, in_roi]) 25 | 26 | model = Model([in_img, in_roi], out_roi_pool) 27 | model.summary() 28 | 29 | model.compile(loss='mse', optimizer='sgd') 30 | 31 | for img_size in [32]: 32 | if dim_ordering == 'th': 33 | X_img = np.random.rand(1, num_channels, img_size, img_size) 34 | row_length = [float(X_img.shape[2]) / pooling_regions] 35 | col_length = [float(X_img.shape[3]) / pooling_regions] 36 | elif dim_ordering == 'tf': 37 | X_img = np.random.rand(1, img_size, img_size, num_channels) 38 | row_length = [float(X_img.shape[1]) / pooling_regions] 39 | col_length = [float(X_img.shape[2]) / pooling_regions] 40 | 41 | X_roi = np.array([[0, 0, img_size / 2, img_size / 2], 42 | [0, img_size / 2, img_size / 2, img_size / 2], 43 | [img_size / 2, 0, img_size / 2, img_size / 2], 44 | [img_size / 2, img_size / 2, img_size / 2, img_size / 2]]) 45 | 46 | X_roi = np.reshape(X_roi, (1, num_rois, 4)) 47 | 48 | Y = model.predict([X_img, X_roi]) 49 | 50 | for roi in range(num_rois): 51 | 52 | if dim_ordering == 'th': 53 | X_curr = X_img[0, :, X_roi[0, roi, 1]:X_roi[0, roi, 1] + X_roi[0, roi, 3], 54 | X_roi[0, roi, 0]:X_roi[0, roi, 0] + X_roi[0, roi, 2]] 55 | row_length = float(X_curr.shape[1]) / pooling_regions 56 | col_length = float(X_curr.shape[2]) / pooling_regions 57 | elif dim_ordering == 'tf': 58 | X_curr = X_img[0, X_roi[0, roi, 1]:X_roi[0, roi, 1] + X_roi[0, roi, 3], 59 | X_roi[0, roi, 0]:X_roi[0, roi, 0] + X_roi[0, roi, 2], :] 60 | row_length = float(X_curr.shape[0]) / pooling_regions 61 | col_length = float(X_curr.shape[1]) / pooling_regions 62 | 63 | idx = 0 64 | 65 | for ix in range(pooling_regions): 66 | for jy in range(pooling_regions): 67 | for cn in range(num_channels): 68 | 69 | x1 = int((ix * col_length)) 70 | x2 = int((ix * col_length + col_length)) 71 | y1 = int((jy * row_length)) 72 | y2 = int((jy * row_length + row_length)) 73 | dx = max(1, x2 - x1) 74 | dy = max(1, y2 - y1) 75 | x2 = x1 + dx 76 | y2 = y1 + dy 77 | 78 | if dim_ordering == 'th': 79 | m_val = np.max(X_curr[cn, y1:y2, x1:x2]) 80 | if abs(m_val - Y[0, roi, cn, jy, ix]) > 0.01: 81 | pdb.set_trace() 82 | np.testing.assert_almost_equal( 83 | m_val, Y[0, roi, cn, jy, ix], decimal=6) 84 | idx += 1 85 | elif dim_ordering == 'tf': 86 | m_val = np.max(X_curr[y1:y2, x1:x2, cn]) 87 | if abs(m_val - Y[0, roi, jy, ix, cn]) > 0.01: 88 | pdb.set_trace() 89 | np.testing.assert_almost_equal( 90 | m_val, Y[0, roi, jy, ix, cn], decimal=6) 91 | idx += 1 92 | 93 | print('Passed roi pooling test') 94 | -------------------------------------------------------------------------------- /tests/test_spp.py: -------------------------------------------------------------------------------- 1 | import keras.backend as K 2 | import numpy as np 3 | from keras.models import Sequential 4 | 5 | from spp.SpatialPyramidPooling import SpatialPyramidPooling 6 | 7 | dim_ordering = K.image_dim_ordering() 8 | assert dim_ordering in {'tf', 'th'}, 'dim_ordering must be in {tf, th}' 9 | 10 | pooling_regions = [1,2,4] 11 | 12 | num_channels = 12 13 | batch_size = 16 14 | 15 | if dim_ordering == 'th': 16 | input_shape = (num_channels, None, None) 17 | elif dim_ordering == 'tf': 18 | input_shape = (None, None, num_channels) 19 | 20 | model = Sequential() 21 | model.add(SpatialPyramidPooling(pooling_regions, input_shape=input_shape)) 22 | model.summary() 23 | 24 | model.compile(loss='mse', optimizer='sgd') 25 | 26 | for img_size in [8,16]: 27 | 28 | if dim_ordering == 'th': 29 | X = np.random.rand(batch_size, num_channels, img_size, img_size*2) 30 | row_length = [float(X.shape[2]) / i for i in pooling_regions] 31 | col_length = [float(X.shape[3]) / i for i in pooling_regions] 32 | elif dim_ordering == 'tf': 33 | X = np.random.rand(batch_size, img_size, img_size*2, num_channels) 34 | row_length = [float(X.shape[1]) / i for i in pooling_regions] 35 | col_length = [float(X.shape[2]) / i for i in pooling_regions] 36 | 37 | Y = model.predict(X) 38 | 39 | for batch_num in range(batch_size): 40 | idx = 0 41 | for pool_num, num_pool_regions in enumerate(pooling_regions): 42 | for jy in range(num_pool_regions): 43 | for ix in range(num_pool_regions): 44 | for cn in range(num_channels): 45 | x1 = int(round(ix * col_length[pool_num])) 46 | x2 = int(round(ix * col_length[pool_num] + col_length[pool_num])) 47 | y1 = int(round(jy * row_length[pool_num])) 48 | y2 = int(round(jy * row_length[pool_num] + row_length[pool_num])) 49 | 50 | if dim_ordering == 'th': 51 | m_val = np.max(X[batch_num, cn, y1:y2, x1:x2]) 52 | elif dim_ordering == 'tf': 53 | m_val = np.max(X[batch_num, y1:y2, x1:x2, cn]) 54 | 55 | np.testing.assert_almost_equal( 56 | m_val, Y[batch_num, idx], decimal=6) 57 | idx += 1 58 | 59 | print('Spatial pyramid pooling test passed') 60 | --------------------------------------------------------------------------------