├── .deployment ├── .gitattributes ├── .gitignore ├── CNTKModels └── download_model.py ├── LICENSE ├── README.md ├── Temp └── placeholder.txt ├── app.py ├── cntk_helpers.py ├── config.py ├── deploy.cmd ├── doc ├── Postman_2017-09-26_22-50-06.jpg ├── cmd_2017-09-26_22-15-45.jpg ├── cmd_2017-09-26_22-18-52.jpg ├── cmd_2017-09-26_22-20-23.jpg ├── iexplore_2017-09-26_22-17-20.jpg ├── iexplore_2017-09-26_22-22-19.jpg ├── iexplore_2017-09-26_22-23-19.jpg ├── iexplore_2017-09-26_22-23-59.jpg ├── iexplore_2017-09-26_22-25-04.jpg └── iexplore_2017-09-26_23-09-42.jpg ├── evaluate.py ├── logs └── placeholder.txt ├── plot_helpers.py ├── requirements.txt ├── utils ├── Readme.md ├── annotations │ └── annotations_helper.py ├── caffe_layers │ ├── anchor_target_layer.py │ ├── bbox_transform.py │ ├── proposal_layer.py │ └── proposal_target_layer.py ├── cython_modules │ ├── cpu_nms.cp35-win_amd64.pyd │ ├── cpu_nms.cpython-34m.so │ ├── cython_bbox.cp35-win_amd64.pyd │ └── cython_bbox.cpython-34m.so ├── default_config.py ├── map │ └── map_helpers.py ├── nms │ └── nms_wrapper.py ├── pytest.ini ├── rpn │ ├── anchor_target_layer.py │ ├── bbox_transform.py │ ├── cntk_smoothL1_loss.py │ ├── generate_anchors.py │ ├── proposal_layer.py │ ├── proposal_target_layer.py │ └── rpn_helpers.py └── unit_tests.py └── web.config /.deployment: -------------------------------------------------------------------------------- 1 | [config] 2 | command = deploy.cmd -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | # *.py[cod] 4 | *$py.class 5 | 6 | # IDE 7 | 8 | .vs/ 9 | .vscode/ 10 | 11 | # C extensions 12 | #*.so 13 | 14 | # Distribution / packaging 15 | .Python 16 | build/ 17 | develop-eggs/ 18 | dist/ 19 | downloads/ 20 | eggs/ 21 | .eggs/ 22 | lib/ 23 | lib64/ 24 | parts/ 25 | sdist/ 26 | var/ 27 | wheels/ 28 | *.egg-info/ 29 | .installed.cfg 30 | *.egg 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | 53 | # Translations 54 | *.mo 55 | *.pot 56 | 57 | # Django stuff: 58 | *.log 59 | local_settings.py 60 | 61 | # Flask stuff: 62 | instance/ 63 | .webassets-cache 64 | 65 | # Scrapy stuff: 66 | .scrapy 67 | 68 | # Sphinx documentation 69 | docs/_build/ 70 | 71 | # PyBuilder 72 | target/ 73 | 74 | # Jupyter Notebook 75 | .ipynb_checkpoints 76 | 77 | # pyenv 78 | .python-version 79 | 80 | # celery beat schedule file 81 | celerybeat-schedule 82 | 83 | # SageMath parsed files 84 | *.sage.py 85 | 86 | # Environments 87 | .env 88 | .venv 89 | env/ 90 | venv/ 91 | ENV/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | 107 | # directories 108 | .idea/ 109 | 110 | # temporary ignore 111 | __pycache__ 112 | __init__.py 113 | Temp/*.jpg 114 | CNTKModels/*.txt 115 | 116 | # models 117 | *.model -------------------------------------------------------------------------------- /CNTKModels/download_model.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | try: 4 | from urllib.request import urlretrieve 5 | except ImportError: 6 | from urllib import urlretrieve 7 | 8 | def download_file(filename, file_url): 9 | file_dir = os.path.dirname(os.path.abspath(__file__)) 10 | file_path = os.path.join(file_dir, filename) 11 | if not os.path.exists(file_path): 12 | print('Downloading file from ' + file_url + ', may take a while...') 13 | urlretrieve(file_url,file_path) 14 | print('Saved file as ' + file_path) 15 | else: 16 | print('File already available at ' + file_path) 17 | 18 | if __name__ == '__main__': 19 | download_file('HotailorPOC2.model','https://privdatastorage.blob.core.windows.net/github/cntk-python-web-service-on-azure/HotailorPOC2.model') 20 | download_file('HotailorPOC2_class_map.txt','https://privdatastorage.blob.core.windows.net/github/cntk-python-web-service-on-azure/HotailorPOC2_class_map.txt') -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2017 Karol Żak 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Table of contents 2 | - [Project description](#project-description) 3 | - [Results](#results) 4 | - [Deployment steps](#deployment-steps) 5 | - [Setup](#setup) 6 | - [Deploy demo](#deploy-demo) 7 | - [Code highlights](#code-highlights) 8 | 9 | # Project description 10 | [[back to the top]](#table-of-contents) 11 | 12 | This sample project shows off how to prepare and deploy to [Azure Web Apps](https://azure.microsoft.com/en-gb/services/app-service/web/) a simple Python web service with an image classifying model produced in [CNTK (Cognitive Toolkit)](https://github.com/Microsoft/CNTK) using [FasterRCNN](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FasterRCNN) 13 | 14 | - [x] Web Service written in Python using Flask module 15 | - [x] Python scripts that allow to evaluate images using CNTK and pretrained model 16 | - [x] Custom deployment scripts for Azure Web Apps 17 | - [x] Automatic setup of custom Python 3.5.x environment with all the required dependencies 18 | - [ ] HTML UI for testing image classification 19 | - [ ] Virtual Python environments for each application deployed to Azure Web Apps 20 | 21 | # Results 22 | [[back to the top]](#table-of-contents) 23 | 24 | [Website Demo](http://cntkpywebapptest1.azurewebsites.net/): 25 | ![Demo](doc/iexplore_2017-09-26_23-09-42.jpg) 26 | 27 | Sample request and response in Postman: 28 | ![Demo](doc/Postman_2017-09-26_22-50-06.jpg) 29 | 30 | # Deployment steps 31 | 32 | ## Setup 33 | [[back to the top]](#table-of-contents) 34 | 35 | 36 | 1. **Download content of this repo** 37 | 38 | You can either clone this repo or just download it and unzip to some folder 39 | 40 | 2. **Setup Python environment** 41 | 42 | In order for scripts to work you should have a proper Python environment. If you don't already have it setup then you should follow one of the online tutorials. To setup Python environment and all the dependencies required by CNTK on my local Windows machine I used [this tutorial](https://docs.microsoft.com/en-us/cognitive-toolkit/setup-windows-binary-script) 43 | 44 | 3. **Download CNTK model and class map file** 45 | 46 | Go to `/CNTKModels` folder in the location were you unzipped this repo and run `download_model.py`. It will automatically download the pretrained model and class map file required for our evaluation to run properly. 47 | 48 | 4. **Install Azure CLI tools** 49 | 50 | If you don't have it then you can easily do it by openning Windows Command Prompt and running this command: 51 | ```Batchfile 52 | pip install azure-cli 53 | ``` 54 | 55 | 5. **Get Azure subscription** 56 | 57 | If you don't own any Azure subscriptions you can always create a [new free trial](https://azure.microsoft.com/en-us/free/) with $200 credits to spend 58 | 59 | ## Deploy demo 60 | [[back to the top]](#table-of-contents) 61 | 1. **Set variables** 62 | 63 | Open Command Prompt to the location where you unzipped the contents of this repository (for example: `cd C:\Poligon\WebService`) and type in as follows (but make sure to replace the `[]` with a proper value): 64 | 65 | ``` 66 | set uname=[username] 67 | set pass=[password] 68 | set appn=[web_app_name] 69 | set rgname=[resource_group_name] 70 | ``` 71 | 72 | 2. **Login to Azure** 73 | 74 | In the same CMD type in: 75 | 76 | ``` 77 | az login 78 | ``` 79 | You should see something like this: 80 | 81 | ![AZ login](doc/cmd_2017-09-26_22-15-45.jpg) 82 | 83 | Now go to the https://aka.ms/devicelogin website and type in the code: 84 | 85 | ![Device login website](doc/iexplore_2017-09-26_22-17-20.jpg) 86 | 87 | You will then be asked to login with an email connected to your Azure subscription 88 | 89 | If everything goes ok you should see the verification message on the website and in console you should see a list of your Azure subscriptions 90 | 91 | 92 | 3. **Setup deployment credentials** 93 | 94 | We're setting this up to later be able to remotely deploy code to our Azure Web App 95 | ``` 96 | az webapp deployment user set --user-name %uname% --password %pass% 97 | ``` 98 | 99 | 4. **Create resource group** 100 | 101 | Resource groups help you to better manage your stuff in subscription and it's a basic method of deploying services to Azure. [Read more here](https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-overview) 102 | 103 | ``` 104 | az group create --location westeurope --name %rgname% 105 | ``` 106 | 107 | 5. **Create new [Azure App Service](https://azure.microsoft.com/en-us/services/app-service/) Plan and new [Azure Web App](https://azure.microsoft.com/en-us/services/app-service/web/)** 108 | 109 | ``` 110 | az appservice plan create --name %appn% --resource-group %rgname% --sku S1 111 | az webapp create --name %appn% --resource-group %rgname% --plan %appn% 112 | ``` 113 | 114 | 6. **Configure Azure Web App and add Python extension** 115 | 116 | Azure Web Apps by default support only Python 2.7 and 3.4. Because I used Python 3.5 I had to use special [extension](https://azure.microsoft.com/en-us/blog/azure-web-sites-extensions/) to setup the environment 117 | 118 | First you need to change some [Application Settings](https://docs.microsoft.com/en-us/azure/app-service/web-sites-configure) on your Web App (the pink ones): 119 | ![Web App preferences](doc/iexplore_2017-09-26_22-22-19.jpg) 120 | Changing `Platform` is required and changing `Always On` is optional but I recommend to use it so that our web service stays awake even if not used. 121 | 122 | After we properly save Application Settings we can now add Python 3.5.x extension. In order to this, just type in `extensions` into the search box 123 | ![Extensions](doc/iexplore_2017-09-26_22-23-19.jpg) 124 | 125 | And then simply add new extension 126 | ![Extensions](doc/iexplore_2017-09-26_22-23-59.jpg) 127 | 128 | It should take around a minute or two to properly install the extension 129 | ![Extensions](doc/iexplore_2017-09-26_22-25-04.jpg) 130 | 131 | 132 | 7. **Setup deployment source for newly created Azure Web App** 133 | 134 | This code will not only setup the deployment source for your app but will also retrive the URL you will need in next steps 135 | ``` 136 | az webapp deployment source config-local-git --name %appn% --resource-group %rgname% --query url --output tsv 137 | ``` 138 | It should return something like this: 139 | ![Remote repo 1](doc/cmd_2017-09-26_22-18-52.jpg) 140 | 141 | 142 | 8. **Initialize git and add remote repository** 143 | 144 | Make sure to replace `[remote_repo_address]` with the URL returned in step number 7. 145 | ``` 146 | git init 147 | git remote add azure [remote_repo_address] 148 | ``` 149 | Command with URL should look like this: 150 | ![Remote repo 2](doc/cmd_2017-09-26_22-20-23.jpg) 151 | 152 | 9. **Push application to Azure Web App remote repository** 153 | 154 | Last step is to simply push our applications code to Azure Web App 155 | ``` 156 | git add -A 157 | git commit -m "init" 158 | git push azure master 159 | ``` 160 | 161 | This will trigger our [custom deployment script](deploy.cmd), copy all the files, setup Python environment and install all the required dependencies from [requirements.txt](requirements.txt) file 162 | 163 | 10. **Test the application** 164 | 165 | If everything went smooth you should now have a running Python application and you should be able to test it. I used Postman to test HTTP requests and responses 166 | 167 | [Website Demo](http://cntkpywebapptest1.azurewebsites.net/): 168 | ![Demo](doc/iexplore_2017-09-26_23-09-42.jpg) 169 | 170 | Sample request and response in Postman: 171 | ![Demo](doc/Postman_2017-09-26_22-50-06.jpg) 172 | 173 | # Code highlights 174 | [[back to the top]](#table-of-contents) 175 | 176 | 177 | - [config.py](config.py) - most important variables for scripts are set in this file 178 | 179 | Variables used by web service to point out directories for temp images and CNTK models: 180 | 181 | ```Python 182 | # directories for web service: 183 | __C.CNTK.TEMP_PATH = "./Temp" # temp folder for image processing - do not change 184 | __C.CNTK.MODEL_DIRECTORY = "./CNTKModels" # directory for storing models and class map files 185 | ``` 186 | 187 | Variables for chosing the specific model: 188 | 189 | ```Python 190 | __C.CNTK.MODEL_NAME = "HotailorPOC2.model" # model file name 191 | __C.CNTK.CLASS_MAP_FILE = "HotailorPOC2_class_map.txt" # class map file name 192 | ``` 193 | 194 | Variables used by `evaluate.py` to properly preprocess images and use CNTK eval function: 195 | 196 | ```Python 197 | __C.CNTK.IMAGE_WIDTH = 1000 198 | __C.CNTK.IMAGE_HEIGHT = 1000 199 | __C.CNTK.NUM_CHANNELS = 3 200 | ``` 201 | 202 | - [app.py](app.py) - main application - startup file for Flask 203 | 204 | There is one very important line for running CNTK: 205 | ```Python 206 | [..] 207 | import os 208 | os.environ['PATH'] = r'D:\home\python354x64;' + os.environ['PATH'] 209 | [..] 210 | ``` 211 | It adds the location of CNTK libraries to PATH variable. It's very important because our code strongly relies on that PATH. As for now I'm doing this in code but in future I want to move it to deployment script 212 | 213 | I am using Flask module to run my web service. In order to make it work I needed to first create an instance of Flask app and then run it on a proper port: 214 | 215 | ```Python 216 | [..] 217 | app = Flask(__name__) 218 | [..] 219 | if __name__ == '__main__': 220 | HOST = os.environ.get('SERVER_HOST', 'localhost') 221 | try: 222 | PORT = int(os.environ.get('SERVER_PORT', '5555')) 223 | except ValueError: 224 | PORT = 5555 225 | app.run(HOST, PORT) 226 | ``` 227 | I also used routes to set up specific methods for our RESTful web service. Currently I expose 2 routes for my API, one returning a collection of classified tags and the second one returning an image with plotted results of evaluation. `'/'` route simply sets the default landing page 228 | ```Python 229 | [..] 230 | @app.route('/') 231 | [..] 232 | @app.route('/hotelidentifier/api/v1.0/evaluate/returntags', methods=['POST']) 233 | [..] 234 | @app.route('/hotelidentifier/api/v1.0/evaluate/returnimage', methods=['POST']) 235 | [..] 236 | ``` 237 | 238 | - [evaluate.py](evaluate.py) - main script for image classification with CNTK model 239 | 240 | This script strongly depends on [config.py](config.py) and it also uses [cntk_helpers.py](cntk_helpers.py), [plot_helpers.py](plot_helpers.py) and bunch of scripts from [utils](utils) folder. Most of those scripts were copied from original [CNTK source on github](https://github.com/Microsoft/CNTK), some of them with slight changes 241 | 242 | - [plot_helpers.py](evaluate.py) - helper script for dealing with image ploting 243 | 244 | While working with headless server environment (non-GUI) such as Azure Web Apps you need to change the default mode of `matpotlib` module to not rely on GUI 245 | ```Python 246 | [..] 247 | # this is important when deploying to headless server environment (non-GUI) 248 | ################################################### 249 | import matplotlib 250 | # force headless backend, or set 'backend' to 'Agg' 251 | # in your ~/.matplotlib/matplotlibrc 252 | matplotlib.use('Agg') 253 | 254 | import matplotlib.pyplot 255 | # force non-interactive mode, or set 'interactive' to False 256 | # in your ~/.matplotlib/matplotlibrc 257 | from matplotlib.pyplot import imsave 258 | matplotlib.pyplot.ioff() 259 | ################################################### 260 | [..] 261 | ``` 262 | 263 | - [requirements.txt](requirements.txt) 264 | 265 | It holds all the dependencies required by my application and CNTK libraries to work. 266 | ``` 267 | easydict==1.6 268 | pytest==3.0.3 269 | opencv-python 270 | https://pypi.python.org/packages/be/5c/670e88bc3ae6afa23c1f09d52a77bbbc7d2e476e7449ad3b6750040a0ac6/scipy-1.0.0b1-cp35-none-win_amd64.whl#md5=dcc90577f2eebc264ec60a2d5729e30b 271 | https://cntk.ai/PythonWheel/CPU-Only/cntk-2.1-cp35-cp35m-win_amd64.whl 272 | Flask==0.12.2 273 | numpy==1.11.2 274 | matplotlib==1.5.3 275 | ipython==6.2.0 276 | Pillow==4.1.1 277 | PyYAML==3.12 278 | ``` 279 | As you can see in most cases we use specific versions of modules and sometimes we even explicitly point out the correct .whl file to use for installation 280 | 281 | - [.deployment](.deployment) 282 | 283 | If this file is present, Kudu will use custom `deploy.cmd` file instead of the default one. We use custom deployment script to chose Python3.5 and install all the necesary dependencies. To learn more about Kudu and deploying to Azure Web Apps - [go here](https://azure.microsoft.com/en-gb/resources/videos/what-is-kudu-with-david-ebbo/) 284 | ``` 285 | [config] 286 | command = deploy.cmd 287 | ``` 288 | 289 | - [deploy.cmd](deploy.cmd) 290 | 291 | Custom script for our deployment with Kudu. Main difference from the default script is that I'm setting Python3.5 (installed from extension) as my main environment 292 | 293 | ``` 294 | [..] 295 | SET PYTHON_DIR=%SYSTEMDRIVE%\home\python354x64 296 | SET PYTHON_EXE=%SYSTEMDRIVE%\home\python354x64\python.exe 297 | [..] 298 | ``` 299 | 300 | I'm also using `deploy.cmd` to install all the required dependencies: 301 | ```CMD 302 | [..] 303 | :: 4. Install packages 304 | echo Pip install requirements. 305 | echo "Installing requirements" 306 | %PYTHON_EXE% -m pip install -r requirements.txt 307 | [..] 308 | ``` 309 | 310 | **TODO:** 311 | I was told that it is better to have virtual Python environment for each app hosted on Azure Web Apps so that there is no chance of conflicts in different versions of modules used by different apps. That is what I need to fix in future. 312 | 313 | 314 | - [web.config](web.config) 315 | 316 | I used `web.config` to point out the directory of my custom Python 3.5 installation and to successfully run my Flask based Python web service. I based my `web.config` on Azure Web Apps [documentation](https://docs.microsoft.com/en-us/azure/app-service/web-sites-python-configure). 317 | 318 | ```xml 319 | 320 | 321 | 322 | 323 | 324 | 325 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | ``` 338 | 339 | 340 | -------------------------------------------------------------------------------- /Temp/placeholder.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/Temp/placeholder.txt -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | #!flask/bin/python 2 | from flask import Flask, jsonify, request, make_response, send_file 3 | import os 4 | os.environ['PATH'] = r'D:\home\python354x64;' + os.environ['PATH'] 5 | import uuid 6 | from config import cfg 7 | from cntk import load_model 8 | app = Flask(__name__) 9 | 10 | 11 | model_path = os.path.join(cfg["CNTK"].MODEL_DIRECTORY, cfg["CNTK"].MODEL_NAME) 12 | print("Loading existing model from %s" % model_path) 13 | loadedModel = load_model(model_path) 14 | 15 | 16 | @app.errorhandler(404) 17 | def not_found(error): 18 | return make_response(jsonify({'error': 'Not found'}), 404) 19 | 20 | @app.route('/') 21 | def index(): 22 | return "" \ 23 | "" \ 24 | "Hello, World!
" \ 25 | "This is a sample web service written in Python using Flask module.
" \ 26 | "Use one of the following urls to evaluate images:
" \ 27 | "/hotelidentifier/api/v1.0/evaluate/returntags - takes image as parameter and returns cloud of tags
" \ 28 | "/hotelidentifier/api/v1.0/evaluate/returnimage - takes image as parameter and returns tagged image
" \ 29 | "" \ 30 | "" 31 | 32 | 33 | @app.route('/hotelidentifier/api/v1.0/evaluate/returntags', methods=['POST']) 34 | def return_tags(): 35 | file_upload = request.files['file'] 36 | if file_upload: 37 | temp_file_path=os.path.join('./Temp',str(uuid.uuid4())+'.jpg') 38 | file_upload.save(temp_file_path) 39 | app.logger.debug('File is saved as %s', temp_file_path) 40 | from evaluate import evaluateimage 41 | return jsonify(tags=[e.serialize() for e in evaluateimage(temp_file_path,"returntags",eval_model=loadedModel)]) 42 | 43 | @app.route('/hotelidentifier/api/v1.0/evaluate/returnimage', methods=['POST']) 44 | def return_image(): 45 | file_upload = request.files['file'] 46 | if file_upload: 47 | temp_file_path=os.path.join('./Temp',str(uuid.uuid4())+'.jpg') 48 | file_upload.save(temp_file_path) 49 | app.logger.debug('File is saved as %s', temp_file_path) 50 | from evaluate import evaluateimage 51 | return send_file(evaluateimage(temp_file_path,"returnimage",eval_model=loadedModel), mimetype='image/jpg') 52 | #return send_file(os.path.join('./Temp', temp_filename), mimetype='image/jpg') 53 | 54 | 55 | 56 | if __name__ == '__main__': 57 | HOST = os.environ.get('SERVER_HOST', 'localhost') 58 | try: 59 | PORT = int(os.environ.get('SERVER_PORT', '5555')) 60 | except ValueError: 61 | PORT = 5555 62 | app.run(HOST, PORT) 63 | 64 | 65 | """ add UI later 66 | @app.route("/") 67 | def index(): 68 | return render_template('index.html') 69 | 70 | @app.route("/api/uploader", methods=['POST']) 71 | @cross_origin() 72 | def api_upload_file(): 73 | img = Image.open(BytesIO(request.files['imagefile'].read())).convert('RGB') 74 | img = ImageOps.fit(img, (224, 224), Image.ANTIALIAS) 75 | return json.dumps(run_some_deep_learning_cntk(img)) 76 | 77 | 78 | def run_some_deep_learning_cntk(rgb_pil_image): 79 | # Convert to BGR 80 | rgb_image = np.array(rgb_pil_image, dtype=np.float32) 81 | bgr_image = rgb_image[..., [2, 1, 0]] 82 | img = np.ascontiguousarray(np.rollaxis(bgr_image, 2)) 83 | 84 | # Use last layer to make prediction 85 | z_out = combine([MODEL.outputs[3].owner]) 86 | result = np.squeeze(z_out.eval({z_out.arguments[0]: [img]})) 87 | 88 | # Sort probabilities 89 | a = np.argsort(result)[-1] 90 | predicted_category = " ".join(LABELS[a].split(" ")[1:]) 91 | 92 | return predicted_category.split(",")[0] 93 | """ -------------------------------------------------------------------------------- /cntk_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from __future__ import print_function 8 | from utils.rpn.bbox_transform import bbox_transform_inv 9 | 10 | def regress_rois(roi_proposals, roi_regression_factors, labels, dims_input): 11 | for i in range(len(labels)): 12 | label = labels[i] 13 | if label > 0: 14 | deltas = roi_regression_factors[i:i+1,label*4:(label+1)*4] 15 | roi_coords = roi_proposals[i:i+1,:] 16 | regressed_rois = bbox_transform_inv(roi_coords, deltas) 17 | roi_proposals[i,:] = regressed_rois 18 | 19 | if dims_input is not None: 20 | # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 21 | pad_width, pad_height, scaled_image_width, scaled_image_height, _, _ = dims_input 22 | left = (pad_width - scaled_image_width) / 2 23 | right = pad_width - left - 1 24 | top = (pad_height - scaled_image_height) / 2 25 | bottom = pad_height - top - 1 26 | 27 | roi_proposals[:,0] = roi_proposals[:,0].clip(left, right) 28 | roi_proposals[:,1] = roi_proposals[:,1].clip(top, bottom) 29 | roi_proposals[:,2] = roi_proposals[:,2].clip(left, right) 30 | roi_proposals[:,3] = roi_proposals[:,3].clip(top, bottom) 31 | 32 | return roi_proposals 33 | 34 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | import os.path as osp 9 | import numpy as np 10 | # `pip install easydict` if you don't have it 11 | from easydict import EasyDict as edict 12 | 13 | __C = edict() 14 | cfg = __C 15 | 16 | # 17 | # CNTK parameters 18 | # 19 | 20 | __C.CNTK = edict() 21 | 22 | # directories for web service: 23 | __C.CNTK.TEMP_PATH = "./Temp" # temp folder for image processing - do not change 24 | __C.CNTK.MODEL_DIRECTORY = "./CNTKModels" # directory for storing models and class map files 25 | 26 | ################# 27 | # Model & Class Map Files names 28 | ################# 29 | __C.CNTK.MODEL_NAME = "HotailorPOC2.model" # model file name 30 | __C.CNTK.CLASS_MAP_FILE = "HotailorPOC2_class_map.txt" # class map file name 31 | 32 | ################# 33 | 34 | __C.CNTK.BASE_MODEL = "AlexNet" # "VGG16" or "AlexNet" 35 | 36 | __C.CNTK.CONV_BIAS_INIT = 0.0 37 | __C.CNTK.SIGMA_RPN_L1 = 3.0 38 | 39 | # change below settings to match variables used to train YOUR model 40 | __C.CNTK.IMAGE_WIDTH = 1000 41 | __C.CNTK.IMAGE_HEIGHT = 1000 42 | __C.CNTK.NUM_CHANNELS = 3 43 | 44 | __C.CNTK.RESULTS_NMS_THRESHOLD = 0.3 # see also: __C.TEST.NMS = 0.3 45 | __C.CNTK.RESULTS_NMS_CONF_THRESHOLD = 0.0 46 | __C.CNTK.RESULTS_BGR_PLOT_THRESHOLD = 0.1 47 | 48 | __C.CNTK.DRAW_NEGATIVE_ROIS = False 49 | __C.CNTK.DRAW_UNREGRESSED_ROIS = False 50 | 51 | 52 | # 53 | # Base models 54 | # 55 | 56 | if __C.CNTK.BASE_MODEL == "AlexNet": 57 | __C.CNTK.BASE_MODEL_FILE = "AlexNet.model" 58 | __C.CNTK.FEATURE_NODE_NAME = "features" 59 | __C.CNTK.LAST_CONV_NODE_NAME = "conv5.y" 60 | __C.CNTK.START_TRAIN_CONV_NODE_NAME = __C.CNTK.FEATURE_NODE_NAME 61 | __C.CNTK.POOL_NODE_NAME = "pool3" 62 | __C.CNTK.LAST_HIDDEN_NODE_NAME = "h2_d" 63 | __C.CNTK.RPN_NUM_CHANNELS = 256 64 | __C.CNTK.ROI_DIM = 6 65 | __C.CNTK.E2E_LR_FACTOR = 1.0 66 | __C.CNTK.RPN_LR_FACTOR = 1.0 67 | __C.CNTK.FRCN_LR_FACTOR = 1.0 68 | 69 | if __C.CNTK.BASE_MODEL == "VGG16": 70 | __C.CNTK.BASE_MODEL_FILE = "VGG16_ImageNet_Caffe.model" 71 | __C.CNTK.FEATURE_NODE_NAME = "data" 72 | __C.CNTK.LAST_CONV_NODE_NAME = "relu5_3" 73 | __C.CNTK.START_TRAIN_CONV_NODE_NAME = "pool2" # __C.CNTK.FEATURE_NODE_NAME 74 | __C.CNTK.POOL_NODE_NAME = "pool5" 75 | __C.CNTK.LAST_HIDDEN_NODE_NAME = "drop7" 76 | __C.CNTK.RPN_NUM_CHANNELS = 512 77 | __C.CNTK.ROI_DIM = 7 78 | __C.CNTK.E2E_LR_FACTOR = 1.0 79 | __C.CNTK.RPN_LR_FACTOR = 1.0 80 | __C.CNTK.FRCN_LR_FACTOR = 1.0 81 | 82 | # 83 | # Training options 84 | # 85 | 86 | __C.TRAIN = edict() 87 | 88 | # Minibatch size (number of regions of interest [ROIs]) 89 | __C.TRAIN.BATCH_SIZE = 128 90 | 91 | # Fraction of minibatch that is labeled foreground (i.e. class > 0) 92 | __C.TRAIN.FG_FRACTION = 0.25 93 | 94 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 95 | __C.TRAIN.FG_THRESH = 0.5 96 | 97 | # Overlap threshold for a ROI to be considered background (class = 0 if 98 | # overlap in [LO, HI)) 99 | __C.TRAIN.BG_THRESH_HI = 0.5 100 | __C.TRAIN.BG_THRESH_LO = 0.0 101 | 102 | # Use horizontally-flipped images during training? 103 | __C.TRAIN.USE_FLIPPED = True 104 | 105 | # Train bounding-box regressors 106 | __C.TRAIN.BBOX_REG = True 107 | 108 | # Overlap required between a ROI and ground-truth box in order for that ROI to 109 | # be used as a bounding-box regression training example 110 | __C.TRAIN.BBOX_THRESH = 0.5 111 | 112 | # Normalize the targets (subtract empirical mean, divide by empirical stddev) 113 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True 114 | # Deprecated (inside weights) 115 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 116 | # Normalize the targets using "precomputed" (or made up) means and stdevs 117 | # (BBOX_NORMALIZE_TARGETS must also be True) 118 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True 119 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 120 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 121 | 122 | # Train using these proposals 123 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search' 124 | 125 | # IOU >= thresh: positive example 126 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 127 | # IOU < thresh: negative example 128 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 129 | # If an anchor statisfied by positive and negative conditions set to negative 130 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False 131 | # Max number of foreground examples 132 | __C.TRAIN.RPN_FG_FRACTION = 0.5 133 | # Total number of examples 134 | __C.TRAIN.RPN_BATCHSIZE = 256 135 | # NMS threshold used on RPN proposals 136 | __C.TRAIN.RPN_NMS_THRESH = 0.7 137 | # Number of top scoring boxes to keep before apply NMS to RPN proposals 138 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 139 | # Number of top scoring boxes to keep after applying NMS to RPN proposals 140 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000 141 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 142 | __C.TRAIN.RPN_MIN_SIZE = 16 143 | # Deprecated (outside weights) 144 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 145 | # Give the positive RPN examples weight of p * 1 / {num positives} 146 | # and give negatives a weight of (1 - p) 147 | # Set to -1.0 to use uniform example weighting 148 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 149 | 150 | 151 | # 152 | # Testing options 153 | # 154 | 155 | __C.TEST = edict() 156 | 157 | # Overlap threshold used for non-maximum suppression (suppress boxes with 158 | # IoU >= this threshold) 159 | __C.TEST.NMS = 0.3 160 | 161 | # Test using bounding-box regressors 162 | __C.TEST.BBOX_REG = True 163 | 164 | # Propose boxes 165 | __C.TEST.HAS_RPN = False 166 | 167 | # Test using these proposals 168 | __C.TEST.PROPOSAL_METHOD = 'selective_search' 169 | 170 | ## NMS threshold used on RPN proposals 171 | __C.TEST.RPN_NMS_THRESH = 0.7 172 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals 173 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000 174 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals 175 | __C.TEST.RPN_POST_NMS_TOP_N = 300 176 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 177 | __C.TEST.RPN_MIN_SIZE = 16 178 | 179 | 180 | # 181 | # MISC 182 | # 183 | 184 | # The mapping from image coordinates to feature map coordinates might cause 185 | # some boxes that are distinct in image space to become identical in feature 186 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor 187 | # for identifying duplicate boxes. 188 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 189 | __C.DEDUP_BOXES = 1./16. 190 | 191 | # Pixel mean values (BGR order) as a (1, 1, 3) array 192 | # We use the same pixel mean for all networks even though it's not exactly what 193 | # they were trained with 194 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 195 | 196 | # For reproducibility 197 | __C.RNG_SEED = 3 198 | 199 | # A small number that's used many times 200 | __C.EPS = 1e-14 201 | 202 | # Use GPU implementation of non-maximum suppression 203 | __C.USE_GPU_NMS = True 204 | 205 | # Default GPU device id 206 | __C.GPU_ID = 0 207 | 208 | 209 | def _merge_a_into_b(a, b): 210 | """Merge config dictionary a into config dictionary b, clobbering the 211 | options in b whenever they are also specified in a. 212 | """ 213 | if type(a) is not edict: 214 | return 215 | 216 | for k, v in a.iteritems(): 217 | # a must specify keys that are in b 218 | if not b.has_key(k): 219 | raise KeyError('{} is not a valid config key'.format(k)) 220 | 221 | # the types must match, too 222 | old_type = type(b[k]) 223 | if old_type is not type(v): 224 | if isinstance(b[k], np.ndarray): 225 | v = np.array(v, dtype=b[k].dtype) 226 | else: 227 | raise ValueError(('Type mismatch ({} vs. {}) ' 228 | 'for config key: {}').format(type(b[k]), 229 | type(v), k)) 230 | 231 | # recursively merge dicts 232 | if type(v) is edict: 233 | try: 234 | _merge_a_into_b(a[k], b[k]) 235 | except: 236 | print('Error under config key: {}'.format(k)) 237 | raise 238 | else: 239 | b[k] = v 240 | 241 | def cfg_from_file(filename): 242 | """Load a config file and merge it into the default options.""" 243 | import yaml 244 | with open(filename, 'r') as f: 245 | yaml_cfg = edict(yaml.load(f)) 246 | 247 | _merge_a_into_b(yaml_cfg, __C) 248 | 249 | def cfg_from_list(cfg_list): 250 | """Set config keys via list (e.g., from command line).""" 251 | from ast import literal_eval 252 | assert len(cfg_list) % 2 == 0 253 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 254 | key_list = k.split('.') 255 | d = __C 256 | for subkey in key_list[:-1]: 257 | assert d.has_key(subkey) 258 | d = d[subkey] 259 | subkey = key_list[-1] 260 | assert d.has_key(subkey) 261 | try: 262 | value = literal_eval(v) 263 | except: 264 | # handle the case when v is a string literal 265 | value = v 266 | assert type(value) == type(d[subkey]), \ 267 | 'type {} does not match original type {}'.format( 268 | type(value), type(d[subkey])) 269 | d[subkey] = value 270 | -------------------------------------------------------------------------------- /deploy.cmd: -------------------------------------------------------------------------------- 1 | @if "%SCM_TRACE_LEVEL%" NEQ "4" @echo off 2 | :: ---------------------- 3 | :: KUDU Deployment Script 4 | :: Version: 2.0.0 5 | :: ---------------------- 6 | :: Prerequisites 7 | :: ------------- 8 | :: VARIABLES 9 | echo "ATTENTION" 10 | echo "USER MUST CHECK/SET THESE VARIABLES:" 11 | SET PYTHON_DIR=%SYSTEMDRIVE%\home\python354x64 12 | SET PYTHON_EXE=%SYSTEMDRIVE%\home\python354x64\python.exe 13 | ::SET CNTK_BIN=https://azurewebappcntk.blob.core.windows.net/cntkrc/cntk.zip 14 | echo "Installed python extension installed here:" 15 | echo %PYTHON_EXE% 16 | ::echo "CNTK Binaries and version located here:" 17 | ::echo %CNTK_BIN% 18 | :: Verify node.js installed 19 | where node 2>nul >nul 20 | IF %ERRORLEVEL% NEQ 0 ( 21 | echo Missing node.js executable, please install node.js, if already installed make sure it can be reached from current environment. 22 | goto error 23 | ) 24 | :: Setup 25 | :: ----- 26 | setlocal enabledelayedexpansion 27 | SET ARTIFACTS=%~dp0%..\artifacts 28 | IF NOT DEFINED DEPLOYMENT_SOURCE ( 29 | SET DEPLOYMENT_SOURCE=%~dp0%. 30 | ) 31 | IF NOT DEFINED DEPLOYMENT_TARGET ( 32 | SET DEPLOYMENT_TARGET=%ARTIFACTS%\wwwroot 33 | ) 34 | IF NOT DEFINED NEXT_MANIFEST_PATH ( 35 | SET NEXT_MANIFEST_PATH=%ARTIFACTS%\manifest 36 | IF NOT DEFINED PREVIOUS_MANIFEST_PATH ( 37 | SET PREVIOUS_MANIFEST_PATH=%ARTIFACTS%\manifest 38 | ) 39 | ) 40 | IF NOT DEFINED KUDU_SYNC_CMD ( 41 | :: Install kudu sync 42 | echo Installing Kudu Sync 43 | call npm install kudusync -g --silent 44 | IF !ERRORLEVEL! NEQ 0 goto error 45 | :: Locally just running "kuduSync" would also work 46 | SET KUDU_SYNC_CMD=%appdata%\npm\kuduSync.cmd 47 | ) 48 | goto Deployment 49 | 50 | 51 | 52 | :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 53 | :Deployment 54 | echo Handling python deployment. 55 | :: 1. KuduSync 56 | IF /I "%IN_PLACE_DEPLOYMENT%" NEQ "1" ( 57 | call :ExecuteCmd "%KUDU_SYNC_CMD%" -v 50 -f "%DEPLOYMENT_SOURCE%" -t "%DEPLOYMENT_TARGET%" -n "%NEXT_MANIFEST_PATH%" -p "%PREVIOUS_MANIFEST_PATH%" -i ".git;.hg;.deployment;deploy.cmd" 58 | IF !ERRORLEVEL! NEQ 0 goto error 59 | ) 60 | IF EXIST "%DEPLOYMENT_TARGET%\.skipPythonDeployment" goto postPython 61 | echo Detected requirements.txt. You can skip Python specific steps with a .skipPythonDeployment file. 62 | echo Custom Script 63 | pushd "%DEPLOYMENT_TARGET%" 64 | :: 3. Setup python 65 | ::echo "Configuring pip" 66 | ::curl https://bootstrap.pypa.io/get-pip.py | %PYTHON_EXE% 67 | :: 4. Install packages 68 | echo Pip install requirements. 69 | echo "Installing requirements" 70 | %PYTHON_EXE% -m pip install -r requirements.txt 71 | :: This PATH should direct to CNTK directory 72 | ::set PATH=%PYTHON_DIR%;%PATH% 73 | ::echo PATH set to %PYTHON_DIR% 74 | IF !ERRORLEVEL! NEQ 0 goto error 75 | 76 | :postPython 77 | :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: 78 | goto end 79 | 80 | 81 | :: Execute command routine that will echo out when error 82 | :ExecuteCmd 83 | setlocal 84 | set _CMD_=%* 85 | call %_CMD_% 86 | if "%ERRORLEVEL%" NEQ "0" echo Failed exitCode=%ERRORLEVEL%, command=%_CMD_% 87 | exit /b %ERRORLEVEL% 88 | :error 89 | endlocal 90 | echo An error has occurred during web site deployment. 91 | call :exitSetErrorLevel 92 | call :exitFromFunction 2>nul 93 | :exitSetErrorLevel 94 | exit /b 1 95 | :exitFromFunction 96 | () 97 | :end 98 | endlocal 99 | echo Finished successfully. -------------------------------------------------------------------------------- /doc/Postman_2017-09-26_22-50-06.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/Postman_2017-09-26_22-50-06.jpg -------------------------------------------------------------------------------- /doc/cmd_2017-09-26_22-15-45.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-15-45.jpg -------------------------------------------------------------------------------- /doc/cmd_2017-09-26_22-18-52.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-18-52.jpg -------------------------------------------------------------------------------- /doc/cmd_2017-09-26_22-20-23.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-20-23.jpg -------------------------------------------------------------------------------- /doc/iexplore_2017-09-26_22-17-20.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-17-20.jpg -------------------------------------------------------------------------------- /doc/iexplore_2017-09-26_22-22-19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-22-19.jpg -------------------------------------------------------------------------------- /doc/iexplore_2017-09-26_22-23-19.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-23-19.jpg -------------------------------------------------------------------------------- /doc/iexplore_2017-09-26_22-23-59.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-23-59.jpg -------------------------------------------------------------------------------- /doc/iexplore_2017-09-26_22-25-04.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-25-04.jpg -------------------------------------------------------------------------------- /doc/iexplore_2017-09-26_23-09-42.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_23-09-42.jpg -------------------------------------------------------------------------------- /evaluate.py: -------------------------------------------------------------------------------- 1 | 2 | from __future__ import print_function 3 | import numpy as np 4 | import os, sys 5 | import cv2 6 | from cntk import load_model, Axis, input_variable 7 | from cntk.core import Value 8 | from cntk.io import MinibatchData 9 | from cntk.layers import Constant 10 | 11 | from utils.annotations.annotations_helper import parse_class_map_file 12 | from config import cfg 13 | from plot_helpers import visualizeResultsFaster, imsave, apply_nms_to_single_image_results 14 | from cntk_helpers import regress_rois 15 | 16 | ############################################################### 17 | # Variables 18 | ############################################################### 19 | 20 | image_width = cfg["CNTK"].IMAGE_WIDTH 21 | image_height = cfg["CNTK"].IMAGE_HEIGHT 22 | num_channels = cfg["CNTK"].NUM_CHANNELS 23 | 24 | # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 25 | dims_input_const = MinibatchData(Value(batch=np.asarray( 26 | [image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False) 27 | 28 | # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170]) 29 | img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114] 30 | normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]]) 31 | 32 | 33 | globalvars = {} 34 | 35 | map_file_path = cfg["CNTK"].MODEL_DIRECTORY 36 | globalvars['class_map_file'] = os.path.join(map_file_path, cfg["CNTK"].CLASS_MAP_FILE) 37 | globalvars['classes'] = parse_class_map_file(globalvars['class_map_file']) 38 | globalvars['num_classes'] = len(globalvars['classes']) 39 | globalvars['temppath'] = cfg["CNTK"].TEMP_PATH 40 | feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME 41 | model_path = os.path.join(cfg["CNTK"].MODEL_DIRECTORY, cfg["CNTK"].MODEL_NAME) 42 | 43 | # helper function 44 | def load_resize_and_pad(image_path, width, height, pad_value=114): 45 | if "@" in image_path: 46 | print("WARNING: zipped image archives are not supported for visualizing results.") 47 | exit(0) 48 | 49 | img = cv2.imread(image_path) 50 | img_width = len(img[0]) 51 | img_height = len(img) 52 | scale_w = img_width > img_height 53 | target_w = width 54 | target_h = height 55 | 56 | if scale_w: 57 | target_h = int(np.round(img_height * float(width) / float(img_width))) 58 | else: 59 | target_w = int(np.round(img_width * float(height) / float(img_height))) 60 | 61 | resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST) 62 | 63 | top = int(max(0, np.round((height - target_h) / 2))) 64 | left = int(max(0, np.round((width - target_w) / 2))) 65 | bottom = height - top - target_h 66 | right = width - left - target_w 67 | resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right, 68 | cv2.BORDER_CONSTANT, value=[pad_value, pad_value, pad_value]) 69 | 70 | # transpose(2,0,1) converts the image to the HWC format which CNTK accepts 71 | model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1)) 72 | 73 | dims = (width, height, target_w, target_h, img_width, img_height) 74 | return resized_with_pad, model_arg_rep, dims 75 | 76 | 77 | # mode="returnimage" or "returntags" 78 | def eval_faster_rcnn(eval_model, imgPath, img_shape, 79 | results_base_path, feature_node_name, classes, mode, 80 | drawUnregressedRois=False, drawNegativeRois=False, 81 | nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8): 82 | 83 | # prepare model 84 | image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) 85 | dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') 86 | frcn_eval = eval_model(image_input, dims_input) 87 | 88 | #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6)) 89 | print("Plotting results from Faster R-CNN model for image.") 90 | # evaluate single image 91 | 92 | _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) 93 | 94 | dims_input = np.array(dims, dtype=np.float32) 95 | dims_input.shape = (1,) + dims_input.shape 96 | output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input}) 97 | 98 | out_dict = dict([(k.name, k) for k in output]) 99 | out_cls_pred = output[out_dict['cls_pred']][0] 100 | out_rpn_rois = output[out_dict['rpn_rois']][0] 101 | out_bbox_regr = output[out_dict['bbox_regr']][0] 102 | 103 | labels = out_cls_pred.argmax(axis=1) 104 | scores = out_cls_pred.max(axis=1).tolist() 105 | 106 | if mode=="returntags": 107 | class Tag(object): 108 | def __init__(self, label, score, bbox): 109 | self.label = label 110 | self.score = score 111 | self.bbox = bbox 112 | 113 | def serialize(self): 114 | return { 115 | 'label': self.label, 116 | 'score': self.score, 117 | 'bbox': self.bbox, 118 | } 119 | 120 | results = [] 121 | for i in range(len(out_rpn_rois)): 122 | if labels[i] != 0: 123 | x = Tag(str(classes[labels[i]]), str(scores[i]), str(out_rpn_rois[i])) 124 | results.append(x) 125 | 126 | return results 127 | 128 | 129 | elif mode=="returnimage": 130 | evaluated_image_path = "{}/{}".format(results_base_path, 'evaluated_' + os.path.basename(imgPath)) 131 | if drawUnregressedRois: 132 | # plot results without final regression 133 | imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], 134 | classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, 135 | decisionThreshold=bgrPlotThreshold) 136 | imsave(evaluated_image_path, imgDebug) 137 | else: 138 | # apply regression and nms to bbox coordinates 139 | regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) 140 | 141 | nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, 142 | nms_threshold=nmsThreshold, 143 | conf_threshold=nmsConfThreshold) 144 | 145 | img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], 146 | classes, nmsKeepIndices=nmsKeepIndices, 147 | boDrawNegativeRois=drawNegativeRois, 148 | decisionThreshold=bgrPlotThreshold) 149 | imsave(evaluated_image_path, img) 150 | 151 | return evaluated_image_path 152 | else: 153 | raise ValueError("Unsupported value found in 'mode' parameter") 154 | 155 | 156 | 157 | 158 | 159 | # mode="returnimage" or "returntags" 160 | def evaluateimage(file_path, mode, eval_model=None): 161 | 162 | #from plot_helpers import eval_and_plot_faster_rcnn 163 | if eval_model==None: 164 | print("Loading existing model from %s" % model_path) 165 | eval_model = load_model(model_path) 166 | img_shape = (num_channels, image_height, image_width) 167 | results_folder = globalvars['temppath'] 168 | results=eval_faster_rcnn(eval_model, file_path, img_shape, 169 | results_folder, feature_node_name, globalvars['classes'], mode, 170 | drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS, 171 | drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS, 172 | nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD, 173 | nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD, 174 | bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD) 175 | return results 176 | 177 | -------------------------------------------------------------------------------- /logs/placeholder.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/logs/placeholder.txt -------------------------------------------------------------------------------- /plot_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from __future__ import print_function 8 | from builtins import str 9 | import sys, os, time 10 | import numpy as np 11 | from easydict import EasyDict 12 | from builtins import range 13 | import copy, textwrap 14 | from PIL import Image, ImageFont, ImageDraw 15 | from PIL.ExifTags import TAGS 16 | 17 | # this is important when deploying to headless server environment (non-GUI) 18 | ################################################### 19 | import matplotlib 20 | # force headless backend, or set 'backend' to 'Agg' 21 | # in your ~/.matplotlib/matplotlibrc 22 | matplotlib.use('Agg') 23 | 24 | import matplotlib.pyplot 25 | # force non-interactive mode, or set 'interactive' to False 26 | # in your ~/.matplotlib/matplotlibrc 27 | from matplotlib.pyplot import imsave 28 | matplotlib.pyplot.ioff() 29 | ################################################### 30 | 31 | import cntk 32 | from cntk import input_variable, Axis 33 | from utils.nms.nms_wrapper import apply_nms_to_single_image_results 34 | from cntk_helpers import regress_rois 35 | import cv2 # pip install opencv-python 36 | 37 | available_font = "arial.ttf" 38 | try: 39 | dummy = ImageFont.truetype(available_font, 16) 40 | except: 41 | available_font = "FreeMono.ttf" 42 | 43 | 44 | #################################### 45 | # Visualize results 46 | #################################### 47 | def visualizeResultsFaster(imgPath, roiLabels, roiScores, roiRelCoords, padWidth, padHeight, classes, 48 | nmsKeepIndices = None, boDrawNegativeRois = True, decisionThreshold = 0.0): 49 | # read and resize image 50 | imgWidth, imgHeight = imWidthHeight(imgPath) 51 | scale = 800.0 / max(imgWidth, imgHeight) 52 | imgHeight = int(imgHeight * scale) 53 | imgWidth = int(imgWidth * scale) 54 | if imgWidth > imgHeight: 55 | h_border = 0 56 | v_border = int((imgWidth - imgHeight)/2) 57 | else: 58 | h_border = int((imgHeight - imgWidth)/2) 59 | v_border = 0 60 | 61 | PAD_COLOR = [103, 116, 123] # [114, 114, 114] 62 | cv_img = cv2.imread(imgPath) 63 | rgb_img = cv2.cvtColor(cv_img,cv2.COLOR_BGR2RGB) 64 | resized = cv2.resize(rgb_img, (imgWidth, imgHeight), interpolation=cv2.INTER_NEAREST) 65 | imgDebug = cv2.copyMakeBorder(resized,v_border,v_border,h_border,h_border,cv2.BORDER_CONSTANT,value=PAD_COLOR) 66 | rect_scale = 800 / padWidth 67 | 68 | assert(len(roiLabels) == len(roiRelCoords)) 69 | if roiScores: 70 | assert(len(roiLabels) == len(roiScores)) 71 | minScore = min(roiScores) 72 | print("roiScores min: {}, max: {}, threshold: {}".format(minScore, max(roiScores), decisionThreshold)) 73 | if minScore > decisionThreshold: 74 | decisionThreshold = minScore * 0.5 75 | print("reset decision threshold to: {}".format(decisionThreshold)) 76 | 77 | # draw multiple times to avoid occlusions 78 | for iter in range(0,3): 79 | for roiIndex in range(len(roiRelCoords)): 80 | label = roiLabels[roiIndex] 81 | if roiScores: 82 | score = roiScores[roiIndex] 83 | if decisionThreshold and score < decisionThreshold: 84 | label = 0 85 | 86 | # init drawing parameters 87 | thickness = 1 88 | if label == 0: 89 | color = (255, 0, 0) 90 | else: 91 | color = getColorsPalette()[label] 92 | 93 | rect = [(rect_scale * i) for i in roiRelCoords[roiIndex]] 94 | rect[0] = int(max(0, min(padWidth, rect[0]))) 95 | rect[1] = int(max(0, min(padHeight, rect[1]))) 96 | rect[2] = int(max(0, min(padWidth, rect[2]))) 97 | rect[3] = int(max(0, min(padHeight, rect[3]))) 98 | 99 | # draw in higher iterations only the detections 100 | if iter == 0 and boDrawNegativeRois: 101 | drawRectangles(imgDebug, [rect], color=color, thickness=thickness) 102 | elif iter==1 and label > 0: 103 | if not nmsKeepIndices or (roiIndex in nmsKeepIndices): 104 | thickness = 4 105 | drawRectangles(imgDebug, [rect], color=color, thickness=thickness) 106 | elif iter == 2 and label > 0: 107 | if not nmsKeepIndices or (roiIndex in nmsKeepIndices): 108 | font = ImageFont.truetype(available_font, 18) 109 | text = classes[label] 110 | if roiScores: 111 | text += "(" + str(round(score, 2)) + ")" 112 | imgDebug = drawText(imgDebug, (rect[0],rect[1]), text, color = (255,255,255), font = font, colorBackground=color) 113 | return imgDebug 114 | 115 | def load_resize_and_pad(image_path, width, height, pad_value=114): 116 | if "@" in image_path: 117 | print("WARNING: zipped image archives are not supported for visualizing results.") 118 | exit(0) 119 | 120 | img = cv2.imread(image_path) 121 | img_width = len(img[0]) 122 | img_height = len(img) 123 | scale_w = img_width > img_height 124 | target_w = width 125 | target_h = height 126 | 127 | if scale_w: 128 | target_h = int(np.round(img_height * float(width) / float(img_width))) 129 | else: 130 | target_w = int(np.round(img_width * float(height) / float(img_height))) 131 | 132 | resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST) 133 | 134 | top = int(max(0, np.round((height - target_h) / 2))) 135 | left = int(max(0, np.round((width - target_w) / 2))) 136 | bottom = height - top - target_h 137 | right = width - left - target_w 138 | resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right, 139 | cv2.BORDER_CONSTANT, value=[pad_value, pad_value, pad_value]) 140 | 141 | # transpose(2,0,1) converts the image to the HWC format which CNTK accepts 142 | model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1)) 143 | 144 | dims = (width, height, target_w, target_h, img_width, img_height) 145 | return resized_with_pad, model_arg_rep, dims 146 | 147 | # Tests a Faster R-CNN model and plots images with detected boxes 148 | def eval_and_plot_faster_rcnn(eval_model, num_images_to_plot, test_map_file, img_shape, 149 | results_base_path, feature_node_name, classes, 150 | drawUnregressedRois=False, drawNegativeRois=False, 151 | nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8): 152 | # get image paths 153 | with open(test_map_file) as f: 154 | content = f.readlines() 155 | img_base_path = os.path.dirname(os.path.abspath(test_map_file)) 156 | img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content] 157 | 158 | # prepare model 159 | image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name) 160 | dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input') 161 | frcn_eval = eval_model(image_input, dims_input) 162 | 163 | #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6)) 164 | print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot) 165 | for i in range(0, num_images_to_plot): 166 | imgPath = img_file_names[i] 167 | 168 | # evaluate single image 169 | _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1]) 170 | 171 | dims_input = np.array(dims, dtype=np.float32) 172 | dims_input.shape = (1,) + dims_input.shape 173 | output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input}) 174 | 175 | out_dict = dict([(k.name, k) for k in output]) 176 | out_cls_pred = output[out_dict['cls_pred']][0] 177 | out_rpn_rois = output[out_dict['rpn_rois']][0] 178 | out_bbox_regr = output[out_dict['bbox_regr']][0] 179 | 180 | labels = out_cls_pred.argmax(axis=1) 181 | scores = out_cls_pred.max(axis=1).tolist() 182 | 183 | if drawUnregressedRois: 184 | # plot results without final regression 185 | imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1], 186 | classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois, 187 | decisionThreshold=bgrPlotThreshold) 188 | imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(imgPath)), imgDebug) 189 | 190 | # apply regression and nms to bbox coordinates 191 | regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims) 192 | 193 | nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores, 194 | nms_threshold=nmsThreshold, 195 | conf_threshold=nmsConfThreshold) 196 | 197 | img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1], 198 | classes, nmsKeepIndices=nmsKeepIndices, 199 | boDrawNegativeRois=drawNegativeRois, 200 | decisionThreshold=bgrPlotThreshold) 201 | imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(imgPath)), img) 202 | 203 | 204 | #################################### 205 | # helper library 206 | #################################### 207 | 208 | def imread(imgPath, boThrowErrorIfExifRotationTagSet = True): 209 | if not os.path.exists(imgPath): 210 | print("ERROR: image path does not exist.") 211 | error 212 | 213 | rotation = rotationFromExifTag(imgPath) 214 | if boThrowErrorIfExifRotationTagSet and rotation != 0: 215 | print ("Error: exif roation tag set, image needs to be rotated by %d degrees." % rotation) 216 | img = cv2.imread(imgPath) 217 | if img is None: 218 | print ("ERROR: cannot load image " + imgPath) 219 | error 220 | if rotation != 0: 221 | img = imrotate(img, -90).copy() # got this error occassionally without copy "TypeError: Layout of the output array img is incompatible with cv::Mat" 222 | return img 223 | 224 | def rotationFromExifTag(imgPath): 225 | TAGSinverted = {v: k for k, v in TAGS.items()} 226 | orientationExifId = TAGSinverted['Orientation'] 227 | try: 228 | imageExifTags = Image.open(imgPath)._getexif() 229 | except: 230 | imageExifTags = None 231 | 232 | # rotate the image if orientation exif tag is present 233 | rotation = 0 234 | if imageExifTags != None and orientationExifId != None and orientationExifId in imageExifTags: 235 | orientation = imageExifTags[orientationExifId] 236 | # print ("orientation = " + str(imageExifTags[orientationExifId])) 237 | if orientation == 1 or orientation == 0: 238 | rotation = 0 # no need to do anything 239 | elif orientation == 6: 240 | rotation = -90 241 | elif orientation == 8: 242 | rotation = 90 243 | else: 244 | print ("ERROR: orientation = " + str(orientation) + " not_supported!") 245 | error 246 | return rotation 247 | 248 | def imwrite(img, imgPath): 249 | cv2.imwrite(imgPath, img) 250 | 251 | def imresize(img, scale, interpolation = cv2.INTER_LINEAR): 252 | return cv2.resize(img, (0,0), fx=scale, fy=scale, interpolation=interpolation) 253 | 254 | def imresizeMaxDim(img, maxDim, boUpscale = False, interpolation = cv2.INTER_LINEAR): 255 | scale = 1.0 * maxDim / max(img.shape[:2]) 256 | if scale < 1 or boUpscale: 257 | img = imresize(img, scale, interpolation) 258 | else: 259 | scale = 1.0 260 | return img, scale 261 | 262 | def imWidth(input): 263 | return imWidthHeight(input)[0] 264 | 265 | def imHeight(input): 266 | return imWidthHeight(input)[1] 267 | 268 | def imWidthHeight(input): 269 | width, height = Image.open(input).size #this does not load the full image 270 | return width,height 271 | 272 | def imArrayWidth(input): 273 | return imArrayWidthHeight(input)[0] 274 | 275 | def imArrayHeight(input): 276 | return imArrayWidthHeight(input)[1] 277 | 278 | def imArrayWidthHeight(input): 279 | width = input.shape[1] 280 | height = input.shape[0] 281 | return width,height 282 | 283 | def imshow(img, waitDuration=0, maxDim = None, windowName = 'img'): 284 | if isinstance(img, str): #test if 'img' is a string 285 | img = cv2.imread(img) 286 | if maxDim is not None: 287 | scaleVal = 1.0 * maxDim / max(img.shape[:2]) 288 | if scaleVal < 1: 289 | img = imresize(img, scaleVal) 290 | cv2.imshow(windowName, img) 291 | cv2.waitKey(waitDuration) 292 | 293 | def drawRectangles(img, rects, color = (0, 255, 0), thickness = 2): 294 | for rect in rects: 295 | pt1 = tuple(ToIntegers(rect[0:2])) 296 | pt2 = tuple(ToIntegers(rect[2:])) 297 | try: 298 | cv2.rectangle(img, pt1, pt2, color, thickness) 299 | except: 300 | import pdb; pdb.set_trace() 301 | print("Unexpected error:", sys.exc_info()[0]) 302 | 303 | def drawCrossbar(img, pt): 304 | (x,y) = pt 305 | cv2.rectangle(img, (0, y), (x, y), (255, 255, 0), 1) 306 | cv2.rectangle(img, (x, 0), (x, y), (255, 255, 0), 1) 307 | cv2.rectangle(img, (img.shape[1],y), (x, y), (255, 255, 0), 1) 308 | cv2.rectangle(img, (x, img.shape[0]), (x, y), (255, 255, 0), 1) 309 | 310 | def ptClip(pt, maxWidth, maxHeight): 311 | pt = list(pt) 312 | pt[0] = max(pt[0], 0) 313 | pt[1] = max(pt[1], 0) 314 | pt[0] = min(pt[0], maxWidth) 315 | pt[1] = min(pt[1], maxHeight) 316 | return pt 317 | 318 | def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype(available_font, 16)): 319 | pilImg = imconvertCv2Pil(img) 320 | pilImg = pilDrawText(pilImg, pt, text, textWidth, color, colorBackground, font) 321 | return imconvertPil2Cv(pilImg) 322 | 323 | def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype(available_font, 16)): 324 | textY = pt[1] 325 | draw = ImageDraw.Draw(pilImg) 326 | if textWidth == None: 327 | lines = [text] 328 | else: 329 | lines = textwrap.wrap(text, width=textWidth) 330 | for line in lines: 331 | width, height = font.getsize(line) 332 | if colorBackground != None: 333 | draw.rectangle((pt[0], pt[1], pt[0] + width, pt[1] + height), fill=tuple(colorBackground[::-1])) 334 | draw.text(pt, line, fill = tuple(color), font = font) 335 | textY += height 336 | return pilImg 337 | 338 | def getColorsPalette(): 339 | colors = [[255,0,0], [0,255,0], [0,0,255], [255,255,0], [255,0,255]] 340 | for i in range(5): 341 | for dim in range(0,3): 342 | for s in (0.25, 0.5, 0.75): 343 | if colors[i][dim] != 0: 344 | newColor = copy.deepcopy(colors[i]) 345 | newColor[dim] = int(round(newColor[dim] * s)) 346 | colors.append(newColor) 347 | return colors 348 | 349 | def imconvertPil2Cv(pilImg): 350 | rgb = pilImg.convert('RGB') 351 | return np.array(rgb).copy()[:, :, ::-1] 352 | 353 | def imconvertCv2Pil(img): 354 | cv2_im = cv2.cvtColor(img,cv2.COLOR_BGR2RGB) 355 | return Image.fromarray(cv2_im) 356 | 357 | def ToIntegers(list1D): 358 | return [int(float(x)) for x in list1D] 359 | 360 | def getDictionary(keys, values, boConvertValueToInt = True): 361 | dictionary = {} 362 | for key,value in zip(keys, values): 363 | if (boConvertValueToInt): 364 | value = int(value) 365 | dictionary[key] = value 366 | return dictionary 367 | 368 | class Bbox: 369 | MAX_VALID_DIM = 100000 370 | left = top = right = bottom = None 371 | 372 | def __init__(self, left, top, right, bottom): 373 | self.left = int(round(float(left))) 374 | self.top = int(round(float(top))) 375 | self.right = int(round(float(right))) 376 | self.bottom = int(round(float(bottom))) 377 | self.standardize() 378 | 379 | def __str__(self): 380 | return ("Bbox object: left = {0}, top = {1}, right = {2}, bottom = {3}".format(self.left, self.top, self.right, self.bottom)) 381 | 382 | def __repr__(self): 383 | return str(self) 384 | 385 | def rect(self): 386 | return [self.left, self.top, self.right, self.bottom] 387 | 388 | def max(self): 389 | return max([self.left, self.top, self.right, self.bottom]) 390 | 391 | def min(self): 392 | return min([self.left, self.top, self.right, self.bottom]) 393 | 394 | def width(self): 395 | width = self.right - self.left + 1 396 | assert(width>=0) 397 | return width 398 | 399 | def height(self): 400 | height = self.bottom - self.top + 1 401 | assert(height>=0) 402 | return height 403 | 404 | def surfaceArea(self): 405 | return self.width() * self.height() 406 | 407 | 408 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | easydict==1.6 2 | pytest==3.0.3 3 | opencv-python 4 | https://pypi.python.org/packages/be/5c/670e88bc3ae6afa23c1f09d52a77bbbc7d2e476e7449ad3b6750040a0ac6/scipy-1.0.0b1-cp35-none-win_amd64.whl#md5=dcc90577f2eebc264ec60a2d5729e30b 5 | https://cntk.ai/PythonWheel/CPU-Only/cntk-2.1-cp35-cp35m-win_amd64.whl 6 | Flask==0.12.2 7 | numpy==1.11.2 8 | matplotlib==1.5.3 9 | ipython==6.2.0 10 | Pillow==4.1.1 11 | PyYAML==3.12 12 | -------------------------------------------------------------------------------- /utils/Readme.md: -------------------------------------------------------------------------------- 1 | ## Detection utils 2 | 3 | This folder contains Python modules that are utilities for object detection networks. 4 | 5 | ### Cython modules 6 | 7 | To use the rpn component you need precompiled cython modules for nms (at least cpu_nms.cpXX-win_amd64.pyd for Windows or cpu_nms.cpython-XXm.so for Linux) and bbox (cython_bbox.cpXX-win_amd64.pyd for Windows or cython_bbox.cpython-XXm.so for Linux). 8 | To compile the cython modules for windows see (https://github.com/MrGF/py-faster-rcnn-windows): 9 | ``` 10 | git clone https://github.com/MrGF/py-faster-rcnn-windows 11 | cd $FRCN_ROOT/lib 12 | python setup.py build_ext --inplace 13 | ``` 14 | For Linux see (https://github.com/rbgirshick/py-faster-rcnn): 15 | ``` 16 | git clone https://github.com/rbgirshick/py-faster-rcnn 17 | cd $FRCN_ROOT/lib 18 | python setup.py build_ext --inplace 19 | ``` 20 | Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder. 21 | 22 | ##### `default_config` 23 | 24 | Contains all required parameters for using a region proposal network in training or evaluation. You can overwrite these parameters by specifying a `config.py` file of the same format inside your working directory. 25 | 26 | ### `rpn` module overview 27 | 28 | The rpn module contains helper methods and required layers to generate region proposal networks for object detection. 29 | 30 | ##### `rpn_helpers` 31 | 32 | Contains helper methods to create a region proposal network (rpn) and a proposal target layer for training the rpn. 33 | 34 | ##### `generate_anchors.py` 35 | 36 | Generates a regular grid of multi-scale, multi-aspect anchor boxes. 37 | 38 | ##### `proposal_layer.py` 39 | 40 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals. 41 | 42 | ##### `anchor_target_layer.py` 43 | 44 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore). 45 | Bbox regression targets are specified when the classification label is > 0. 46 | 47 | ##### `proposal_target_layer.py` 48 | 49 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K) 50 | and bbox regression targets in that case that the label is > 0. 51 | 52 | ##### `generate.py` 53 | 54 | Generate object detection proposals from an imdb using an RPN. 55 | -------------------------------------------------------------------------------- /utils/annotations/annotations_helper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | import os 9 | 10 | def parse_class_map_file(class_map_file): 11 | with open(class_map_file, "r") as f: 12 | lines = f.readlines() 13 | class_list = [None]*len(lines) 14 | for line in lines: 15 | tab_pos = line.find('\t') 16 | class_name = line[:tab_pos] 17 | class_id = int(line[tab_pos+1:-1]) 18 | class_list[class_id] = class_name 19 | 20 | return class_list -------------------------------------------------------------------------------- /utils/caffe_layers/anchor_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | import os 9 | #import caffe 10 | import yaml 11 | import numpy as np 12 | import numpy.random as npr 13 | from utils.default_config import cfg 14 | from utils.rpn.generate_anchors import generate_anchors 15 | from utils.rpn.bbox_transform import bbox_transform 16 | from utils.cython_modules.cython_bbox import bbox_overlaps 17 | 18 | DEBUG = False 19 | 20 | class AnchorTargetLayer: #(caffe.Layer): 21 | """ 22 | Assign anchors to ground-truth targets. Produces anchor classification 23 | labels and bounding-box regression targets. 24 | """ 25 | 26 | def set_param_str(self, param_str): 27 | self.param_str_ = param_str 28 | 29 | def set_deterministic_mode(self, mode = True): 30 | self._determininistic_mode = mode 31 | 32 | def setup(self, bottom, top): 33 | layer_params = yaml.load(self.param_str_) 34 | anchor_scales = layer_params.get('scales', (8, 16, 32)) 35 | self._anchors = generate_anchors(scales=np.array(anchor_scales)) 36 | self._num_anchors = self._anchors.shape[0] 37 | self._feat_stride = layer_params['feat_stride'] 38 | 39 | if DEBUG: 40 | print('anchors:') 41 | print(self._anchors) 42 | print('anchor shapes:') 43 | print(np.hstack(( 44 | self._anchors[:, 2::4] - self._anchors[:, 0::4], 45 | self._anchors[:, 3::4] - self._anchors[:, 1::4], 46 | ))) 47 | self._counts = cfg.EPS 48 | self._sums = np.zeros((1, 4)) 49 | self._squared_sums = np.zeros((1, 4)) 50 | self._fg_sum = 0 51 | self._bg_sum = 0 52 | self._count = 0 53 | 54 | # allow boxes to sit over the edge by a small amount 55 | self._allowed_border = layer_params.get('allowed_border', 0) 56 | 57 | height, width = bottom[0].data.shape[-2:] 58 | if DEBUG: 59 | print('AnchorTargetLayer: height', height, 'width', width) 60 | 61 | #A = self._num_anchors 62 | # labels 63 | #top[0].reshape(1, 1, A * height, width) 64 | # bbox_targets 65 | #top[1].reshape(1, A * 4, height, width) 66 | # bbox_inside_weights 67 | #top[2].reshape(1, A * 4, height, width) 68 | # bbox_outside_weights 69 | #top[3].reshape(1, A * 4, height, width) 70 | 71 | def forward(self, bottom, top): 72 | # Algorithm: 73 | # 74 | # for each (H, W) location i 75 | # generate 9 anchor boxes centered on cell i 76 | # apply predicted bbox deltas at cell i to each of the 9 anchors 77 | # filter out-of-image anchors 78 | # measure GT overlap 79 | 80 | assert bottom[0].data.shape[0] == 1, \ 81 | 'Only single item batches are supported' 82 | 83 | # map of shape (..., H, W) 84 | height, width = bottom[0].data.shape[-2:] 85 | # GT boxes (x1, y1, x2, y2, label) 86 | gt_boxes = bottom[1]#.data 87 | # im_info 88 | im_info = bottom[2]#.data[0, :] 89 | 90 | if DEBUG: 91 | print('') 92 | print('im_size: ({}, {})'.format(im_info[0], im_info[1])) 93 | print('scale: {}'.format(im_info[2])) 94 | print('height, width: ({}, {})'.format(height, width)) 95 | print('rpn: gt_boxes.shape', gt_boxes.shape) 96 | print('rpn: gt_boxes', gt_boxes) 97 | 98 | # 1. Generate proposals from bbox deltas and shifted anchors 99 | shift_x = np.arange(0, width) * self._feat_stride 100 | shift_y = np.arange(0, height) * self._feat_stride 101 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 102 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 103 | shift_x.ravel(), shift_y.ravel())).transpose() 104 | # add A anchors (1, A, 4) to 105 | # cell K shifts (K, 1, 4) to get 106 | # shift anchors (K, A, 4) 107 | # reshape to (K*A, 4) shifted anchors 108 | A = self._num_anchors 109 | K = shifts.shape[0] 110 | all_anchors = (self._anchors.reshape((1, A, 4)) + 111 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))) 112 | all_anchors = all_anchors.reshape((K * A, 4)) 113 | total_anchors = int(K * A) 114 | 115 | # only keep anchors inside the image 116 | inds_inside = np.where( 117 | (all_anchors[:, 0] >= -self._allowed_border) & 118 | (all_anchors[:, 1] >= -self._allowed_border) & 119 | (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width 120 | (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height 121 | )[0] 122 | 123 | if DEBUG: 124 | print('total_anchors', total_anchors) 125 | print('inds_inside', len(inds_inside)) 126 | 127 | # keep only inside anchors 128 | anchors = all_anchors[inds_inside, :] 129 | if DEBUG: 130 | print('anchors.shape', anchors.shape) 131 | 132 | # label: 1 is positive, 0 is negative, -1 is dont care 133 | labels = np.empty((len(inds_inside), ), dtype=np.float32) 134 | labels.fill(-1) 135 | 136 | # overlaps between the anchors and the gt boxes 137 | # overlaps (ex, gt) 138 | overlaps = bbox_overlaps( 139 | np.ascontiguousarray(anchors, dtype=np.float), 140 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 141 | argmax_overlaps = overlaps.argmax(axis=1) 142 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 143 | gt_argmax_overlaps = overlaps.argmax(axis=0) 144 | gt_max_overlaps = overlaps[gt_argmax_overlaps, 145 | np.arange(overlaps.shape[1])] 146 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 147 | 148 | if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: 149 | # assign bg labels first so that positive labels can clobber them 150 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 151 | 152 | # fg label: for each gt, anchor with highest overlap 153 | labels[gt_argmax_overlaps] = 1 154 | 155 | # fg label: above threshold IOU 156 | labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 157 | 158 | if cfg.TRAIN.RPN_CLOBBER_POSITIVES: 159 | # assign bg labels last so that negative labels can clobber positives 160 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 161 | 162 | # subsample positive labels if we have too many 163 | num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) 164 | fg_inds = np.where(labels == 1)[0] 165 | if len(fg_inds) > num_fg: 166 | if self._determininistic_mode: 167 | disable_inds = fg_inds[:(len(fg_inds) - num_fg)] 168 | else: 169 | disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) 170 | labels[disable_inds] = -1 171 | 172 | # subsample negative labels if we have too many 173 | num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) 174 | bg_inds = np.where(labels == 0)[0] 175 | if len(bg_inds) > num_bg: 176 | if self._determininistic_mode: 177 | disable_inds = bg_inds[:(len(bg_inds) - num_bg)] 178 | else: 179 | disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) 180 | labels[disable_inds] = -1 181 | #print "was %s inds, disabling %s, now %s inds" % ( 182 | #len(bg_inds), len(disable_inds), np.sum(labels == 0)) 183 | 184 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) 185 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 186 | 187 | bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 188 | bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) 189 | 190 | bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 191 | if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: 192 | # uniform weighting of examples (given non-uniform sampling) 193 | num_examples = np.sum(labels >= 0) 194 | positive_weights = np.ones((1, 4)) * 1.0 / num_examples 195 | negative_weights = np.ones((1, 4)) * 1.0 / num_examples 196 | else: 197 | assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & 198 | (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) 199 | positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / 200 | np.sum(labels == 1)) 201 | negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / 202 | np.sum(labels == 0)) 203 | bbox_outside_weights[labels == 1, :] = positive_weights 204 | bbox_outside_weights[labels == 0, :] = negative_weights 205 | 206 | if DEBUG: 207 | self._sums += bbox_targets[labels == 1, :].sum(axis=0) 208 | self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) 209 | self._counts += np.sum(labels == 1) 210 | means = self._sums / self._counts 211 | stds = np.sqrt(self._squared_sums / self._counts - means ** 2) 212 | print('means:') 213 | print(means) 214 | print('stdevs:') 215 | print(stds) 216 | 217 | # map up to original set of anchors 218 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 219 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 220 | bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) 221 | bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0) 222 | 223 | if DEBUG: 224 | print('rpn: max max_overlap', np.max(max_overlaps)) 225 | print('rpn: num_positive', np.sum(labels == 1)) 226 | print('rpn: num_negative', np.sum(labels == 0)) 227 | self._fg_sum += np.sum(labels == 1) 228 | self._bg_sum += np.sum(labels == 0) 229 | self._count += 1 230 | print('rpn: num_positive avg', self._fg_sum / self._count) 231 | print('rpn: num_negative avg', self._bg_sum / self._count) 232 | 233 | # labels 234 | labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) 235 | #labels = labels.reshape((1, 1, A * height, width)) 236 | #top[0].reshape(*labels.shape) 237 | #top[0].data[...] = labels 238 | 239 | # bbox_targets 240 | bbox_targets = bbox_targets \ 241 | .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) 242 | #top[1].reshape(*bbox_targets.shape) 243 | #top[1].data[...] = bbox_targets 244 | 245 | # bbox_inside_weights 246 | bbox_inside_weights = bbox_inside_weights \ 247 | .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) 248 | #assert bbox_inside_weights.shape[2] == height 249 | #assert bbox_inside_weights.shape[3] == width 250 | #top[2].reshape(*bbox_inside_weights.shape) 251 | #top[2].data[...] = bbox_inside_weights 252 | 253 | # bbox_outside_weights 254 | #bbox_outside_weights = bbox_outside_weights \ 255 | # .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) 256 | #assert bbox_outside_weights.shape[2] == height 257 | #assert bbox_outside_weights.shape[3] == width 258 | #top[3].reshape(*bbox_outside_weights.shape) 259 | #top[3].data[...] = bbox_outside_weights 260 | 261 | return labels, bbox_targets, bbox_inside_weights 262 | 263 | def backward(self, top, propagate_down, bottom): 264 | """This layer does not propagate gradients.""" 265 | pass 266 | 267 | def reshape(self, bottom, top): 268 | """Reshaping happens during the call to forward.""" 269 | pass 270 | 271 | 272 | def _unmap(data, count, inds, fill=0): 273 | """ Unmap a subset of item (data) back to the original set of items (of 274 | size count) """ 275 | if len(data.shape) == 1: 276 | ret = np.empty((count, ), dtype=np.float32) 277 | ret.fill(fill) 278 | ret[inds] = data 279 | else: 280 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) 281 | ret.fill(fill) 282 | ret[inds, :] = data 283 | return ret 284 | 285 | 286 | def _compute_targets(ex_rois, gt_rois): 287 | """Compute bounding-box regression targets for an image.""" 288 | 289 | assert ex_rois.shape[0] == gt_rois.shape[0] 290 | assert ex_rois.shape[1] == 4 291 | assert gt_rois.shape[1] == 5 292 | 293 | return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) 294 | -------------------------------------------------------------------------------- /utils/caffe_layers/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Fast R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick 6 | # -------------------------------------------------------- 7 | 8 | import numpy as np 9 | 10 | def bbox_transform(ex_rois, gt_rois): 11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 15 | 16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 20 | 21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 23 | targets_dw = np.log(gt_widths / ex_widths) 24 | targets_dh = np.log(gt_heights / ex_heights) 25 | 26 | targets = np.vstack( 27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 28 | return targets 29 | 30 | def bbox_transform_inv(boxes, deltas): 31 | if boxes.shape[0] == 0: 32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 33 | 34 | boxes = boxes.astype(deltas.dtype, copy=False) 35 | 36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 38 | ctr_x = boxes[:, 0] + 0.5 * widths 39 | ctr_y = boxes[:, 1] + 0.5 * heights 40 | 41 | dx = deltas[:, 0::4] 42 | dy = deltas[:, 1::4] 43 | dw = deltas[:, 2::4] 44 | dh = deltas[:, 3::4] 45 | 46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 48 | pred_w = np.exp(dw) * widths[:, np.newaxis] 49 | pred_h = np.exp(dh) * heights[:, np.newaxis] 50 | 51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 52 | # x1 53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 54 | # y1 55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 56 | # x2 57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 58 | # y2 59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 60 | 61 | return pred_boxes 62 | 63 | def clip_boxes(boxes, im_shape): 64 | """ 65 | Clip boxes to image boundaries. 66 | """ 67 | 68 | # x1 >= 0 69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) 70 | # y1 >= 0 71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) 72 | # x2 < im_shape[1] 73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) 74 | # y2 < im_shape[0] 75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) 76 | return boxes 77 | -------------------------------------------------------------------------------- /utils/caffe_layers/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | #import caffe 9 | import numpy as np 10 | import yaml 11 | from utils.default_config import cfg 12 | from utils.rpn.generate_anchors import generate_anchors 13 | from utils.caffe_layers.bbox_transform import bbox_transform_inv, clip_boxes 14 | from utils.nms.nms_wrapper import nms 15 | 16 | DEBUG = False 17 | 18 | class ProposalLayer: #(caffe.Layer): 19 | """ 20 | Outputs object detection proposals by applying estimated bounding-box 21 | transformations to a set of regular boxes (called "anchors"). 22 | """ 23 | 24 | def set_param_str(self, param_str): 25 | self.param_str_ = param_str 26 | 27 | def setup(self, bottom, top): 28 | # parse the layer parameter string, which must be valid YAML 29 | layer_params = yaml.load(self.param_str_) 30 | 31 | self._feat_stride = layer_params['feat_stride'] 32 | anchor_scales = layer_params.get('scales', (8, 16, 32)) 33 | self._anchors = generate_anchors(scales=np.array(anchor_scales)) 34 | self._num_anchors = self._anchors.shape[0] 35 | self.phase = "TEST" 36 | 37 | #if DEBUG: 38 | #print 'feat_stride: {}'.format(self._feat_stride) 39 | #print 'anchors:' 40 | #print self._anchors 41 | 42 | # rois blob: holds R regions of interest, each is a 5-tuple 43 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 44 | # rectangle (x1, y1, x2, y2) 45 | #top[0].reshape(1, 5) 46 | 47 | # scores blob: holds scores for R regions of interest 48 | #if len(top) > 1: 49 | # top[1].reshape(1, 1, 1, 1) 50 | 51 | def forward(self, bottom, top): 52 | # Algorithm: 53 | # 54 | # for each (H, W) location i 55 | # generate A anchor boxes centered on cell i 56 | # apply predicted bbox deltas at cell i to each of the A anchors 57 | # clip predicted boxes to image 58 | # remove predicted boxes with either height or width < threshold 59 | # sort all (proposal, score) pairs by score from highest to lowest 60 | # take top pre_nms_topN proposals before NMS 61 | # apply NMS with threshold 0.7 to remaining proposals 62 | # take after_nms_topN proposals after NMS 63 | # return the top proposals (-> RoIs top, scores top) 64 | 65 | assert bottom[0].shape[0] == 1, \ 66 | 'Only single item batches are supported' 67 | 68 | cfg_key = str(self.phase) # either 'TRAIN' or 'TEST' 69 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N 70 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N 71 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH 72 | min_size = cfg[cfg_key].RPN_MIN_SIZE 73 | 74 | # the first set of _num_anchors channels are bg probs 75 | # the second set are the fg probs, which we want 76 | scores = bottom[0][:, self._num_anchors:, :, :] 77 | bbox_deltas = bottom[1] 78 | im_info = bottom[2][0, :] 79 | 80 | #if DEBUG: 81 | # print 'im_size: ({}, {})'.format(im_info[0], im_info[1]) 82 | # print 'scale: {}'.format(im_info[2]) 83 | 84 | # 1. Generate proposals from bbox deltas and shifted anchors 85 | height, width = scores.shape[-2:] 86 | 87 | #if DEBUG: 88 | # print 'score map size: {}'.format(scores.shape) 89 | 90 | # Enumerate all shifts 91 | shift_x = np.arange(0, width) * self._feat_stride 92 | shift_y = np.arange(0, height) * self._feat_stride 93 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 94 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 95 | shift_x.ravel(), shift_y.ravel())).transpose() 96 | 97 | # Enumerate all shifted anchors: 98 | # 99 | # add A anchors (1, A, 4) to 100 | # cell K shifts (K, 1, 4) to get 101 | # shift anchors (K, A, 4) 102 | # reshape to (K*A, 4) shifted anchors 103 | A = self._num_anchors 104 | K = shifts.shape[0] 105 | anchors = self._anchors.reshape((1, A, 4)) + \ 106 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 107 | anchors = anchors.reshape((K * A, 4)) 108 | 109 | # Transpose and reshape predicted bbox transformations to get them 110 | # into the same order as the anchors: 111 | # 112 | # bbox deltas will be (1, 4 * A, H, W) format 113 | # transpose to (1, H, W, 4 * A) 114 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 115 | # in slowest to fastest order 116 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) 117 | 118 | # Same story for the scores: 119 | # 120 | # scores are (1, A, H, W) format 121 | # transpose to (1, H, W, A) 122 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 123 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 124 | 125 | # Convert anchors into proposals via bbox transformations 126 | proposals = bbox_transform_inv(anchors, bbox_deltas) 127 | 128 | # 2. clip predicted boxes to image 129 | proposals = clip_boxes(proposals, im_info[:2]) 130 | 131 | # 3. remove predicted boxes with either height or width < threshold 132 | # (NOTE: convert min_size to input image scale stored in im_info[2]) 133 | keep = _filter_boxes(proposals, min_size * im_info[2]) 134 | proposals = proposals[keep, :] 135 | scores = scores[keep] 136 | 137 | # 4. sort all (proposal, score) pairs by score from highest to lowest 138 | # 5. take top pre_nms_topN (e.g. 6000) 139 | order = scores.ravel().argsort()[::-1] 140 | if pre_nms_topN > 0: 141 | order = order[:pre_nms_topN] 142 | proposals = proposals[order, :] 143 | scores = scores[order] 144 | 145 | # 6. apply nms (e.g. threshold = 0.7) 146 | # 7. take after_nms_topN (e.g. 300) 147 | # 8. return the top proposals (-> RoIs top) 148 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 149 | if post_nms_topN > 0: 150 | keep = keep[:post_nms_topN] 151 | proposals = proposals[keep, :] 152 | scores = scores[keep] 153 | 154 | # Output rois blob 155 | # Our RPN implementation only supports a single input image, so all 156 | # batch inds are 0 157 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32) 158 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False))) 159 | 160 | return blob 161 | #top[0].reshape(*(blob.shape)) 162 | #top[0].data[...] = blob 163 | 164 | # [Optional] output scores blob 165 | #if len(top) > 1: 166 | # top[1].reshape(*(scores.shape)) 167 | # top[1].data[...] = scores 168 | 169 | def backward(self, top, propagate_down, bottom): 170 | """This layer does not propagate gradients.""" 171 | pass 172 | 173 | def reshape(self, bottom, top): 174 | """Reshaping happens during the call to forward.""" 175 | pass 176 | 177 | def _filter_boxes(boxes, min_size): 178 | """Remove all boxes with any side smaller than min_size.""" 179 | ws = boxes[:, 2] - boxes[:, 0] + 1 180 | hs = boxes[:, 3] - boxes[:, 1] + 1 181 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 182 | return keep 183 | -------------------------------------------------------------------------------- /utils/caffe_layers/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # -------------------------------------------------------- 2 | # Faster R-CNN 3 | # Copyright (c) 2015 Microsoft 4 | # Licensed under The MIT License [see LICENSE for details] 5 | # Written by Ross Girshick and Sean Bell 6 | # -------------------------------------------------------- 7 | 8 | #import caffe 9 | import yaml 10 | import numpy as np 11 | import numpy.random as npr 12 | from utils.default_config import cfg 13 | from utils.rpn.bbox_transform import bbox_transform 14 | from utils.cython_modules.cython_bbox import bbox_overlaps 15 | 16 | DEBUG = False 17 | 18 | class ProposalTargetLayer(): #caffe.Layer): 19 | """ 20 | Assign object detection proposals to ground-truth targets. Produces proposal 21 | classification labels and bounding-box regression targets. 22 | """ 23 | 24 | def set_param_str(self, param_str): 25 | self.param_str_ = param_str 26 | 27 | def set_deterministic_mode(self, mode = True): 28 | self._determininistic_mode = mode 29 | 30 | def setup(self, bottom, top): 31 | layer_params = yaml.load(self.param_str_) 32 | self._num_classes = layer_params['num_classes'] 33 | self._determininistic_mode = False 34 | 35 | # sampled rois (0, x1, y1, x2, y2) 36 | #top[0].reshape(1, 5) 37 | # labels 38 | #top[1].reshape(1, 1) 39 | # bbox_targets 40 | #top[2].reshape(1, self._num_classes * 4) 41 | # bbox_inside_weights 42 | #top[3].reshape(1, self._num_classes * 4) 43 | # bbox_outside_weights 44 | #top[4].reshape(1, self._num_classes * 4) 45 | 46 | def forward(self, bottom, top): 47 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 48 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 49 | all_rois = bottom[0] #.data 50 | # GT boxes (x1, y1, x2, y2, label) 51 | # TODO(rbg): it's annoying that sometimes I have extra info before 52 | # and other times after box coordinates -- normalize to one format 53 | gt_boxes = bottom[1] #.data 54 | 55 | # Include ground-truth boxes in the set of candidate rois 56 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype) 57 | all_rois = np.vstack( 58 | (all_rois, np.hstack((zeros, gt_boxes[:, :-1]))) 59 | ) 60 | 61 | # Sanity check: single batch only 62 | assert np.all(all_rois[:, 0] == 0), \ 63 | 'Only single item batches are supported' 64 | 65 | #num_images = 1 66 | #rois_per_image = int(cfg.TRAIN.BATCH_SIZE / num_images) 67 | rois_per_image = cfg.TRAIN.BATCH_SIZE 68 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int) 69 | 70 | # Sample rois with classification labels and bounding box regression 71 | # targets 72 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( 73 | all_rois, gt_boxes, fg_rois_per_image, 74 | rois_per_image, self._num_classes, 75 | deterministic=self._determininistic_mode) 76 | 77 | if DEBUG: 78 | print('num fg: {}'.format((labels > 0).sum())) 79 | print('num bg: {}'.format((labels == 0).sum())) 80 | self._count += 1 81 | self._fg_num += (labels > 0).sum() 82 | self._bg_num += (labels == 0).sum() 83 | print('num fg avg: {}'.format(self._fg_num / self._count)) 84 | print('num bg avg: {}'.format(self._bg_num / self._count)) 85 | print('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))) 86 | 87 | return rois, labels, bbox_targets, bbox_inside_weights 88 | 89 | # sampled rois 90 | #top[0].reshape(*rois.shape) 91 | #top[0].data[...] = rois 92 | 93 | # classification labels 94 | #top[1].reshape(*labels.shape) 95 | #top[1].data[...] = labels 96 | 97 | # bbox_targets 98 | #top[2].reshape(*bbox_targets.shape) 99 | #top[2].data[...] = bbox_targets 100 | 101 | # bbox_inside_weights 102 | #top[3].reshape(*bbox_inside_weights.shape) 103 | #top[3].data[...] = bbox_inside_weights 104 | 105 | # bbox_outside_weights 106 | #top[4].reshape(*bbox_inside_weights.shape) 107 | #top[4].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32) 108 | 109 | def backward(self, top, propagate_down, bottom): 110 | """This layer does not propagate gradients.""" 111 | pass 112 | 113 | def reshape(self, bottom, top): 114 | """Reshaping happens during the call to forward.""" 115 | pass 116 | 117 | 118 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 119 | """Bounding-box regression targets (bbox_target_data) are stored in a 120 | compact form N x (class, tx, ty, tw, th) 121 | 122 | This function expands those targets into the 4-of-4*K representation used 123 | by the network (i.e. only one class has non-zero targets). 124 | 125 | Returns: 126 | bbox_target (ndarray): N x 4K blob of regression targets 127 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 128 | """ 129 | 130 | clss = bbox_target_data[:, 0].astype(int) 131 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 132 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 133 | inds = np.where(clss > 0)[0] 134 | for ind in inds: 135 | cls = clss[ind] 136 | start = 4 * cls 137 | end = start + 4 138 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 139 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS 140 | return bbox_targets, bbox_inside_weights 141 | 142 | 143 | def _compute_targets(ex_rois, gt_rois, labels): 144 | """Compute bounding-box regression targets for an image.""" 145 | 146 | assert ex_rois.shape[0] == gt_rois.shape[0] 147 | assert ex_rois.shape[1] == 4 148 | assert gt_rois.shape[1] == 4 149 | 150 | targets = bbox_transform(ex_rois, gt_rois) 151 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 152 | # Optionally normalize targets by a precomputed mean and stdev 153 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 154 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 155 | return np.hstack( 156 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False) 157 | 158 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, deterministic=False): 159 | """Generate a random sample of RoIs comprising foreground and background 160 | examples. 161 | """ 162 | # overlaps: (rois x gt_boxes) 163 | overlaps = bbox_overlaps( 164 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), 165 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 166 | gt_assignment = overlaps.argmax(axis=1) 167 | max_overlaps = overlaps.max(axis=1) 168 | labels = gt_boxes[gt_assignment, 4] 169 | 170 | # Select foreground RoIs as those with >= FG_THRESH overlap 171 | fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0] 172 | # Guard against the case when an image has fewer than fg_rois_per_image 173 | # foreground RoIs 174 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) 175 | 176 | # Sample foreground regions without replacement 177 | if fg_inds.size > 0: 178 | if deterministic: 179 | fg_inds = fg_inds[:fg_rois_per_this_image] 180 | else: 181 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) 182 | 183 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 184 | bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) & 185 | (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] 186 | # Compute number of background RoIs to take from this image (guarding 187 | # against there being fewer than desired) 188 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 189 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 190 | # Sample background regions without replacement 191 | if bg_inds.size > 0: 192 | if deterministic: 193 | bg_inds = bg_inds[:bg_rois_per_this_image] 194 | else: 195 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 196 | 197 | # The indices that we're selecting (both fg and bg) 198 | keep_inds = np.append(fg_inds, bg_inds) 199 | # Select sampled values from various arrays: 200 | labels = labels[keep_inds] 201 | # Clamp labels for the background RoIs to 0 202 | labels[fg_rois_per_this_image:] = 0 203 | rois = all_rois[keep_inds] 204 | 205 | bbox_target_data = _compute_targets( 206 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) 207 | 208 | bbox_targets, bbox_inside_weights = \ 209 | _get_bbox_regression_labels(bbox_target_data, num_classes) 210 | 211 | return labels, rois, bbox_targets, bbox_inside_weights 212 | -------------------------------------------------------------------------------- /utils/cython_modules/cpu_nms.cp35-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cpu_nms.cp35-win_amd64.pyd -------------------------------------------------------------------------------- /utils/cython_modules/cpu_nms.cpython-34m.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cpu_nms.cpython-34m.so -------------------------------------------------------------------------------- /utils/cython_modules/cython_bbox.cp35-win_amd64.pyd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cython_bbox.cp35-win_amd64.pyd -------------------------------------------------------------------------------- /utils/cython_modules/cython_bbox.cpython-34m.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cython_bbox.cpython-34m.so -------------------------------------------------------------------------------- /utils/default_config.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | import os.path as osp 9 | import numpy as np 10 | # `pip install easydict` if you don't have it 11 | from easydict import EasyDict as edict 12 | 13 | __C = edict() 14 | cfg = __C 15 | 16 | # 17 | # CNTK parameters 18 | # 19 | 20 | __C.CNTK = edict() 21 | 22 | 23 | __C.CNTK.CONV_BIAS_INIT = 0.0 24 | __C.CNTK.SIGMA_RPN_L1 = 3.0 25 | 26 | __C.CNTK.IMAGE_WIDTH = 850 27 | __C.CNTK.IMAGE_HEIGHT = 850 28 | 29 | __C.CNTK.RESULTS_NMS_THRESHOLD = 0.3 # see also: __C.TEST.NMS = 0.3 30 | __C.CNTK.RESULTS_NMS_CONF_THRESHOLD = 0.0 31 | __C.CNTK.RESULTS_BGR_PLOT_THRESHOLD = 0.1 32 | 33 | __C.CNTK.DRAW_NEGATIVE_ROIS = False 34 | __C.CNTK.DRAW_UNREGRESSED_ROIS = False 35 | 36 | # 37 | # Training options 38 | # 39 | 40 | __C.TRAIN = edict() 41 | 42 | # Minibatch size (number of regions of interest [ROIs]) 43 | __C.TRAIN.BATCH_SIZE = 128 44 | 45 | # Fraction of minibatch that is labeled foreground (i.e. class > 0) 46 | __C.TRAIN.FG_FRACTION = 0.25 47 | 48 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) 49 | __C.TRAIN.FG_THRESH = 0.5 50 | 51 | # Overlap threshold for a ROI to be considered background (class = 0 if 52 | # overlap in [LO, HI)) 53 | __C.TRAIN.BG_THRESH_HI = 0.5 54 | __C.TRAIN.BG_THRESH_LO = 0.0 55 | 56 | # Use horizontally-flipped images during training? 57 | __C.TRAIN.USE_FLIPPED = True 58 | 59 | # Train bounding-box regressors 60 | __C.TRAIN.BBOX_REG = True 61 | 62 | # Overlap required between a ROI and ground-truth box in order for that ROI to 63 | # be used as a bounding-box regression training example 64 | __C.TRAIN.BBOX_THRESH = 0.5 65 | 66 | # Normalize the targets (subtract empirical mean, divide by empirical stddev) 67 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True 68 | # Deprecated (inside weights) 69 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 70 | # Normalize the targets using "precomputed" (or made up) means and stdevs 71 | # (BBOX_NORMALIZE_TARGETS must also be True) 72 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True 73 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) 74 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) 75 | 76 | # Train using these proposals 77 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search' 78 | 79 | # IOU >= thresh: positive example 80 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 81 | # IOU < thresh: negative example 82 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 83 | # If an anchor statisfied by positive and negative conditions set to negative 84 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False 85 | # Max number of foreground examples 86 | __C.TRAIN.RPN_FG_FRACTION = 0.5 87 | # Total number of examples 88 | __C.TRAIN.RPN_BATCHSIZE = 256 89 | # NMS threshold used on RPN proposals 90 | __C.TRAIN.RPN_NMS_THRESH = 0.7 91 | # Number of top scoring boxes to keep before apply NMS to RPN proposals 92 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 93 | # Number of top scoring boxes to keep after applying NMS to RPN proposals 94 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000 95 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 96 | __C.TRAIN.RPN_MIN_SIZE = 16 97 | # Deprecated (outside weights) 98 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) 99 | # Give the positive RPN examples weight of p * 1 / {num positives} 100 | # and give negatives a weight of (1 - p) 101 | # Set to -1.0 to use uniform example weighting 102 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 103 | 104 | 105 | # 106 | # Testing options 107 | # 108 | 109 | __C.TEST = edict() 110 | 111 | # Overlap threshold used for non-maximum suppression (suppress boxes with 112 | # IoU >= this threshold) 113 | __C.TEST.NMS = 0.3 114 | 115 | # Test using bounding-box regressors 116 | __C.TEST.BBOX_REG = True 117 | 118 | # Propose boxes 119 | __C.TEST.HAS_RPN = False 120 | 121 | # Test using these proposals 122 | __C.TEST.PROPOSAL_METHOD = 'selective_search' 123 | 124 | ## NMS threshold used on RPN proposals 125 | __C.TEST.RPN_NMS_THRESH = 0.7 126 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals 127 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000 128 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals 129 | __C.TEST.RPN_POST_NMS_TOP_N = 300 130 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) 131 | __C.TEST.RPN_MIN_SIZE = 16 132 | 133 | 134 | # 135 | # MISC 136 | # 137 | 138 | # The mapping from image coordinates to feature map coordinates might cause 139 | # some boxes that are distinct in image space to become identical in feature 140 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor 141 | # for identifying duplicate boxes. 142 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 143 | __C.DEDUP_BOXES = 1./16. 144 | 145 | # Pixel mean values (BGR order) as a (1, 1, 3) array 146 | # We use the same pixel mean for all networks even though it's not exactly what 147 | # they were trained with 148 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) 149 | 150 | # For reproducibility 151 | __C.RNG_SEED = 3 152 | 153 | # A small number that's used many times 154 | __C.EPS = 1e-14 155 | 156 | # Use GPU implementation of non-maximum suppression 157 | __C.USE_GPU_NMS = True 158 | 159 | # Default GPU device id 160 | __C.GPU_ID = 0 161 | 162 | 163 | def _merge_a_into_b(a, b): 164 | """Merge config dictionary a into config dictionary b, clobbering the 165 | options in b whenever they are also specified in a. 166 | """ 167 | if type(a) is not edict: 168 | return 169 | 170 | for k, v in a.iteritems(): 171 | # a must specify keys that are in b 172 | if not b.has_key(k): 173 | raise KeyError('{} is not a valid config key'.format(k)) 174 | 175 | # the types must match, too 176 | old_type = type(b[k]) 177 | if old_type is not type(v): 178 | if isinstance(b[k], np.ndarray): 179 | v = np.array(v, dtype=b[k].dtype) 180 | else: 181 | raise ValueError(('Type mismatch ({} vs. {}) ' 182 | 'for config key: {}').format(type(b[k]), 183 | type(v), k)) 184 | 185 | # recursively merge dicts 186 | if type(v) is edict: 187 | try: 188 | _merge_a_into_b(a[k], b[k]) 189 | except: 190 | print('Error under config key: {}'.format(k)) 191 | raise 192 | else: 193 | b[k] = v 194 | 195 | def cfg_from_file(filename): 196 | """Load a config file and merge it into the default options.""" 197 | import yaml 198 | with open(filename, 'r') as f: 199 | yaml_cfg = edict(yaml.load(f)) 200 | 201 | _merge_a_into_b(yaml_cfg, __C) 202 | 203 | def cfg_from_list(cfg_list): 204 | """Set config keys via list (e.g., from command line).""" 205 | from ast import literal_eval 206 | assert len(cfg_list) % 2 == 0 207 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]): 208 | key_list = k.split('.') 209 | d = __C 210 | for subkey in key_list[:-1]: 211 | assert d.has_key(subkey) 212 | d = d[subkey] 213 | subkey = key_list[-1] 214 | assert d.has_key(subkey) 215 | try: 216 | value = literal_eval(v) 217 | except: 218 | # handle the case when v is a string literal 219 | value = v 220 | assert type(value) == type(d[subkey]), \ 221 | 'type {} does not match original type {}'.format( 222 | type(value), type(d[subkey])) 223 | d[subkey] = value 224 | -------------------------------------------------------------------------------- /utils/map/map_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | from utils.nms.nms_wrapper import apply_nms_to_test_set_results 9 | 10 | def evaluate_detections(all_boxes, all_gt_infos, classes, use_07_metric=False, apply_mms=True, nms_threshold=0.5, conf_threshold=0.0): 11 | ''' 12 | Computes per-class average precision. 13 | 14 | Args: 15 | all_boxes: shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score 16 | all_gt_infos: a dictionary that contains all ground truth annoations in the following form: 17 | {'class_A': [{'bbox': array([[ 376., 210., 456., 288., 10.]], dtype=float32), 'det': [False], 'difficult': [False]}, ... ]} 18 | 'class_B': [ ], } 19 | classes: a list of class name, e.g. ['__background__', 'avocado', 'orange', 'butter'] 20 | use_07_metric: whether to use VOC07's 11 point AP computation (default False) 21 | apply_mms: whether to apply non maximum suppression before computing average precision values 22 | nms_threshold: the threshold for discarding overlapping ROIs in nms 23 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded 24 | 25 | Returns: 26 | aps - average precision value per class in a dictionary {classname: ap} 27 | ''' 28 | 29 | if apply_mms: 30 | print ("Number of rois before non-maximum suppression: %d" % sum([len(all_boxes[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))])) 31 | nms_dets,_ = apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold) 32 | print ("Number of rois after non-maximum suppression: %d" % sum([len(nms_dets[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))])) 33 | else: 34 | print ("Skipping non-maximum suppression") 35 | nms_dets = all_boxes 36 | 37 | aps = {} 38 | for classIndex, className in enumerate(classes): 39 | if className != '__background__': 40 | rec, prec, ap = _evaluate_detections(classIndex, nms_dets, all_gt_infos[className], use_07_metric=use_07_metric) 41 | aps[className] = ap 42 | 43 | return aps 44 | 45 | def _evaluate_detections(classIndex, all_boxes, gtInfos, overlapThreshold=0.5, use_07_metric=False): 46 | ''' 47 | Top level function that does the PASCAL VOC evaluation. 48 | ''' 49 | 50 | # parse detections for this class 51 | # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score 52 | num_images = len(all_boxes[0]) 53 | detBboxes = [] 54 | detImgIndices = [] 55 | detConfidences = [] 56 | for imgIndex in range(num_images): 57 | dets = all_boxes[classIndex][imgIndex] 58 | if dets != []: 59 | for k in range(dets.shape[0]): 60 | detImgIndices.append(imgIndex) 61 | detConfidences.append(dets[k, -1]) 62 | # the VOCdevkit expects 1-based indices 63 | detBboxes.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1]) 64 | detBboxes = np.array(detBboxes) 65 | detConfidences = np.array(detConfidences) 66 | 67 | # compute precision / recall / ap 68 | rec, prec, ap = _voc_computePrecisionRecallAp( 69 | class_recs=gtInfos, 70 | confidence=detConfidences, 71 | image_ids=detImgIndices, 72 | BB=detBboxes, 73 | ovthresh=overlapThreshold, 74 | use_07_metric=use_07_metric) 75 | return rec, prec, ap 76 | 77 | def computeAveragePrecision(recalls, precisions, use_07_metric=False): 78 | ''' 79 | Computes VOC AP given precision and recall. 80 | ''' 81 | if use_07_metric: 82 | # 11 point metric 83 | ap = 0. 84 | for t in np.arange(0., 1.1, 0.1): 85 | if np.sum(recalls >= t) == 0: 86 | p = 0 87 | else: 88 | p = np.max(precisions[recalls >= t]) 89 | ap = ap + p / 11. 90 | else: 91 | # correct AP calculation 92 | # first append sentinel values at the end 93 | mrecalls = np.concatenate(([0.], recalls, [1.])) 94 | mprecisions = np.concatenate(([0.], precisions, [0.])) 95 | 96 | # compute the precision envelope 97 | for i in range(mprecisions.size - 1, 0, -1): 98 | mprecisions[i - 1] = np.maximum(mprecisions[i - 1], mprecisions[i]) 99 | 100 | # to calculate area under PR curve, look for points 101 | # where X axis (recall) changes value 102 | i = np.where(mrecalls[1:] != mrecalls[:-1])[0] 103 | 104 | # and sum (\Delta recall) * prec 105 | ap = np.sum((mrecalls[i + 1] - mrecalls[i]) * mprecisions[i + 1]) 106 | return ap 107 | 108 | def _voc_computePrecisionRecallAp(class_recs, confidence, image_ids, BB, ovthresh=0.5, use_07_metric=False): 109 | ''' 110 | Computes precision, recall. and average precision 111 | ''' 112 | if len(BB) == 0: 113 | return 0.0, 0.0, 0.0 114 | 115 | # sort by confidence 116 | sorted_ind = np.argsort(-confidence) 117 | 118 | BB = BB[sorted_ind, :] 119 | image_ids = [image_ids[x] for x in sorted_ind] 120 | 121 | # go down dets and mark TPs and FPs 122 | nd = len(image_ids) 123 | tp = np.zeros(nd) 124 | fp = np.zeros(nd) 125 | for d in range(nd): 126 | R = class_recs[image_ids[d]] 127 | bb = BB[d, :].astype(float) 128 | ovmax = -np.inf 129 | BBGT = R['bbox'].astype(float) 130 | 131 | if BBGT.size > 0: 132 | # compute overlaps 133 | ixmin = np.maximum(BBGT[:, 0], bb[0]) 134 | iymin = np.maximum(BBGT[:, 1], bb[1]) 135 | ixmax = np.minimum(BBGT[:, 2], bb[2]) 136 | iymax = np.minimum(BBGT[:, 3], bb[3]) 137 | iw = np.maximum(ixmax - ixmin + 1., 0.) 138 | ih = np.maximum(iymax - iymin + 1., 0.) 139 | inters = iw * ih 140 | 141 | # union 142 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) + 143 | (BBGT[:, 2] - BBGT[:, 0] + 1.) * 144 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters) 145 | 146 | overlaps = inters / uni 147 | ovmax = np.max(overlaps) 148 | jmax = np.argmax(overlaps) 149 | 150 | if ovmax > ovthresh: 151 | if not R['difficult'][jmax]: 152 | if not R['det'][jmax]: 153 | tp[d] = 1. 154 | R['det'][jmax] = 1 155 | else: 156 | fp[d] = 1. 157 | else: 158 | fp[d] = 1. 159 | 160 | # compute precision recall 161 | npos = sum([len(cr['bbox']) for cr in class_recs]) 162 | fp = np.cumsum(fp) 163 | tp = np.cumsum(tp) 164 | rec = tp / float(npos) 165 | # avoid divide by zero in case the first detection matches a difficult ground truth 166 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps) 167 | ap = computeAveragePrecision(rec, prec, use_07_metric) 168 | return rec, prec, ap 169 | -------------------------------------------------------------------------------- /utils/nms/nms_wrapper.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | from utils.cython_modules.cpu_nms import cpu_nms 9 | try: 10 | from utils.cython_modules.gpu_nms import gpu_nms 11 | gpu_nms_available = True 12 | except ImportError: 13 | gpu_nms_available = False 14 | 15 | try: 16 | from config import cfg 17 | except ImportError: 18 | from utils.default_config import cfg 19 | 20 | import pdb 21 | 22 | def nms(dets, thresh, force_cpu=False): 23 | ''' 24 | Dispatches the call to either CPU or GPU NMS implementations 25 | ''' 26 | if dets.shape[0] == 0: 27 | return [] 28 | if gpu_nms_available and cfg.USE_GPU_NMS and not force_cpu: 29 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) 30 | else: 31 | return cpu_nms(dets, thresh) 32 | 33 | def apply_nms_to_single_image_results(coords, labels, scores, nms_threshold=0.5, conf_threshold=0.0): 34 | ''' 35 | Applies nms to the results for a single image. 36 | 37 | Args: 38 | coords: (x_min, y_min, x_max, y_max) coordinates for n rois. shape = (n, 4) 39 | labels: the predicted label per roi. shape = (n, 1) 40 | scores: the predicted score per roi. shape = (n, 1) 41 | nms_threshold: the threshold for discarding overlapping ROIs in nms 42 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded 43 | 44 | Returns: 45 | nmsKeepIndices - the indices of the ROIs to keep after nms 46 | ''' 47 | 48 | # generate input for nms 49 | allIndices = [] 50 | nmsRects = [[[]] for _ in range(max(labels) + 1)] 51 | coordsWithScores = np.hstack((coords, np.array([scores]).T)) 52 | for i in range(max(labels) + 1): 53 | indices = np.where(np.array(labels) == i)[0] 54 | nmsRects[i][0] = coordsWithScores[indices,:] 55 | allIndices.append(indices) 56 | 57 | # call nms 58 | _, nmsKeepIndicesList = apply_nms_to_test_set_results(nmsRects, nms_threshold, conf_threshold) 59 | 60 | # map back to original roi indices 61 | nmsKeepIndices = [] 62 | for i in range(max(labels) + 1): 63 | for keepIndex in nmsKeepIndicesList[i][0]: 64 | nmsKeepIndices.append(allIndices[i][keepIndex]) # for keepIndex in nmsKeepIndicesList[i][0]] 65 | assert (len(nmsKeepIndices) == len(set(nmsKeepIndices))) # check if no roi indices was added >1 times 66 | return nmsKeepIndices 67 | 68 | def apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold): 69 | ''' 70 | Applies nms to the results of multiple images. 71 | 72 | Args: 73 | all_boxes: shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score 74 | nms_threshold: the threshold for discarding overlapping ROIs in nms 75 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded 76 | 77 | Returns: 78 | nms_boxes - the reduced set of rois after nms 79 | nmsKeepIndices - the indices of the ROIs to keep after nms 80 | ''' 81 | 82 | num_classes = len(all_boxes) 83 | num_images = len(all_boxes[0]) 84 | nms_boxes = [[[] for _ in range(num_images)] 85 | for _ in range(num_classes)] 86 | nms_keepIndices = [[[] for _ in range(num_images)] 87 | for _ in range(num_classes)] 88 | for cls_ind in range(num_classes): 89 | for im_ind in range(num_images): 90 | dets = all_boxes[cls_ind][im_ind] 91 | if dets == []: 92 | continue 93 | keep = nms(dets.astype(np.float32), nms_threshold) 94 | 95 | # also filter out low confidences 96 | if conf_threshold > 0: 97 | #pdb.set_trace() 98 | keep_conf_idx = np.where(dets[:, -1] > conf_threshold) 99 | keep = list(set(keep_conf_idx[0]).intersection(keep)) 100 | 101 | if len(keep) == 0: 102 | continue 103 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy() 104 | nms_keepIndices[cls_ind][im_ind] = keep 105 | return nms_boxes, nms_keepIndices 106 | 107 | -------------------------------------------------------------------------------- /utils/pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | python_files = *_tests.py 3 | -------------------------------------------------------------------------------- /utils/rpn/anchor_target_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os 8 | from cntk import output_variable 9 | from cntk.ops.functions import UserFunction 10 | import yaml 11 | import numpy as np 12 | import numpy.random as npr 13 | from utils.rpn.generate_anchors import generate_anchors 14 | from utils.rpn.bbox_transform import bbox_transform 15 | from utils.cython_modules.cython_bbox import bbox_overlaps 16 | 17 | try: 18 | from config import cfg 19 | except ImportError: 20 | from utils.default_config import cfg 21 | 22 | DEBUG = False 23 | 24 | class AnchorTargetLayer(UserFunction): 25 | ''' 26 | Assign anchors to ground-truth targets. Produces anchor classification 27 | labels and bounding-box regression targets. 28 | ''' 29 | 30 | def __init__(self, arg1, arg2, arg3, name='AnchorTargetLayer', param_str=None, cfm_shape=None, deterministic=False): 31 | super(AnchorTargetLayer, self).__init__([arg1, arg2, arg3], name=name) 32 | self.param_str_ = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32" 33 | 34 | # parse the layer parameter string, which must be valid YAML 35 | layer_params = yaml.load(self.param_str_) 36 | anchor_scales = layer_params.get('scales', (8, 16, 32)) 37 | self._anchors = generate_anchors(scales=np.array(anchor_scales)) 38 | self._num_anchors = self._anchors.shape[0] 39 | self._feat_stride = layer_params['feat_stride'] 40 | self._cfm_shape = cfm_shape 41 | self._determininistic_mode = deterministic 42 | 43 | if DEBUG: 44 | print ('anchors:') 45 | print (self._anchors) 46 | print ('anchor shapes:') 47 | print (np.hstack(( 48 | self._anchors[:, 2::4] - self._anchors[:, 0::4], 49 | self._anchors[:, 3::4] - self._anchors[:, 1::4], 50 | ))) 51 | self._counts = cfg.EPS 52 | self._sums = np.zeros((1, 4)) 53 | self._squared_sums = np.zeros((1, 4)) 54 | self._fg_sum = 0 55 | self._bg_sum = 0 56 | self._count = 0 57 | 58 | # allow boxes to sit over the edge by a small amount 59 | self._allowed_border = False # layer_params.get('allowed_border', 0) 60 | 61 | def infer_outputs(self): 62 | # This is a necessary work around since anfter cloning the cloned inputs are just place holders without the proper shape 63 | if self._cfm_shape is None: 64 | self._cfm_shape = self.inputs[0].shape 65 | height, width = self._cfm_shape[-2:] 66 | 67 | if DEBUG: 68 | print('AnchorTargetLayer: height', height, 'width', width) 69 | 70 | A = self._num_anchors 71 | # labels 72 | labelShape = (1, A, height, width) 73 | # Comment: this layer uses encoded labels, while in CNTK we mostly use one hot labels 74 | # bbox_targets 75 | bbox_target_shape = (1, A * 4, height, width) 76 | # bbox_inside_weights 77 | bbox_inside_weights_shape = (1, A * 4, height, width) 78 | 79 | return [output_variable(labelShape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 80 | name="objectness_target", needs_gradient=False), 81 | output_variable(bbox_target_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 82 | name="rpn_bbox_target", needs_gradient=False), 83 | output_variable(bbox_inside_weights_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 84 | name="rpn_bbox_inside_w", needs_gradient=False),] 85 | 86 | def forward(self, arguments, outputs, device=None, outputs_to_retain=None): 87 | # Algorithm: 88 | # 89 | # for each (H, W) location i 90 | # generate 9 anchor boxes centered on cell i 91 | # apply predicted bbox deltas at cell i to each of the 9 anchors 92 | # filter out-of-image anchors 93 | # measure GT overlap 94 | 95 | bottom = arguments 96 | 97 | # map of shape (..., H, W) 98 | height, width = bottom[0].shape[-2:] 99 | # GT boxes (x1, y1, x2, y2, label) 100 | gt_boxes = bottom[1][0,:] 101 | # im_info 102 | im_info = bottom[2][0] 103 | 104 | # remove zero padded ground truth boxes 105 | keep = np.where( 106 | ((gt_boxes[:,2] - gt_boxes[:,0]) > 0) & 107 | ((gt_boxes[:,3] - gt_boxes[:,1]) > 0) 108 | ) 109 | gt_boxes = gt_boxes[keep] 110 | 111 | if DEBUG: 112 | print ('') 113 | # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 114 | # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 115 | print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) 116 | print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3])) 117 | print ('original im_size: ({}, {})'.format(im_info[4], im_info[5])) 118 | print ('height, width: ({}, {})'.format(height, width)) 119 | print ('rpn: gt_boxes.shape', gt_boxes.shape) 120 | #print ('rpn: gt_boxes', gt_boxes) 121 | 122 | # 1. Generate proposals from bbox deltas and shifted anchors 123 | shift_x = np.arange(0, width) * self._feat_stride 124 | shift_y = np.arange(0, height) * self._feat_stride 125 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 126 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 127 | shift_x.ravel(), shift_y.ravel())).transpose() 128 | # add A anchors (1, A, 4) to 129 | # cell K shifts (K, 1, 4) to get 130 | # shift anchors (K, A, 4) 131 | # reshape to (K*A, 4) shifted anchors 132 | A = self._num_anchors 133 | K = shifts.shape[0] 134 | all_anchors = (self._anchors.reshape((1, A, 4)) + 135 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))) 136 | all_anchors = all_anchors.reshape((K * A, 4)) 137 | total_anchors = int(K * A) 138 | 139 | # only keep anchors inside the image 140 | padded_wh = im_info[0:2] 141 | scaled_wh = im_info[2:4] 142 | xy_offset = (padded_wh - scaled_wh) / 2 143 | xy_min = xy_offset 144 | xy_max = xy_offset + scaled_wh 145 | 146 | inds_inside = np.where( 147 | (all_anchors[:, 0] >= xy_min[0] - self._allowed_border) & 148 | (all_anchors[:, 1] >= xy_min[1] - self._allowed_border) & 149 | (all_anchors[:, 2] < xy_max[0] + self._allowed_border) & # width 150 | (all_anchors[:, 3] < xy_max[1] + self._allowed_border) # height 151 | )[0] 152 | 153 | if DEBUG: 154 | print ('total_anchors', total_anchors) 155 | print ('inds_inside', len(inds_inside)) 156 | 157 | # keep only inside anchors 158 | anchors = all_anchors[inds_inside, :] 159 | if DEBUG: 160 | print ('anchors.shape', anchors.shape) 161 | print('gt_boxes.shape', gt_boxes.shape) 162 | 163 | # label: 1 is positive, 0 is negative, -1 is dont care 164 | labels = np.empty((len(inds_inside), ), dtype=np.float32) 165 | labels.fill(-1) 166 | 167 | # overlaps between the anchors and the gt boxes 168 | # overlaps (ex, gt) 169 | overlaps = bbox_overlaps( 170 | np.ascontiguousarray(anchors, dtype=np.float), 171 | np.ascontiguousarray(gt_boxes, dtype=np.float)) 172 | argmax_overlaps = overlaps.argmax(axis=1) 173 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] 174 | gt_argmax_overlaps = overlaps.argmax(axis=0) 175 | gt_max_overlaps = overlaps[gt_argmax_overlaps, 176 | np.arange(overlaps.shape[1])] 177 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] 178 | 179 | if not cfg["TRAIN"].RPN_CLOBBER_POSITIVES: 180 | # assign bg labels first so that positive labels can clobber them 181 | labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0 182 | 183 | # fg label: for each gt, anchor with highest overlap 184 | labels[gt_argmax_overlaps] = 1 185 | 186 | # fg label: above threshold IOU 187 | labels[max_overlaps >= cfg["TRAIN"].RPN_POSITIVE_OVERLAP] = 1 188 | 189 | if cfg["TRAIN"].RPN_CLOBBER_POSITIVES: 190 | # assign bg labels last so that negative labels can clobber positives 191 | labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0 192 | 193 | # subsample positive labels if we have too many 194 | num_fg = int(cfg["TRAIN"].RPN_FG_FRACTION * cfg["TRAIN"].RPN_BATCHSIZE) 195 | fg_inds = np.where(labels == 1)[0] 196 | if len(fg_inds) > num_fg: 197 | if self._determininistic_mode: 198 | disable_inds = fg_inds[:(len(fg_inds) - num_fg)] 199 | else: 200 | disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False) 201 | labels[disable_inds] = -1 202 | 203 | # subsample negative labels if we have too many 204 | num_bg = cfg["TRAIN"].RPN_BATCHSIZE - np.sum(labels == 1) 205 | bg_inds = np.where(labels == 0)[0] 206 | if len(bg_inds) > num_bg: 207 | if self._determininistic_mode: 208 | disable_inds = bg_inds[:(len(bg_inds) - num_bg)] 209 | else: 210 | disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False) 211 | labels[disable_inds] = -1 212 | 213 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) 214 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) 215 | 216 | bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) 217 | bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0)) 218 | 219 | if DEBUG: 220 | self._sums += bbox_targets[labels == 1, :].sum(axis=0) 221 | self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) 222 | self._counts += np.sum(labels == 1) 223 | means = self._sums / self._counts 224 | stds = np.sqrt(self._squared_sums / self._counts - means ** 2) 225 | print ('means:') 226 | print (means) 227 | print ('stdevs:') 228 | print (stds) 229 | 230 | # map up to original set of anchors 231 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1) 232 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0) 233 | bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0) 234 | 235 | if DEBUG: 236 | print ('rpn: max max_overlap', np.max(max_overlaps)) 237 | print ('rpn: num_positive', np.sum(labels == 1)) 238 | print ('rpn: num_negative', np.sum(labels == 0)) 239 | self._fg_sum += np.sum(labels == 1) 240 | self._bg_sum += np.sum(labels == 0) 241 | self._count += 1 242 | print ('rpn: num_positive avg', self._fg_sum / self._count) 243 | print ('rpn: num_negative avg', self._bg_sum / self._count) 244 | 245 | # labels 246 | labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2) 247 | outputs[self.outputs[0]] = np.ascontiguousarray(labels) 248 | 249 | # bbox_targets 250 | bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) 251 | outputs[self.outputs[1]] = np.ascontiguousarray(bbox_targets) 252 | 253 | # bbox_inside_weights 254 | bbox_inside_weights = bbox_inside_weights \ 255 | .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2) 256 | assert bbox_inside_weights.shape[2] == height 257 | assert bbox_inside_weights.shape[3] == width 258 | outputs[self.outputs[2]] = np.ascontiguousarray(bbox_inside_weights) 259 | 260 | # No state needs to be passed to backward() so we just pass None 261 | return None 262 | 263 | def backward(self, state, root_gradients, variables): 264 | """This layer does not propagate gradients.""" 265 | pass 266 | 267 | def clone(self, cloned_inputs): 268 | return AnchorTargetLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], param_str=self.param_str_, cfm_shape=self._cfm_shape) 269 | 270 | def serialize(self): 271 | internal_state = {} 272 | internal_state['param_str'] = self.param_str_ 273 | return internal_state 274 | 275 | @staticmethod 276 | def deserialize(inputs, name, state): 277 | param_str = state['param_str'] 278 | return AnchorTargetLayer(inputs[0], inputs[1], inputs[2], name=name, param_str=param_str) 279 | 280 | 281 | def _unmap(data, count, inds, fill=0): 282 | """ Unmap a subset of item (data) back to the original set of items (of size count) """ 283 | if len(data.shape) == 1: 284 | ret = np.empty((count, ), dtype=np.float32) 285 | ret.fill(fill) 286 | ret[inds] = data 287 | else: 288 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) 289 | ret.fill(fill) 290 | ret[inds, :] = data 291 | return ret 292 | 293 | 294 | def _compute_targets(ex_rois, gt_rois): 295 | """Compute bounding-box regression targets for an image.""" 296 | 297 | assert ex_rois.shape[0] == gt_rois.shape[0] 298 | assert ex_rois.shape[1] == 4 299 | assert gt_rois.shape[1] == 5 300 | 301 | return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) 302 | -------------------------------------------------------------------------------- /utils/rpn/bbox_transform.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | 9 | # compute example and gt width ctr, width and height 10 | # and returns optimal target deltas 11 | def bbox_transform(ex_rois, gt_rois): 12 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 13 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 14 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths 15 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights 16 | 17 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 18 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 19 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths 20 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights 21 | 22 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths 23 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights 24 | targets_dw = np.log(gt_widths / ex_widths) 25 | targets_dh = np.log(gt_heights / ex_heights) 26 | 27 | targets = np.vstack( 28 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() 29 | return targets 30 | 31 | # gets 32 | # - boxes (n, 4) as [x_low, y_low, x_high, y_high] 33 | # - deltas (n, 4) as [dx, dy, dw, dh] 34 | # returns 35 | # - pred_boxes (n, 4) as [x_low, y_low, x_high, y_high] 36 | # where 37 | # pred_ctr_x = dx * widths + ctr_x 38 | # --> pred_x_low = pred_ctr_x - 0.5 * pred_w 39 | # and 40 | # pred_w = np.exp(dw) * widths 41 | def bbox_transform_inv(boxes, deltas): 42 | if boxes.shape[0] == 0: 43 | #import pdb; pdb.set_trace() 44 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype) 45 | 46 | boxes = boxes.astype(deltas.dtype, copy=False) 47 | 48 | widths = boxes[:, 2] - boxes[:, 0] + 1.0 49 | heights = boxes[:, 3] - boxes[:, 1] + 1.0 50 | ctr_x = boxes[:, 0] + 0.5 * widths 51 | ctr_y = boxes[:, 1] + 0.5 * heights 52 | 53 | # avoid overflow in exp 54 | dx = np.clip(deltas[:, 0::4], None, 10) 55 | dy = np.clip(deltas[:, 1::4], None, 10) 56 | dw = np.clip(deltas[:, 2::4], None, 10) 57 | dh = np.clip(deltas[:, 3::4], None, 10) 58 | 59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis] 60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] 61 | pred_w = np.exp(dw) * widths[:, np.newaxis] 62 | pred_h = np.exp(dh) * heights[:, np.newaxis] 63 | 64 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) 65 | # x1 66 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w 67 | # y1 68 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h 69 | # x2 70 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w 71 | # y2 72 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h 73 | 74 | return pred_boxes 75 | 76 | def clip_boxes(boxes, im_info): 77 | ''' 78 | Clip boxes to image boundaries. 79 | :param boxes: boxes 80 | :param im_info: (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 81 | e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 82 | ''' 83 | 84 | im_info.shape = (6) 85 | padded_wh = im_info[0:2] 86 | scaled_wh = im_info[2:4] 87 | xy_offset = (padded_wh - scaled_wh) / 2 88 | xy_min = xy_offset 89 | xy_max = xy_offset + scaled_wh 90 | 91 | # x_min <= x1 <= x_max 92 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], xy_max[0] - 1), xy_min[0]) 93 | # y_min <= y1 <= y_max 94 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], xy_max[1] - 1), xy_min[1]) 95 | # x_min <= x2 <= x_max 96 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], xy_max[0] - 1), xy_min[0]) 97 | # y_min <= y2 <= y_max 98 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], xy_max[1] - 1), xy_min[1]) 99 | return boxes 100 | -------------------------------------------------------------------------------- /utils/rpn/cntk_smoothL1_loss.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | import cntk as C 9 | 10 | def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights): 11 | """ 12 | From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py 13 | 14 | ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets)) 15 | SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2 16 | |x| - 0.5 / sigma^2, otherwise 17 | """ 18 | sigma2 = sigma * sigma 19 | 20 | inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets))) 21 | 22 | smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2) 23 | smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2) 24 | smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2) 25 | smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign), 26 | C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign))) 27 | 28 | return C.element_times(bbox_outside_weights, smooth_l1_result) 29 | -------------------------------------------------------------------------------- /utils/rpn/generate_anchors.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | 9 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2], 10 | scales=2**np.arange(3, 6)): 11 | """ 12 | Generate anchor (reference) windows by enumerating aspect ratios X 13 | scales wrt a reference (0, 0, 15, 15) window. 14 | """ 15 | 16 | base_anchor = np.array([1, 1, base_size, base_size]) - 1 17 | ratio_anchors = _ratio_enum(base_anchor, ratios) 18 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales) 19 | for i in range(ratio_anchors.shape[0])]) # was xrange 20 | return anchors 21 | 22 | def _whctrs(anchor): 23 | """ 24 | Return width, height, x center, and y center for an anchor (window). 25 | """ 26 | 27 | w = anchor[2] - anchor[0] + 1 28 | h = anchor[3] - anchor[1] + 1 29 | x_ctr = anchor[0] + 0.5 * (w - 1) 30 | y_ctr = anchor[1] + 0.5 * (h - 1) 31 | return w, h, x_ctr, y_ctr 32 | 33 | def _mkanchors(ws, hs, x_ctr, y_ctr): 34 | """ 35 | Given a vector of widths (ws) and heights (hs) around a center 36 | (x_ctr, y_ctr), output a set of anchors (windows). 37 | """ 38 | 39 | ws = ws[:, np.newaxis] 40 | hs = hs[:, np.newaxis] 41 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1), 42 | y_ctr - 0.5 * (hs - 1), 43 | x_ctr + 0.5 * (ws - 1), 44 | y_ctr + 0.5 * (hs - 1))) 45 | return anchors 46 | 47 | def _ratio_enum(anchor, ratios): 48 | """ 49 | Enumerate a set of anchors for each aspect ratio wrt an anchor. 50 | """ 51 | 52 | w, h, x_ctr, y_ctr = _whctrs(anchor) 53 | size = w * h 54 | size_ratios = size / ratios 55 | ws = np.round(np.sqrt(size_ratios)) 56 | hs = np.round(ws * ratios) 57 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 58 | return anchors 59 | 60 | def _scale_enum(anchor, scales): 61 | """ 62 | Enumerate a set of anchors for each scale wrt an anchor. 63 | """ 64 | 65 | w, h, x_ctr, y_ctr = _whctrs(anchor) 66 | ws = w * scales 67 | hs = h * scales 68 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr) 69 | return anchors 70 | 71 | if __name__ == '__main__': 72 | import time 73 | t = time.time() 74 | a = generate_anchors() 75 | print (time.time() - t) 76 | print (a) 77 | from IPython import embed; embed() 78 | -------------------------------------------------------------------------------- /utils/rpn/proposal_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from cntk import output_variable, FreeDimension 8 | from cntk.ops.functions import UserFunction 9 | import numpy as np 10 | import yaml 11 | from utils.rpn.generate_anchors import generate_anchors 12 | from utils.rpn.bbox_transform import bbox_transform_inv, clip_boxes 13 | from utils.nms.nms_wrapper import nms 14 | 15 | try: 16 | from config import cfg 17 | except ImportError: 18 | from utils.default_config import cfg 19 | 20 | DEBUG = False 21 | 22 | class ProposalLayer(UserFunction): 23 | ''' 24 | Outputs object detection proposals by applying estimated bounding-box 25 | transformations to a set of regular boxes (called "anchors"). 26 | ''' 27 | 28 | def __init__(self, arg1, arg2, arg3, name='ProposalLayer', param_str=None): 29 | super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name) 30 | self.param_str_ = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32" 31 | 32 | # parse the layer parameter string, which must be valid YAML 33 | layer_params = yaml.load(self.param_str_) 34 | self._feat_stride = layer_params['feat_stride'] 35 | anchor_scales = layer_params.get('scales', (8, 16, 32)) 36 | self._anchors = generate_anchors(scales=np.array(anchor_scales)) 37 | self._num_anchors = self._anchors.shape[0] 38 | 39 | if DEBUG: 40 | print ('feat_stride: {}'.format(self._feat_stride)) 41 | print ('anchors:') 42 | print (self._anchors) 43 | 44 | def infer_outputs(self): 45 | # rois blob: holds R regions of interest, each is a 5-tuple 46 | # (n, x1, y1, x2, y2) specifying an image batch index n and a 47 | # rectangle (x1, y1, x2, y2) 48 | # for CNTK the proposal shape is [4 x roisPerImage], and mirrored in Python 49 | proposalShape = (FreeDimension, 4) 50 | 51 | return [output_variable(proposalShape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 52 | name="rpn_rois_raw", needs_gradient=False)] 53 | 54 | def forward(self, arguments, device=None, outputs_to_retain=None): 55 | # Algorithm: 56 | # 57 | # for each (H, W) location i 58 | # generate A anchor boxes centered on cell i 59 | # apply predicted bbox deltas at cell i to each of the A anchors 60 | # clip predicted boxes to image 61 | # remove predicted boxes with either height or width < threshold 62 | # sort all (proposal, score) pairs by score from highest to lowest 63 | # take top pre_nms_topN proposals before NMS 64 | # apply NMS with threshold 0.7 to remaining proposals 65 | # take after_nms_topN proposals after NMS 66 | # return the top proposals (-> RoIs top, scores top) 67 | 68 | # use potentially different number of proposals for training vs evaluation 69 | if len(outputs_to_retain) == 0: 70 | # print("EVAL") 71 | pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N 72 | post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N 73 | nms_thresh = cfg["TEST"].RPN_NMS_THRESH 74 | min_size = cfg["TEST"].RPN_MIN_SIZE 75 | else: 76 | pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N 77 | post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N 78 | nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH 79 | min_size = cfg["TRAIN"].RPN_MIN_SIZE 80 | 81 | bottom = arguments 82 | assert bottom[0].shape[0] == 1, \ 83 | 'Only single item batches are supported' 84 | 85 | # the first set of _num_anchors channels are bg probs 86 | # the second set are the fg probs, which we want 87 | scores = bottom[0][:, self._num_anchors:, :, :] 88 | bbox_deltas = bottom[1] 89 | im_info = bottom[2][0] 90 | 91 | if DEBUG: 92 | # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 93 | # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 94 | print ('im_size: ({}, {})'.format(im_info[0], im_info[1])) 95 | print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3])) 96 | print ('original im_size: ({}, {})'.format(im_info[4], im_info[5])) 97 | 98 | # 1. Generate proposals from bbox deltas and shifted anchors 99 | height, width = scores.shape[-2:] 100 | 101 | if DEBUG: 102 | print ('score map size: {}'.format(scores.shape)) 103 | 104 | # Enumerate all shifts 105 | shift_x = np.arange(0, width) * self._feat_stride 106 | shift_y = np.arange(0, height) * self._feat_stride 107 | shift_x, shift_y = np.meshgrid(shift_x, shift_y) 108 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), 109 | shift_x.ravel(), shift_y.ravel())).transpose() 110 | 111 | # Enumerate all shifted anchors: 112 | # 113 | # add A anchors (1, A, 4) to 114 | # cell K shifts (K, 1, 4) to get 115 | # shift anchors (K, A, 4) 116 | # reshape to (K*A, 4) shifted anchors 117 | A = self._num_anchors 118 | K = shifts.shape[0] 119 | anchors = self._anchors.reshape((1, A, 4)) + \ 120 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)) 121 | anchors = anchors.reshape((K * A, 4)) 122 | 123 | # Transpose and reshape predicted bbox transformations to get them 124 | # into the same order as the anchors: 125 | # 126 | # bbox deltas will be (1, 4 * A, H, W) format 127 | # transpose to (1, H, W, 4 * A) 128 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) 129 | # in slowest to fastest order 130 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4)) 131 | 132 | # Same story for the scores: 133 | # 134 | # scores are (1, A, H, W) format 135 | # transpose to (1, H, W, A) 136 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a) 137 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1)) 138 | 139 | # Convert anchors into proposals via bbox transformations 140 | proposals = bbox_transform_inv(anchors, bbox_deltas) 141 | 142 | # 2. clip predicted boxes to image 143 | proposals = clip_boxes(proposals, im_info) 144 | 145 | # 3. remove predicted boxes with either height or width < threshold 146 | # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4]) 147 | cntk_image_scale = im_info[2] / im_info[4] 148 | keep = _filter_boxes(proposals, min_size * cntk_image_scale) 149 | proposals = proposals[keep, :] 150 | scores = scores[keep] 151 | 152 | # 4. sort all (proposal, score) pairs by score from highest to lowest 153 | # 5. take top pre_nms_topN (e.g. 6000) 154 | order = scores.ravel().argsort()[::-1] 155 | if pre_nms_topN > 0: 156 | order = order[:pre_nms_topN] 157 | proposals = proposals[order, :] 158 | scores = scores[order] 159 | 160 | # 6. apply nms (e.g. threshold = 0.7) 161 | # 7. take after_nms_topN (e.g. 300) 162 | # 8. return the top proposals (-> RoIs top) 163 | keep = nms(np.hstack((proposals, scores)), nms_thresh) 164 | if post_nms_topN > 0: 165 | keep = keep[:post_nms_topN] 166 | proposals = proposals[keep, :] 167 | scores = scores[keep] 168 | 169 | # pad with zeros if too few rois were found 170 | num_found_proposals = proposals.shape[0] 171 | if num_found_proposals < post_nms_topN: 172 | if DEBUG: 173 | print("Only {} proposals generated in ProposalLayer".format(num_found_proposals)) 174 | proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32) 175 | proposals_padded[:num_found_proposals, :] = proposals 176 | proposals = proposals_padded 177 | 178 | # Output rois blob 179 | # Our RPN implementation only supports a single input image, so all 180 | # batch inds are 0 181 | # for CNTK: add batch axis to output shape 182 | proposals.shape = (1,) + proposals.shape 183 | 184 | return None, proposals 185 | 186 | def backward(self, state, root_gradients, variables): 187 | """This layer does not propagate gradients.""" 188 | pass 189 | 190 | def clone(self, cloned_inputs): 191 | return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], param_str=self.param_str_) 192 | 193 | def serialize(self): 194 | internal_state = {} 195 | internal_state['param_str'] = self.param_str_ 196 | 197 | return internal_state 198 | 199 | @staticmethod 200 | def deserialize(inputs, name, state): 201 | param_str = state['param_str'] 202 | 203 | return ProposalLayer(inputs[0], inputs[1], inputs[2], name=name, param_str=param_str) 204 | 205 | 206 | def _filter_boxes(boxes, min_size): 207 | """Remove all boxes with any side smaller than min_size.""" 208 | ws = boxes[:, 2] - boxes[:, 0] + 1 209 | hs = boxes[:, 3] - boxes[:, 1] + 1 210 | if np.isnan(ws[0]): 211 | print('NaN NaN NaN NaN') 212 | keep = np.where((ws >= min_size) & (hs >= min_size))[0] 213 | return keep 214 | -------------------------------------------------------------------------------- /utils/rpn/proposal_target_layer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | from cntk import output_variable, FreeDimension 8 | from cntk.ops.functions import UserFunction 9 | import yaml 10 | import numpy as np 11 | import numpy.random as npr 12 | from utils.rpn.bbox_transform import bbox_transform 13 | from utils.cython_modules.cython_bbox import bbox_overlaps 14 | 15 | try: 16 | from config import cfg 17 | except ImportError: 18 | from utils.default_config import cfg 19 | 20 | DEBUG = False 21 | 22 | class ProposalTargetLayer(UserFunction): 23 | ''' 24 | Assign object detection proposals to ground-truth targets. Produces proposal 25 | classification labels and bounding-box regression targets. 26 | ''' 27 | 28 | def __init__(self, arg1, arg2, name='ProposalTargetLayer', param_str=None, deterministic=False): 29 | super(ProposalTargetLayer, self).__init__([arg1, arg2], name=name) 30 | self.param_str_ = param_str if param_str is not None else "'num_classes': 2" 31 | 32 | # parse the layer parameter string, which must be valid YAML 33 | layer_params = yaml.load(self.param_str_) 34 | self._num_classes = layer_params['num_classes'] 35 | self._determininistic_mode = deterministic 36 | 37 | self._count = 0 38 | self._fg_num = 0 39 | self._bg_num = 0 40 | 41 | def infer_outputs(self): 42 | # sampled rois (0, x1, y1, x2, y2) 43 | # for CNTK the proposal shape is [4 x roisPerImage], and mirrored in Python 44 | rois_shape = (FreeDimension, 4) 45 | labels_shape = (FreeDimension, self._num_classes) 46 | bbox_targets_shape = (FreeDimension, self._num_classes * 4) 47 | bbox_inside_weights_shape = (FreeDimension, self._num_classes * 4) 48 | 49 | return [output_variable(rois_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 50 | name="rpn_target_rois_raw", needs_gradient=False), 51 | output_variable(labels_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 52 | name="label_targets_raw", needs_gradient=False), 53 | output_variable(bbox_targets_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 54 | name="bbox_targets_raw", needs_gradient=False), 55 | output_variable(bbox_inside_weights_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes, 56 | name="bbox_inside_w_raw", needs_gradient=False)] 57 | 58 | def forward(self, arguments, outputs, device=None, outputs_to_retain=None): 59 | bottom = arguments 60 | 61 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN 62 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source 63 | all_rois = bottom[0][0,:] 64 | # remove zero padded proposals 65 | keep0 = np.where( 66 | ((all_rois[:, 2] - all_rois[:, 0]) > 0) & 67 | ((all_rois[:, 3] - all_rois[:, 1]) > 0) 68 | ) 69 | all_rois = all_rois[keep0] 70 | 71 | # GT boxes (x1, y1, x2, y2, label) 72 | # TODO(rbg): it's annoying that sometimes I have extra info before 73 | # and other times after box coordinates -- normalize to one format 74 | gt_boxes = bottom[1][0,:] 75 | # remove zero padded ground truth boxes 76 | keep1 = np.where( 77 | ((gt_boxes[:,2] - gt_boxes[:,0]) > 0) & 78 | ((gt_boxes[:,3] - gt_boxes[:,1]) > 0) 79 | ) 80 | gt_boxes = gt_boxes[keep1] 81 | 82 | assert gt_boxes.shape[0] > 0, \ 83 | "No ground truth boxes provided" 84 | 85 | # Include ground-truth boxes in the set of candidate rois 86 | # for CNTK: add batch index axis with all zeros to both inputs 87 | all_rois = np.vstack((all_rois, gt_boxes[:, :-1])) 88 | zeros = np.zeros((all_rois.shape[0], 1), dtype=all_rois.dtype) 89 | all_rois = np.hstack((zeros, all_rois)) 90 | 91 | # Sanity check: single batch only 92 | assert np.all(all_rois[:, 0] == 0), \ 93 | 'Only single item batches are supported' 94 | 95 | rois_per_image = cfg.TRAIN.BATCH_SIZE 96 | fg_rois_per_image = np.round(cfg["TRAIN"].FG_FRACTION * rois_per_image).astype(int) 97 | 98 | # Sample rois with classification labels and bounding box regression 99 | # targets 100 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois( 101 | all_rois, gt_boxes, fg_rois_per_image, 102 | rois_per_image, self._num_classes, 103 | deterministic=self._determininistic_mode) 104 | 105 | if DEBUG: 106 | print ('num rois: {}'.format(rois_per_image)) 107 | print ('num fg: {}'.format((labels > 0).sum())) 108 | print ('num bg: {}'.format((labels == 0).sum())) 109 | self._count += 1 110 | self._fg_num += (labels > 0).sum() 111 | self._bg_num += (labels == 0).sum() 112 | print ('num fg avg: {}'.format(self._fg_num / self._count)) 113 | print ('num bg avg: {}'.format(self._bg_num / self._count)) 114 | print ('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num))) 115 | 116 | # pad with zeros if too few rois were found 117 | num_found_rois = rois.shape[0] 118 | if num_found_rois < rois_per_image: 119 | rois_padded = np.zeros((rois_per_image, rois.shape[1]), dtype=np.float32) 120 | rois_padded[:num_found_rois, :] = rois 121 | rois = rois_padded 122 | 123 | labels_padded = np.zeros((rois_per_image), dtype=np.float32) 124 | labels_padded[:num_found_rois] = labels 125 | labels = labels_padded 126 | 127 | bbox_targets_padded = np.zeros((rois_per_image, bbox_targets.shape[1]), dtype=np.float32) 128 | bbox_targets_padded[:num_found_rois, :] = bbox_targets 129 | bbox_targets = bbox_targets_padded 130 | 131 | bbox_inside_weights_padded = np.zeros((rois_per_image, bbox_inside_weights.shape[1]), dtype=np.float32) 132 | bbox_inside_weights_padded[:num_found_rois, :] = bbox_inside_weights 133 | bbox_inside_weights = bbox_inside_weights_padded 134 | 135 | # for CNTK: get rid of batch ind zeros and add batch axis 136 | rois = rois[:,1:] 137 | 138 | # sampled rois 139 | rois.shape = (1,) + rois.shape 140 | outputs[self.outputs[0]] = np.ascontiguousarray(rois) 141 | 142 | # classification labels 143 | labels_as_int = [i.item() for i in labels.astype(int)] 144 | labels_dense = np.eye(self._num_classes, dtype=np.float32)[labels_as_int] 145 | labels_dense.shape = (1,) + labels_dense.shape # batch axis 146 | outputs[self.outputs[1]] = labels_dense 147 | 148 | # bbox_targets 149 | bbox_targets.shape = (1,) + bbox_targets.shape # batch axis 150 | outputs[self.outputs[2]] = np.ascontiguousarray(bbox_targets) 151 | 152 | # bbox_inside_weights 153 | bbox_inside_weights.shape = (1,) + bbox_inside_weights.shape # batch axis 154 | outputs[self.outputs[3]] = np.ascontiguousarray(bbox_inside_weights) 155 | 156 | def backward(self, state, root_gradients, variables): 157 | """This layer does not propagate gradients.""" 158 | pass 159 | 160 | def clone(self, cloned_inputs): 161 | return ProposalTargetLayer(cloned_inputs[0], cloned_inputs[1], param_str=self.param_str_) 162 | 163 | def serialize(self): 164 | internal_state = {} 165 | internal_state['param_str'] = self.param_str_ 166 | return internal_state 167 | 168 | @staticmethod 169 | def deserialize(inputs, name, state): 170 | param_str = state['param_str'] 171 | return ProposalTargetLayer(inputs[0], inputs[1], name=name, param_str=param_str) 172 | 173 | 174 | def _get_bbox_regression_labels(bbox_target_data, num_classes): 175 | """Bounding-box regression targets (bbox_target_data) are stored in a 176 | compact form N x (class, tx, ty, tw, th) 177 | 178 | This function expands those targets into the 4-of-4*K representation used 179 | by the network (i.e. only one class has non-zero targets). 180 | 181 | Returns: 182 | bbox_target (ndarray): N x 4K blob of regression targets 183 | bbox_inside_weights (ndarray): N x 4K blob of loss weights 184 | """ 185 | 186 | clss = bbox_target_data[:, 0].astype(int) 187 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) 188 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) 189 | inds = np.where(clss > 0)[0] 190 | for ind in inds: 191 | cls = clss[ind] 192 | start = 4 * cls 193 | end = start + 4 194 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] 195 | bbox_inside_weights[ind, start:end] = [1.0, 1.0, 1.0, 1.0] 196 | return bbox_targets, bbox_inside_weights 197 | 198 | 199 | def _compute_targets(ex_rois, gt_rois, labels): 200 | """Compute bounding-box regression targets for an image.""" 201 | 202 | assert ex_rois.shape[0] == gt_rois.shape[0] 203 | assert ex_rois.shape[1] == 4 204 | assert gt_rois.shape[1] == 4 205 | 206 | targets = bbox_transform(ex_rois, gt_rois) 207 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: 208 | # Optionally normalize targets by a precomputed mean and stdev 209 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS)) 210 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS)) 211 | 212 | return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False) 213 | 214 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, deterministic=False): 215 | """Generate a random sample of RoIs comprising foreground and background 216 | examples. 217 | """ 218 | # overlaps: (rois x gt_boxes) 219 | overlaps = bbox_overlaps( 220 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float), 221 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float)) 222 | gt_assignment = overlaps.argmax(axis=1) 223 | max_overlaps = overlaps.max(axis=1) 224 | labels = gt_boxes[gt_assignment, 4] 225 | 226 | # Select foreground RoIs as those with >= FG_THRESH overlap 227 | fg_inds = np.where(max_overlaps >= cfg["TRAIN"].FG_THRESH)[0] 228 | # Guard against the case when an image has fewer than fg_rois_per_image 229 | # foreground RoIs 230 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size) 231 | 232 | # Sample foreground regions without replacement 233 | if fg_inds.size > 0: 234 | if deterministic: 235 | fg_inds = fg_inds[:fg_rois_per_this_image] 236 | else: 237 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False) 238 | 239 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) 240 | bg_inds = np.where((max_overlaps < cfg["TRAIN"].BG_THRESH_HI) & 241 | (max_overlaps >= cfg["TRAIN"].BG_THRESH_LO))[0] 242 | # Compute number of background RoIs to take from this image (guarding 243 | # against there being fewer than desired) 244 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image 245 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size) 246 | # Sample background regions without replacement 247 | if bg_inds.size > 0: 248 | if deterministic: 249 | bg_inds = bg_inds[:bg_rois_per_this_image] 250 | else: 251 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False) 252 | 253 | # The indices that we're selecting (both fg and bg) 254 | keep_inds = np.append(fg_inds, bg_inds) 255 | # Select sampled values from various arrays: 256 | labels = labels[keep_inds] 257 | # Clamp labels for the background RoIs to 0 258 | labels[fg_rois_per_this_image:] = 0 259 | rois = all_rois[keep_inds] 260 | 261 | bbox_target_data = _compute_targets( 262 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels) 263 | 264 | bbox_targets, bbox_inside_weights = \ 265 | _get_bbox_regression_labels(bbox_target_data, num_classes) 266 | 267 | return labels, rois, bbox_targets, bbox_inside_weights 268 | -------------------------------------------------------------------------------- /utils/rpn/rpn_helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import numpy as np 8 | import cntk 9 | from cntk import reduce_sum 10 | from cntk import user_function, relu, softmax, slice, splice, reshape, element_times, plus, minus, alias, classification_error 11 | from cntk.initializer import glorot_uniform, normal 12 | from cntk.layers import Convolution 13 | from cntk.losses import cross_entropy_with_softmax 14 | from utils.rpn.anchor_target_layer import AnchorTargetLayer 15 | from utils.rpn.proposal_layer import ProposalLayer 16 | from utils.rpn.proposal_target_layer import ProposalTargetLayer 17 | from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss 18 | try: 19 | from config import cfg 20 | except ImportError: 21 | from utils.default_config import cfg 22 | 23 | # Please keep in sync with Readme.md 24 | def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True, 25 | proposal_layer_param_string=None, conv_bias_init=0.0): 26 | ''' 27 | Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper: 28 | Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: 29 | "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" 30 | 31 | Outputs object detection proposals by applying estimated bounding-box 32 | transformations to a set of regular boxes (called "anchors"). 33 | 34 | Args: 35 | conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network 36 | scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. 37 | im_info: A CNTK variable or constant containing 38 | (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height) 39 | e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000 40 | add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses 41 | proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer. 42 | 43 | Returns: 44 | rpn_rois - the proposed ROIs 45 | rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness) 46 | ''' 47 | 48 | # RPN network 49 | # init = 'normal', initValueScale = 0.01, initBias = 0.1 50 | num_channels = cfg["CNTK"].RPN_NUM_CHANNELS 51 | rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1, 52 | init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out) 53 | rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score", 54 | init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors) 55 | rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred", 56 | init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 4(coords) * 9(anchors) 57 | 58 | # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W) 59 | num_predictions = int(rpn_cls_score.shape[0] / 2) 60 | rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp") 61 | p_rpn_cls_score_rshp = cntk.placeholder() 62 | rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0) 63 | rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob') 64 | rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape") 65 | 66 | # proposal layer 67 | rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string)) 68 | rpn_rois = alias(rpn_rois_raw, name='rpn_rois') 69 | 70 | rpn_losses = None 71 | if(add_loss_functions): 72 | # RPN targets 73 | # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ... 74 | atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string)) 75 | rpn_labels = atl.outputs[0] 76 | rpn_bbox_targets = atl.outputs[1] 77 | rpn_bbox_inside_weights = atl.outputs[2] 78 | 79 | # classification loss 80 | p_rpn_labels = cntk.placeholder() 81 | p_rpn_cls_score_rshp = cntk.placeholder() 82 | 83 | keeps = cntk.greater_equal(p_rpn_labels, 0.0) 84 | fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets") 85 | bg_labels = minus(1, fg_labels, name="bg_targets") 86 | rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0) 87 | rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0) 88 | rpn_loss_cls = element_times(rpn_ce, keeps) 89 | 90 | # The terms that are accounted for in the cls loss are those that have a label >= 0 91 | cls_num_terms = reduce_sum(keeps) 92 | cls_normalization_factor = 1.0 / cls_num_terms 93 | normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor 94 | 95 | reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss, 96 | [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 97 | 'CE_with_ignore', 'norm_rpn_cls_loss') 98 | 99 | # regression loss 100 | p_rpn_bbox_pred = cntk.placeholder() 101 | p_rpn_bbox_targets = cntk.placeholder() 102 | p_rpn_bbox_inside_weights = cntk.placeholder() 103 | rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0) 104 | # The bbox loss is normalized by the rpn batch size 105 | bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE 106 | normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor 107 | 108 | reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss, 109 | [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets), 110 | (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)], 111 | 'SmoothL1Loss', 'norm_rpn_bbox_loss') 112 | 113 | rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses") 114 | 115 | return rpn_rois, rpn_losses 116 | 117 | def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes): 118 | ''' 119 | Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper: 120 | Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun: 121 | "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks" 122 | 123 | Assigns object detection proposals to ground-truth targets. 124 | Produces proposal classification labels and bounding-box regression targets. 125 | It also adds gt_boxes to candidates and samples fg and bg rois for training. 126 | 127 | Args: 128 | rpn_rois: The proposed ROIs, e.g. from a region proposal network 129 | scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image. 130 | num_classes: The number of classes in the data set 131 | 132 | Returns: 133 | rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs 134 | label_targets - the target labels for the rois 135 | bbox_targets - the regression coefficient targets for the rois 136 | bbox_inside_weights - the weights for the regression loss 137 | ''' 138 | 139 | ptl_param_string = "'num_classes': {}".format(num_classes) 140 | ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, param_str=ptl_param_string)) 141 | 142 | # use an alias if you need to access the outputs, e.g., when cloning a trained network 143 | rois = alias(ptl.outputs[0], name='rpn_target_rois') 144 | label_targets = ptl.outputs[1] 145 | bbox_targets = ptl.outputs[2] 146 | bbox_inside_weights = ptl.outputs[3] 147 | 148 | return rois, label_targets, bbox_targets, bbox_inside_weights 149 | 150 | 151 | -------------------------------------------------------------------------------- /utils/unit_tests.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) Microsoft. All rights reserved. 2 | 3 | # Licensed under the MIT license. See LICENSE.md file in the project root 4 | # for full license information. 5 | # ============================================================================== 6 | 7 | import os, sys 8 | abs_path = os.path.dirname(os.path.abspath(__file__)) 9 | sys.path.append(os.path.join(abs_path)) 10 | sys.path.append(os.path.join(abs_path, "..")) 11 | 12 | import pytest 13 | import numpy as np 14 | import cntk 15 | from cntk import user_function 16 | from cntk.ops import input_variable 17 | from rpn.proposal_layer import ProposalLayer as CntkProposalLayer 18 | from rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer 19 | from rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer 20 | from caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer 21 | from caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer 22 | from caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer 23 | 24 | def test_proposal_layer(): 25 | cls_prob_shape_cntk = (18,61,61) 26 | cls_prob_shape_caffe = (18,61,61) 27 | rpn_bbox_shape = (36, 61, 61) 28 | dims_info_shape = (6,) 29 | im_info = [1000, 1000, 1] 30 | 31 | # Create input tensors with values 32 | cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32) 33 | rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32) 34 | dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) 35 | 36 | # Create CNTK layer and call forward 37 | cls_prob_var = input_variable(cls_prob_shape_cntk) 38 | rpn_bbox_var = input_variable(rpn_bbox_shape) 39 | dims_info_var = input_variable(dims_info_shape) 40 | 41 | cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var)) 42 | state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input}) 43 | cntk_proposals = cntk_output[next(iter(cntk_output))][0] 44 | 45 | # Create Caffe layer and call forward 46 | cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe) 47 | bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])] 48 | top = None # handled through return statement in caffe layer for unit testing 49 | 50 | param_str = "'feat_stride': 16" 51 | caffe_layer = CaffeProposalLayer() 52 | caffe_layer.set_param_str(param_str) 53 | caffe_layer.setup(bottom, top) 54 | caffe_output = caffe_layer.forward(bottom, top) 55 | caffe_proposals = caffe_output[:,1:] 56 | 57 | # assert that results are exactly the same 58 | assert cntk_proposals.shape == caffe_proposals.shape 59 | assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0) 60 | print("Verified ProposalLayer") 61 | 62 | def test_proposal_target_layer(): 63 | num_rois = 400 64 | all_rois_shape_cntk = (num_rois,4) 65 | num_gt_boxes = 50 66 | gt_boxes_shape_cntk = (num_gt_boxes,5) 67 | 68 | # Create input tensors with values 69 | x1y1 = np.random.random_sample((num_rois, 2)) * 500 70 | wh = np.random.random_sample((num_rois, 2)) * 400 71 | x2y2 = x1y1 + wh + 50 72 | all_rois = np.hstack((x1y1, x2y2)).astype(np.float32) 73 | 74 | x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500 75 | wh = np.random.random_sample((num_gt_boxes, 2)) * 400 76 | x2y2 = x1y1 + wh + 50 77 | label = np.random.random_sample((num_gt_boxes, 1)) 78 | label = (label * 17.0) 79 | gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32) 80 | 81 | # Create CNTK layer and call forward 82 | all_rois_var = input_variable(all_rois_shape_cntk) 83 | gt_boxes_var = input_variable(gt_boxes_shape_cntk) 84 | 85 | cntk_layer = user_function(CntkProposalTargetLayer(all_rois_var, gt_boxes_var, param_str="'num_classes': 17", deterministic=True)) 86 | state, cntk_output = cntk_layer.forward({all_rois_var: [all_rois], gt_boxes_var: [gt_boxes]}) 87 | 88 | roi_key = [k for k in cntk_output if 'rpn_target_rois_raw' in str(k)][0] 89 | labels_key = [k for k in cntk_output if 'label_targets_raw' in str(k)][0] 90 | bbox_key = [k for k in cntk_output if 'bbox_targets_raw' in str(k)][0] 91 | bbox_w_key = [k for k in cntk_output if 'bbox_inside_w_raw' in str(k)][0] 92 | 93 | cntk_rois = cntk_output[roi_key][0] 94 | cntk_labels_one_hot = cntk_output[labels_key][0] 95 | cntk_bbox_targets = cntk_output[bbox_key][0] 96 | cntk_bbox_inside_weights = cntk_output[bbox_w_key][0] 97 | 98 | cntk_labels = np.argmax(cntk_labels_one_hot, axis=1) 99 | 100 | # Create Caffe layer and call forward 101 | zeros = np.zeros((all_rois.shape[0], 1), dtype=gt_boxes.dtype) 102 | all_rois_caffe = np.hstack((zeros, all_rois)) 103 | 104 | bottom = [np.array(all_rois_caffe),np.array(gt_boxes)] 105 | top = None # handled through return statement in caffe layer for unit testing 106 | 107 | param_str = "'num_classes': 17" 108 | caffe_layer = CaffeProposalTargetLayer() 109 | caffe_layer.set_param_str(param_str) 110 | caffe_layer.setup(bottom, top) 111 | caffe_layer.set_deterministic_mode() 112 | 113 | caffe_rois, caffe_labels, caffe_bbox_targets, caffe_bbox_inside_weights = caffe_layer.forward(bottom, top) 114 | caffe_rois = caffe_rois[:,1:] 115 | 116 | num_caffe_rois = caffe_rois.shape[0] 117 | cntk_rois = cntk_rois[:num_caffe_rois,:] 118 | cntk_labels = cntk_labels[:num_caffe_rois] 119 | cntk_bbox_targets = cntk_bbox_targets[:num_caffe_rois,:] 120 | cntk_bbox_inside_weights = cntk_bbox_inside_weights[:num_caffe_rois,:] 121 | 122 | # assert that results are exactly the same 123 | assert cntk_rois.shape == caffe_rois.shape 124 | assert cntk_labels.shape == caffe_labels.shape 125 | assert cntk_bbox_targets.shape == caffe_bbox_targets.shape 126 | assert cntk_bbox_inside_weights.shape == caffe_bbox_inside_weights.shape 127 | 128 | caffe_labels = [int(x) for x in caffe_labels] 129 | 130 | assert np.allclose(cntk_rois, caffe_rois, rtol=0.0, atol=0.0) 131 | assert np.allclose(cntk_labels, caffe_labels, rtol=0.0, atol=0.0) 132 | assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0) 133 | assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0) 134 | print("Verified ProposalTargetLayer") 135 | 136 | def test_anchor_target_layer(): 137 | rpn_cls_score_shape_cntk = (1, 18, 61, 61) 138 | num_gt_boxes = 50 139 | gt_boxes_shape_cntk = (num_gt_boxes,5) 140 | dims_info_shape = (6,) 141 | im_info = [1000, 1000, 1] 142 | 143 | # Create input tensors with values 144 | rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32) 145 | dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32) 146 | 147 | x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500 148 | wh = np.random.random_sample((num_gt_boxes, 2)) * 400 149 | x2y2 = x1y1 + wh + 50 150 | label = np.random.random_sample((num_gt_boxes, 1)) 151 | label = (label * 17.0) 152 | gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32) 153 | 154 | # Create CNTK layer and call forward 155 | rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk) 156 | gt_boxes_var = input_variable(gt_boxes_shape_cntk) 157 | dims_info_var = input_variable(dims_info_shape) 158 | 159 | cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True)) 160 | state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input}) 161 | 162 | obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0] 163 | bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0] 164 | bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0] 165 | 166 | cntk_objectness_target = cntk_output[obj_key][0] 167 | cntk_bbox_targets = cntk_output[bbt_key][0] 168 | cntk_bbox_inside_w = cntk_output[bbw_key][0] 169 | 170 | # Create Caffe layer and call forward 171 | bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)] 172 | top = None # handled through return statement in caffe layer for unit testing 173 | 174 | param_str = "'feat_stride': 16" 175 | caffe_layer = CaffeAnchorTargetLayer() 176 | caffe_layer.set_param_str(param_str) 177 | caffe_layer.setup(bottom, top) 178 | caffe_layer.set_deterministic_mode() 179 | 180 | caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top) 181 | 182 | # assert that results are exactly the same 183 | assert cntk_objectness_target.shape == caffe_objectness_target.shape 184 | assert cntk_bbox_targets.shape == caffe_bbox_targets.shape 185 | assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape 186 | 187 | assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0) 188 | assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0) 189 | assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0) 190 | print("Verified AnchorTargetLayer") 191 | 192 | if __name__ == '__main__': 193 | test_proposal_layer() 194 | test_proposal_target_layer() 195 | test_anchor_target_layer() 196 | -------------------------------------------------------------------------------- /web.config: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 13 | 14 | 15 | 16 | 17 | 18 | --------------------------------------------------------------------------------