├── .deployment
├── .gitattributes
├── .gitignore
├── CNTKModels
    └── download_model.py
├── LICENSE
├── README.md
├── Temp
    └── placeholder.txt
├── app.py
├── cntk_helpers.py
├── config.py
├── deploy.cmd
├── doc
    ├── Postman_2017-09-26_22-50-06.jpg
    ├── cmd_2017-09-26_22-15-45.jpg
    ├── cmd_2017-09-26_22-18-52.jpg
    ├── cmd_2017-09-26_22-20-23.jpg
    ├── iexplore_2017-09-26_22-17-20.jpg
    ├── iexplore_2017-09-26_22-22-19.jpg
    ├── iexplore_2017-09-26_22-23-19.jpg
    ├── iexplore_2017-09-26_22-23-59.jpg
    ├── iexplore_2017-09-26_22-25-04.jpg
    └── iexplore_2017-09-26_23-09-42.jpg
├── evaluate.py
├── logs
    └── placeholder.txt
├── plot_helpers.py
├── requirements.txt
├── utils
    ├── Readme.md
    ├── annotations
    │   └── annotations_helper.py
    ├── caffe_layers
    │   ├── anchor_target_layer.py
    │   ├── bbox_transform.py
    │   ├── proposal_layer.py
    │   └── proposal_target_layer.py
    ├── cython_modules
    │   ├── cpu_nms.cp35-win_amd64.pyd
    │   ├── cpu_nms.cpython-34m.so
    │   ├── cython_bbox.cp35-win_amd64.pyd
    │   └── cython_bbox.cpython-34m.so
    ├── default_config.py
    ├── map
    │   └── map_helpers.py
    ├── nms
    │   └── nms_wrapper.py
    ├── pytest.ini
    ├── rpn
    │   ├── anchor_target_layer.py
    │   ├── bbox_transform.py
    │   ├── cntk_smoothL1_loss.py
    │   ├── generate_anchors.py
    │   ├── proposal_layer.py
    │   ├── proposal_target_layer.py
    │   └── rpn_helpers.py
    └── unit_tests.py
└── web.config


/.deployment:
--------------------------------------------------------------------------------
1 | [config]
2 | command = deploy.cmd


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | # *.py[cod]
  4 | *$py.class
  5 | 
  6 | # IDE
  7 | 
  8 | .vs/
  9 | .vscode/
 10 | 
 11 | # C extensions
 12 | #*.so
 13 | 
 14 | # Distribution / packaging
 15 | .Python
 16 | build/
 17 | develop-eggs/
 18 | dist/
 19 | downloads/
 20 | eggs/
 21 | .eggs/
 22 | lib/
 23 | lib64/
 24 | parts/
 25 | sdist/
 26 | var/
 27 | wheels/
 28 | *.egg-info/
 29 | .installed.cfg
 30 | *.egg
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .coverage
 46 | .coverage.*
 47 | .cache
 48 | nosetests.xml
 49 | coverage.xml
 50 | *.cover
 51 | .hypothesis/
 52 | 
 53 | # Translations
 54 | *.mo
 55 | *.pot
 56 | 
 57 | # Django stuff:
 58 | *.log
 59 | local_settings.py
 60 | 
 61 | # Flask stuff:
 62 | instance/
 63 | .webassets-cache
 64 | 
 65 | # Scrapy stuff:
 66 | .scrapy
 67 | 
 68 | # Sphinx documentation
 69 | docs/_build/
 70 | 
 71 | # PyBuilder
 72 | target/
 73 | 
 74 | # Jupyter Notebook
 75 | .ipynb_checkpoints
 76 | 
 77 | # pyenv
 78 | .python-version
 79 | 
 80 | # celery beat schedule file
 81 | celerybeat-schedule
 82 | 
 83 | # SageMath parsed files
 84 | *.sage.py
 85 | 
 86 | # Environments
 87 | .env
 88 | .venv
 89 | env/
 90 | venv/
 91 | ENV/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 
106 | 
107 | # directories
108 | .idea/
109 | 
110 | # temporary ignore
111 | __pycache__
112 | __init__.py
113 | Temp/*.jpg
114 | CNTKModels/*.txt
115 | 
116 | # models
117 | *.model


--------------------------------------------------------------------------------
/CNTKModels/download_model.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | import os
 3 | try:
 4 |     from urllib.request import urlretrieve 
 5 | except ImportError: 
 6 |     from urllib import urlretrieve
 7 |    
 8 | def download_file(filename, file_url):
 9 |     file_dir = os.path.dirname(os.path.abspath(__file__))
10 |     file_path = os.path.join(file_dir, filename)
11 |     if not os.path.exists(file_path):
12 |         print('Downloading file from ' + file_url + ', may take a while...')
13 |         urlretrieve(file_url,file_path)
14 |         print('Saved file as ' + file_path)
15 |     else:
16 |         print('File already available at ' + file_path)
17 | 
18 | if __name__ == '__main__':
19 |     download_file('HotailorPOC2.model','https://privdatastorage.blob.core.windows.net/github/cntk-python-web-service-on-azure/HotailorPOC2.model')
20 |     download_file('HotailorPOC2_class_map.txt','https://privdatastorage.blob.core.windows.net/github/cntk-python-web-service-on-azure/HotailorPOC2_class_map.txt')


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 Karol Żak
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Table of contents
  2 | - [Project description](#project-description)
  3 | - [Results](#results)
  4 | - [Deployment steps](#deployment-steps)
  5 |     - [Setup](#setup)
  6 |     - [Deploy demo](#deploy-demo)
  7 | - [Code highlights](#code-highlights)
  8 | 
  9 | # Project description
 10 | [[back to the top]](#table-of-contents)
 11 | 
 12 | This sample project shows off how to prepare and deploy to [Azure Web Apps](https://azure.microsoft.com/en-gb/services/app-service/web/) a simple Python web service with an image classifying model produced in [CNTK (Cognitive Toolkit)](https://github.com/Microsoft/CNTK) using [FasterRCNN](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FasterRCNN)
 13 | 
 14 | - [x] Web Service written in Python using Flask module
 15 | - [x] Python scripts that allow to evaluate images using CNTK and pretrained model
 16 | - [x] Custom deployment scripts for Azure Web Apps
 17 | - [x] Automatic setup of custom Python 3.5.x environment with all the required dependencies
 18 | - [ ] HTML UI for testing image classification
 19 | - [ ] Virtual Python environments for each application deployed to Azure Web Apps
 20 | 
 21 | # Results
 22 | [[back to the top]](#table-of-contents)
 23 | 
 24 | [Website Demo](http://cntkpywebapptest1.azurewebsites.net/):  
 25 | ![Demo](doc/iexplore_2017-09-26_23-09-42.jpg)
 26 | 
 27 | Sample request and response in Postman:
 28 | ![Demo](doc/Postman_2017-09-26_22-50-06.jpg)
 29 | 
 30 | # Deployment steps
 31 | 
 32 | ## Setup
 33 | [[back to the top]](#table-of-contents)
 34 | 
 35 | 
 36 | 1. **Download content of this repo**
 37 | 
 38 |     You can either clone this repo or just download it and unzip to some folder
 39 | 
 40 | 2. **Setup Python environment**
 41 | 
 42 |     In order for scripts to work you should have a proper Python environment. If you don't already have it setup then you should follow one of the online tutorials. To setup Python environment and all the dependencies required by CNTK on my local Windows machine I used [this tutorial](https://docs.microsoft.com/en-us/cognitive-toolkit/setup-windows-binary-script) 
 43 | 
 44 | 3. **Download CNTK model and class map file**
 45 | 
 46 |     Go to `/CNTKModels` folder in the location were you unzipped this repo and run `download_model.py`. It will automatically download the pretrained model and class map file required for our evaluation to run properly.
 47 | 
 48 | 4. **Install Azure CLI tools**
 49 | 
 50 |     If you don't have it then you can easily do it by openning Windows Command Prompt and running this command:
 51 |     ```Batchfile
 52 |     pip install azure-cli
 53 |     ```
 54 | 
 55 | 5.  **Get Azure subscription**
 56 | 
 57 |     If you don't own any Azure subscriptions you can always create a [new free trial](https://azure.microsoft.com/en-us/free/) with $200 credits to spend
 58 |     
 59 | ## Deploy demo
 60 | [[back to the top]](#table-of-contents)
 61 | 1.  **Set variables**
 62 | 
 63 |     Open Command Prompt to the location where you unzipped the contents of this repository (for example: `cd C:\Poligon\WebService`) and type in as follows (but make sure to replace the `[]` with a proper value):
 64 | 
 65 |     ```
 66 |     set uname=[username]
 67 |     set pass=[password]
 68 |     set appn=[web_app_name]
 69 |     set rgname=[resource_group_name]
 70 |     ```
 71 | 
 72 | 2.  **Login to Azure**
 73 | 
 74 |     In the same CMD type in:
 75 | 
 76 |     ```
 77 |     az login
 78 |     ```
 79 |     You should see something like this:
 80 | 
 81 |     ![AZ login](doc/cmd_2017-09-26_22-15-45.jpg)
 82 | 
 83 |     Now go to the https://aka.ms/devicelogin website and type in the code:
 84 | 
 85 |     ![Device login website](doc/iexplore_2017-09-26_22-17-20.jpg)
 86 | 
 87 |     You will then be asked to login with an email connected to your Azure subscription
 88 | 
 89 |     If everything goes ok you should see the verification message on the website and in console you should see a list of your Azure subscriptions
 90 | 
 91 | 
 92 | 3.  **Setup deployment credentials**
 93 | 
 94 |     We're setting this up to later be able to remotely deploy code to our Azure Web App
 95 |     ```
 96 |     az webapp deployment user set --user-name %uname% --password %pass%
 97 |     ```
 98 | 
 99 | 4.  **Create resource group**
100 | 
101 |     Resource groups help you to better manage your stuff in subscription and it's a basic method of deploying services to Azure. [Read more here](https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-overview)
102 | 
103 |     ```
104 |     az group create --location westeurope --name %rgname%
105 |     ```
106 | 
107 | 5.  **Create new [Azure App Service](https://azure.microsoft.com/en-us/services/app-service/) Plan and new [Azure Web App](https://azure.microsoft.com/en-us/services/app-service/web/)**
108 | 
109 |     ```
110 |     az appservice plan create --name %appn% --resource-group %rgname% --sku S1
111 |     az webapp create --name %appn% --resource-group %rgname% --plan %appn%
112 |     ```
113 | 
114 | 6.  **Configure Azure Web App and add Python extension**
115 | 
116 |     Azure Web Apps by default support only Python 2.7 and 3.4. Because I used Python 3.5 I had to use special [extension](https://azure.microsoft.com/en-us/blog/azure-web-sites-extensions/) to setup the environment
117 | 
118 |     First you need to change some [Application Settings](https://docs.microsoft.com/en-us/azure/app-service/web-sites-configure) on your Web App (the pink ones):
119 |     ![Web App preferences](doc/iexplore_2017-09-26_22-22-19.jpg)
120 |     Changing `Platform` is required and changing `Always On` is optional but I recommend to use it so that our web service stays awake even if not used.
121 | 
122 |     After we properly save Application Settings we can now add Python 3.5.x extension. In order to this, just type in `extensions` into the search box
123 |     ![Extensions](doc/iexplore_2017-09-26_22-23-19.jpg)
124 | 
125 |     And then simply add new extension
126 |     ![Extensions](doc/iexplore_2017-09-26_22-23-59.jpg)
127 | 
128 |     It should take around a minute or two to properly install the extension
129 |     ![Extensions](doc/iexplore_2017-09-26_22-25-04.jpg)
130 | 
131 | 
132 | 7.  **Setup deployment source for newly created Azure Web App**
133 | 
134 |     This code will not only setup the deployment source for your app but will also retrive the URL you will need in next steps
135 |     ```
136 |     az webapp deployment source config-local-git --name %appn% --resource-group %rgname% --query url --output tsv
137 |     ```
138 |     It should return something like this:
139 |     ![Remote repo 1](doc/cmd_2017-09-26_22-18-52.jpg)
140 |     
141 | 
142 | 8. **Initialize git and add remote repository**
143 | 
144 |     Make sure to replace `[remote_repo_address]` with the URL returned in step number 7.
145 |     ```
146 |     git init
147 |     git remote add azure [remote_repo_address]
148 |     ```
149 |     Command with URL should look like this:
150 |     ![Remote repo 2](doc/cmd_2017-09-26_22-20-23.jpg)
151 | 
152 | 9.  **Push application to Azure Web App remote repository** 
153 |     
154 |     Last step is to simply push our applications code to Azure Web App 
155 |     ```
156 |     git add -A
157 |     git commit -m "init"
158 |     git push azure master
159 |     ```
160 | 
161 |     This will trigger our [custom deployment script](deploy.cmd), copy all the files, setup Python environment and install all the required dependencies from [requirements.txt](requirements.txt) file
162 | 
163 | 10. **Test the application**
164 | 
165 |     If everything went smooth you should now have a running Python application and you should be able to test it. I used Postman to test HTTP requests and responses
166 | 
167 |     [Website Demo](http://cntkpywebapptest1.azurewebsites.net/):  
168 |     ![Demo](doc/iexplore_2017-09-26_23-09-42.jpg)
169 | 
170 |     Sample request and response in Postman:
171 |     ![Demo](doc/Postman_2017-09-26_22-50-06.jpg)
172 | 
173 | # Code highlights
174 | [[back to the top]](#table-of-contents)
175 | 
176 | 
177 | - [config.py](config.py) - most important variables for scripts are set in this file
178 |     
179 |     Variables used by web service to point out directories for temp images and CNTK models:
180 | 
181 |     ```Python    
182 |     # directories for web service:
183 |     __C.CNTK.TEMP_PATH = "./Temp" # temp folder for image processing - do not change
184 |     __C.CNTK.MODEL_DIRECTORY = "./CNTKModels" # directory for storing models and class map files
185 |     ```
186 |     
187 |     Variables for chosing the specific model:
188 | 
189 |     ```Python           
190 |     __C.CNTK.MODEL_NAME = "HotailorPOC2.model" # model file name
191 |     __C.CNTK.CLASS_MAP_FILE = "HotailorPOC2_class_map.txt" # class map file name
192 |     ```
193 | 
194 |     Variables used by `evaluate.py` to properly preprocess images and use CNTK eval function:
195 |     
196 |     ```Python
197 |     __C.CNTK.IMAGE_WIDTH = 1000
198 |     __C.CNTK.IMAGE_HEIGHT = 1000
199 |     __C.CNTK.NUM_CHANNELS = 3
200 |     ```
201 | 
202 | - [app.py](app.py) - main application - startup file for Flask
203 |     
204 |     There is one very important line for running CNTK:
205 |     ```Python
206 |     [..]
207 |     import os
208 |     os.environ['PATH'] = r'D:\home\python354x64;' + os.environ['PATH']    
209 |     [..]
210 |     ```
211 |     It adds the location of CNTK libraries to PATH variable. It's very important because our code strongly relies on that PATH. As for now I'm doing this in code but in future I want to move it to deployment script
212 | 
213 |     I am using Flask module to run my web service. In order to make it work I needed to first create an instance of Flask app and then run it on a proper port:
214 | 
215 |     ```Python
216 |     [..]
217 |     app = Flask(__name__)
218 |     [..]    
219 |     if __name__ == '__main__':
220 |         HOST = os.environ.get('SERVER_HOST', 'localhost')
221 |         try:
222 |             PORT = int(os.environ.get('SERVER_PORT', '5555'))
223 |         except ValueError:
224 |             PORT = 5555
225 |         app.run(HOST, PORT)
226 |     ```
227 |     I also used routes to set up specific methods for our RESTful web service. Currently I expose 2 routes for my API, one returning a collection of classified tags and the second one returning an image with plotted results of evaluation. `'/'` route simply sets the default landing page
228 |     ```Python
229 |     [..]
230 |     @app.route('/')
231 |     [..]
232 |     @app.route('/hotelidentifier/api/v1.0/evaluate/returntags', methods=['POST'])
233 |     [..]
234 |     @app.route('/hotelidentifier/api/v1.0/evaluate/returnimage', methods=['POST'])
235 |     [..]
236 |     ```
237 | 
238 | - [evaluate.py](evaluate.py) - main script for image classification with CNTK model
239 |     
240 |     This script strongly depends on [config.py](config.py) and it also uses [cntk_helpers.py](cntk_helpers.py), [plot_helpers.py](plot_helpers.py) and bunch of scripts from [utils](utils) folder. Most of those scripts were copied from original [CNTK source on github](https://github.com/Microsoft/CNTK), some of them with slight changes
241 | 
242 | - [plot_helpers.py](evaluate.py) - helper script for dealing with image ploting
243 | 
244 |     While working with headless server environment (non-GUI) such as Azure Web Apps you need to change the default mode of `matpotlib` module to not rely on GUI
245 |     ```Python  
246 |     [..]
247 |     # this is important when deploying to headless server environment (non-GUI)
248 |     ###################################################
249 |     import matplotlib
250 |     # force headless backend, or set 'backend' to 'Agg'
251 |     # in your ~/.matplotlib/matplotlibrc
252 |     matplotlib.use('Agg')
253 | 
254 |     import matplotlib.pyplot
255 |     # force non-interactive mode, or set 'interactive' to False
256 |     # in your ~/.matplotlib/matplotlibrc
257 |     from matplotlib.pyplot import imsave
258 |     matplotlib.pyplot.ioff()
259 |     ###################################################    
260 |     [..]
261 |     ```
262 | 
263 | - [requirements.txt](requirements.txt)
264 | 
265 |     It holds all the dependencies required by my application and CNTK libraries to work.
266 |     ```
267 |     easydict==1.6
268 |     pytest==3.0.3
269 |     opencv-python
270 |     https://pypi.python.org/packages/be/5c/670e88bc3ae6afa23c1f09d52a77bbbc7d2e476e7449ad3b6750040a0ac6/scipy-1.0.0b1-cp35-none-win_amd64.whl#md5=dcc90577f2eebc264ec60a2d5729e30b
271 |     https://cntk.ai/PythonWheel/CPU-Only/cntk-2.1-cp35-cp35m-win_amd64.whl
272 |     Flask==0.12.2
273 |     numpy==1.11.2
274 |     matplotlib==1.5.3
275 |     ipython==6.2.0
276 |     Pillow==4.1.1
277 |     PyYAML==3.12
278 |     ```
279 |     As you can see in most cases we use specific versions of modules and sometimes we even explicitly point out the correct .whl file to use for installation
280 | 
281 | - [.deployment](.deployment)
282 | 
283 |     If this file is present, Kudu will use custom `deploy.cmd` file instead of the default one. We use custom deployment script to chose Python3.5 and install all the necesary dependencies. To learn more about Kudu and deploying to Azure Web Apps - [go here](https://azure.microsoft.com/en-gb/resources/videos/what-is-kudu-with-david-ebbo/)
284 |     ```
285 |     [config]
286 |     command = deploy.cmd
287 |     ```
288 | 
289 | - [deploy.cmd](deploy.cmd)
290 | 
291 |     Custom script for our deployment with Kudu. Main difference from the default script is that I'm setting Python3.5 (installed from extension) as my main environment
292 | 
293 |     ```
294 |     [..]
295 |     SET PYTHON_DIR=%SYSTEMDRIVE%\home\python354x64
296 |     SET PYTHON_EXE=%SYSTEMDRIVE%\home\python354x64\python.exe
297 |     [..]
298 |     ```
299 | 
300 |     I'm also using `deploy.cmd` to install all the required dependencies:
301 |     ```CMD
302 |     [..]
303 |     :: 4. Install packages
304 |     echo Pip install requirements.
305 |     echo "Installing requirements"
306 |     %PYTHON_EXE% -m pip install -r requirements.txt
307 |     [..]
308 |     ```
309 | 
310 |     **TODO:**
311 |     I was told that it is better to have virtual Python environment for each app hosted on Azure Web Apps so that there is no chance of conflicts in different versions of modules used by different apps. That is what I need to fix in future.
312 | 
313 | 
314 | - [web.config](web.config)
315 | 
316 |     I used `web.config` to point out the directory of my custom Python 3.5 installation and to successfully run my Flask based Python web service. I based my `web.config` on Azure Web Apps [documentation](https://docs.microsoft.com/en-us/azure/app-service/web-sites-python-configure).
317 | 
318 |     ```xml
319 |     <?xml version="1.0" encoding="utf-8"?>
320 |     <configuration>
321 |     <system.webServer>
322 |         <handlers>
323 |             <add name="PythonHandler" path="*" verb="*" modules="httpPlatformHandler" resourceType="Unspecified"/>
324 |         </handlers>
325 |         <httpPlatform processPath="D:\home\python354x64\python.exe"
326 |                     arguments="D:\home\site\wwwroot\app.py --port %HTTP_PLATFORM_PORT%"
327 |                     stdoutLogEnabled="true"
328 |                     stdoutLogFile="D:\home\site\wwwroot\logs\log_file2.log"
329 |                     startupTimeLimit="220"
330 |                     processesPerApplication="5">
331 |             <environmentVariables>
332 |                 <environmentVariable name="SERVER_PORT" value="%HTTP_PLATFORM_PORT%" />
333 |             </environmentVariables>
334 |         </httpPlatform>
335 |     </system.webServer>
336 |     </configuration>
337 |     ```
338 |     
339 |     
340 | 


--------------------------------------------------------------------------------
/Temp/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/Temp/placeholder.txt


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
 1 | #!flask/bin/python
 2 | from flask import Flask, jsonify, request, make_response, send_file
 3 | import os
 4 | os.environ['PATH'] = r'D:\home\python354x64;' + os.environ['PATH']
 5 | import uuid
 6 | from config import cfg
 7 | from cntk import load_model
 8 | app = Flask(__name__)
 9 | 
10 | 
11 | model_path = os.path.join(cfg["CNTK"].MODEL_DIRECTORY, cfg["CNTK"].MODEL_NAME)
12 | print("Loading existing model from %s" % model_path)
13 | loadedModel = load_model(model_path)
14 | 
15 | 
16 | @app.errorhandler(404)
17 | def not_found(error):
18 |     return make_response(jsonify({'error': 'Not found'}), 404)
19 | 
20 | @app.route('/')
21 | def index():
22 |     return  "<html>" \
23 |             "<body>" \
24 |             "Hello, World!<br>" \
25 |             "This is a sample web service written in Python using <a href=""http://flask.pocoo.org/"">Flask</a> module.<br>" \
26 |             "Use one of the following urls to evaluate images:<br>" \
27 |             "<a href=""/hotelidentifier/api/v1.0/evaluate/returntags"">/hotelidentifier/api/v1.0/evaluate/returntags</a> - takes image as parameter and returns cloud of tags<br>" \
28 |             "<a href=""/hotelidentifier/api/v1.0/evaluate/returntags"">/hotelidentifier/api/v1.0/evaluate/returnimage</a> - takes image as parameter and returns tagged image<br>" \
29 |             "</body>" \
30 |             "</html>"
31 | 
32 | 
33 | @app.route('/hotelidentifier/api/v1.0/evaluate/returntags', methods=['POST'])
34 | def return_tags():
35 |     file_upload = request.files['file']
36 |     if file_upload:
37 |         temp_file_path=os.path.join('./Temp',str(uuid.uuid4())+'.jpg')
38 |         file_upload.save(temp_file_path)
39 |         app.logger.debug('File is saved as %s', temp_file_path)
40 |     from evaluate import evaluateimage
41 |     return jsonify(tags=[e.serialize() for e in evaluateimage(temp_file_path,"returntags",eval_model=loadedModel)])
42 | 
43 | @app.route('/hotelidentifier/api/v1.0/evaluate/returnimage', methods=['POST'])
44 | def return_image():
45 |     file_upload = request.files['file']
46 |     if file_upload:
47 |         temp_file_path=os.path.join('./Temp',str(uuid.uuid4())+'.jpg')
48 |         file_upload.save(temp_file_path)
49 |         app.logger.debug('File is saved as %s', temp_file_path)
50 |     from evaluate import evaluateimage
51 |     return send_file(evaluateimage(temp_file_path,"returnimage",eval_model=loadedModel), mimetype='image/jpg')
52 |     #return send_file(os.path.join('./Temp', temp_filename), mimetype='image/jpg')
53 | 
54 | 
55 | 
56 | if __name__ == '__main__':
57 |     HOST = os.environ.get('SERVER_HOST', 'localhost')
58 |     try:
59 |         PORT = int(os.environ.get('SERVER_PORT', '5555'))
60 |     except ValueError:
61 |         PORT = 5555
62 |     app.run(HOST, PORT)
63 | 
64 | 
65 | """ add UI later
66 | @app.route("/")
67 | def index():
68 |     return render_template('index.html')
69 | 
70 | @app.route("/api/uploader", methods=['POST'])
71 | @cross_origin()
72 | def api_upload_file():
73 |     img = Image.open(BytesIO(request.files['imagefile'].read())).convert('RGB')
74 |     img = ImageOps.fit(img, (224, 224), Image.ANTIALIAS)
75 |     return json.dumps(run_some_deep_learning_cntk(img))
76 | 
77 | 
78 | def run_some_deep_learning_cntk(rgb_pil_image):
79 |     # Convert to BGR
80 |     rgb_image = np.array(rgb_pil_image, dtype=np.float32)
81 |     bgr_image = rgb_image[..., [2, 1, 0]]
82 |     img = np.ascontiguousarray(np.rollaxis(bgr_image, 2))
83 | 
84 |     # Use last layer to make prediction
85 |     z_out = combine([MODEL.outputs[3].owner])
86 |     result = np.squeeze(z_out.eval({z_out.arguments[0]: [img]}))
87 | 
88 |     # Sort probabilities 
89 |     a = np.argsort(result)[-1]
90 |     predicted_category = " ".join(LABELS[a].split(" ")[1:])
91 |     
92 |     return predicted_category.split(",")[0]
93 | """


--------------------------------------------------------------------------------
/cntk_helpers.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft. All rights reserved.
 2 | 
 3 | # Licensed under the MIT license. See LICENSE.md file in the project root
 4 | # for full license information.
 5 | # ==============================================================================
 6 | 
 7 | from __future__ import print_function
 8 | from utils.rpn.bbox_transform import bbox_transform_inv
 9 | 
10 | def regress_rois(roi_proposals, roi_regression_factors, labels, dims_input):
11 |     for i in range(len(labels)):
12 |         label = labels[i]
13 |         if label > 0:
14 |             deltas = roi_regression_factors[i:i+1,label*4:(label+1)*4]
15 |             roi_coords = roi_proposals[i:i+1,:]
16 |             regressed_rois = bbox_transform_inv(roi_coords, deltas)
17 |             roi_proposals[i,:] = regressed_rois
18 | 
19 |     if dims_input is not None:
20 |         # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
21 |         pad_width, pad_height, scaled_image_width, scaled_image_height, _, _ = dims_input
22 |         left = (pad_width - scaled_image_width) / 2
23 |         right = pad_width - left - 1
24 |         top = (pad_height - scaled_image_height) / 2
25 |         bottom = pad_height - top - 1
26 | 
27 |         roi_proposals[:,0] = roi_proposals[:,0].clip(left, right)
28 |         roi_proposals[:,1] = roi_proposals[:,1].clip(top, bottom)
29 |         roi_proposals[:,2] = roi_proposals[:,2].clip(left, right)
30 |         roi_proposals[:,3] = roi_proposals[:,3].clip(top, bottom)
31 | 
32 |     return roi_proposals
33 | 
34 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import os
  8 | import os.path as osp
  9 | import numpy as np
 10 | # `pip install easydict` if you don't have it
 11 | from easydict import EasyDict as edict
 12 | 
 13 | __C = edict()
 14 | cfg = __C
 15 | 
 16 | #
 17 | # CNTK parameters
 18 | #
 19 | 
 20 | __C.CNTK = edict()
 21 | 
 22 | # directories for web service:
 23 | __C.CNTK.TEMP_PATH = "./Temp" # temp folder for image processing - do not change
 24 | __C.CNTK.MODEL_DIRECTORY = "./CNTKModels" # directory for storing models and class map files
 25 | 
 26 | #################
 27 | # Model & Class Map Files names
 28 | #################
 29 | __C.CNTK.MODEL_NAME = "HotailorPOC2.model" # model file name
 30 | __C.CNTK.CLASS_MAP_FILE = "HotailorPOC2_class_map.txt" # class map file name
 31 | 
 32 | #################
 33 | 
 34 | __C.CNTK.BASE_MODEL = "AlexNet" # "VGG16" or "AlexNet"
 35 | 
 36 | __C.CNTK.CONV_BIAS_INIT = 0.0
 37 | __C.CNTK.SIGMA_RPN_L1 = 3.0
 38 | 
 39 | # change below settings to match variables used to train YOUR model
 40 | __C.CNTK.IMAGE_WIDTH = 1000
 41 | __C.CNTK.IMAGE_HEIGHT = 1000
 42 | __C.CNTK.NUM_CHANNELS = 3
 43 | 
 44 | __C.CNTK.RESULTS_NMS_THRESHOLD = 0.3 # see also: __C.TEST.NMS = 0.3
 45 | __C.CNTK.RESULTS_NMS_CONF_THRESHOLD = 0.0
 46 | __C.CNTK.RESULTS_BGR_PLOT_THRESHOLD = 0.1
 47 | 
 48 | __C.CNTK.DRAW_NEGATIVE_ROIS = False
 49 | __C.CNTK.DRAW_UNREGRESSED_ROIS = False
 50 | 
 51 | 
 52 | #
 53 | # Base models
 54 | #
 55 | 
 56 | if __C.CNTK.BASE_MODEL == "AlexNet":
 57 |     __C.CNTK.BASE_MODEL_FILE = "AlexNet.model"
 58 |     __C.CNTK.FEATURE_NODE_NAME = "features"
 59 |     __C.CNTK.LAST_CONV_NODE_NAME = "conv5.y"
 60 |     __C.CNTK.START_TRAIN_CONV_NODE_NAME = __C.CNTK.FEATURE_NODE_NAME
 61 |     __C.CNTK.POOL_NODE_NAME = "pool3"
 62 |     __C.CNTK.LAST_HIDDEN_NODE_NAME = "h2_d"
 63 |     __C.CNTK.RPN_NUM_CHANNELS = 256
 64 |     __C.CNTK.ROI_DIM = 6
 65 |     __C.CNTK.E2E_LR_FACTOR = 1.0
 66 |     __C.CNTK.RPN_LR_FACTOR = 1.0
 67 |     __C.CNTK.FRCN_LR_FACTOR = 1.0
 68 | 
 69 | if __C.CNTK.BASE_MODEL == "VGG16":
 70 |     __C.CNTK.BASE_MODEL_FILE = "VGG16_ImageNet_Caffe.model"
 71 |     __C.CNTK.FEATURE_NODE_NAME = "data"
 72 |     __C.CNTK.LAST_CONV_NODE_NAME = "relu5_3"
 73 |     __C.CNTK.START_TRAIN_CONV_NODE_NAME = "pool2" # __C.CNTK.FEATURE_NODE_NAME
 74 |     __C.CNTK.POOL_NODE_NAME = "pool5"
 75 |     __C.CNTK.LAST_HIDDEN_NODE_NAME = "drop7"
 76 |     __C.CNTK.RPN_NUM_CHANNELS = 512
 77 |     __C.CNTK.ROI_DIM = 7
 78 |     __C.CNTK.E2E_LR_FACTOR = 1.0
 79 |     __C.CNTK.RPN_LR_FACTOR = 1.0
 80 |     __C.CNTK.FRCN_LR_FACTOR = 1.0
 81 | 
 82 | #
 83 | # Training options
 84 | #
 85 | 
 86 | __C.TRAIN = edict()
 87 | 
 88 | # Minibatch size (number of regions of interest [ROIs])
 89 | __C.TRAIN.BATCH_SIZE = 128
 90 | 
 91 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 92 | __C.TRAIN.FG_FRACTION = 0.25
 93 | 
 94 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 95 | __C.TRAIN.FG_THRESH = 0.5
 96 | 
 97 | # Overlap threshold for a ROI to be considered background (class = 0 if
 98 | # overlap in [LO, HI))
 99 | __C.TRAIN.BG_THRESH_HI = 0.5
100 | __C.TRAIN.BG_THRESH_LO = 0.0
101 | 
102 | # Use horizontally-flipped images during training?
103 | __C.TRAIN.USE_FLIPPED = True
104 | 
105 | # Train bounding-box regressors
106 | __C.TRAIN.BBOX_REG = True
107 | 
108 | # Overlap required between a ROI and ground-truth box in order for that ROI to
109 | # be used as a bounding-box regression training example
110 | __C.TRAIN.BBOX_THRESH = 0.5
111 | 
112 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
113 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
114 | # Deprecated (inside weights)
115 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
116 | # Normalize the targets using "precomputed" (or made up) means and stdevs
117 | # (BBOX_NORMALIZE_TARGETS must also be True)
118 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True
119 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
120 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
121 | 
122 | # Train using these proposals
123 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
124 | 
125 | # IOU >= thresh: positive example
126 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
127 | # IOU < thresh: negative example
128 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
129 | # If an anchor statisfied by positive and negative conditions set to negative
130 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
131 | # Max number of foreground examples
132 | __C.TRAIN.RPN_FG_FRACTION = 0.5
133 | # Total number of examples
134 | __C.TRAIN.RPN_BATCHSIZE = 256
135 | # NMS threshold used on RPN proposals
136 | __C.TRAIN.RPN_NMS_THRESH = 0.7
137 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
138 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
139 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
140 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
141 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
142 | __C.TRAIN.RPN_MIN_SIZE = 16
143 | # Deprecated (outside weights)
144 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
145 | # Give the positive RPN examples weight of p * 1 / {num positives}
146 | # and give negatives a weight of (1 - p)
147 | # Set to -1.0 to use uniform example weighting
148 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
149 | 
150 | 
151 | #
152 | # Testing options
153 | #
154 | 
155 | __C.TEST = edict()
156 | 
157 | # Overlap threshold used for non-maximum suppression (suppress boxes with
158 | # IoU >= this threshold)
159 | __C.TEST.NMS = 0.3
160 | 
161 | # Test using bounding-box regressors
162 | __C.TEST.BBOX_REG = True
163 | 
164 | # Propose boxes
165 | __C.TEST.HAS_RPN = False
166 | 
167 | # Test using these proposals
168 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
169 | 
170 | ## NMS threshold used on RPN proposals
171 | __C.TEST.RPN_NMS_THRESH = 0.7
172 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
173 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
174 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
175 | __C.TEST.RPN_POST_NMS_TOP_N = 300
176 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
177 | __C.TEST.RPN_MIN_SIZE = 16
178 | 
179 | 
180 | #
181 | # MISC
182 | #
183 | 
184 | # The mapping from image coordinates to feature map coordinates might cause
185 | # some boxes that are distinct in image space to become identical in feature
186 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
187 | # for identifying duplicate boxes.
188 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
189 | __C.DEDUP_BOXES = 1./16.
190 | 
191 | # Pixel mean values (BGR order) as a (1, 1, 3) array
192 | # We use the same pixel mean for all networks even though it's not exactly what
193 | # they were trained with
194 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
195 | 
196 | # For reproducibility
197 | __C.RNG_SEED = 3
198 | 
199 | # A small number that's used many times
200 | __C.EPS = 1e-14
201 | 
202 | # Use GPU implementation of non-maximum suppression
203 | __C.USE_GPU_NMS = True
204 | 
205 | # Default GPU device id
206 | __C.GPU_ID = 0
207 | 
208 | 
209 | def _merge_a_into_b(a, b):
210 |     """Merge config dictionary a into config dictionary b, clobbering the
211 |     options in b whenever they are also specified in a.
212 |     """
213 |     if type(a) is not edict:
214 |         return
215 | 
216 |     for k, v in a.iteritems():
217 |         # a must specify keys that are in b
218 |         if not b.has_key(k):
219 |             raise KeyError('{} is not a valid config key'.format(k))
220 | 
221 |         # the types must match, too
222 |         old_type = type(b[k])
223 |         if old_type is not type(v):
224 |             if isinstance(b[k], np.ndarray):
225 |                 v = np.array(v, dtype=b[k].dtype)
226 |             else:
227 |                 raise ValueError(('Type mismatch ({} vs. {}) '
228 |                                 'for config key: {}').format(type(b[k]),
229 |                                                             type(v), k))
230 | 
231 |         # recursively merge dicts
232 |         if type(v) is edict:
233 |             try:
234 |                 _merge_a_into_b(a[k], b[k])
235 |             except:
236 |                 print('Error under config key: {}'.format(k))
237 |                 raise
238 |         else:
239 |             b[k] = v
240 | 
241 | def cfg_from_file(filename):
242 |     """Load a config file and merge it into the default options."""
243 |     import yaml
244 |     with open(filename, 'r') as f:
245 |         yaml_cfg = edict(yaml.load(f))
246 | 
247 |     _merge_a_into_b(yaml_cfg, __C)
248 | 
249 | def cfg_from_list(cfg_list):
250 |     """Set config keys via list (e.g., from command line)."""
251 |     from ast import literal_eval
252 |     assert len(cfg_list) % 2 == 0
253 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
254 |         key_list = k.split('.')
255 |         d = __C
256 |         for subkey in key_list[:-1]:
257 |             assert d.has_key(subkey)
258 |             d = d[subkey]
259 |         subkey = key_list[-1]
260 |         assert d.has_key(subkey)
261 |         try:
262 |             value = literal_eval(v)
263 |         except:
264 |             # handle the case when v is a string literal
265 |             value = v
266 |         assert type(value) == type(d[subkey]), \
267 |             'type {} does not match original type {}'.format(
268 |             type(value), type(d[subkey]))
269 |         d[subkey] = value
270 | 


--------------------------------------------------------------------------------
/deploy.cmd:
--------------------------------------------------------------------------------
 1 | @if "%SCM_TRACE_LEVEL%" NEQ "4" @echo off
 2 | :: ----------------------
 3 | :: KUDU Deployment Script
 4 | :: Version: 2.0.0
 5 | :: ----------------------
 6 | :: Prerequisites
 7 | :: -------------
 8 | :: VARIABLES
 9 | echo "ATTENTION"
10 | echo "USER MUST CHECK/SET THESE VARIABLES:"
11 | SET PYTHON_DIR=%SYSTEMDRIVE%\home\python354x64
12 | SET PYTHON_EXE=%SYSTEMDRIVE%\home\python354x64\python.exe
13 | ::SET CNTK_BIN=https://azurewebappcntk.blob.core.windows.net/cntkrc/cntk.zip
14 | echo "Installed python extension installed here:"
15 | echo %PYTHON_EXE%
16 | ::echo "CNTK Binaries and version located here:"
17 | ::echo %CNTK_BIN%
18 | :: Verify node.js installed
19 | where node 2>nul >nul
20 | IF %ERRORLEVEL% NEQ 0 (
21 |   echo Missing node.js executable, please install node.js, if already installed make sure it can be reached from current environment.
22 |   goto error
23 | )
24 | :: Setup
25 | :: -----
26 | setlocal enabledelayedexpansion
27 | SET ARTIFACTS=%~dp0%..\artifacts
28 | IF NOT DEFINED DEPLOYMENT_SOURCE (
29 |   SET DEPLOYMENT_SOURCE=%~dp0%.
30 | )
31 | IF NOT DEFINED DEPLOYMENT_TARGET (
32 |   SET DEPLOYMENT_TARGET=%ARTIFACTS%\wwwroot
33 | )
34 | IF NOT DEFINED NEXT_MANIFEST_PATH (
35 |   SET NEXT_MANIFEST_PATH=%ARTIFACTS%\manifest
36 |   IF NOT DEFINED PREVIOUS_MANIFEST_PATH (
37 |     SET PREVIOUS_MANIFEST_PATH=%ARTIFACTS%\manifest
38 |   )
39 | )
40 | IF NOT DEFINED KUDU_SYNC_CMD (
41 |   :: Install kudu sync
42 |   echo Installing Kudu Sync
43 |   call npm install kudusync -g --silent
44 |   IF !ERRORLEVEL! NEQ 0 goto error
45 |   :: Locally just running "kuduSync" would also work
46 |   SET KUDU_SYNC_CMD=%appdata%\npm\kuduSync.cmd
47 | )
48 | goto Deployment
49 | 
50 | 
51 | 
52 | ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
53 | :Deployment
54 | echo Handling python deployment.
55 | :: 1. KuduSync
56 | IF /I "%IN_PLACE_DEPLOYMENT%" NEQ "1" (
57 |   call :ExecuteCmd "%KUDU_SYNC_CMD%" -v 50 -f "%DEPLOYMENT_SOURCE%" -t "%DEPLOYMENT_TARGET%" -n "%NEXT_MANIFEST_PATH%" -p "%PREVIOUS_MANIFEST_PATH%" -i ".git;.hg;.deployment;deploy.cmd"
58 |   IF !ERRORLEVEL! NEQ 0 goto error
59 | )
60 | IF EXIST "%DEPLOYMENT_TARGET%\.skipPythonDeployment" goto postPython
61 | echo Detected requirements.txt.  You can skip Python specific steps with a .skipPythonDeployment file.
62 | echo Custom Script
63 | pushd "%DEPLOYMENT_TARGET%"
64 | :: 3. Setup python
65 | ::echo "Configuring pip"
66 | ::curl https://bootstrap.pypa.io/get-pip.py | %PYTHON_EXE%
67 | :: 4. Install packages
68 | echo Pip install requirements.
69 | echo "Installing requirements"
70 | %PYTHON_EXE% -m pip install -r requirements.txt
71 | :: This PATH should direct to CNTK directory
72 | ::set PATH=%PYTHON_DIR%;%PATH%
73 | ::echo PATH set to %PYTHON_DIR%
74 | IF !ERRORLEVEL! NEQ 0 goto error
75 | 
76 | :postPython
77 | ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
78 | goto end
79 | 
80 | 
81 | :: Execute command routine that will echo out when error
82 | :ExecuteCmd
83 | setlocal
84 | set _CMD_=%*
85 | call %_CMD_%
86 | if "%ERRORLEVEL%" NEQ "0" echo Failed exitCode=%ERRORLEVEL%, command=%_CMD_%
87 | exit /b %ERRORLEVEL%
88 | :error
89 | endlocal
90 | echo An error has occurred during web site deployment.
91 | call :exitSetErrorLevel
92 | call :exitFromFunction 2>nul
93 | :exitSetErrorLevel
94 | exit /b 1
95 | :exitFromFunction
96 | ()
97 | :end
98 | endlocal
99 | echo Finished successfully.


--------------------------------------------------------------------------------
/doc/Postman_2017-09-26_22-50-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/Postman_2017-09-26_22-50-06.jpg


--------------------------------------------------------------------------------
/doc/cmd_2017-09-26_22-15-45.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-15-45.jpg


--------------------------------------------------------------------------------
/doc/cmd_2017-09-26_22-18-52.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-18-52.jpg


--------------------------------------------------------------------------------
/doc/cmd_2017-09-26_22-20-23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-20-23.jpg


--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-17-20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-17-20.jpg


--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-22-19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-22-19.jpg


--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-23-19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-23-19.jpg


--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-23-59.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-23-59.jpg


--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-25-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-25-04.jpg


--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_23-09-42.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_23-09-42.jpg


--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
  1 | 
  2 | from __future__ import print_function
  3 | import numpy as np
  4 | import os, sys
  5 | import cv2
  6 | from cntk import load_model, Axis, input_variable
  7 | from cntk.core import Value
  8 | from cntk.io import MinibatchData
  9 | from cntk.layers import Constant
 10 | 
 11 | from utils.annotations.annotations_helper import parse_class_map_file
 12 | from config import cfg
 13 | from plot_helpers import visualizeResultsFaster, imsave, apply_nms_to_single_image_results
 14 | from cntk_helpers import regress_rois
 15 | 
 16 | ###############################################################
 17 | # Variables
 18 | ###############################################################
 19 | 
 20 | image_width = cfg["CNTK"].IMAGE_WIDTH
 21 | image_height = cfg["CNTK"].IMAGE_HEIGHT
 22 | num_channels = cfg["CNTK"].NUM_CHANNELS
 23 | 
 24 | # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
 25 | dims_input_const = MinibatchData(Value(batch=np.asarray(
 26 |     [image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False)
 27 | 
 28 | # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170])
 29 | img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114]
 30 | normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]])
 31 | 
 32 | 
 33 | globalvars = {}
 34 | 
 35 | map_file_path = cfg["CNTK"].MODEL_DIRECTORY
 36 | globalvars['class_map_file'] = os.path.join(map_file_path, cfg["CNTK"].CLASS_MAP_FILE)
 37 | globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
 38 | globalvars['num_classes'] = len(globalvars['classes'])
 39 | globalvars['temppath'] = cfg["CNTK"].TEMP_PATH
 40 | feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME
 41 | model_path = os.path.join(cfg["CNTK"].MODEL_DIRECTORY, cfg["CNTK"].MODEL_NAME)
 42 | 
 43 | # helper function
 44 | def load_resize_and_pad(image_path, width, height, pad_value=114):
 45 |     if "@" in image_path:
 46 |         print("WARNING: zipped image archives are not supported for visualizing results.")
 47 |         exit(0)
 48 | 
 49 |     img = cv2.imread(image_path)
 50 |     img_width = len(img[0])
 51 |     img_height = len(img)
 52 |     scale_w = img_width > img_height
 53 |     target_w = width
 54 |     target_h = height
 55 | 
 56 |     if scale_w:
 57 |         target_h = int(np.round(img_height * float(width) / float(img_width)))
 58 |     else:
 59 |         target_w = int(np.round(img_width * float(height) / float(img_height)))
 60 | 
 61 |     resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST)
 62 | 
 63 |     top = int(max(0, np.round((height - target_h) / 2)))
 64 |     left = int(max(0, np.round((width - target_w) / 2)))
 65 |     bottom = height - top - target_h
 66 |     right = width - left - target_w
 67 |     resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right,
 68 |                                           cv2.BORDER_CONSTANT, value=[pad_value, pad_value, pad_value])
 69 | 
 70 |     # transpose(2,0,1) converts the image to the HWC format which CNTK accepts
 71 |     model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))
 72 | 
 73 |     dims = (width, height, target_w, target_h, img_width, img_height)
 74 |     return resized_with_pad, model_arg_rep, dims
 75 | 
 76 | 
 77 | # mode="returnimage" or "returntags"
 78 | def eval_faster_rcnn(eval_model, imgPath, img_shape,
 79 |                               results_base_path, feature_node_name, classes, mode,
 80 |                               drawUnregressedRois=False, drawNegativeRois=False,
 81 |                               nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8):
 82 | 
 83 |     # prepare model
 84 |     image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
 85 |     dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input')
 86 |     frcn_eval = eval_model(image_input, dims_input)
 87 | 
 88 |     #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6))
 89 |     print("Plotting results from Faster R-CNN model for image.")
 90 |     # evaluate single image
 91 | 
 92 |     _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1])
 93 | 
 94 |     dims_input = np.array(dims, dtype=np.float32)
 95 |     dims_input.shape = (1,) + dims_input.shape
 96 |     output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input})
 97 | 
 98 |     out_dict = dict([(k.name, k) for k in output])
 99 |     out_cls_pred = output[out_dict['cls_pred']][0]
100 |     out_rpn_rois = output[out_dict['rpn_rois']][0]
101 |     out_bbox_regr = output[out_dict['bbox_regr']][0]
102 | 
103 |     labels = out_cls_pred.argmax(axis=1)
104 |     scores = out_cls_pred.max(axis=1).tolist()
105 | 
106 |     if mode=="returntags":
107 |         class Tag(object):
108 |             def __init__(self, label, score, bbox):
109 |                 self.label = label
110 |                 self.score = score
111 |                 self.bbox = bbox
112 | 
113 |             def serialize(self):
114 |                 return {
115 |                     'label': self.label,
116 |                     'score': self.score,
117 |                     'bbox': self.bbox,
118 |                 }
119 | 
120 |         results = []
121 |         for i in range(len(out_rpn_rois)):
122 |             if labels[i] != 0:
123 |                 x = Tag(str(classes[labels[i]]), str(scores[i]), str(out_rpn_rois[i]))
124 |                 results.append(x)
125 | 
126 |         return results
127 | 
128 | 
129 |     elif mode=="returnimage":
130 |         evaluated_image_path = "{}/{}".format(results_base_path, 'evaluated_' + os.path.basename(imgPath))
131 |         if drawUnregressedRois:
132 |             # plot results without final regression
133 |             imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1],
134 |                                               classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois,
135 |                                               decisionThreshold=bgrPlotThreshold)
136 |             imsave(evaluated_image_path, imgDebug)
137 |         else:
138 |             # apply regression and nms to bbox coordinates
139 |             regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
140 | 
141 |             nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores,
142 |                                                                nms_threshold=nmsThreshold,
143 |                                                                conf_threshold=nmsConfThreshold)
144 | 
145 |             img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1],
146 |                                          classes, nmsKeepIndices=nmsKeepIndices,
147 |                                          boDrawNegativeRois=drawNegativeRois,
148 |                                          decisionThreshold=bgrPlotThreshold)
149 |             imsave(evaluated_image_path, img)
150 | 
151 |         return evaluated_image_path
152 |     else:
153 |         raise ValueError("Unsupported value found in 'mode' parameter")
154 | 
155 | 
156 | 
157 | 
158 | 
159 | # mode="returnimage" or "returntags"
160 | def evaluateimage(file_path, mode, eval_model=None):
161 | 
162 |     #from plot_helpers import eval_and_plot_faster_rcnn
163 |     if eval_model==None:
164 |         print("Loading existing model from %s" % model_path)
165 |         eval_model = load_model(model_path)
166 |     img_shape = (num_channels, image_height, image_width)
167 |     results_folder = globalvars['temppath']
168 |     results=eval_faster_rcnn(eval_model, file_path, img_shape,
169 |                               results_folder, feature_node_name, globalvars['classes'], mode,
170 |                               drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS,
171 |                               drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS,
172 |                               nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
173 |                               nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD,
174 |                               bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD)
175 |     return results
176 | 
177 | 


--------------------------------------------------------------------------------
/logs/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/logs/placeholder.txt


--------------------------------------------------------------------------------
/plot_helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | from __future__ import print_function
  8 | from builtins import str
  9 | import sys, os, time
 10 | import numpy as np
 11 | from easydict import EasyDict
 12 | from builtins import range
 13 | import copy, textwrap
 14 | from PIL import Image, ImageFont, ImageDraw
 15 | from PIL.ExifTags import TAGS
 16 | 
 17 | # this is important when deploying to headless server environment (non-GUI)
 18 | ###################################################
 19 | import matplotlib
 20 | # force headless backend, or set 'backend' to 'Agg'
 21 | # in your ~/.matplotlib/matplotlibrc
 22 | matplotlib.use('Agg')
 23 | 
 24 | import matplotlib.pyplot
 25 | # force non-interactive mode, or set 'interactive' to False
 26 | # in your ~/.matplotlib/matplotlibrc
 27 | from matplotlib.pyplot import imsave
 28 | matplotlib.pyplot.ioff()
 29 | ###################################################
 30 | 
 31 | import cntk
 32 | from cntk import input_variable, Axis
 33 | from utils.nms.nms_wrapper import apply_nms_to_single_image_results
 34 | from cntk_helpers import regress_rois
 35 | import cv2 # pip install opencv-python
 36 | 
 37 | available_font = "arial.ttf"
 38 | try:
 39 |     dummy = ImageFont.truetype(available_font, 16)
 40 | except:
 41 |     available_font = "FreeMono.ttf"
 42 | 
 43 | 
 44 | ####################################
 45 | # Visualize results
 46 | ####################################
 47 | def visualizeResultsFaster(imgPath, roiLabels, roiScores, roiRelCoords, padWidth, padHeight, classes,
 48 |                      nmsKeepIndices = None, boDrawNegativeRois = True, decisionThreshold = 0.0):
 49 |     # read and resize image
 50 |     imgWidth, imgHeight = imWidthHeight(imgPath)
 51 |     scale = 800.0 / max(imgWidth, imgHeight)
 52 |     imgHeight = int(imgHeight * scale)
 53 |     imgWidth = int(imgWidth * scale)
 54 |     if imgWidth > imgHeight:
 55 |         h_border = 0
 56 |         v_border = int((imgWidth - imgHeight)/2)
 57 |     else:
 58 |         h_border = int((imgHeight - imgWidth)/2)
 59 |         v_border = 0
 60 | 
 61 |     PAD_COLOR = [103, 116, 123] # [114, 114, 114]
 62 |     cv_img = cv2.imread(imgPath)
 63 |     rgb_img = cv2.cvtColor(cv_img,cv2.COLOR_BGR2RGB)
 64 |     resized = cv2.resize(rgb_img, (imgWidth, imgHeight), interpolation=cv2.INTER_NEAREST)
 65 |     imgDebug = cv2.copyMakeBorder(resized,v_border,v_border,h_border,h_border,cv2.BORDER_CONSTANT,value=PAD_COLOR)
 66 |     rect_scale = 800 / padWidth
 67 | 
 68 |     assert(len(roiLabels) == len(roiRelCoords))
 69 |     if roiScores:
 70 |         assert(len(roiLabels) == len(roiScores))
 71 |         minScore = min(roiScores)
 72 |         print("roiScores min: {}, max: {}, threshold: {}".format(minScore, max(roiScores), decisionThreshold))
 73 |         if minScore > decisionThreshold:
 74 |             decisionThreshold = minScore * 0.5
 75 |             print("reset decision threshold to: {}".format(decisionThreshold))
 76 | 
 77 |     # draw multiple times to avoid occlusions
 78 |     for iter in range(0,3):
 79 |         for roiIndex in range(len(roiRelCoords)):
 80 |             label = roiLabels[roiIndex]
 81 |             if roiScores:
 82 |                 score = roiScores[roiIndex]
 83 |                 if decisionThreshold and score < decisionThreshold:
 84 |                     label = 0
 85 | 
 86 |             # init drawing parameters
 87 |             thickness = 1
 88 |             if label == 0:
 89 |                 color = (255, 0, 0)
 90 |             else:
 91 |                 color = getColorsPalette()[label]
 92 | 
 93 |             rect = [(rect_scale * i) for i in roiRelCoords[roiIndex]]
 94 |             rect[0] = int(max(0, min(padWidth, rect[0])))
 95 |             rect[1] = int(max(0, min(padHeight, rect[1])))
 96 |             rect[2] = int(max(0, min(padWidth, rect[2])))
 97 |             rect[3] = int(max(0, min(padHeight, rect[3])))
 98 | 
 99 |             # draw in higher iterations only the detections
100 |             if iter == 0 and boDrawNegativeRois:
101 |                 drawRectangles(imgDebug, [rect], color=color, thickness=thickness)
102 |             elif iter==1 and label > 0:
103 |                 if not nmsKeepIndices or (roiIndex in nmsKeepIndices):
104 |                     thickness = 4
105 |                 drawRectangles(imgDebug, [rect], color=color, thickness=thickness)
106 |             elif iter == 2 and label > 0:
107 |                 if not nmsKeepIndices or (roiIndex in nmsKeepIndices):
108 |                     font = ImageFont.truetype(available_font, 18)
109 |                     text = classes[label]
110 |                     if roiScores:
111 |                         text += "(" + str(round(score, 2)) + ")"
112 |                     imgDebug = drawText(imgDebug, (rect[0],rect[1]), text, color = (255,255,255), font = font, colorBackground=color)
113 |     return imgDebug
114 | 
115 | def load_resize_and_pad(image_path, width, height, pad_value=114):
116 |     if "@" in image_path:
117 |         print("WARNING: zipped image archives are not supported for visualizing results.")
118 |         exit(0)
119 | 
120 |     img = cv2.imread(image_path)
121 |     img_width = len(img[0])
122 |     img_height = len(img)
123 |     scale_w = img_width > img_height
124 |     target_w = width
125 |     target_h = height
126 | 
127 |     if scale_w:
128 |         target_h = int(np.round(img_height * float(width) / float(img_width)))
129 |     else:
130 |         target_w = int(np.round(img_width * float(height) / float(img_height)))
131 | 
132 |     resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST)
133 | 
134 |     top = int(max(0, np.round((height - target_h) / 2)))
135 |     left = int(max(0, np.round((width - target_w) / 2)))
136 |     bottom = height - top - target_h
137 |     right = width - left - target_w
138 |     resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right,
139 |                                           cv2.BORDER_CONSTANT, value=[pad_value, pad_value, pad_value])
140 | 
141 |     # transpose(2,0,1) converts the image to the HWC format which CNTK accepts
142 |     model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))
143 | 
144 |     dims = (width, height, target_w, target_h, img_width, img_height)
145 |     return resized_with_pad, model_arg_rep, dims
146 | 
147 | # Tests a Faster R-CNN model and plots images with detected boxes
148 | def eval_and_plot_faster_rcnn(eval_model, num_images_to_plot, test_map_file, img_shape,
149 |                               results_base_path, feature_node_name, classes,
150 |                               drawUnregressedRois=False, drawNegativeRois=False,
151 |                               nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8):
152 |     # get image paths
153 |     with open(test_map_file) as f:
154 |         content = f.readlines()
155 |     img_base_path = os.path.dirname(os.path.abspath(test_map_file))
156 |     img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content]
157 | 
158 |     # prepare model
159 |     image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
160 |     dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input')
161 |     frcn_eval = eval_model(image_input, dims_input)
162 | 
163 |     #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6))
164 |     print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot)
165 |     for i in range(0, num_images_to_plot):
166 |         imgPath = img_file_names[i]
167 | 
168 |         # evaluate single image
169 |         _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1])
170 | 
171 |         dims_input = np.array(dims, dtype=np.float32)
172 |         dims_input.shape = (1,) + dims_input.shape
173 |         output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input})
174 | 
175 |         out_dict = dict([(k.name, k) for k in output])
176 |         out_cls_pred = output[out_dict['cls_pred']][0]
177 |         out_rpn_rois = output[out_dict['rpn_rois']][0]
178 |         out_bbox_regr = output[out_dict['bbox_regr']][0]
179 | 
180 |         labels = out_cls_pred.argmax(axis=1)
181 |         scores = out_cls_pred.max(axis=1).tolist()
182 | 
183 |         if drawUnregressedRois:
184 |             # plot results without final regression
185 |             imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1],
186 |                                               classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois,
187 |                                               decisionThreshold=bgrPlotThreshold)
188 |             imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(imgPath)), imgDebug)
189 | 
190 |         # apply regression and nms to bbox coordinates
191 |         regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
192 | 
193 |         nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores,
194 |                                                     nms_threshold=nmsThreshold,
195 |                                                     conf_threshold=nmsConfThreshold)
196 | 
197 |         img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1],
198 |                                      classes, nmsKeepIndices=nmsKeepIndices,
199 |                                      boDrawNegativeRois=drawNegativeRois,
200 |                                      decisionThreshold=bgrPlotThreshold)
201 |         imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(imgPath)), img)
202 | 
203 | 
204 | ####################################
205 | # helper library
206 | ####################################
207 | 
208 | def imread(imgPath, boThrowErrorIfExifRotationTagSet = True):
209 |     if not os.path.exists(imgPath):
210 |         print("ERROR: image path does not exist.")
211 |         error
212 | 
213 |     rotation = rotationFromExifTag(imgPath)
214 |     if boThrowErrorIfExifRotationTagSet and rotation != 0:
215 |         print ("Error: exif roation tag set, image needs to be rotated by %d degrees." % rotation)
216 |     img = cv2.imread(imgPath)
217 |     if img is None:
218 |         print ("ERROR: cannot load image " + imgPath)
219 |         error
220 |     if rotation != 0:
221 |         img = imrotate(img, -90).copy()  # got this error occassionally without copy "TypeError: Layout of the output array img is incompatible with cv::Mat"
222 |     return img
223 | 
224 | def rotationFromExifTag(imgPath):
225 |     TAGSinverted = {v: k for k, v in TAGS.items()}
226 |     orientationExifId = TAGSinverted['Orientation']
227 |     try:
228 |         imageExifTags = Image.open(imgPath)._getexif()
229 |     except:
230 |         imageExifTags = None
231 | 
232 |     # rotate the image if orientation exif tag is present
233 |     rotation = 0
234 |     if imageExifTags != None and orientationExifId != None and orientationExifId in imageExifTags:
235 |         orientation = imageExifTags[orientationExifId]
236 |         # print ("orientation = " + str(imageExifTags[orientationExifId]))
237 |         if orientation == 1 or orientation == 0:
238 |             rotation = 0 # no need to do anything
239 |         elif orientation == 6:
240 |             rotation = -90
241 |         elif orientation == 8:
242 |             rotation = 90
243 |         else:
244 |             print ("ERROR: orientation = " + str(orientation) + " not_supported!")
245 |             error
246 |     return rotation
247 | 
248 | def imwrite(img, imgPath):
249 |     cv2.imwrite(imgPath, img)
250 | 
251 | def imresize(img, scale, interpolation = cv2.INTER_LINEAR):
252 |     return cv2.resize(img, (0,0), fx=scale, fy=scale, interpolation=interpolation)
253 | 
254 | def imresizeMaxDim(img, maxDim, boUpscale = False, interpolation = cv2.INTER_LINEAR):
255 |     scale = 1.0 * maxDim / max(img.shape[:2])
256 |     if scale < 1  or boUpscale:
257 |         img = imresize(img, scale, interpolation)
258 |     else:
259 |         scale = 1.0
260 |     return img, scale
261 | 
262 | def imWidth(input):
263 |     return imWidthHeight(input)[0]
264 | 
265 | def imHeight(input):
266 |     return imWidthHeight(input)[1]
267 | 
268 | def imWidthHeight(input):
269 |     width, height = Image.open(input).size #this does not load the full image
270 |     return width,height
271 | 
272 | def imArrayWidth(input):
273 |     return imArrayWidthHeight(input)[0]
274 | 
275 | def imArrayHeight(input):
276 |     return imArrayWidthHeight(input)[1]
277 |     
278 | def imArrayWidthHeight(input):
279 |     width =  input.shape[1]
280 |     height = input.shape[0]
281 |     return width,height
282 |  
283 | def imshow(img, waitDuration=0, maxDim = None, windowName = 'img'):
284 |     if isinstance(img, str): #test if 'img' is a string
285 |         img = cv2.imread(img)
286 |     if maxDim is not None:
287 |         scaleVal = 1.0 * maxDim / max(img.shape[:2])
288 |         if scaleVal < 1:
289 |             img = imresize(img, scaleVal)
290 |     cv2.imshow(windowName, img)
291 |     cv2.waitKey(waitDuration)
292 | 
293 | def drawRectangles(img, rects, color = (0, 255, 0), thickness = 2):
294 |     for rect in rects:
295 |         pt1 = tuple(ToIntegers(rect[0:2]))
296 |         pt2 = tuple(ToIntegers(rect[2:]))
297 |         try:
298 |             cv2.rectangle(img, pt1, pt2, color, thickness)
299 |         except:
300 |             import pdb; pdb.set_trace()
301 |             print("Unexpected error:", sys.exc_info()[0])
302 | 
303 | def drawCrossbar(img, pt):
304 |     (x,y) = pt
305 |     cv2.rectangle(img, (0, y), (x, y), (255, 255, 0), 1)
306 |     cv2.rectangle(img, (x, 0), (x, y), (255, 255, 0), 1)
307 |     cv2.rectangle(img, (img.shape[1],y), (x, y), (255, 255, 0), 1)
308 |     cv2.rectangle(img, (x, img.shape[0]), (x, y), (255, 255, 0), 1)
309 | 
310 | def ptClip(pt, maxWidth, maxHeight):
311 |     pt = list(pt)
312 |     pt[0] = max(pt[0], 0)
313 |     pt[1] = max(pt[1], 0)
314 |     pt[0] = min(pt[0], maxWidth)
315 |     pt[1] = min(pt[1], maxHeight)
316 |     return pt
317 | 
318 | def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype(available_font, 16)):
319 |     pilImg = imconvertCv2Pil(img)
320 |     pilImg = pilDrawText(pilImg,  pt, text, textWidth, color, colorBackground, font)
321 |     return imconvertPil2Cv(pilImg)
322 | 
323 | def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype(available_font, 16)):
324 |     textY = pt[1]
325 |     draw = ImageDraw.Draw(pilImg)
326 |     if textWidth == None:
327 |         lines = [text]
328 |     else:
329 |         lines = textwrap.wrap(text, width=textWidth)
330 |     for line in lines:
331 |         width, height = font.getsize(line)
332 |         if colorBackground != None:
333 |             draw.rectangle((pt[0], pt[1], pt[0] + width, pt[1] + height), fill=tuple(colorBackground[::-1]))
334 |         draw.text(pt, line, fill = tuple(color), font = font)
335 |         textY += height
336 |     return pilImg
337 | 
338 | def getColorsPalette():
339 |     colors = [[255,0,0], [0,255,0], [0,0,255], [255,255,0], [255,0,255]]
340 |     for i in range(5):
341 |         for dim in range(0,3):
342 |             for s in (0.25, 0.5, 0.75):
343 |                 if colors[i][dim] != 0:
344 |                     newColor = copy.deepcopy(colors[i])
345 |                     newColor[dim] = int(round(newColor[dim] * s))
346 |                     colors.append(newColor)
347 |     return colors
348 | 
349 | def imconvertPil2Cv(pilImg):
350 |     rgb = pilImg.convert('RGB')
351 |     return np.array(rgb).copy()[:, :, ::-1]
352 | 
353 | def imconvertCv2Pil(img):
354 |     cv2_im = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
355 |     return Image.fromarray(cv2_im)
356 | 
357 | def ToIntegers(list1D):
358 |     return [int(float(x)) for x in list1D]
359 | 
360 | def getDictionary(keys, values, boConvertValueToInt = True):
361 |     dictionary = {}
362 |     for key,value in zip(keys, values):
363 |         if (boConvertValueToInt):
364 |             value = int(value)
365 |         dictionary[key] = value
366 |     return dictionary
367 | 
368 | class Bbox:
369 |     MAX_VALID_DIM = 100000
370 |     left = top = right = bottom = None
371 | 
372 |     def __init__(self, left, top, right, bottom):
373 |         self.left   = int(round(float(left)))
374 |         self.top    = int(round(float(top)))
375 |         self.right  = int(round(float(right)))
376 |         self.bottom = int(round(float(bottom)))
377 |         self.standardize()
378 | 
379 |     def __str__(self):
380 |         return ("Bbox object: left = {0}, top = {1}, right = {2}, bottom = {3}".format(self.left, self.top, self.right, self.bottom))
381 | 
382 |     def __repr__(self):
383 |         return str(self)
384 | 
385 |     def rect(self):
386 |         return [self.left, self.top, self.right, self.bottom]
387 | 
388 |     def max(self):
389 |         return max([self.left, self.top, self.right, self.bottom])
390 | 
391 |     def min(self):
392 |         return min([self.left, self.top, self.right, self.bottom])
393 | 
394 |     def width(self):
395 |         width  = self.right - self.left + 1
396 |         assert(width>=0)
397 |         return width
398 | 
399 |     def height(self):
400 |         height = self.bottom - self.top + 1
401 |         assert(height>=0)
402 |         return height
403 | 
404 |     def surfaceArea(self):
405 |         return self.width() * self.height()
406 | 
407 | 
408 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | easydict==1.6
 2 | pytest==3.0.3
 3 | opencv-python
 4 | https://pypi.python.org/packages/be/5c/670e88bc3ae6afa23c1f09d52a77bbbc7d2e476e7449ad3b6750040a0ac6/scipy-1.0.0b1-cp35-none-win_amd64.whl#md5=dcc90577f2eebc264ec60a2d5729e30b
 5 | https://cntk.ai/PythonWheel/CPU-Only/cntk-2.1-cp35-cp35m-win_amd64.whl
 6 | Flask==0.12.2
 7 | numpy==1.11.2
 8 | matplotlib==1.5.3
 9 | ipython==6.2.0
10 | Pillow==4.1.1
11 | PyYAML==3.12
12 | 


--------------------------------------------------------------------------------
/utils/Readme.md:
--------------------------------------------------------------------------------
 1 | ## Detection utils
 2 | 
 3 | This folder contains Python modules that are utilities for object detection networks. 
 4 | 
 5 | ### Cython modules
 6 | 
 7 | To use the rpn component you need precompiled cython modules for nms (at least cpu_nms.cpXX-win_amd64.pyd for Windows or cpu_nms.cpython-XXm.so for Linux) and  bbox (cython_bbox.cpXX-win_amd64.pyd for Windows or cython_bbox.cpython-XXm.so for Linux). 
 8 | To compile the cython modules for windows see (https://github.com/MrGF/py-faster-rcnn-windows): 
 9 | ```
10 | git clone https://github.com/MrGF/py-faster-rcnn-windows
11 | cd $FRCN_ROOT/lib
12 | python setup.py build_ext --inplace
13 | ```
14 | For Linux see (https://github.com/rbgirshick/py-faster-rcnn):
15 | ```
16 | git clone https://github.com/rbgirshick/py-faster-rcnn
17 | cd $FRCN_ROOT/lib
18 | python setup.py build_ext --inplace
19 | ```
20 | Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder.
21 | 
22 | ##### `default_config`
23 | 
24 | Contains all required parameters for using a region proposal network in training or evaluation. You can overwrite these parameters by specifying a `config.py` file of the same format inside your working directory.
25 | 
26 | ### `rpn` module overview
27 | 
28 | The rpn module contains helper methods and required layers to generate region proposal networks for object detection.
29 | 
30 | ##### `rpn_helpers`
31 | 
32 | Contains helper methods to create a region proposal network (rpn) and a proposal target layer for training the rpn.
33 | 
34 | ##### `generate_anchors.py`
35 | 
36 | Generates a regular grid of multi-scale, multi-aspect anchor boxes.
37 | 
38 | ##### `proposal_layer.py`
39 | 
40 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals.
41 | 
42 | ##### `anchor_target_layer.py` 
43 | 
44 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore).
45 | Bbox regression targets are specified when the classification label is > 0.
46 | 
47 | ##### `proposal_target_layer.py`
48 | 
49 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
50 | and bbox regression targets in that case that the label is > 0.
51 | 
52 | ##### `generate.py`
53 | 
54 | Generate object detection proposals from an imdb using an RPN.
55 | 


--------------------------------------------------------------------------------
/utils/annotations/annotations_helper.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft. All rights reserved.
 2 | 
 3 | # Licensed under the MIT license. See LICENSE.md file in the project root
 4 | # for full license information.
 5 | # ==============================================================================
 6 | 
 7 | import numpy as np
 8 | import os
 9 | 
10 | def parse_class_map_file(class_map_file):
11 |     with open(class_map_file, "r") as f:
12 |         lines = f.readlines()
13 |     class_list = [None]*len(lines)
14 |     for line in lines:
15 |         tab_pos = line.find('\t')
16 |         class_name = line[:tab_pos]
17 |         class_id = int(line[tab_pos+1:-1])
18 |         class_list[class_id] = class_name
19 | 
20 |     return class_list


--------------------------------------------------------------------------------
/utils/caffe_layers/anchor_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | import os
  9 | #import caffe
 10 | import yaml
 11 | import numpy as np
 12 | import numpy.random as npr
 13 | from utils.default_config import cfg
 14 | from utils.rpn.generate_anchors import generate_anchors
 15 | from utils.rpn.bbox_transform import bbox_transform
 16 | from utils.cython_modules.cython_bbox import bbox_overlaps
 17 | 
 18 | DEBUG = False
 19 | 
 20 | class AnchorTargetLayer: #(caffe.Layer):
 21 |     """
 22 |     Assign anchors to ground-truth targets. Produces anchor classification
 23 |     labels and bounding-box regression targets.
 24 |     """
 25 | 
 26 |     def set_param_str(self, param_str):
 27 |         self.param_str_ = param_str
 28 | 
 29 |     def set_deterministic_mode(self, mode = True):
 30 |         self._determininistic_mode = mode
 31 | 
 32 |     def setup(self, bottom, top):
 33 |         layer_params = yaml.load(self.param_str_)
 34 |         anchor_scales = layer_params.get('scales', (8, 16, 32))
 35 |         self._anchors = generate_anchors(scales=np.array(anchor_scales))
 36 |         self._num_anchors = self._anchors.shape[0]
 37 |         self._feat_stride = layer_params['feat_stride']
 38 | 
 39 |         if DEBUG:
 40 |             print('anchors:')
 41 |             print(self._anchors)
 42 |             print('anchor shapes:')
 43 |             print(np.hstack((
 44 |                 self._anchors[:, 2::4] - self._anchors[:, 0::4],
 45 |                 self._anchors[:, 3::4] - self._anchors[:, 1::4],
 46 |             )))
 47 |             self._counts = cfg.EPS
 48 |             self._sums = np.zeros((1, 4))
 49 |             self._squared_sums = np.zeros((1, 4))
 50 |             self._fg_sum = 0
 51 |             self._bg_sum = 0
 52 |             self._count = 0
 53 | 
 54 |         # allow boxes to sit over the edge by a small amount
 55 |         self._allowed_border = layer_params.get('allowed_border', 0)
 56 | 
 57 |         height, width = bottom[0].data.shape[-2:]
 58 |         if DEBUG:
 59 |             print('AnchorTargetLayer: height', height, 'width', width)
 60 | 
 61 |         #A = self._num_anchors
 62 |         # labels
 63 |         #top[0].reshape(1, 1, A * height, width)
 64 |         # bbox_targets
 65 |         #top[1].reshape(1, A * 4, height, width)
 66 |         # bbox_inside_weights
 67 |         #top[2].reshape(1, A * 4, height, width)
 68 |         # bbox_outside_weights
 69 |         #top[3].reshape(1, A * 4, height, width)
 70 | 
 71 |     def forward(self, bottom, top):
 72 |         # Algorithm:
 73 |         #
 74 |         # for each (H, W) location i
 75 |         #   generate 9 anchor boxes centered on cell i
 76 |         #   apply predicted bbox deltas at cell i to each of the 9 anchors
 77 |         # filter out-of-image anchors
 78 |         # measure GT overlap
 79 | 
 80 |         assert bottom[0].data.shape[0] == 1, \
 81 |             'Only single item batches are supported'
 82 | 
 83 |         # map of shape (..., H, W)
 84 |         height, width = bottom[0].data.shape[-2:]
 85 |         # GT boxes (x1, y1, x2, y2, label)
 86 |         gt_boxes = bottom[1]#.data
 87 |         # im_info
 88 |         im_info = bottom[2]#.data[0, :]
 89 | 
 90 |         if DEBUG:
 91 |             print('')
 92 |             print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
 93 |             print('scale: {}'.format(im_info[2]))
 94 |             print('height, width: ({}, {})'.format(height, width))
 95 |             print('rpn: gt_boxes.shape', gt_boxes.shape)
 96 |             print('rpn: gt_boxes', gt_boxes)
 97 | 
 98 |         # 1. Generate proposals from bbox deltas and shifted anchors
 99 |         shift_x = np.arange(0, width) * self._feat_stride
100 |         shift_y = np.arange(0, height) * self._feat_stride
101 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
102 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
103 |                             shift_x.ravel(), shift_y.ravel())).transpose()
104 |         # add A anchors (1, A, 4) to
105 |         # cell K shifts (K, 1, 4) to get
106 |         # shift anchors (K, A, 4)
107 |         # reshape to (K*A, 4) shifted anchors
108 |         A = self._num_anchors
109 |         K = shifts.shape[0]
110 |         all_anchors = (self._anchors.reshape((1, A, 4)) +
111 |                        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
112 |         all_anchors = all_anchors.reshape((K * A, 4))
113 |         total_anchors = int(K * A)
114 | 
115 |         # only keep anchors inside the image
116 |         inds_inside = np.where(
117 |             (all_anchors[:, 0] >= -self._allowed_border) &
118 |             (all_anchors[:, 1] >= -self._allowed_border) &
119 |             (all_anchors[:, 2] < im_info[1] + self._allowed_border) &  # width
120 |             (all_anchors[:, 3] < im_info[0] + self._allowed_border)    # height
121 |         )[0]
122 | 
123 |         if DEBUG:
124 |             print('total_anchors', total_anchors)
125 |             print('inds_inside', len(inds_inside))
126 | 
127 |         # keep only inside anchors
128 |         anchors = all_anchors[inds_inside, :]
129 |         if DEBUG:
130 |             print('anchors.shape', anchors.shape)
131 | 
132 |         # label: 1 is positive, 0 is negative, -1 is dont care
133 |         labels = np.empty((len(inds_inside), ), dtype=np.float32)
134 |         labels.fill(-1)
135 | 
136 |         # overlaps between the anchors and the gt boxes
137 |         # overlaps (ex, gt)
138 |         overlaps = bbox_overlaps(
139 |             np.ascontiguousarray(anchors, dtype=np.float),
140 |             np.ascontiguousarray(gt_boxes, dtype=np.float))
141 |         argmax_overlaps = overlaps.argmax(axis=1)
142 |         max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
143 |         gt_argmax_overlaps = overlaps.argmax(axis=0)
144 |         gt_max_overlaps = overlaps[gt_argmax_overlaps,
145 |                                    np.arange(overlaps.shape[1])]
146 |         gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
147 | 
148 |         if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
149 |             # assign bg labels first so that positive labels can clobber them
150 |             labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
151 | 
152 |         # fg label: for each gt, anchor with highest overlap
153 |         labels[gt_argmax_overlaps] = 1
154 | 
155 |         # fg label: above threshold IOU
156 |         labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
157 | 
158 |         if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
159 |             # assign bg labels last so that negative labels can clobber positives
160 |             labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
161 | 
162 |         # subsample positive labels if we have too many
163 |         num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
164 |         fg_inds = np.where(labels == 1)[0]
165 |         if len(fg_inds) > num_fg:
166 |             if self._determininistic_mode:
167 |                 disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
168 |             else:
169 |                 disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
170 |             labels[disable_inds] = -1
171 | 
172 |         # subsample negative labels if we have too many
173 |         num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
174 |         bg_inds = np.where(labels == 0)[0]
175 |         if len(bg_inds) > num_bg:
176 |             if self._determininistic_mode:
177 |                 disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
178 |             else:
179 |                 disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
180 |             labels[disable_inds] = -1
181 |             #print "was %s inds, disabling %s, now %s inds" % (
182 |                 #len(bg_inds), len(disable_inds), np.sum(labels == 0))
183 | 
184 |         bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
185 |         bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
186 | 
187 |         bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
188 |         bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
189 | 
190 |         bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
191 |         if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
192 |             # uniform weighting of examples (given non-uniform sampling)
193 |             num_examples = np.sum(labels >= 0)
194 |             positive_weights = np.ones((1, 4)) * 1.0 / num_examples
195 |             negative_weights = np.ones((1, 4)) * 1.0 / num_examples
196 |         else:
197 |             assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
198 |                     (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
199 |             positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
200 |                                 np.sum(labels == 1))
201 |             negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
202 |                                 np.sum(labels == 0))
203 |         bbox_outside_weights[labels == 1, :] = positive_weights
204 |         bbox_outside_weights[labels == 0, :] = negative_weights
205 | 
206 |         if DEBUG:
207 |             self._sums += bbox_targets[labels == 1, :].sum(axis=0)
208 |             self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
209 |             self._counts += np.sum(labels == 1)
210 |             means = self._sums / self._counts
211 |             stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
212 |             print('means:')
213 |             print(means)
214 |             print('stdevs:')
215 |             print(stds)
216 | 
217 |         # map up to original set of anchors
218 |         labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
219 |         bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
220 |         bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
221 |         bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
222 | 
223 |         if DEBUG:
224 |             print('rpn: max max_overlap', np.max(max_overlaps))
225 |             print('rpn: num_positive', np.sum(labels == 1))
226 |             print('rpn: num_negative', np.sum(labels == 0))
227 |             self._fg_sum += np.sum(labels == 1)
228 |             self._bg_sum += np.sum(labels == 0)
229 |             self._count += 1
230 |             print('rpn: num_positive avg', self._fg_sum / self._count)
231 |             print('rpn: num_negative avg', self._bg_sum / self._count)
232 | 
233 |         # labels
234 |         labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
235 |         #labels = labels.reshape((1, 1, A * height, width))
236 |         #top[0].reshape(*labels.shape)
237 |         #top[0].data[...] = labels
238 | 
239 |         # bbox_targets
240 |         bbox_targets = bbox_targets \
241 |             .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
242 |         #top[1].reshape(*bbox_targets.shape)
243 |         #top[1].data[...] = bbox_targets
244 | 
245 |         # bbox_inside_weights
246 |         bbox_inside_weights = bbox_inside_weights \
247 |             .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
248 |         #assert bbox_inside_weights.shape[2] == height
249 |         #assert bbox_inside_weights.shape[3] == width
250 |         #top[2].reshape(*bbox_inside_weights.shape)
251 |         #top[2].data[...] = bbox_inside_weights
252 | 
253 |         # bbox_outside_weights
254 |         #bbox_outside_weights = bbox_outside_weights \
255 |         #    .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
256 |         #assert bbox_outside_weights.shape[2] == height
257 |         #assert bbox_outside_weights.shape[3] == width
258 |         #top[3].reshape(*bbox_outside_weights.shape)
259 |         #top[3].data[...] = bbox_outside_weights
260 | 
261 |         return labels, bbox_targets, bbox_inside_weights
262 | 
263 |     def backward(self, top, propagate_down, bottom):
264 |         """This layer does not propagate gradients."""
265 |         pass
266 | 
267 |     def reshape(self, bottom, top):
268 |         """Reshaping happens during the call to forward."""
269 |         pass
270 | 
271 | 
272 | def _unmap(data, count, inds, fill=0):
273 |     """ Unmap a subset of item (data) back to the original set of items (of
274 |     size count) """
275 |     if len(data.shape) == 1:
276 |         ret = np.empty((count, ), dtype=np.float32)
277 |         ret.fill(fill)
278 |         ret[inds] = data
279 |     else:
280 |         ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
281 |         ret.fill(fill)
282 |         ret[inds, :] = data
283 |     return ret
284 | 
285 | 
286 | def _compute_targets(ex_rois, gt_rois):
287 |     """Compute bounding-box regression targets for an image."""
288 | 
289 |     assert ex_rois.shape[0] == gt_rois.shape[0]
290 |     assert ex_rois.shape[1] == 4
291 |     assert gt_rois.shape[1] == 5
292 | 
293 |     return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
294 | 


--------------------------------------------------------------------------------
/utils/caffe_layers/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | 
10 | def bbox_transform(ex_rois, gt_rois):
11 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 | 
16 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 | 
21 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 |     targets_dw = np.log(gt_widths / ex_widths)
24 |     targets_dh = np.log(gt_heights / ex_heights)
25 | 
26 |     targets = np.vstack(
27 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 |     return targets
29 | 
30 | def bbox_transform_inv(boxes, deltas):
31 |     if boxes.shape[0] == 0:
32 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 | 
34 |     boxes = boxes.astype(deltas.dtype, copy=False)
35 | 
36 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 |     ctr_x = boxes[:, 0] + 0.5 * widths
39 |     ctr_y = boxes[:, 1] + 0.5 * heights
40 | 
41 |     dx = deltas[:, 0::4]
42 |     dy = deltas[:, 1::4]
43 |     dw = deltas[:, 2::4]
44 |     dh = deltas[:, 3::4]
45 | 
46 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
49 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
50 | 
51 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 |     # x1
53 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 |     # y1
55 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 |     # x2
57 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 |     # y2
59 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 | 
61 |     return pred_boxes
62 | 
63 | def clip_boxes(boxes, im_shape):
64 |     """
65 |     Clip boxes to image boundaries.
66 |     """
67 | 
68 |     # x1 >= 0
69 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 |     # y1 >= 0
71 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 |     # x2 < im_shape[1]
73 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 |     # y2 < im_shape[0]
75 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 |     return boxes
77 | 


--------------------------------------------------------------------------------
/utils/caffe_layers/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | #import caffe
  9 | import numpy as np
 10 | import yaml
 11 | from utils.default_config import cfg
 12 | from utils.rpn.generate_anchors import generate_anchors
 13 | from utils.caffe_layers.bbox_transform import bbox_transform_inv, clip_boxes
 14 | from utils.nms.nms_wrapper import nms
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalLayer: #(caffe.Layer):
 19 |     """
 20 |     Outputs object detection proposals by applying estimated bounding-box
 21 |     transformations to a set of regular boxes (called "anchors").
 22 |     """
 23 | 
 24 |     def set_param_str(self, param_str):
 25 |         self.param_str_ = param_str
 26 | 
 27 |     def setup(self, bottom, top):
 28 |         # parse the layer parameter string, which must be valid YAML
 29 |         layer_params = yaml.load(self.param_str_)
 30 | 
 31 |         self._feat_stride = layer_params['feat_stride']
 32 |         anchor_scales = layer_params.get('scales', (8, 16, 32))
 33 |         self._anchors = generate_anchors(scales=np.array(anchor_scales))
 34 |         self._num_anchors = self._anchors.shape[0]
 35 |         self.phase = "TEST"
 36 | 
 37 |         #if DEBUG:
 38 |             #print 'feat_stride: {}'.format(self._feat_stride)
 39 |             #print 'anchors:'
 40 |             #print self._anchors
 41 | 
 42 |         # rois blob: holds R regions of interest, each is a 5-tuple
 43 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 44 |         # rectangle (x1, y1, x2, y2)
 45 |         #top[0].reshape(1, 5)
 46 | 
 47 |         # scores blob: holds scores for R regions of interest
 48 |         #if len(top) > 1:
 49 |         #    top[1].reshape(1, 1, 1, 1)
 50 | 
 51 |     def forward(self, bottom, top):
 52 |         # Algorithm:
 53 |         #
 54 |         # for each (H, W) location i
 55 |         #   generate A anchor boxes centered on cell i
 56 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 57 |         # clip predicted boxes to image
 58 |         # remove predicted boxes with either height or width < threshold
 59 |         # sort all (proposal, score) pairs by score from highest to lowest
 60 |         # take top pre_nms_topN proposals before NMS
 61 |         # apply NMS with threshold 0.7 to remaining proposals
 62 |         # take after_nms_topN proposals after NMS
 63 |         # return the top proposals (-> RoIs top, scores top)
 64 | 
 65 |         assert bottom[0].shape[0] == 1, \
 66 |             'Only single item batches are supported'
 67 | 
 68 |         cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
 69 |         pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N
 70 |         post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
 71 |         nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH
 72 |         min_size      = cfg[cfg_key].RPN_MIN_SIZE
 73 | 
 74 |         # the first set of _num_anchors channels are bg probs
 75 |         # the second set are the fg probs, which we want
 76 |         scores = bottom[0][:, self._num_anchors:, :, :]
 77 |         bbox_deltas = bottom[1]
 78 |         im_info = bottom[2][0, :]
 79 | 
 80 |         #if DEBUG:
 81 |         #    print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
 82 |         #    print 'scale: {}'.format(im_info[2])
 83 | 
 84 |         # 1. Generate proposals from bbox deltas and shifted anchors
 85 |         height, width = scores.shape[-2:]
 86 | 
 87 |         #if DEBUG:
 88 |         #    print 'score map size: {}'.format(scores.shape)
 89 | 
 90 |         # Enumerate all shifts
 91 |         shift_x = np.arange(0, width) * self._feat_stride
 92 |         shift_y = np.arange(0, height) * self._feat_stride
 93 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 94 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 95 |                             shift_x.ravel(), shift_y.ravel())).transpose()
 96 | 
 97 |         # Enumerate all shifted anchors:
 98 |         #
 99 |         # add A anchors (1, A, 4) to
100 |         # cell K shifts (K, 1, 4) to get
101 |         # shift anchors (K, A, 4)
102 |         # reshape to (K*A, 4) shifted anchors
103 |         A = self._num_anchors
104 |         K = shifts.shape[0]
105 |         anchors = self._anchors.reshape((1, A, 4)) + \
106 |                   shifts.reshape((1, K, 4)).transpose((1, 0, 2))
107 |         anchors = anchors.reshape((K * A, 4))
108 | 
109 |         # Transpose and reshape predicted bbox transformations to get them
110 |         # into the same order as the anchors:
111 |         #
112 |         # bbox deltas will be (1, 4 * A, H, W) format
113 |         # transpose to (1, H, W, 4 * A)
114 |         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
115 |         # in slowest to fastest order
116 |         bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
117 | 
118 |         # Same story for the scores:
119 |         #
120 |         # scores are (1, A, H, W) format
121 |         # transpose to (1, H, W, A)
122 |         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
123 |         scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
124 | 
125 |         # Convert anchors into proposals via bbox transformations
126 |         proposals = bbox_transform_inv(anchors, bbox_deltas)
127 | 
128 |         # 2. clip predicted boxes to image
129 |         proposals = clip_boxes(proposals, im_info[:2])
130 | 
131 |         # 3. remove predicted boxes with either height or width < threshold
132 |         # (NOTE: convert min_size to input image scale stored in im_info[2])
133 |         keep = _filter_boxes(proposals, min_size * im_info[2])
134 |         proposals = proposals[keep, :]
135 |         scores = scores[keep]
136 | 
137 |         # 4. sort all (proposal, score) pairs by score from highest to lowest
138 |         # 5. take top pre_nms_topN (e.g. 6000)
139 |         order = scores.ravel().argsort()[::-1]
140 |         if pre_nms_topN > 0:
141 |             order = order[:pre_nms_topN]
142 |         proposals = proposals[order, :]
143 |         scores = scores[order]
144 | 
145 |         # 6. apply nms (e.g. threshold = 0.7)
146 |         # 7. take after_nms_topN (e.g. 300)
147 |         # 8. return the top proposals (-> RoIs top)
148 |         keep = nms(np.hstack((proposals, scores)), nms_thresh)
149 |         if post_nms_topN > 0:
150 |             keep = keep[:post_nms_topN]
151 |         proposals = proposals[keep, :]
152 |         scores = scores[keep]
153 | 
154 |         # Output rois blob
155 |         # Our RPN implementation only supports a single input image, so all
156 |         # batch inds are 0
157 |         batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
158 |         blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
159 | 
160 |         return blob
161 |         #top[0].reshape(*(blob.shape))
162 |         #top[0].data[...] = blob
163 | 
164 |         # [Optional] output scores blob
165 |         #if len(top) > 1:
166 |         #    top[1].reshape(*(scores.shape))
167 |         #    top[1].data[...] = scores
168 | 
169 |     def backward(self, top, propagate_down, bottom):
170 |         """This layer does not propagate gradients."""
171 |         pass
172 | 
173 |     def reshape(self, bottom, top):
174 |         """Reshaping happens during the call to forward."""
175 |         pass
176 | 
177 | def _filter_boxes(boxes, min_size):
178 |     """Remove all boxes with any side smaller than min_size."""
179 |     ws = boxes[:, 2] - boxes[:, 0] + 1
180 |     hs = boxes[:, 3] - boxes[:, 1] + 1
181 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
182 |     return keep
183 | 


--------------------------------------------------------------------------------
/utils/caffe_layers/proposal_target_layer.py:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Faster R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick and Sean Bell
  6 | # --------------------------------------------------------
  7 | 
  8 | #import caffe
  9 | import yaml
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | from utils.default_config import cfg
 13 | from utils.rpn.bbox_transform import bbox_transform
 14 | from utils.cython_modules.cython_bbox import bbox_overlaps
 15 | 
 16 | DEBUG = False
 17 | 
 18 | class ProposalTargetLayer(): #caffe.Layer):
 19 |     """
 20 |     Assign object detection proposals to ground-truth targets. Produces proposal
 21 |     classification labels and bounding-box regression targets.
 22 |     """
 23 | 
 24 |     def set_param_str(self, param_str):
 25 |         self.param_str_ = param_str
 26 | 
 27 |     def set_deterministic_mode(self, mode = True):
 28 |         self._determininistic_mode = mode
 29 | 
 30 |     def setup(self, bottom, top):
 31 |         layer_params = yaml.load(self.param_str_)
 32 |         self._num_classes = layer_params['num_classes']
 33 |         self._determininistic_mode = False
 34 | 
 35 |         # sampled rois (0, x1, y1, x2, y2)
 36 |         #top[0].reshape(1, 5)
 37 |         # labels
 38 |         #top[1].reshape(1, 1)
 39 |         # bbox_targets
 40 |         #top[2].reshape(1, self._num_classes * 4)
 41 |         # bbox_inside_weights
 42 |         #top[3].reshape(1, self._num_classes * 4)
 43 |         # bbox_outside_weights
 44 |         #top[4].reshape(1, self._num_classes * 4)
 45 | 
 46 |     def forward(self, bottom, top):
 47 |         # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 48 |         # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 49 |         all_rois = bottom[0] #.data
 50 |         # GT boxes (x1, y1, x2, y2, label)
 51 |         # TODO(rbg): it's annoying that sometimes I have extra info before
 52 |         # and other times after box coordinates -- normalize to one format
 53 |         gt_boxes = bottom[1] #.data
 54 | 
 55 |         # Include ground-truth boxes in the set of candidate rois
 56 |         zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
 57 |         all_rois = np.vstack(
 58 |             (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
 59 |         )
 60 | 
 61 |         # Sanity check: single batch only
 62 |         assert np.all(all_rois[:, 0] == 0), \
 63 |                 'Only single item batches are supported'
 64 | 
 65 |         #num_images = 1
 66 |         #rois_per_image = int(cfg.TRAIN.BATCH_SIZE / num_images)
 67 |         rois_per_image = cfg.TRAIN.BATCH_SIZE
 68 |         fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int)
 69 | 
 70 |         # Sample rois with classification labels and bounding box regression
 71 |         # targets
 72 |         labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
 73 |             all_rois, gt_boxes, fg_rois_per_image,
 74 |             rois_per_image, self._num_classes,
 75 |             deterministic=self._determininistic_mode)
 76 | 
 77 |         if DEBUG:
 78 |             print('num fg: {}'.format((labels > 0).sum()))
 79 |             print('num bg: {}'.format((labels == 0).sum()))
 80 |             self._count += 1
 81 |             self._fg_num += (labels > 0).sum()
 82 |             self._bg_num += (labels == 0).sum()
 83 |             print('num fg avg: {}'.format(self._fg_num / self._count))
 84 |             print('num bg avg: {}'.format(self._bg_num / self._count))
 85 |             print('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)))
 86 | 
 87 |         return rois, labels, bbox_targets, bbox_inside_weights
 88 | 
 89 |         # sampled rois
 90 |         #top[0].reshape(*rois.shape)
 91 |         #top[0].data[...] = rois
 92 | 
 93 |         # classification labels
 94 |         #top[1].reshape(*labels.shape)
 95 |         #top[1].data[...] = labels
 96 | 
 97 |         # bbox_targets
 98 |         #top[2].reshape(*bbox_targets.shape)
 99 |         #top[2].data[...] = bbox_targets
100 | 
101 |         # bbox_inside_weights
102 |         #top[3].reshape(*bbox_inside_weights.shape)
103 |         #top[3].data[...] = bbox_inside_weights
104 | 
105 |         # bbox_outside_weights
106 |         #top[4].reshape(*bbox_inside_weights.shape)
107 |         #top[4].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32)
108 | 
109 |     def backward(self, top, propagate_down, bottom):
110 |         """This layer does not propagate gradients."""
111 |         pass
112 | 
113 |     def reshape(self, bottom, top):
114 |         """Reshaping happens during the call to forward."""
115 |         pass
116 | 
117 | 
118 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
119 |     """Bounding-box regression targets (bbox_target_data) are stored in a
120 |     compact form N x (class, tx, ty, tw, th)
121 | 
122 |     This function expands those targets into the 4-of-4*K representation used
123 |     by the network (i.e. only one class has non-zero targets).
124 | 
125 |     Returns:
126 |         bbox_target (ndarray): N x 4K blob of regression targets
127 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
128 |     """
129 | 
130 |     clss = bbox_target_data[:, 0].astype(int)
131 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
132 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
133 |     inds = np.where(clss > 0)[0]
134 |     for ind in inds:
135 |         cls = clss[ind]
136 |         start = 4 * cls
137 |         end = start + 4
138 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
139 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
140 |     return bbox_targets, bbox_inside_weights
141 | 
142 | 
143 | def _compute_targets(ex_rois, gt_rois, labels):
144 |     """Compute bounding-box regression targets for an image."""
145 | 
146 |     assert ex_rois.shape[0] == gt_rois.shape[0]
147 |     assert ex_rois.shape[1] == 4
148 |     assert gt_rois.shape[1] == 4
149 | 
150 |     targets = bbox_transform(ex_rois, gt_rois)
151 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
152 |         # Optionally normalize targets by a precomputed mean and stdev
153 |         targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
154 |                 / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
155 |     return np.hstack(
156 |             (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
157 | 
158 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, deterministic=False):
159 |     """Generate a random sample of RoIs comprising foreground and background
160 |     examples.
161 |     """
162 |     # overlaps: (rois x gt_boxes)
163 |     overlaps = bbox_overlaps(
164 |         np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
165 |         np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
166 |     gt_assignment = overlaps.argmax(axis=1)
167 |     max_overlaps = overlaps.max(axis=1)
168 |     labels = gt_boxes[gt_assignment, 4]
169 | 
170 |     # Select foreground RoIs as those with >= FG_THRESH overlap
171 |     fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
172 |     # Guard against the case when an image has fewer than fg_rois_per_image
173 |     # foreground RoIs
174 |     fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
175 | 
176 |     # Sample foreground regions without replacement
177 |     if fg_inds.size > 0:
178 |         if deterministic:
179 |             fg_inds = fg_inds[:fg_rois_per_this_image]
180 |         else:
181 |             fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
182 |             
183 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
184 |     bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
185 |                        (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
186 |     # Compute number of background RoIs to take from this image (guarding
187 |     # against there being fewer than desired)
188 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
189 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
190 |     # Sample background regions without replacement
191 |     if bg_inds.size > 0:
192 |         if deterministic:
193 |             bg_inds = bg_inds[:bg_rois_per_this_image]
194 |         else:
195 |             bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
196 | 
197 |     # The indices that we're selecting (both fg and bg)
198 |     keep_inds = np.append(fg_inds, bg_inds)
199 |     # Select sampled values from various arrays:
200 |     labels = labels[keep_inds]
201 |     # Clamp labels for the background RoIs to 0
202 |     labels[fg_rois_per_this_image:] = 0
203 |     rois = all_rois[keep_inds]
204 | 
205 |     bbox_target_data = _compute_targets(
206 |         rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
207 | 
208 |     bbox_targets, bbox_inside_weights = \
209 |         _get_bbox_regression_labels(bbox_target_data, num_classes)
210 | 
211 |     return labels, rois, bbox_targets, bbox_inside_weights
212 | 


--------------------------------------------------------------------------------
/utils/cython_modules/cpu_nms.cp35-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cpu_nms.cp35-win_amd64.pyd


--------------------------------------------------------------------------------
/utils/cython_modules/cpu_nms.cpython-34m.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cpu_nms.cpython-34m.so


--------------------------------------------------------------------------------
/utils/cython_modules/cython_bbox.cp35-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cython_bbox.cp35-win_amd64.pyd


--------------------------------------------------------------------------------
/utils/cython_modules/cython_bbox.cpython-34m.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cython_bbox.cpython-34m.so


--------------------------------------------------------------------------------
/utils/default_config.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import os
  8 | import os.path as osp
  9 | import numpy as np
 10 | # `pip install easydict` if you don't have it
 11 | from easydict import EasyDict as edict
 12 | 
 13 | __C = edict()
 14 | cfg = __C
 15 | 
 16 | #
 17 | # CNTK parameters
 18 | #
 19 | 
 20 | __C.CNTK = edict()
 21 | 
 22 | 
 23 | __C.CNTK.CONV_BIAS_INIT = 0.0
 24 | __C.CNTK.SIGMA_RPN_L1 = 3.0
 25 | 
 26 | __C.CNTK.IMAGE_WIDTH = 850
 27 | __C.CNTK.IMAGE_HEIGHT = 850
 28 | 
 29 | __C.CNTK.RESULTS_NMS_THRESHOLD = 0.3 # see also: __C.TEST.NMS = 0.3
 30 | __C.CNTK.RESULTS_NMS_CONF_THRESHOLD = 0.0
 31 | __C.CNTK.RESULTS_BGR_PLOT_THRESHOLD = 0.1
 32 | 
 33 | __C.CNTK.DRAW_NEGATIVE_ROIS = False
 34 | __C.CNTK.DRAW_UNREGRESSED_ROIS = False
 35 | 
 36 | #
 37 | # Training options
 38 | #
 39 | 
 40 | __C.TRAIN = edict()
 41 | 
 42 | # Minibatch size (number of regions of interest [ROIs])
 43 | __C.TRAIN.BATCH_SIZE = 128
 44 | 
 45 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 46 | __C.TRAIN.FG_FRACTION = 0.25
 47 | 
 48 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 49 | __C.TRAIN.FG_THRESH = 0.5
 50 | 
 51 | # Overlap threshold for a ROI to be considered background (class = 0 if
 52 | # overlap in [LO, HI))
 53 | __C.TRAIN.BG_THRESH_HI = 0.5
 54 | __C.TRAIN.BG_THRESH_LO = 0.0
 55 | 
 56 | # Use horizontally-flipped images during training?
 57 | __C.TRAIN.USE_FLIPPED = True
 58 | 
 59 | # Train bounding-box regressors
 60 | __C.TRAIN.BBOX_REG = True
 61 | 
 62 | # Overlap required between a ROI and ground-truth box in order for that ROI to
 63 | # be used as a bounding-box regression training example
 64 | __C.TRAIN.BBOX_THRESH = 0.5
 65 | 
 66 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
 67 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
 68 | # Deprecated (inside weights)
 69 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 70 | # Normalize the targets using "precomputed" (or made up) means and stdevs
 71 | # (BBOX_NORMALIZE_TARGETS must also be True)
 72 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True
 73 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
 74 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
 75 | 
 76 | # Train using these proposals
 77 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
 78 | 
 79 | # IOU >= thresh: positive example
 80 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
 81 | # IOU < thresh: negative example
 82 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
 83 | # If an anchor statisfied by positive and negative conditions set to negative
 84 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
 85 | # Max number of foreground examples
 86 | __C.TRAIN.RPN_FG_FRACTION = 0.5
 87 | # Total number of examples
 88 | __C.TRAIN.RPN_BATCHSIZE = 256
 89 | # NMS threshold used on RPN proposals
 90 | __C.TRAIN.RPN_NMS_THRESH = 0.7
 91 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
 92 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
 93 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
 94 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
 95 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
 96 | __C.TRAIN.RPN_MIN_SIZE = 16
 97 | # Deprecated (outside weights)
 98 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 99 | # Give the positive RPN examples weight of p * 1 / {num positives}
100 | # and give negatives a weight of (1 - p)
101 | # Set to -1.0 to use uniform example weighting
102 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
103 | 
104 | 
105 | #
106 | # Testing options
107 | #
108 | 
109 | __C.TEST = edict()
110 | 
111 | # Overlap threshold used for non-maximum suppression (suppress boxes with
112 | # IoU >= this threshold)
113 | __C.TEST.NMS = 0.3
114 | 
115 | # Test using bounding-box regressors
116 | __C.TEST.BBOX_REG = True
117 | 
118 | # Propose boxes
119 | __C.TEST.HAS_RPN = False
120 | 
121 | # Test using these proposals
122 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
123 | 
124 | ## NMS threshold used on RPN proposals
125 | __C.TEST.RPN_NMS_THRESH = 0.7
126 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
127 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
128 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
129 | __C.TEST.RPN_POST_NMS_TOP_N = 300
130 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
131 | __C.TEST.RPN_MIN_SIZE = 16
132 | 
133 | 
134 | #
135 | # MISC
136 | #
137 | 
138 | # The mapping from image coordinates to feature map coordinates might cause
139 | # some boxes that are distinct in image space to become identical in feature
140 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
141 | # for identifying duplicate boxes.
142 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
143 | __C.DEDUP_BOXES = 1./16.
144 | 
145 | # Pixel mean values (BGR order) as a (1, 1, 3) array
146 | # We use the same pixel mean for all networks even though it's not exactly what
147 | # they were trained with
148 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
149 | 
150 | # For reproducibility
151 | __C.RNG_SEED = 3
152 | 
153 | # A small number that's used many times
154 | __C.EPS = 1e-14
155 | 
156 | # Use GPU implementation of non-maximum suppression
157 | __C.USE_GPU_NMS = True
158 | 
159 | # Default GPU device id
160 | __C.GPU_ID = 0
161 | 
162 | 
163 | def _merge_a_into_b(a, b):
164 |     """Merge config dictionary a into config dictionary b, clobbering the
165 |     options in b whenever they are also specified in a.
166 |     """
167 |     if type(a) is not edict:
168 |         return
169 | 
170 |     for k, v in a.iteritems():
171 |         # a must specify keys that are in b
172 |         if not b.has_key(k):
173 |             raise KeyError('{} is not a valid config key'.format(k))
174 | 
175 |         # the types must match, too
176 |         old_type = type(b[k])
177 |         if old_type is not type(v):
178 |             if isinstance(b[k], np.ndarray):
179 |                 v = np.array(v, dtype=b[k].dtype)
180 |             else:
181 |                 raise ValueError(('Type mismatch ({} vs. {}) '
182 |                                 'for config key: {}').format(type(b[k]),
183 |                                                             type(v), k))
184 | 
185 |         # recursively merge dicts
186 |         if type(v) is edict:
187 |             try:
188 |                 _merge_a_into_b(a[k], b[k])
189 |             except:
190 |                 print('Error under config key: {}'.format(k))
191 |                 raise
192 |         else:
193 |             b[k] = v
194 | 
195 | def cfg_from_file(filename):
196 |     """Load a config file and merge it into the default options."""
197 |     import yaml
198 |     with open(filename, 'r') as f:
199 |         yaml_cfg = edict(yaml.load(f))
200 | 
201 |     _merge_a_into_b(yaml_cfg, __C)
202 | 
203 | def cfg_from_list(cfg_list):
204 |     """Set config keys via list (e.g., from command line)."""
205 |     from ast import literal_eval
206 |     assert len(cfg_list) % 2 == 0
207 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
208 |         key_list = k.split('.')
209 |         d = __C
210 |         for subkey in key_list[:-1]:
211 |             assert d.has_key(subkey)
212 |             d = d[subkey]
213 |         subkey = key_list[-1]
214 |         assert d.has_key(subkey)
215 |         try:
216 |             value = literal_eval(v)
217 |         except:
218 |             # handle the case when v is a string literal
219 |             value = v
220 |         assert type(value) == type(d[subkey]), \
221 |             'type {} does not match original type {}'.format(
222 |             type(value), type(d[subkey]))
223 |         d[subkey] = value
224 | 


--------------------------------------------------------------------------------
/utils/map/map_helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import numpy as np
  8 | from utils.nms.nms_wrapper import apply_nms_to_test_set_results
  9 | 
 10 | def evaluate_detections(all_boxes, all_gt_infos, classes, use_07_metric=False, apply_mms=True, nms_threshold=0.5, conf_threshold=0.0):
 11 |     '''
 12 |     Computes per-class average precision.
 13 | 
 14 |     Args:
 15 |         all_boxes:          shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
 16 |         all_gt_infos:       a dictionary that contains all ground truth annoations in the following form:
 17 |                             {'class_A': [{'bbox': array([[ 376.,  210.,  456.,  288.,   10.]], dtype=float32), 'det': [False], 'difficult': [False]}, ... ]}
 18 |                             'class_B': [ <bbox_list> ], <more_class_to_bbox_list_entries> }
 19 |         classes:            a list of class name, e.g. ['__background__', 'avocado', 'orange', 'butter']
 20 |         use_07_metric:      whether to use VOC07's 11 point AP computation (default False)
 21 |         apply_mms:          whether to apply non maximum suppression before computing average precision values
 22 |         nms_threshold:      the threshold for discarding overlapping ROIs in nms
 23 |         conf_threshold:     a minimum value for the score of an ROI. ROIs with lower score will be discarded
 24 | 
 25 |     Returns:
 26 |         aps - average precision value per class in a dictionary {classname: ap}
 27 |     '''
 28 | 
 29 |     if apply_mms:
 30 |         print ("Number of rois before non-maximum suppression: %d" % sum([len(all_boxes[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))]))
 31 |         nms_dets,_ = apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold)
 32 |         print ("Number of rois  after non-maximum suppression: %d" % sum([len(nms_dets[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))]))
 33 |     else:
 34 |         print ("Skipping non-maximum suppression")
 35 |         nms_dets = all_boxes
 36 | 
 37 |     aps = {}
 38 |     for classIndex, className in enumerate(classes):
 39 |         if className != '__background__':
 40 |             rec, prec, ap = _evaluate_detections(classIndex, nms_dets, all_gt_infos[className], use_07_metric=use_07_metric)
 41 |             aps[className] = ap
 42 | 
 43 |     return aps
 44 | 
 45 | def _evaluate_detections(classIndex, all_boxes, gtInfos, overlapThreshold=0.5, use_07_metric=False):
 46 |     '''
 47 |     Top level function that does the PASCAL VOC evaluation.
 48 |     '''
 49 | 
 50 |     # parse detections for this class
 51 |     # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
 52 |     num_images = len(all_boxes[0])
 53 |     detBboxes = []
 54 |     detImgIndices = []
 55 |     detConfidences = []
 56 |     for imgIndex in range(num_images):
 57 |         dets = all_boxes[classIndex][imgIndex]
 58 |         if dets != []:
 59 |             for k in range(dets.shape[0]):
 60 |                 detImgIndices.append(imgIndex)
 61 |                 detConfidences.append(dets[k, -1])
 62 |                 # the VOCdevkit expects 1-based indices
 63 |                 detBboxes.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1])
 64 |     detBboxes = np.array(detBboxes)
 65 |     detConfidences = np.array(detConfidences)
 66 | 
 67 |     # compute precision / recall / ap
 68 |     rec, prec, ap = _voc_computePrecisionRecallAp(
 69 |         class_recs=gtInfos,
 70 |         confidence=detConfidences,
 71 |         image_ids=detImgIndices,
 72 |         BB=detBboxes,
 73 |         ovthresh=overlapThreshold,
 74 |         use_07_metric=use_07_metric)
 75 |     return rec, prec, ap
 76 | 
 77 | def computeAveragePrecision(recalls, precisions, use_07_metric=False):
 78 |     '''
 79 |     Computes VOC AP given precision and recall.
 80 |     '''
 81 |     if use_07_metric:
 82 |         # 11 point metric
 83 |         ap = 0.
 84 |         for t in np.arange(0., 1.1, 0.1):
 85 |             if np.sum(recalls >= t) == 0:
 86 |                 p = 0
 87 |             else:
 88 |                 p = np.max(precisions[recalls >= t])
 89 |             ap = ap + p / 11.
 90 |     else:
 91 |         # correct AP calculation
 92 |         # first append sentinel values at the end
 93 |         mrecalls = np.concatenate(([0.], recalls, [1.]))
 94 |         mprecisions = np.concatenate(([0.], precisions, [0.]))
 95 | 
 96 |         # compute the precision envelope
 97 |         for i in range(mprecisions.size - 1, 0, -1):
 98 |             mprecisions[i - 1] = np.maximum(mprecisions[i - 1], mprecisions[i])
 99 | 
100 |         # to calculate area under PR curve, look for points
101 |         # where X axis (recall) changes value
102 |         i = np.where(mrecalls[1:] != mrecalls[:-1])[0]
103 | 
104 |         # and sum (\Delta recall) * prec
105 |         ap = np.sum((mrecalls[i + 1] - mrecalls[i]) * mprecisions[i + 1])
106 |     return ap
107 | 
108 | def _voc_computePrecisionRecallAp(class_recs, confidence, image_ids, BB, ovthresh=0.5, use_07_metric=False):
109 |     '''
110 |     Computes precision, recall. and average precision
111 |     '''
112 |     if len(BB) == 0:
113 |         return 0.0, 0.0, 0.0
114 | 
115 |     # sort by confidence
116 |     sorted_ind = np.argsort(-confidence)
117 | 
118 |     BB = BB[sorted_ind, :]
119 |     image_ids = [image_ids[x] for x in sorted_ind]
120 | 
121 |     # go down dets and mark TPs and FPs
122 |     nd = len(image_ids)
123 |     tp = np.zeros(nd)
124 |     fp = np.zeros(nd)
125 |     for d in range(nd):
126 |         R = class_recs[image_ids[d]]
127 |         bb = BB[d, :].astype(float)
128 |         ovmax = -np.inf
129 |         BBGT = R['bbox'].astype(float)
130 | 
131 |         if BBGT.size > 0:
132 |             # compute overlaps
133 |             ixmin = np.maximum(BBGT[:, 0], bb[0])
134 |             iymin = np.maximum(BBGT[:, 1], bb[1])
135 |             ixmax = np.minimum(BBGT[:, 2], bb[2])
136 |             iymax = np.minimum(BBGT[:, 3], bb[3])
137 |             iw = np.maximum(ixmax - ixmin + 1., 0.)
138 |             ih = np.maximum(iymax - iymin + 1., 0.)
139 |             inters = iw * ih
140 | 
141 |             # union
142 |             uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
143 |                    (BBGT[:, 2] - BBGT[:, 0] + 1.) *
144 |                    (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
145 | 
146 |             overlaps = inters / uni
147 |             ovmax = np.max(overlaps)
148 |             jmax = np.argmax(overlaps)
149 | 
150 |         if ovmax > ovthresh:
151 |             if not R['difficult'][jmax]:
152 |                 if not R['det'][jmax]:
153 |                     tp[d] = 1.
154 |                     R['det'][jmax] = 1
155 |                 else:
156 |                     fp[d] = 1.
157 |         else:
158 |             fp[d] = 1.
159 | 
160 |     # compute precision recall
161 |     npos = sum([len(cr['bbox']) for cr in class_recs])
162 |     fp = np.cumsum(fp)
163 |     tp = np.cumsum(tp)
164 |     rec = tp / float(npos)
165 |     # avoid divide by zero in case the first detection matches a difficult ground truth
166 |     prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
167 |     ap = computeAveragePrecision(rec, prec, use_07_metric)
168 |     return rec, prec, ap
169 | 


--------------------------------------------------------------------------------
/utils/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import numpy as np
  8 | from utils.cython_modules.cpu_nms import cpu_nms
  9 | try:
 10 |     from utils.cython_modules.gpu_nms import gpu_nms
 11 |     gpu_nms_available = True
 12 | except ImportError:
 13 |     gpu_nms_available = False
 14 | 
 15 | try:
 16 |     from config import cfg
 17 | except ImportError:
 18 |     from utils.default_config import cfg
 19 | 
 20 | import pdb
 21 | 
 22 | def nms(dets, thresh, force_cpu=False):
 23 |     '''
 24 |     Dispatches the call to either CPU or GPU NMS implementations
 25 |     '''
 26 |     if dets.shape[0] == 0:
 27 |         return []
 28 |     if gpu_nms_available and cfg.USE_GPU_NMS and not force_cpu:
 29 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
 30 |     else:
 31 |         return cpu_nms(dets, thresh)
 32 | 
 33 | def apply_nms_to_single_image_results(coords, labels, scores, nms_threshold=0.5, conf_threshold=0.0):
 34 |     '''
 35 |     Applies nms to the results for a single image.
 36 | 
 37 |     Args:
 38 |         coords:             (x_min, y_min, x_max, y_max) coordinates for n rois. shape = (n, 4)
 39 |         labels:             the predicted label per roi. shape = (n, 1)
 40 |         scores:             the predicted score per roi. shape = (n, 1)
 41 |         nms_threshold:      the threshold for discarding overlapping ROIs in nms
 42 |         conf_threshold:     a minimum value for the score of an ROI. ROIs with lower score will be discarded
 43 | 
 44 |     Returns:
 45 |         nmsKeepIndices - the indices of the ROIs to keep after nms
 46 |     '''
 47 | 
 48 |     # generate input for nms
 49 |     allIndices = []
 50 |     nmsRects = [[[]] for _ in range(max(labels) + 1)]
 51 |     coordsWithScores = np.hstack((coords, np.array([scores]).T))
 52 |     for i in range(max(labels) + 1):
 53 |         indices = np.where(np.array(labels) == i)[0]
 54 |         nmsRects[i][0] = coordsWithScores[indices,:]
 55 |         allIndices.append(indices)
 56 | 
 57 |     # call nms
 58 |     _, nmsKeepIndicesList = apply_nms_to_test_set_results(nmsRects, nms_threshold, conf_threshold)
 59 | 
 60 |     # map back to original roi indices
 61 |     nmsKeepIndices = []
 62 |     for i in range(max(labels) + 1):
 63 |         for keepIndex in nmsKeepIndicesList[i][0]:
 64 |             nmsKeepIndices.append(allIndices[i][keepIndex]) # for keepIndex in nmsKeepIndicesList[i][0]]
 65 |     assert (len(nmsKeepIndices) == len(set(nmsKeepIndices))) # check if no roi indices was added >1 times
 66 |     return nmsKeepIndices
 67 | 
 68 | def apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold):
 69 |     '''
 70 |     Applies nms to the results of multiple images.
 71 | 
 72 |     Args:
 73 |         all_boxes:      shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
 74 |         nms_threshold:  the threshold for discarding overlapping ROIs in nms
 75 |         conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded
 76 | 
 77 |     Returns:
 78 |         nms_boxes - the reduced set of rois after nms
 79 |         nmsKeepIndices - the indices of the ROIs to keep after nms
 80 |     '''
 81 | 
 82 |     num_classes = len(all_boxes)
 83 |     num_images = len(all_boxes[0])
 84 |     nms_boxes = [[[] for _ in range(num_images)]
 85 |                  for _ in range(num_classes)]
 86 |     nms_keepIndices = [[[] for _ in range(num_images)]
 87 |                  for _ in range(num_classes)]
 88 |     for cls_ind in range(num_classes):
 89 |         for im_ind in range(num_images):
 90 |             dets = all_boxes[cls_ind][im_ind]
 91 |             if dets == []:
 92 |                 continue
 93 |             keep = nms(dets.astype(np.float32), nms_threshold)
 94 | 
 95 |             # also filter out low confidences
 96 |             if conf_threshold > 0:
 97 |                 #pdb.set_trace()
 98 |                 keep_conf_idx = np.where(dets[:, -1] > conf_threshold)
 99 |                 keep = list(set(keep_conf_idx[0]).intersection(keep))
100 | 
101 |             if len(keep) == 0:
102 |                 continue
103 |             nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
104 |             nms_keepIndices[cls_ind][im_ind] = keep
105 |     return nms_boxes, nms_keepIndices
106 | 
107 | 


--------------------------------------------------------------------------------
/utils/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | python_files = *_tests.py
3 | 


--------------------------------------------------------------------------------
/utils/rpn/anchor_target_layer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import os
  8 | from cntk import output_variable
  9 | from cntk.ops.functions import UserFunction
 10 | import yaml
 11 | import numpy as np
 12 | import numpy.random as npr
 13 | from utils.rpn.generate_anchors import generate_anchors
 14 | from utils.rpn.bbox_transform import bbox_transform
 15 | from utils.cython_modules.cython_bbox import bbox_overlaps
 16 | 
 17 | try:
 18 |     from config import cfg
 19 | except ImportError:
 20 |     from utils.default_config import cfg
 21 | 
 22 | DEBUG = False
 23 | 
 24 | class AnchorTargetLayer(UserFunction):
 25 |     '''
 26 |     Assign anchors to ground-truth targets. Produces anchor classification
 27 |     labels and bounding-box regression targets.
 28 |     '''
 29 | 
 30 |     def __init__(self, arg1, arg2, arg3, name='AnchorTargetLayer', param_str=None, cfm_shape=None, deterministic=False):
 31 |         super(AnchorTargetLayer, self).__init__([arg1, arg2, arg3], name=name)
 32 |         self.param_str_ = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
 33 | 
 34 |         # parse the layer parameter string, which must be valid YAML
 35 |         layer_params = yaml.load(self.param_str_)
 36 |         anchor_scales = layer_params.get('scales', (8, 16, 32))
 37 |         self._anchors = generate_anchors(scales=np.array(anchor_scales))
 38 |         self._num_anchors = self._anchors.shape[0]
 39 |         self._feat_stride = layer_params['feat_stride']
 40 |         self._cfm_shape = cfm_shape
 41 |         self._determininistic_mode = deterministic
 42 | 
 43 |         if DEBUG:
 44 |             print ('anchors:')
 45 |             print (self._anchors)
 46 |             print ('anchor shapes:')
 47 |             print (np.hstack((
 48 |                 self._anchors[:, 2::4] - self._anchors[:, 0::4],
 49 |                 self._anchors[:, 3::4] - self._anchors[:, 1::4],
 50 |             )))
 51 |             self._counts = cfg.EPS
 52 |             self._sums = np.zeros((1, 4))
 53 |             self._squared_sums = np.zeros((1, 4))
 54 |             self._fg_sum = 0
 55 |             self._bg_sum = 0
 56 |             self._count = 0
 57 | 
 58 |         # allow boxes to sit over the edge by a small amount
 59 |         self._allowed_border = False # layer_params.get('allowed_border', 0)
 60 | 
 61 |     def infer_outputs(self):
 62 |         # This is a necessary work around since anfter cloning the cloned inputs are just place holders without the proper shape
 63 |         if self._cfm_shape is None:
 64 |             self._cfm_shape = self.inputs[0].shape
 65 |         height, width = self._cfm_shape[-2:]
 66 | 
 67 |         if DEBUG:
 68 |             print('AnchorTargetLayer: height', height, 'width', width)
 69 | 
 70 |         A = self._num_anchors
 71 |         # labels
 72 |         labelShape = (1, A, height, width)
 73 |         # Comment: this layer uses encoded labels, while in CNTK we mostly use one hot labels
 74 |         # bbox_targets
 75 |         bbox_target_shape = (1, A * 4, height, width)
 76 |         # bbox_inside_weights
 77 |         bbox_inside_weights_shape = (1, A * 4, height, width)
 78 | 
 79 |         return [output_variable(labelShape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 80 |                                 name="objectness_target", needs_gradient=False),
 81 |                 output_variable(bbox_target_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 82 |                                 name="rpn_bbox_target", needs_gradient=False),
 83 |                 output_variable(bbox_inside_weights_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 84 |                                 name="rpn_bbox_inside_w", needs_gradient=False),]
 85 | 
 86 |     def forward(self, arguments, outputs, device=None, outputs_to_retain=None):
 87 |         # Algorithm:
 88 |         #
 89 |         # for each (H, W) location i
 90 |         #   generate 9 anchor boxes centered on cell i
 91 |         #   apply predicted bbox deltas at cell i to each of the 9 anchors
 92 |         # filter out-of-image anchors
 93 |         # measure GT overlap
 94 | 
 95 |         bottom = arguments
 96 | 
 97 |         # map of shape (..., H, W)
 98 |         height, width = bottom[0].shape[-2:]
 99 |         # GT boxes (x1, y1, x2, y2, label)
100 |         gt_boxes = bottom[1][0,:]
101 |         # im_info
102 |         im_info = bottom[2][0]
103 | 
104 |         # remove zero padded ground truth boxes
105 |         keep = np.where(
106 |             ((gt_boxes[:,2] - gt_boxes[:,0]) > 0) &
107 |             ((gt_boxes[:,3] - gt_boxes[:,1]) > 0)
108 |         )
109 |         gt_boxes = gt_boxes[keep]
110 | 
111 |         if DEBUG:
112 |             print ('')
113 |             # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
114 |             # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
115 |             print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
116 |             print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3]))
117 |             print ('original im_size: ({}, {})'.format(im_info[4], im_info[5]))
118 |             print ('height, width: ({}, {})'.format(height, width))
119 |             print ('rpn: gt_boxes.shape', gt_boxes.shape)
120 |             #print ('rpn: gt_boxes', gt_boxes)
121 | 
122 |         # 1. Generate proposals from bbox deltas and shifted anchors
123 |         shift_x = np.arange(0, width) * self._feat_stride
124 |         shift_y = np.arange(0, height) * self._feat_stride
125 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
126 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
127 |                             shift_x.ravel(), shift_y.ravel())).transpose()
128 |         # add A anchors (1, A, 4) to
129 |         # cell K shifts (K, 1, 4) to get
130 |         # shift anchors (K, A, 4)
131 |         # reshape to (K*A, 4) shifted anchors
132 |         A = self._num_anchors
133 |         K = shifts.shape[0]
134 |         all_anchors = (self._anchors.reshape((1, A, 4)) +
135 |                        shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
136 |         all_anchors = all_anchors.reshape((K * A, 4))
137 |         total_anchors = int(K * A)
138 | 
139 |         # only keep anchors inside the image
140 |         padded_wh = im_info[0:2]
141 |         scaled_wh = im_info[2:4]
142 |         xy_offset = (padded_wh - scaled_wh) / 2
143 |         xy_min = xy_offset
144 |         xy_max = xy_offset + scaled_wh
145 | 
146 |         inds_inside = np.where(
147 |             (all_anchors[:, 0] >= xy_min[0] - self._allowed_border) &
148 |             (all_anchors[:, 1] >= xy_min[1] - self._allowed_border) &
149 |             (all_anchors[:, 2] < xy_max[0] + self._allowed_border) &  # width
150 |             (all_anchors[:, 3] < xy_max[1] + self._allowed_border)    # height
151 |         )[0]
152 | 
153 |         if DEBUG:
154 |             print ('total_anchors', total_anchors)
155 |             print ('inds_inside', len(inds_inside))
156 | 
157 |         # keep only inside anchors
158 |         anchors = all_anchors[inds_inside, :]
159 |         if DEBUG:
160 |             print ('anchors.shape', anchors.shape)
161 |             print('gt_boxes.shape', gt_boxes.shape)
162 | 
163 |         # label: 1 is positive, 0 is negative, -1 is dont care
164 |         labels = np.empty((len(inds_inside), ), dtype=np.float32)
165 |         labels.fill(-1)
166 | 
167 |         # overlaps between the anchors and the gt boxes
168 |         # overlaps (ex, gt)
169 |         overlaps = bbox_overlaps(
170 |             np.ascontiguousarray(anchors, dtype=np.float),
171 |             np.ascontiguousarray(gt_boxes, dtype=np.float))
172 |         argmax_overlaps = overlaps.argmax(axis=1)
173 |         max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
174 |         gt_argmax_overlaps = overlaps.argmax(axis=0)
175 |         gt_max_overlaps = overlaps[gt_argmax_overlaps,
176 |                                    np.arange(overlaps.shape[1])]
177 |         gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
178 | 
179 |         if not cfg["TRAIN"].RPN_CLOBBER_POSITIVES:
180 |             # assign bg labels first so that positive labels can clobber them
181 |             labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0
182 | 
183 |         # fg label: for each gt, anchor with highest overlap
184 |         labels[gt_argmax_overlaps] = 1
185 | 
186 |         # fg label: above threshold IOU
187 |         labels[max_overlaps >= cfg["TRAIN"].RPN_POSITIVE_OVERLAP] = 1
188 | 
189 |         if cfg["TRAIN"].RPN_CLOBBER_POSITIVES:
190 |             # assign bg labels last so that negative labels can clobber positives
191 |             labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0
192 | 
193 |         # subsample positive labels if we have too many
194 |         num_fg = int(cfg["TRAIN"].RPN_FG_FRACTION * cfg["TRAIN"].RPN_BATCHSIZE)
195 |         fg_inds = np.where(labels == 1)[0]
196 |         if len(fg_inds) > num_fg:
197 |             if self._determininistic_mode:
198 |                 disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
199 |             else:
200 |                 disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
201 |             labels[disable_inds] = -1
202 | 
203 |         # subsample negative labels if we have too many
204 |         num_bg = cfg["TRAIN"].RPN_BATCHSIZE - np.sum(labels == 1)
205 |         bg_inds = np.where(labels == 0)[0]
206 |         if len(bg_inds) > num_bg:
207 |             if self._determininistic_mode:
208 |                 disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
209 |             else:
210 |                 disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
211 |             labels[disable_inds] = -1
212 | 
213 |         bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
214 |         bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
215 | 
216 |         bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
217 |         bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
218 | 
219 |         if DEBUG:
220 |             self._sums += bbox_targets[labels == 1, :].sum(axis=0)
221 |             self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
222 |             self._counts += np.sum(labels == 1)
223 |             means = self._sums / self._counts
224 |             stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
225 |             print ('means:')
226 |             print (means)
227 |             print ('stdevs:')
228 |             print (stds)
229 | 
230 |         # map up to original set of anchors
231 |         labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
232 |         bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
233 |         bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
234 | 
235 |         if DEBUG:
236 |             print ('rpn: max max_overlap', np.max(max_overlaps))
237 |             print ('rpn: num_positive', np.sum(labels == 1))
238 |             print ('rpn: num_negative', np.sum(labels == 0))
239 |             self._fg_sum += np.sum(labels == 1)
240 |             self._bg_sum += np.sum(labels == 0)
241 |             self._count += 1
242 |             print ('rpn: num_positive avg', self._fg_sum / self._count)
243 |             print ('rpn: num_negative avg', self._bg_sum / self._count)
244 | 
245 |         # labels
246 |         labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
247 |         outputs[self.outputs[0]] = np.ascontiguousarray(labels)
248 | 
249 |         # bbox_targets
250 |         bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
251 |         outputs[self.outputs[1]] = np.ascontiguousarray(bbox_targets)
252 | 
253 |         # bbox_inside_weights
254 |         bbox_inside_weights = bbox_inside_weights \
255 |             .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
256 |         assert bbox_inside_weights.shape[2] == height
257 |         assert bbox_inside_weights.shape[3] == width
258 |         outputs[self.outputs[2]] = np.ascontiguousarray(bbox_inside_weights)
259 | 
260 |         # No state needs to be passed to backward() so we just pass None
261 |         return None
262 | 
263 |     def backward(self, state, root_gradients, variables):
264 |         """This layer does not propagate gradients."""
265 |         pass
266 | 
267 |     def clone(self, cloned_inputs):
268 |         return AnchorTargetLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], param_str=self.param_str_, cfm_shape=self._cfm_shape)
269 | 
270 |     def serialize(self):
271 |         internal_state = {}
272 |         internal_state['param_str'] = self.param_str_
273 |         return internal_state
274 | 
275 |     @staticmethod
276 |     def deserialize(inputs, name, state):
277 |         param_str = state['param_str']
278 |         return AnchorTargetLayer(inputs[0], inputs[1], inputs[2], name=name, param_str=param_str)
279 | 
280 | 
281 | def _unmap(data, count, inds, fill=0):
282 |     """ Unmap a subset of item (data) back to the original set of items (of size count) """
283 |     if len(data.shape) == 1:
284 |         ret = np.empty((count, ), dtype=np.float32)
285 |         ret.fill(fill)
286 |         ret[inds] = data
287 |     else:
288 |         ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
289 |         ret.fill(fill)
290 |         ret[inds, :] = data
291 |     return ret
292 | 
293 | 
294 | def _compute_targets(ex_rois, gt_rois):
295 |     """Compute bounding-box regression targets for an image."""
296 | 
297 |     assert ex_rois.shape[0] == gt_rois.shape[0]
298 |     assert ex_rois.shape[1] == 4
299 |     assert gt_rois.shape[1] == 5
300 | 
301 |     return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
302 | 


--------------------------------------------------------------------------------
/utils/rpn/bbox_transform.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import numpy as np
  8 | 
  9 | # compute example and gt width ctr, width and height
 10 | # and returns optimal target deltas
 11 | def bbox_transform(ex_rois, gt_rois):
 12 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
 13 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
 14 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
 15 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
 16 | 
 17 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
 18 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
 19 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
 20 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
 21 | 
 22 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
 23 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
 24 |     targets_dw = np.log(gt_widths / ex_widths)
 25 |     targets_dh = np.log(gt_heights / ex_heights)
 26 | 
 27 |     targets = np.vstack(
 28 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
 29 |     return targets
 30 | 
 31 | # gets
 32 | # - boxes (n, 4) as [x_low, y_low, x_high, y_high]
 33 | # - deltas (n, 4) as [dx, dy, dw, dh]
 34 | # returns
 35 | # - pred_boxes (n, 4) as [x_low, y_low, x_high, y_high]
 36 | # where
 37 | # pred_ctr_x = dx * widths + ctr_x
 38 | # --> pred_x_low = pred_ctr_x - 0.5 * pred_w
 39 | # and
 40 | # pred_w = np.exp(dw) * widths
 41 | def bbox_transform_inv(boxes, deltas):
 42 |     if boxes.shape[0] == 0:
 43 |         #import pdb; pdb.set_trace()
 44 |         return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
 45 | 
 46 |     boxes = boxes.astype(deltas.dtype, copy=False)
 47 | 
 48 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
 49 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
 50 |     ctr_x = boxes[:, 0] + 0.5 * widths
 51 |     ctr_y = boxes[:, 1] + 0.5 * heights
 52 | 
 53 |     # avoid overflow in exp
 54 |     dx = np.clip(deltas[:, 0::4], None, 10)
 55 |     dy = np.clip(deltas[:, 1::4], None, 10)
 56 |     dw = np.clip(deltas[:, 2::4], None, 10)
 57 |     dh = np.clip(deltas[:, 3::4], None, 10)
 58 | 
 59 |     pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
 60 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
 61 |     pred_w = np.exp(dw) * widths[:, np.newaxis]
 62 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
 63 | 
 64 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
 65 |     # x1
 66 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
 67 |     # y1
 68 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
 69 |     # x2
 70 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
 71 |     # y2
 72 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
 73 | 
 74 |     return pred_boxes
 75 | 
 76 | def clip_boxes(boxes, im_info):
 77 |     '''
 78 |     Clip boxes to image boundaries.
 79 |     :param boxes: boxes
 80 |     :param im_info: (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
 81 |                     e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
 82 |     '''
 83 | 
 84 |     im_info.shape = (6)
 85 |     padded_wh = im_info[0:2]
 86 |     scaled_wh = im_info[2:4]
 87 |     xy_offset = (padded_wh - scaled_wh) / 2
 88 |     xy_min = xy_offset
 89 |     xy_max = xy_offset + scaled_wh
 90 | 
 91 |     # x_min <= x1 <= x_max
 92 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], xy_max[0] - 1), xy_min[0])
 93 |     # y_min <= y1 <= y_max
 94 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], xy_max[1] - 1), xy_min[1])
 95 |     # x_min <= x2 <= x_max
 96 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], xy_max[0] - 1), xy_min[0])
 97 |     # y_min <= y2 <= y_max
 98 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], xy_max[1] - 1), xy_min[1])
 99 |     return boxes
100 | 


--------------------------------------------------------------------------------
/utils/rpn/cntk_smoothL1_loss.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft. All rights reserved.
 2 | 
 3 | # Licensed under the MIT license. See LICENSE.md file in the project root
 4 | # for full license information.
 5 | # ==============================================================================
 6 | 
 7 | import numpy as np
 8 | import cntk as C
 9 | 
10 | def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights):
11 |     """
12 |         From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py
13 | 
14 |         ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets))
15 |         SmoothL1(x) = 0.5 * (sigma * x)^2,    if |x| < 1 / sigma^2
16 |                         |x| - 0.5 / sigma^2,    otherwise
17 |     """
18 |     sigma2 = sigma * sigma
19 | 
20 |     inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets)))
21 | 
22 |     smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2)
23 |     smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2)
24 |     smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2)
25 |     smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign),
26 |                               C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign)))
27 | 
28 |     return C.element_times(bbox_outside_weights, smooth_l1_result)
29 | 


--------------------------------------------------------------------------------
/utils/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
 1 | # Copyright (c) Microsoft. All rights reserved.
 2 | 
 3 | # Licensed under the MIT license. See LICENSE.md file in the project root
 4 | # for full license information.
 5 | # ==============================================================================
 6 | 
 7 | import numpy as np
 8 | 
 9 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
10 |                      scales=2**np.arange(3, 6)):
11 |     """
12 |     Generate anchor (reference) windows by enumerating aspect ratios X
13 |     scales wrt a reference (0, 0, 15, 15) window.
14 |     """
15 | 
16 |     base_anchor = np.array([1, 1, base_size, base_size]) - 1
17 |     ratio_anchors = _ratio_enum(base_anchor, ratios)
18 |     anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
19 |                          for i in range(ratio_anchors.shape[0])]) # was xrange
20 |     return anchors
21 | 
22 | def _whctrs(anchor):
23 |     """
24 |     Return width, height, x center, and y center for an anchor (window).
25 |     """
26 | 
27 |     w = anchor[2] - anchor[0] + 1
28 |     h = anchor[3] - anchor[1] + 1
29 |     x_ctr = anchor[0] + 0.5 * (w - 1)
30 |     y_ctr = anchor[1] + 0.5 * (h - 1)
31 |     return w, h, x_ctr, y_ctr
32 | 
33 | def _mkanchors(ws, hs, x_ctr, y_ctr):
34 |     """
35 |     Given a vector of widths (ws) and heights (hs) around a center
36 |     (x_ctr, y_ctr), output a set of anchors (windows).
37 |     """
38 | 
39 |     ws = ws[:, np.newaxis]
40 |     hs = hs[:, np.newaxis]
41 |     anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
42 |                          y_ctr - 0.5 * (hs - 1),
43 |                          x_ctr + 0.5 * (ws - 1),
44 |                          y_ctr + 0.5 * (hs - 1)))
45 |     return anchors
46 | 
47 | def _ratio_enum(anchor, ratios):
48 |     """
49 |     Enumerate a set of anchors for each aspect ratio wrt an anchor.
50 |     """
51 | 
52 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
53 |     size = w * h
54 |     size_ratios = size / ratios
55 |     ws = np.round(np.sqrt(size_ratios))
56 |     hs = np.round(ws * ratios)
57 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
58 |     return anchors
59 | 
60 | def _scale_enum(anchor, scales):
61 |     """
62 |     Enumerate a set of anchors for each scale wrt an anchor.
63 |     """
64 | 
65 |     w, h, x_ctr, y_ctr = _whctrs(anchor)
66 |     ws = w * scales
67 |     hs = h * scales
68 |     anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
69 |     return anchors
70 | 
71 | if __name__ == '__main__':
72 |     import time
73 |     t = time.time()
74 |     a = generate_anchors()
75 |     print (time.time() - t)
76 |     print (a)
77 |     from IPython import embed; embed()
78 | 


--------------------------------------------------------------------------------
/utils/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | from cntk import output_variable, FreeDimension
  8 | from cntk.ops.functions import UserFunction
  9 | import numpy as np
 10 | import yaml
 11 | from utils.rpn.generate_anchors import generate_anchors
 12 | from utils.rpn.bbox_transform import bbox_transform_inv, clip_boxes
 13 | from utils.nms.nms_wrapper import nms
 14 | 
 15 | try:
 16 |     from config import cfg
 17 | except ImportError:
 18 |     from utils.default_config import cfg
 19 | 
 20 | DEBUG = False
 21 | 
 22 | class ProposalLayer(UserFunction):
 23 |     '''
 24 |     Outputs object detection proposals by applying estimated bounding-box
 25 |     transformations to a set of regular boxes (called "anchors").
 26 |     '''
 27 | 
 28 |     def __init__(self, arg1, arg2, arg3, name='ProposalLayer', param_str=None):
 29 |         super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name)
 30 |         self.param_str_ = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
 31 | 
 32 |         # parse the layer parameter string, which must be valid YAML
 33 |         layer_params = yaml.load(self.param_str_)
 34 |         self._feat_stride = layer_params['feat_stride']
 35 |         anchor_scales = layer_params.get('scales', (8, 16, 32))
 36 |         self._anchors = generate_anchors(scales=np.array(anchor_scales))
 37 |         self._num_anchors = self._anchors.shape[0]
 38 | 
 39 |         if DEBUG:
 40 |             print ('feat_stride: {}'.format(self._feat_stride))
 41 |             print ('anchors:')
 42 |             print (self._anchors)
 43 | 
 44 |     def infer_outputs(self):
 45 |         # rois blob: holds R regions of interest, each is a 5-tuple
 46 |         # (n, x1, y1, x2, y2) specifying an image batch index n and a
 47 |         # rectangle (x1, y1, x2, y2)
 48 |         # for CNTK the proposal shape is [4 x roisPerImage], and mirrored in Python
 49 |         proposalShape = (FreeDimension, 4)
 50 | 
 51 |         return [output_variable(proposalShape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 52 |                             name="rpn_rois_raw", needs_gradient=False)]
 53 | 
 54 |     def forward(self, arguments, device=None, outputs_to_retain=None):
 55 |         # Algorithm:
 56 |         #
 57 |         # for each (H, W) location i
 58 |         #   generate A anchor boxes centered on cell i
 59 |         #   apply predicted bbox deltas at cell i to each of the A anchors
 60 |         # clip predicted boxes to image
 61 |         # remove predicted boxes with either height or width < threshold
 62 |         # sort all (proposal, score) pairs by score from highest to lowest
 63 |         # take top pre_nms_topN proposals before NMS
 64 |         # apply NMS with threshold 0.7 to remaining proposals
 65 |         # take after_nms_topN proposals after NMS
 66 |         # return the top proposals (-> RoIs top, scores top)
 67 | 
 68 |         # use potentially different number of proposals for training vs evaluation
 69 |         if len(outputs_to_retain) == 0:
 70 |             # print("EVAL")
 71 |             pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N
 72 |             post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N
 73 |             nms_thresh = cfg["TEST"].RPN_NMS_THRESH
 74 |             min_size = cfg["TEST"].RPN_MIN_SIZE
 75 |         else:
 76 |             pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
 77 |             post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N
 78 |             nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH
 79 |             min_size = cfg["TRAIN"].RPN_MIN_SIZE
 80 | 
 81 |         bottom = arguments
 82 |         assert bottom[0].shape[0] == 1, \
 83 |             'Only single item batches are supported'
 84 | 
 85 |         # the first set of _num_anchors channels are bg probs
 86 |         # the second set are the fg probs, which we want
 87 |         scores = bottom[0][:, self._num_anchors:, :, :]
 88 |         bbox_deltas = bottom[1]
 89 |         im_info = bottom[2][0]
 90 | 
 91 |         if DEBUG:
 92 |             # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
 93 |             # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
 94 |             print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
 95 |             print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3]))
 96 |             print ('original im_size: ({}, {})'.format(im_info[4], im_info[5]))
 97 | 
 98 |         # 1. Generate proposals from bbox deltas and shifted anchors
 99 |         height, width = scores.shape[-2:]
100 | 
101 |         if DEBUG:
102 |             print ('score map size: {}'.format(scores.shape))
103 | 
104 |         # Enumerate all shifts
105 |         shift_x = np.arange(0, width) * self._feat_stride
106 |         shift_y = np.arange(0, height) * self._feat_stride
107 |         shift_x, shift_y = np.meshgrid(shift_x, shift_y)
108 |         shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
109 |                             shift_x.ravel(), shift_y.ravel())).transpose()
110 | 
111 |         # Enumerate all shifted anchors:
112 |         #
113 |         # add A anchors (1, A, 4) to
114 |         # cell K shifts (K, 1, 4) to get
115 |         # shift anchors (K, A, 4)
116 |         # reshape to (K*A, 4) shifted anchors
117 |         A = self._num_anchors
118 |         K = shifts.shape[0]
119 |         anchors = self._anchors.reshape((1, A, 4)) + \
120 |                   shifts.reshape((1, K, 4)).transpose((1, 0, 2))
121 |         anchors = anchors.reshape((K * A, 4))
122 | 
123 |         # Transpose and reshape predicted bbox transformations to get them
124 |         # into the same order as the anchors:
125 |         #
126 |         # bbox deltas will be (1, 4 * A, H, W) format
127 |         # transpose to (1, H, W, 4 * A)
128 |         # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
129 |         # in slowest to fastest order
130 |         bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
131 | 
132 |         # Same story for the scores:
133 |         #
134 |         # scores are (1, A, H, W) format
135 |         # transpose to (1, H, W, A)
136 |         # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
137 |         scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
138 | 
139 |         # Convert anchors into proposals via bbox transformations
140 |         proposals = bbox_transform_inv(anchors, bbox_deltas)
141 | 
142 |         # 2. clip predicted boxes to image
143 |         proposals = clip_boxes(proposals, im_info)
144 | 
145 |         # 3. remove predicted boxes with either height or width < threshold
146 |         # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4])
147 |         cntk_image_scale = im_info[2] / im_info[4]
148 |         keep = _filter_boxes(proposals, min_size * cntk_image_scale)
149 |         proposals = proposals[keep, :]
150 |         scores = scores[keep]
151 | 
152 |         # 4. sort all (proposal, score) pairs by score from highest to lowest
153 |         # 5. take top pre_nms_topN (e.g. 6000)
154 |         order = scores.ravel().argsort()[::-1]
155 |         if pre_nms_topN > 0:
156 |             order = order[:pre_nms_topN]
157 |         proposals = proposals[order, :]
158 |         scores = scores[order]
159 | 
160 |         # 6. apply nms (e.g. threshold = 0.7)
161 |         # 7. take after_nms_topN (e.g. 300)
162 |         # 8. return the top proposals (-> RoIs top)
163 |         keep = nms(np.hstack((proposals, scores)), nms_thresh)
164 |         if post_nms_topN > 0:
165 |             keep = keep[:post_nms_topN]
166 |         proposals = proposals[keep, :]
167 |         scores = scores[keep]
168 | 
169 |         # pad with zeros if too few rois were found
170 |         num_found_proposals = proposals.shape[0]
171 |         if num_found_proposals < post_nms_topN:
172 |             if DEBUG:
173 |                 print("Only {} proposals generated in ProposalLayer".format(num_found_proposals))
174 |             proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32)
175 |             proposals_padded[:num_found_proposals, :] = proposals
176 |             proposals = proposals_padded
177 | 
178 |         # Output rois blob
179 |         # Our RPN implementation only supports a single input image, so all
180 |         # batch inds are 0
181 |         # for CNTK: add batch axis to output shape
182 |         proposals.shape = (1,) + proposals.shape
183 | 
184 |         return None, proposals
185 | 
186 |     def backward(self, state, root_gradients, variables):
187 |         """This layer does not propagate gradients."""
188 |         pass
189 | 
190 |     def clone(self, cloned_inputs):
191 |         return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], param_str=self.param_str_)
192 | 
193 |     def serialize(self):
194 |         internal_state = {}
195 |         internal_state['param_str'] = self.param_str_
196 | 
197 |         return internal_state
198 | 
199 |     @staticmethod
200 |     def deserialize(inputs, name, state):
201 |         param_str = state['param_str']
202 | 
203 |         return ProposalLayer(inputs[0], inputs[1], inputs[2], name=name, param_str=param_str)
204 | 
205 | 
206 | def _filter_boxes(boxes, min_size):
207 |     """Remove all boxes with any side smaller than min_size."""
208 |     ws = boxes[:, 2] - boxes[:, 0] + 1
209 |     hs = boxes[:, 3] - boxes[:, 1] + 1
210 |     if np.isnan(ws[0]):
211 |         print('NaN NaN NaN NaN')
212 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
213 |     return keep
214 | 


--------------------------------------------------------------------------------
/utils/rpn/proposal_target_layer.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | from cntk import output_variable, FreeDimension
  8 | from cntk.ops.functions import UserFunction
  9 | import yaml
 10 | import numpy as np
 11 | import numpy.random as npr
 12 | from utils.rpn.bbox_transform import bbox_transform
 13 | from utils.cython_modules.cython_bbox import bbox_overlaps
 14 | 
 15 | try:
 16 |     from config import cfg
 17 | except ImportError:
 18 |     from utils.default_config import cfg
 19 | 
 20 | DEBUG = False
 21 | 
 22 | class ProposalTargetLayer(UserFunction):
 23 |     '''
 24 |     Assign object detection proposals to ground-truth targets. Produces proposal
 25 |     classification labels and bounding-box regression targets.
 26 |     '''
 27 |     
 28 |     def __init__(self, arg1, arg2, name='ProposalTargetLayer', param_str=None, deterministic=False):
 29 |         super(ProposalTargetLayer, self).__init__([arg1, arg2], name=name)
 30 |         self.param_str_ = param_str if param_str is not None else "'num_classes': 2"
 31 | 
 32 |         # parse the layer parameter string, which must be valid YAML
 33 |         layer_params = yaml.load(self.param_str_)
 34 |         self._num_classes = layer_params['num_classes']
 35 |         self._determininistic_mode = deterministic
 36 | 
 37 |         self._count = 0
 38 |         self._fg_num = 0
 39 |         self._bg_num = 0
 40 | 
 41 |     def infer_outputs(self):
 42 |         # sampled rois (0, x1, y1, x2, y2)
 43 |         # for CNTK the proposal shape is [4 x roisPerImage], and mirrored in Python
 44 |         rois_shape = (FreeDimension, 4)
 45 |         labels_shape = (FreeDimension, self._num_classes)
 46 |         bbox_targets_shape = (FreeDimension, self._num_classes * 4)
 47 |         bbox_inside_weights_shape = (FreeDimension, self._num_classes * 4)
 48 | 
 49 |         return [output_variable(rois_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 50 |                                 name="rpn_target_rois_raw", needs_gradient=False),
 51 |                 output_variable(labels_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 52 |                                 name="label_targets_raw", needs_gradient=False),
 53 |                 output_variable(bbox_targets_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 54 |                                 name="bbox_targets_raw", needs_gradient=False),
 55 |                 output_variable(bbox_inside_weights_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
 56 |                                 name="bbox_inside_w_raw", needs_gradient=False)]
 57 | 
 58 |     def forward(self, arguments, outputs, device=None, outputs_to_retain=None):
 59 |         bottom = arguments
 60 | 
 61 |         # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
 62 |         # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
 63 |         all_rois = bottom[0][0,:]
 64 |         # remove zero padded proposals
 65 |         keep0 = np.where(
 66 |             ((all_rois[:, 2] - all_rois[:, 0]) > 0) &
 67 |             ((all_rois[:, 3] - all_rois[:, 1]) > 0)
 68 |         )
 69 |         all_rois = all_rois[keep0]
 70 | 
 71 |         # GT boxes (x1, y1, x2, y2, label)
 72 |         # TODO(rbg): it's annoying that sometimes I have extra info before
 73 |         # and other times after box coordinates -- normalize to one format
 74 |         gt_boxes = bottom[1][0,:]
 75 |         # remove zero padded ground truth boxes
 76 |         keep1 = np.where(
 77 |             ((gt_boxes[:,2] - gt_boxes[:,0]) > 0) &
 78 |             ((gt_boxes[:,3] - gt_boxes[:,1]) > 0)
 79 |         )
 80 |         gt_boxes = gt_boxes[keep1]
 81 | 
 82 |         assert gt_boxes.shape[0] > 0, \
 83 |             "No ground truth boxes provided"
 84 | 
 85 |         # Include ground-truth boxes in the set of candidate rois
 86 |         # for CNTK: add batch index axis with all zeros to both inputs
 87 |         all_rois = np.vstack((all_rois, gt_boxes[:, :-1]))
 88 |         zeros = np.zeros((all_rois.shape[0], 1), dtype=all_rois.dtype)
 89 |         all_rois = np.hstack((zeros, all_rois))
 90 | 
 91 |         # Sanity check: single batch only
 92 |         assert np.all(all_rois[:, 0] == 0), \
 93 |                 'Only single item batches are supported'
 94 | 
 95 |         rois_per_image = cfg.TRAIN.BATCH_SIZE
 96 |         fg_rois_per_image = np.round(cfg["TRAIN"].FG_FRACTION * rois_per_image).astype(int)
 97 | 
 98 |         # Sample rois with classification labels and bounding box regression
 99 |         # targets
100 |         labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
101 |             all_rois, gt_boxes, fg_rois_per_image,
102 |             rois_per_image, self._num_classes,
103 |             deterministic=self._determininistic_mode)
104 | 
105 |         if DEBUG:
106 |             print ('num rois: {}'.format(rois_per_image))
107 |             print ('num fg: {}'.format((labels > 0).sum()))
108 |             print ('num bg: {}'.format((labels == 0).sum()))
109 |             self._count += 1
110 |             self._fg_num += (labels > 0).sum()
111 |             self._bg_num += (labels == 0).sum()
112 |             print ('num fg avg: {}'.format(self._fg_num / self._count))
113 |             print ('num bg avg: {}'.format(self._bg_num / self._count))
114 |             print ('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)))
115 | 
116 |         # pad with zeros if too few rois were found
117 |         num_found_rois = rois.shape[0]
118 |         if num_found_rois < rois_per_image:
119 |             rois_padded = np.zeros((rois_per_image, rois.shape[1]), dtype=np.float32)
120 |             rois_padded[:num_found_rois, :] = rois
121 |             rois = rois_padded
122 | 
123 |             labels_padded = np.zeros((rois_per_image), dtype=np.float32)
124 |             labels_padded[:num_found_rois] = labels
125 |             labels = labels_padded
126 | 
127 |             bbox_targets_padded = np.zeros((rois_per_image, bbox_targets.shape[1]), dtype=np.float32)
128 |             bbox_targets_padded[:num_found_rois, :] = bbox_targets
129 |             bbox_targets = bbox_targets_padded
130 | 
131 |             bbox_inside_weights_padded = np.zeros((rois_per_image, bbox_inside_weights.shape[1]), dtype=np.float32)
132 |             bbox_inside_weights_padded[:num_found_rois, :] = bbox_inside_weights
133 |             bbox_inside_weights = bbox_inside_weights_padded
134 | 
135 |         # for CNTK: get rid of batch ind zeros and add batch axis
136 |         rois = rois[:,1:]
137 | 
138 |         # sampled rois
139 |         rois.shape = (1,) + rois.shape
140 |         outputs[self.outputs[0]] = np.ascontiguousarray(rois)
141 | 
142 |         # classification labels
143 |         labels_as_int = [i.item() for i in labels.astype(int)]
144 |         labels_dense = np.eye(self._num_classes, dtype=np.float32)[labels_as_int]
145 |         labels_dense.shape = (1,) + labels_dense.shape # batch axis
146 |         outputs[self.outputs[1]] = labels_dense
147 | 
148 |         # bbox_targets
149 |         bbox_targets.shape = (1,) + bbox_targets.shape # batch axis
150 |         outputs[self.outputs[2]] = np.ascontiguousarray(bbox_targets)
151 | 
152 |         # bbox_inside_weights
153 |         bbox_inside_weights.shape = (1,) + bbox_inside_weights.shape # batch axis
154 |         outputs[self.outputs[3]] = np.ascontiguousarray(bbox_inside_weights)
155 | 
156 |     def backward(self, state, root_gradients, variables):
157 |         """This layer does not propagate gradients."""
158 |         pass
159 | 
160 |     def clone(self, cloned_inputs):
161 |         return ProposalTargetLayer(cloned_inputs[0], cloned_inputs[1], param_str=self.param_str_)
162 | 
163 |     def serialize(self):
164 |         internal_state = {}
165 |         internal_state['param_str'] = self.param_str_
166 |         return internal_state
167 | 
168 |     @staticmethod
169 |     def deserialize(inputs, name, state):
170 |         param_str = state['param_str']
171 |         return ProposalTargetLayer(inputs[0], inputs[1], name=name, param_str=param_str)
172 | 
173 | 
174 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
175 |     """Bounding-box regression targets (bbox_target_data) are stored in a
176 |     compact form N x (class, tx, ty, tw, th)
177 | 
178 |     This function expands those targets into the 4-of-4*K representation used
179 |     by the network (i.e. only one class has non-zero targets).
180 | 
181 |     Returns:
182 |         bbox_target (ndarray): N x 4K blob of regression targets
183 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
184 |     """
185 | 
186 |     clss = bbox_target_data[:, 0].astype(int)
187 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
188 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
189 |     inds = np.where(clss > 0)[0]
190 |     for ind in inds:
191 |         cls = clss[ind]
192 |         start = 4 * cls
193 |         end = start + 4
194 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
195 |         bbox_inside_weights[ind, start:end] = [1.0, 1.0, 1.0, 1.0]
196 |     return bbox_targets, bbox_inside_weights
197 | 
198 | 
199 | def _compute_targets(ex_rois, gt_rois, labels):
200 |     """Compute bounding-box regression targets for an image."""
201 | 
202 |     assert ex_rois.shape[0] == gt_rois.shape[0]
203 |     assert ex_rois.shape[1] == 4
204 |     assert gt_rois.shape[1] == 4
205 | 
206 |     targets = bbox_transform(ex_rois, gt_rois)
207 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
208 |         # Optionally normalize targets by a precomputed mean and stdev
209 |         targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
210 |                 / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
211 | 
212 |     return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
213 | 
214 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, deterministic=False):
215 |     """Generate a random sample of RoIs comprising foreground and background
216 |     examples.
217 |     """
218 |     # overlaps: (rois x gt_boxes)
219 |     overlaps = bbox_overlaps(
220 |         np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
221 |         np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
222 |     gt_assignment = overlaps.argmax(axis=1)
223 |     max_overlaps = overlaps.max(axis=1)
224 |     labels = gt_boxes[gt_assignment, 4]
225 | 
226 |     # Select foreground RoIs as those with >= FG_THRESH overlap
227 |     fg_inds = np.where(max_overlaps >= cfg["TRAIN"].FG_THRESH)[0]
228 |     # Guard against the case when an image has fewer than fg_rois_per_image
229 |     # foreground RoIs
230 |     fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
231 | 
232 |     # Sample foreground regions without replacement
233 |     if fg_inds.size > 0:
234 |         if deterministic:
235 |             fg_inds = fg_inds[:fg_rois_per_this_image]
236 |         else:
237 |             fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
238 |             
239 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
240 |     bg_inds = np.where((max_overlaps < cfg["TRAIN"].BG_THRESH_HI) &
241 |                        (max_overlaps >= cfg["TRAIN"].BG_THRESH_LO))[0]
242 |     # Compute number of background RoIs to take from this image (guarding
243 |     # against there being fewer than desired)
244 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
245 |     bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
246 |     # Sample background regions without replacement
247 |     if bg_inds.size > 0:
248 |         if deterministic:
249 |             bg_inds = bg_inds[:bg_rois_per_this_image]
250 |         else:
251 |             bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
252 | 
253 |     # The indices that we're selecting (both fg and bg)
254 |     keep_inds = np.append(fg_inds, bg_inds)
255 |     # Select sampled values from various arrays:
256 |     labels = labels[keep_inds]
257 |     # Clamp labels for the background RoIs to 0
258 |     labels[fg_rois_per_this_image:] = 0
259 |     rois = all_rois[keep_inds]
260 | 
261 |     bbox_target_data = _compute_targets(
262 |         rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
263 | 
264 |     bbox_targets, bbox_inside_weights = \
265 |         _get_bbox_regression_labels(bbox_target_data, num_classes)
266 | 
267 |     return labels, rois, bbox_targets, bbox_inside_weights
268 | 


--------------------------------------------------------------------------------
/utils/rpn/rpn_helpers.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import numpy as np
  8 | import cntk
  9 | from cntk import reduce_sum
 10 | from cntk import user_function, relu, softmax, slice, splice, reshape, element_times, plus, minus, alias, classification_error
 11 | from cntk.initializer import glorot_uniform, normal
 12 | from cntk.layers import Convolution
 13 | from cntk.losses import cross_entropy_with_softmax
 14 | from utils.rpn.anchor_target_layer import AnchorTargetLayer
 15 | from utils.rpn.proposal_layer import ProposalLayer
 16 | from utils.rpn.proposal_target_layer import ProposalTargetLayer
 17 | from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
 18 | try:
 19 |     from config import cfg
 20 | except ImportError:
 21 |     from utils.default_config import cfg
 22 | 
 23 | # Please keep in sync with Readme.md
 24 | def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
 25 |                proposal_layer_param_string=None, conv_bias_init=0.0):
 26 |     '''
 27 |     Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
 28 |         Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
 29 |         "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
 30 | 
 31 |     Outputs object detection proposals by applying estimated bounding-box
 32 |     transformations to a set of regular boxes (called "anchors").
 33 | 
 34 |     Args:
 35 |         conv_out:        The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
 36 |         scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
 37 |         im_info:         A CNTK variable or constant containing
 38 |                          (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
 39 |                          e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
 40 |         add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
 41 |         proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.
 42 | 
 43 |     Returns:
 44 |         rpn_rois - the proposed ROIs
 45 |         rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
 46 |     '''
 47 | 
 48 |     # RPN network
 49 |     # init = 'normal', initValueScale = 0.01, initBias = 0.1
 50 |     num_channels = cfg["CNTK"].RPN_NUM_CHANNELS
 51 |     rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1,
 52 |                                 init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out)
 53 |     rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
 54 |                                 init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 2(bg/fg)  * 9(anchors)
 55 |     rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
 56 |                                 init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3)  # 4(coords) * 9(anchors)
 57 | 
 58 |     # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
 59 |     num_predictions = int(rpn_cls_score.shape[0] / 2)
 60 |     rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp")
 61 |     p_rpn_cls_score_rshp = cntk.placeholder()
 62 |     rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
 63 |     rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob')
 64 |     rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")
 65 | 
 66 |     # proposal layer
 67 |     rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
 68 |     rpn_rois = alias(rpn_rois_raw, name='rpn_rois')
 69 | 
 70 |     rpn_losses = None
 71 |     if(add_loss_functions):
 72 |         # RPN targets
 73 |         # Comment: rpn_cls_score is only passed   vvv   to get width and height of the conv feature map ...
 74 |         atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
 75 |         rpn_labels = atl.outputs[0]
 76 |         rpn_bbox_targets = atl.outputs[1]
 77 |         rpn_bbox_inside_weights = atl.outputs[2]
 78 | 
 79 |         # classification loss
 80 |         p_rpn_labels = cntk.placeholder()
 81 |         p_rpn_cls_score_rshp = cntk.placeholder()
 82 | 
 83 |         keeps = cntk.greater_equal(p_rpn_labels, 0.0)
 84 |         fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
 85 |         bg_labels = minus(1, fg_labels, name="bg_targets")
 86 |         rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
 87 |         rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0)
 88 |         rpn_loss_cls = element_times(rpn_ce, keeps)
 89 | 
 90 |         # The terms that are accounted for in the cls loss are those that have a label >= 0
 91 |         cls_num_terms = reduce_sum(keeps)
 92 |         cls_normalization_factor = 1.0 / cls_num_terms
 93 |         normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor
 94 | 
 95 |         reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss,
 96 |                                          [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
 97 |                                          'CE_with_ignore', 'norm_rpn_cls_loss')
 98 | 
 99 |         # regression loss
100 |         p_rpn_bbox_pred = cntk.placeholder()
101 |         p_rpn_bbox_targets = cntk.placeholder()
102 |         p_rpn_bbox_inside_weights = cntk.placeholder()
103 |         rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0)
104 |         # The bbox loss is normalized by the rpn batch size
105 |         bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
106 |         normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor
107 | 
108 |         reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss,
109 |                                           [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets),
110 |                                            (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
111 |                                           'SmoothL1Loss', 'norm_rpn_bbox_loss')
112 | 
113 |         rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses")
114 | 
115 |     return rpn_rois, rpn_losses
116 | 
117 | def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes):
118 |     '''
119 |     Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper:
120 |         Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
121 |         "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
122 | 
123 |     Assigns object detection proposals to ground-truth targets.
124 |     Produces proposal classification labels and bounding-box regression targets.
125 |     It also adds gt_boxes to candidates and samples fg and bg rois for training.
126 | 
127 |     Args:
128 |         rpn_rois:        The proposed ROIs, e.g. from a region proposal network
129 |         scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
130 |         num_classes:     The number of classes in the data set
131 | 
132 |     Returns:
133 |         rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs
134 |         label_targets - the target labels for the rois
135 |         bbox_targets - the regression coefficient targets for the rois
136 |         bbox_inside_weights - the weights for the regression loss
137 |     '''
138 | 
139 |     ptl_param_string = "'num_classes': {}".format(num_classes)
140 |     ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, param_str=ptl_param_string))
141 | 
142 |     # use an alias if you need to access the outputs, e.g., when cloning a trained network
143 |     rois = alias(ptl.outputs[0], name='rpn_target_rois')
144 |     label_targets = ptl.outputs[1]
145 |     bbox_targets = ptl.outputs[2]
146 |     bbox_inside_weights = ptl.outputs[3]
147 | 
148 |     return rois, label_targets, bbox_targets, bbox_inside_weights
149 | 
150 | 
151 | 


--------------------------------------------------------------------------------
/utils/unit_tests.py:
--------------------------------------------------------------------------------
  1 | # Copyright (c) Microsoft. All rights reserved.
  2 | 
  3 | # Licensed under the MIT license. See LICENSE.md file in the project root
  4 | # for full license information.
  5 | # ==============================================================================
  6 | 
  7 | import os, sys
  8 | abs_path = os.path.dirname(os.path.abspath(__file__))
  9 | sys.path.append(os.path.join(abs_path))
 10 | sys.path.append(os.path.join(abs_path, ".."))
 11 | 
 12 | import pytest
 13 | import numpy as np
 14 | import cntk
 15 | from cntk import user_function
 16 | from cntk.ops import input_variable
 17 | from rpn.proposal_layer import ProposalLayer as CntkProposalLayer
 18 | from rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
 19 | from rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
 20 | from caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
 21 | from caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
 22 | from caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
 23 | 
 24 | def test_proposal_layer():
 25 |     cls_prob_shape_cntk = (18,61,61)
 26 |     cls_prob_shape_caffe = (18,61,61)
 27 |     rpn_bbox_shape = (36, 61, 61)
 28 |     dims_info_shape = (6,)
 29 |     im_info = [1000, 1000, 1]
 30 | 
 31 |     # Create input tensors with values
 32 |     cls_prob =  np.random.random_sample(cls_prob_shape_cntk).astype(np.float32)
 33 |     rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32)
 34 |     dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32)
 35 | 
 36 |     # Create CNTK layer and call forward
 37 |     cls_prob_var = input_variable(cls_prob_shape_cntk)
 38 |     rpn_bbox_var = input_variable(rpn_bbox_shape)
 39 |     dims_info_var = input_variable(dims_info_shape)
 40 | 
 41 |     cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var))
 42 |     state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input})
 43 |     cntk_proposals = cntk_output[next(iter(cntk_output))][0]
 44 | 
 45 |     # Create Caffe layer and call forward
 46 |     cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe)
 47 |     bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])]
 48 |     top = None # handled through return statement in caffe layer for unit testing
 49 | 
 50 |     param_str = "'feat_stride': 16"
 51 |     caffe_layer = CaffeProposalLayer()
 52 |     caffe_layer.set_param_str(param_str)
 53 |     caffe_layer.setup(bottom, top)
 54 |     caffe_output = caffe_layer.forward(bottom, top)
 55 |     caffe_proposals = caffe_output[:,1:]
 56 | 
 57 |     # assert that results are exactly the same
 58 |     assert cntk_proposals.shape == caffe_proposals.shape
 59 |     assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
 60 |     print("Verified ProposalLayer")
 61 | 
 62 | def test_proposal_target_layer():
 63 |     num_rois = 400
 64 |     all_rois_shape_cntk = (num_rois,4)
 65 |     num_gt_boxes = 50
 66 |     gt_boxes_shape_cntk = (num_gt_boxes,5)
 67 | 
 68 |     # Create input tensors with values
 69 |     x1y1 = np.random.random_sample((num_rois, 2)) * 500
 70 |     wh = np.random.random_sample((num_rois, 2)) * 400
 71 |     x2y2 = x1y1 + wh + 50
 72 |     all_rois = np.hstack((x1y1, x2y2)).astype(np.float32)
 73 | 
 74 |     x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500
 75 |     wh = np.random.random_sample((num_gt_boxes, 2)) * 400
 76 |     x2y2 = x1y1 + wh + 50
 77 |     label = np.random.random_sample((num_gt_boxes, 1))
 78 |     label = (label * 17.0)
 79 |     gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32)
 80 | 
 81 |     # Create CNTK layer and call forward
 82 |     all_rois_var = input_variable(all_rois_shape_cntk)
 83 |     gt_boxes_var = input_variable(gt_boxes_shape_cntk)
 84 | 
 85 |     cntk_layer = user_function(CntkProposalTargetLayer(all_rois_var, gt_boxes_var, param_str="'num_classes': 17", deterministic=True))
 86 |     state, cntk_output = cntk_layer.forward({all_rois_var: [all_rois], gt_boxes_var: [gt_boxes]})
 87 | 
 88 |     roi_key = [k for k in cntk_output if 'rpn_target_rois_raw' in str(k)][0]
 89 |     labels_key = [k for k in cntk_output if 'label_targets_raw' in str(k)][0]
 90 |     bbox_key = [k for k in cntk_output if 'bbox_targets_raw' in str(k)][0]
 91 |     bbox_w_key = [k for k in cntk_output if 'bbox_inside_w_raw' in str(k)][0]
 92 | 
 93 |     cntk_rois = cntk_output[roi_key][0]
 94 |     cntk_labels_one_hot = cntk_output[labels_key][0]
 95 |     cntk_bbox_targets = cntk_output[bbox_key][0]
 96 |     cntk_bbox_inside_weights = cntk_output[bbox_w_key][0]
 97 | 
 98 |     cntk_labels = np.argmax(cntk_labels_one_hot, axis=1)
 99 | 
100 |     # Create Caffe layer and call forward
101 |     zeros = np.zeros((all_rois.shape[0], 1), dtype=gt_boxes.dtype)
102 |     all_rois_caffe = np.hstack((zeros, all_rois))
103 | 
104 |     bottom = [np.array(all_rois_caffe),np.array(gt_boxes)]
105 |     top = None # handled through return statement in caffe layer for unit testing
106 | 
107 |     param_str = "'num_classes': 17"
108 |     caffe_layer = CaffeProposalTargetLayer()
109 |     caffe_layer.set_param_str(param_str)
110 |     caffe_layer.setup(bottom, top)
111 |     caffe_layer.set_deterministic_mode()
112 | 
113 |     caffe_rois, caffe_labels, caffe_bbox_targets, caffe_bbox_inside_weights = caffe_layer.forward(bottom, top)
114 |     caffe_rois = caffe_rois[:,1:]
115 | 
116 |     num_caffe_rois = caffe_rois.shape[0]
117 |     cntk_rois = cntk_rois[:num_caffe_rois,:]
118 |     cntk_labels = cntk_labels[:num_caffe_rois]
119 |     cntk_bbox_targets = cntk_bbox_targets[:num_caffe_rois,:]
120 |     cntk_bbox_inside_weights = cntk_bbox_inside_weights[:num_caffe_rois,:]
121 | 
122 |     # assert that results are exactly the same
123 |     assert cntk_rois.shape == caffe_rois.shape
124 |     assert cntk_labels.shape == caffe_labels.shape
125 |     assert cntk_bbox_targets.shape == caffe_bbox_targets.shape
126 |     assert cntk_bbox_inside_weights.shape == caffe_bbox_inside_weights.shape
127 | 
128 |     caffe_labels = [int(x) for x in caffe_labels]
129 | 
130 |     assert np.allclose(cntk_rois, caffe_rois, rtol=0.0, atol=0.0)
131 |     assert np.allclose(cntk_labels, caffe_labels, rtol=0.0, atol=0.0)
132 |     assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0)
133 |     assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0)
134 |     print("Verified ProposalTargetLayer")
135 | 
136 | def test_anchor_target_layer():
137 |     rpn_cls_score_shape_cntk = (1, 18, 61, 61)
138 |     num_gt_boxes = 50
139 |     gt_boxes_shape_cntk = (num_gt_boxes,5)
140 |     dims_info_shape = (6,)
141 |     im_info = [1000, 1000, 1]
142 | 
143 |     # Create input tensors with values
144 |     rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32)
145 |     dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32)
146 | 
147 |     x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500
148 |     wh = np.random.random_sample((num_gt_boxes, 2)) * 400
149 |     x2y2 = x1y1 + wh + 50
150 |     label = np.random.random_sample((num_gt_boxes, 1))
151 |     label = (label * 17.0)
152 |     gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32)
153 | 
154 |     # Create CNTK layer and call forward
155 |     rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk)
156 |     gt_boxes_var = input_variable(gt_boxes_shape_cntk)
157 |     dims_info_var = input_variable(dims_info_shape)
158 | 
159 |     cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True))
160 |     state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input})
161 | 
162 |     obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0]
163 |     bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0]
164 |     bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0]
165 | 
166 |     cntk_objectness_target = cntk_output[obj_key][0]
167 |     cntk_bbox_targets = cntk_output[bbt_key][0]
168 |     cntk_bbox_inside_w = cntk_output[bbw_key][0]
169 | 
170 |     # Create Caffe layer and call forward
171 |     bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)]
172 |     top = None # handled through return statement in caffe layer for unit testing
173 | 
174 |     param_str = "'feat_stride': 16"
175 |     caffe_layer = CaffeAnchorTargetLayer()
176 |     caffe_layer.set_param_str(param_str)
177 |     caffe_layer.setup(bottom, top)
178 |     caffe_layer.set_deterministic_mode()
179 | 
180 |     caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top)
181 | 
182 |     # assert that results are exactly the same
183 |     assert cntk_objectness_target.shape == caffe_objectness_target.shape
184 |     assert cntk_bbox_targets.shape == caffe_bbox_targets.shape
185 |     assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape
186 | 
187 |     assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0)
188 |     assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0)
189 |     assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0)
190 |     print("Verified AnchorTargetLayer")
191 | 
192 | if __name__ == '__main__':
193 |     test_proposal_layer()
194 |     test_proposal_target_layer()
195 |     test_anchor_target_layer()
196 | 


--------------------------------------------------------------------------------
/web.config:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <configuration>
 3 |   <system.webServer>
 4 |     <handlers>
 5 |       <add name="PythonHandler" path="*" verb="*" modules="httpPlatformHandler" resourceType="Unspecified"/>
 6 |     </handlers>
 7 |     <httpPlatform processPath="D:\home\python354x64\python.exe"
 8 |                   arguments="D:\home\site\wwwroot\app.py --port %HTTP_PLATFORM_PORT%"
 9 |                   stdoutLogEnabled="true"
10 |                   stdoutLogFile="D:\home\site\wwwroot\logs\log_file2.log"
11 |                   startupTimeLimit="220"
12 |                   processesPerApplication="5">
13 |       <environmentVariables>
14 |         <environmentVariable name="SERVER_PORT" value="%HTTP_PLATFORM_PORT%" />
15 |       </environmentVariables>
16 |     </httpPlatform>
17 |   </system.webServer>
18 | </configuration>


--------------------------------------------------------------------------------