├── .deployment
├── .gitattributes
├── .gitignore
├── CNTKModels
└── download_model.py
├── LICENSE
├── README.md
├── Temp
└── placeholder.txt
├── app.py
├── cntk_helpers.py
├── config.py
├── deploy.cmd
├── doc
├── Postman_2017-09-26_22-50-06.jpg
├── cmd_2017-09-26_22-15-45.jpg
├── cmd_2017-09-26_22-18-52.jpg
├── cmd_2017-09-26_22-20-23.jpg
├── iexplore_2017-09-26_22-17-20.jpg
├── iexplore_2017-09-26_22-22-19.jpg
├── iexplore_2017-09-26_22-23-19.jpg
├── iexplore_2017-09-26_22-23-59.jpg
├── iexplore_2017-09-26_22-25-04.jpg
└── iexplore_2017-09-26_23-09-42.jpg
├── evaluate.py
├── logs
└── placeholder.txt
├── plot_helpers.py
├── requirements.txt
├── utils
├── Readme.md
├── annotations
│ └── annotations_helper.py
├── caffe_layers
│ ├── anchor_target_layer.py
│ ├── bbox_transform.py
│ ├── proposal_layer.py
│ └── proposal_target_layer.py
├── cython_modules
│ ├── cpu_nms.cp35-win_amd64.pyd
│ ├── cpu_nms.cpython-34m.so
│ ├── cython_bbox.cp35-win_amd64.pyd
│ └── cython_bbox.cpython-34m.so
├── default_config.py
├── map
│ └── map_helpers.py
├── nms
│ └── nms_wrapper.py
├── pytest.ini
├── rpn
│ ├── anchor_target_layer.py
│ ├── bbox_transform.py
│ ├── cntk_smoothL1_loss.py
│ ├── generate_anchors.py
│ ├── proposal_layer.py
│ ├── proposal_target_layer.py
│ └── rpn_helpers.py
└── unit_tests.py
└── web.config
/.deployment:
--------------------------------------------------------------------------------
1 | [config]
2 | command = deploy.cmd
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | # *.py[cod]
4 | *$py.class
5 |
6 | # IDE
7 |
8 | .vs/
9 | .vscode/
10 |
11 | # C extensions
12 | #*.so
13 |
14 | # Distribution / packaging
15 | .Python
16 | build/
17 | develop-eggs/
18 | dist/
19 | downloads/
20 | eggs/
21 | .eggs/
22 | lib/
23 | lib64/
24 | parts/
25 | sdist/
26 | var/
27 | wheels/
28 | *.egg-info/
29 | .installed.cfg
30 | *.egg
31 |
32 | # PyInstaller
33 | # Usually these files are written by a python script from a template
34 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
35 | *.manifest
36 | *.spec
37 |
38 | # Installer logs
39 | pip-log.txt
40 | pip-delete-this-directory.txt
41 |
42 | # Unit test / coverage reports
43 | htmlcov/
44 | .tox/
45 | .coverage
46 | .coverage.*
47 | .cache
48 | nosetests.xml
49 | coverage.xml
50 | *.cover
51 | .hypothesis/
52 |
53 | # Translations
54 | *.mo
55 | *.pot
56 |
57 | # Django stuff:
58 | *.log
59 | local_settings.py
60 |
61 | # Flask stuff:
62 | instance/
63 | .webassets-cache
64 |
65 | # Scrapy stuff:
66 | .scrapy
67 |
68 | # Sphinx documentation
69 | docs/_build/
70 |
71 | # PyBuilder
72 | target/
73 |
74 | # Jupyter Notebook
75 | .ipynb_checkpoints
76 |
77 | # pyenv
78 | .python-version
79 |
80 | # celery beat schedule file
81 | celerybeat-schedule
82 |
83 | # SageMath parsed files
84 | *.sage.py
85 |
86 | # Environments
87 | .env
88 | .venv
89 | env/
90 | venv/
91 | ENV/
92 |
93 | # Spyder project settings
94 | .spyderproject
95 | .spyproject
96 |
97 | # Rope project settings
98 | .ropeproject
99 |
100 | # mkdocs documentation
101 | /site
102 |
103 | # mypy
104 | .mypy_cache/
105 |
106 |
107 | # directories
108 | .idea/
109 |
110 | # temporary ignore
111 | __pycache__
112 | __init__.py
113 | Temp/*.jpg
114 | CNTKModels/*.txt
115 |
116 | # models
117 | *.model
--------------------------------------------------------------------------------
/CNTKModels/download_model.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | try:
4 | from urllib.request import urlretrieve
5 | except ImportError:
6 | from urllib import urlretrieve
7 |
8 | def download_file(filename, file_url):
9 | file_dir = os.path.dirname(os.path.abspath(__file__))
10 | file_path = os.path.join(file_dir, filename)
11 | if not os.path.exists(file_path):
12 | print('Downloading file from ' + file_url + ', may take a while...')
13 | urlretrieve(file_url,file_path)
14 | print('Saved file as ' + file_path)
15 | else:
16 | print('File already available at ' + file_path)
17 |
18 | if __name__ == '__main__':
19 | download_file('HotailorPOC2.model','https://privdatastorage.blob.core.windows.net/github/cntk-python-web-service-on-azure/HotailorPOC2.model')
20 | download_file('HotailorPOC2_class_map.txt','https://privdatastorage.blob.core.windows.net/github/cntk-python-web-service-on-azure/HotailorPOC2_class_map.txt')
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2017 Karol Żak
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Table of contents
2 | - [Project description](#project-description)
3 | - [Results](#results)
4 | - [Deployment steps](#deployment-steps)
5 | - [Setup](#setup)
6 | - [Deploy demo](#deploy-demo)
7 | - [Code highlights](#code-highlights)
8 |
9 | # Project description
10 | [[back to the top]](#table-of-contents)
11 |
12 | This sample project shows off how to prepare and deploy to [Azure Web Apps](https://azure.microsoft.com/en-gb/services/app-service/web/) a simple Python web service with an image classifying model produced in [CNTK (Cognitive Toolkit)](https://github.com/Microsoft/CNTK) using [FasterRCNN](https://github.com/Microsoft/CNTK/tree/master/Examples/Image/Detection/FasterRCNN)
13 |
14 | - [x] Web Service written in Python using Flask module
15 | - [x] Python scripts that allow to evaluate images using CNTK and pretrained model
16 | - [x] Custom deployment scripts for Azure Web Apps
17 | - [x] Automatic setup of custom Python 3.5.x environment with all the required dependencies
18 | - [ ] HTML UI for testing image classification
19 | - [ ] Virtual Python environments for each application deployed to Azure Web Apps
20 |
21 | # Results
22 | [[back to the top]](#table-of-contents)
23 |
24 | [Website Demo](http://cntkpywebapptest1.azurewebsites.net/):
25 | 
26 |
27 | Sample request and response in Postman:
28 | 
29 |
30 | # Deployment steps
31 |
32 | ## Setup
33 | [[back to the top]](#table-of-contents)
34 |
35 |
36 | 1. **Download content of this repo**
37 |
38 | You can either clone this repo or just download it and unzip to some folder
39 |
40 | 2. **Setup Python environment**
41 |
42 | In order for scripts to work you should have a proper Python environment. If you don't already have it setup then you should follow one of the online tutorials. To setup Python environment and all the dependencies required by CNTK on my local Windows machine I used [this tutorial](https://docs.microsoft.com/en-us/cognitive-toolkit/setup-windows-binary-script)
43 |
44 | 3. **Download CNTK model and class map file**
45 |
46 | Go to `/CNTKModels` folder in the location were you unzipped this repo and run `download_model.py`. It will automatically download the pretrained model and class map file required for our evaluation to run properly.
47 |
48 | 4. **Install Azure CLI tools**
49 |
50 | If you don't have it then you can easily do it by openning Windows Command Prompt and running this command:
51 | ```Batchfile
52 | pip install azure-cli
53 | ```
54 |
55 | 5. **Get Azure subscription**
56 |
57 | If you don't own any Azure subscriptions you can always create a [new free trial](https://azure.microsoft.com/en-us/free/) with $200 credits to spend
58 |
59 | ## Deploy demo
60 | [[back to the top]](#table-of-contents)
61 | 1. **Set variables**
62 |
63 | Open Command Prompt to the location where you unzipped the contents of this repository (for example: `cd C:\Poligon\WebService`) and type in as follows (but make sure to replace the `[]` with a proper value):
64 |
65 | ```
66 | set uname=[username]
67 | set pass=[password]
68 | set appn=[web_app_name]
69 | set rgname=[resource_group_name]
70 | ```
71 |
72 | 2. **Login to Azure**
73 |
74 | In the same CMD type in:
75 |
76 | ```
77 | az login
78 | ```
79 | You should see something like this:
80 |
81 | 
82 |
83 | Now go to the https://aka.ms/devicelogin website and type in the code:
84 |
85 | 
86 |
87 | You will then be asked to login with an email connected to your Azure subscription
88 |
89 | If everything goes ok you should see the verification message on the website and in console you should see a list of your Azure subscriptions
90 |
91 |
92 | 3. **Setup deployment credentials**
93 |
94 | We're setting this up to later be able to remotely deploy code to our Azure Web App
95 | ```
96 | az webapp deployment user set --user-name %uname% --password %pass%
97 | ```
98 |
99 | 4. **Create resource group**
100 |
101 | Resource groups help you to better manage your stuff in subscription and it's a basic method of deploying services to Azure. [Read more here](https://docs.microsoft.com/en-us/azure/azure-resource-manager/resource-group-overview)
102 |
103 | ```
104 | az group create --location westeurope --name %rgname%
105 | ```
106 |
107 | 5. **Create new [Azure App Service](https://azure.microsoft.com/en-us/services/app-service/) Plan and new [Azure Web App](https://azure.microsoft.com/en-us/services/app-service/web/)**
108 |
109 | ```
110 | az appservice plan create --name %appn% --resource-group %rgname% --sku S1
111 | az webapp create --name %appn% --resource-group %rgname% --plan %appn%
112 | ```
113 |
114 | 6. **Configure Azure Web App and add Python extension**
115 |
116 | Azure Web Apps by default support only Python 2.7 and 3.4. Because I used Python 3.5 I had to use special [extension](https://azure.microsoft.com/en-us/blog/azure-web-sites-extensions/) to setup the environment
117 |
118 | First you need to change some [Application Settings](https://docs.microsoft.com/en-us/azure/app-service/web-sites-configure) on your Web App (the pink ones):
119 | 
120 | Changing `Platform` is required and changing `Always On` is optional but I recommend to use it so that our web service stays awake even if not used.
121 |
122 | After we properly save Application Settings we can now add Python 3.5.x extension. In order to this, just type in `extensions` into the search box
123 | 
124 |
125 | And then simply add new extension
126 | 
127 |
128 | It should take around a minute or two to properly install the extension
129 | 
130 |
131 |
132 | 7. **Setup deployment source for newly created Azure Web App**
133 |
134 | This code will not only setup the deployment source for your app but will also retrive the URL you will need in next steps
135 | ```
136 | az webapp deployment source config-local-git --name %appn% --resource-group %rgname% --query url --output tsv
137 | ```
138 | It should return something like this:
139 | 
140 |
141 |
142 | 8. **Initialize git and add remote repository**
143 |
144 | Make sure to replace `[remote_repo_address]` with the URL returned in step number 7.
145 | ```
146 | git init
147 | git remote add azure [remote_repo_address]
148 | ```
149 | Command with URL should look like this:
150 | 
151 |
152 | 9. **Push application to Azure Web App remote repository**
153 |
154 | Last step is to simply push our applications code to Azure Web App
155 | ```
156 | git add -A
157 | git commit -m "init"
158 | git push azure master
159 | ```
160 |
161 | This will trigger our [custom deployment script](deploy.cmd), copy all the files, setup Python environment and install all the required dependencies from [requirements.txt](requirements.txt) file
162 |
163 | 10. **Test the application**
164 |
165 | If everything went smooth you should now have a running Python application and you should be able to test it. I used Postman to test HTTP requests and responses
166 |
167 | [Website Demo](http://cntkpywebapptest1.azurewebsites.net/):
168 | 
169 |
170 | Sample request and response in Postman:
171 | 
172 |
173 | # Code highlights
174 | [[back to the top]](#table-of-contents)
175 |
176 |
177 | - [config.py](config.py) - most important variables for scripts are set in this file
178 |
179 | Variables used by web service to point out directories for temp images and CNTK models:
180 |
181 | ```Python
182 | # directories for web service:
183 | __C.CNTK.TEMP_PATH = "./Temp" # temp folder for image processing - do not change
184 | __C.CNTK.MODEL_DIRECTORY = "./CNTKModels" # directory for storing models and class map files
185 | ```
186 |
187 | Variables for chosing the specific model:
188 |
189 | ```Python
190 | __C.CNTK.MODEL_NAME = "HotailorPOC2.model" # model file name
191 | __C.CNTK.CLASS_MAP_FILE = "HotailorPOC2_class_map.txt" # class map file name
192 | ```
193 |
194 | Variables used by `evaluate.py` to properly preprocess images and use CNTK eval function:
195 |
196 | ```Python
197 | __C.CNTK.IMAGE_WIDTH = 1000
198 | __C.CNTK.IMAGE_HEIGHT = 1000
199 | __C.CNTK.NUM_CHANNELS = 3
200 | ```
201 |
202 | - [app.py](app.py) - main application - startup file for Flask
203 |
204 | There is one very important line for running CNTK:
205 | ```Python
206 | [..]
207 | import os
208 | os.environ['PATH'] = r'D:\home\python354x64;' + os.environ['PATH']
209 | [..]
210 | ```
211 | It adds the location of CNTK libraries to PATH variable. It's very important because our code strongly relies on that PATH. As for now I'm doing this in code but in future I want to move it to deployment script
212 |
213 | I am using Flask module to run my web service. In order to make it work I needed to first create an instance of Flask app and then run it on a proper port:
214 |
215 | ```Python
216 | [..]
217 | app = Flask(__name__)
218 | [..]
219 | if __name__ == '__main__':
220 | HOST = os.environ.get('SERVER_HOST', 'localhost')
221 | try:
222 | PORT = int(os.environ.get('SERVER_PORT', '5555'))
223 | except ValueError:
224 | PORT = 5555
225 | app.run(HOST, PORT)
226 | ```
227 | I also used routes to set up specific methods for our RESTful web service. Currently I expose 2 routes for my API, one returning a collection of classified tags and the second one returning an image with plotted results of evaluation. `'/'` route simply sets the default landing page
228 | ```Python
229 | [..]
230 | @app.route('/')
231 | [..]
232 | @app.route('/hotelidentifier/api/v1.0/evaluate/returntags', methods=['POST'])
233 | [..]
234 | @app.route('/hotelidentifier/api/v1.0/evaluate/returnimage', methods=['POST'])
235 | [..]
236 | ```
237 |
238 | - [evaluate.py](evaluate.py) - main script for image classification with CNTK model
239 |
240 | This script strongly depends on [config.py](config.py) and it also uses [cntk_helpers.py](cntk_helpers.py), [plot_helpers.py](plot_helpers.py) and bunch of scripts from [utils](utils) folder. Most of those scripts were copied from original [CNTK source on github](https://github.com/Microsoft/CNTK), some of them with slight changes
241 |
242 | - [plot_helpers.py](evaluate.py) - helper script for dealing with image ploting
243 |
244 | While working with headless server environment (non-GUI) such as Azure Web Apps you need to change the default mode of `matpotlib` module to not rely on GUI
245 | ```Python
246 | [..]
247 | # this is important when deploying to headless server environment (non-GUI)
248 | ###################################################
249 | import matplotlib
250 | # force headless backend, or set 'backend' to 'Agg'
251 | # in your ~/.matplotlib/matplotlibrc
252 | matplotlib.use('Agg')
253 |
254 | import matplotlib.pyplot
255 | # force non-interactive mode, or set 'interactive' to False
256 | # in your ~/.matplotlib/matplotlibrc
257 | from matplotlib.pyplot import imsave
258 | matplotlib.pyplot.ioff()
259 | ###################################################
260 | [..]
261 | ```
262 |
263 | - [requirements.txt](requirements.txt)
264 |
265 | It holds all the dependencies required by my application and CNTK libraries to work.
266 | ```
267 | easydict==1.6
268 | pytest==3.0.3
269 | opencv-python
270 | https://pypi.python.org/packages/be/5c/670e88bc3ae6afa23c1f09d52a77bbbc7d2e476e7449ad3b6750040a0ac6/scipy-1.0.0b1-cp35-none-win_amd64.whl#md5=dcc90577f2eebc264ec60a2d5729e30b
271 | https://cntk.ai/PythonWheel/CPU-Only/cntk-2.1-cp35-cp35m-win_amd64.whl
272 | Flask==0.12.2
273 | numpy==1.11.2
274 | matplotlib==1.5.3
275 | ipython==6.2.0
276 | Pillow==4.1.1
277 | PyYAML==3.12
278 | ```
279 | As you can see in most cases we use specific versions of modules and sometimes we even explicitly point out the correct .whl file to use for installation
280 |
281 | - [.deployment](.deployment)
282 |
283 | If this file is present, Kudu will use custom `deploy.cmd` file instead of the default one. We use custom deployment script to chose Python3.5 and install all the necesary dependencies. To learn more about Kudu and deploying to Azure Web Apps - [go here](https://azure.microsoft.com/en-gb/resources/videos/what-is-kudu-with-david-ebbo/)
284 | ```
285 | [config]
286 | command = deploy.cmd
287 | ```
288 |
289 | - [deploy.cmd](deploy.cmd)
290 |
291 | Custom script for our deployment with Kudu. Main difference from the default script is that I'm setting Python3.5 (installed from extension) as my main environment
292 |
293 | ```
294 | [..]
295 | SET PYTHON_DIR=%SYSTEMDRIVE%\home\python354x64
296 | SET PYTHON_EXE=%SYSTEMDRIVE%\home\python354x64\python.exe
297 | [..]
298 | ```
299 |
300 | I'm also using `deploy.cmd` to install all the required dependencies:
301 | ```CMD
302 | [..]
303 | :: 4. Install packages
304 | echo Pip install requirements.
305 | echo "Installing requirements"
306 | %PYTHON_EXE% -m pip install -r requirements.txt
307 | [..]
308 | ```
309 |
310 | **TODO:**
311 | I was told that it is better to have virtual Python environment for each app hosted on Azure Web Apps so that there is no chance of conflicts in different versions of modules used by different apps. That is what I need to fix in future.
312 |
313 |
314 | - [web.config](web.config)
315 |
316 | I used `web.config` to point out the directory of my custom Python 3.5 installation and to successfully run my Flask based Python web service. I based my `web.config` on Azure Web Apps [documentation](https://docs.microsoft.com/en-us/azure/app-service/web-sites-python-configure).
317 |
318 | ```xml
319 |
320 |
321 |
322 |
323 |
324 |
325 |
331 |
332 |
333 |
334 |
335 |
336 |
337 | ```
338 |
339 |
340 |
--------------------------------------------------------------------------------
/Temp/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/Temp/placeholder.txt
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | #!flask/bin/python
2 | from flask import Flask, jsonify, request, make_response, send_file
3 | import os
4 | os.environ['PATH'] = r'D:\home\python354x64;' + os.environ['PATH']
5 | import uuid
6 | from config import cfg
7 | from cntk import load_model
8 | app = Flask(__name__)
9 |
10 |
11 | model_path = os.path.join(cfg["CNTK"].MODEL_DIRECTORY, cfg["CNTK"].MODEL_NAME)
12 | print("Loading existing model from %s" % model_path)
13 | loadedModel = load_model(model_path)
14 |
15 |
16 | @app.errorhandler(404)
17 | def not_found(error):
18 | return make_response(jsonify({'error': 'Not found'}), 404)
19 |
20 | @app.route('/')
21 | def index():
22 | return "" \
23 | "
" \
24 | "Hello, World!
" \
25 | "This is a sample web service written in Python using Flask module.
" \
26 | "Use one of the following urls to evaluate images:
" \
27 | "/hotelidentifier/api/v1.0/evaluate/returntags - takes image as parameter and returns cloud of tags
" \
28 | "/hotelidentifier/api/v1.0/evaluate/returnimage - takes image as parameter and returns tagged image
" \
29 | "" \
30 | ""
31 |
32 |
33 | @app.route('/hotelidentifier/api/v1.0/evaluate/returntags', methods=['POST'])
34 | def return_tags():
35 | file_upload = request.files['file']
36 | if file_upload:
37 | temp_file_path=os.path.join('./Temp',str(uuid.uuid4())+'.jpg')
38 | file_upload.save(temp_file_path)
39 | app.logger.debug('File is saved as %s', temp_file_path)
40 | from evaluate import evaluateimage
41 | return jsonify(tags=[e.serialize() for e in evaluateimage(temp_file_path,"returntags",eval_model=loadedModel)])
42 |
43 | @app.route('/hotelidentifier/api/v1.0/evaluate/returnimage', methods=['POST'])
44 | def return_image():
45 | file_upload = request.files['file']
46 | if file_upload:
47 | temp_file_path=os.path.join('./Temp',str(uuid.uuid4())+'.jpg')
48 | file_upload.save(temp_file_path)
49 | app.logger.debug('File is saved as %s', temp_file_path)
50 | from evaluate import evaluateimage
51 | return send_file(evaluateimage(temp_file_path,"returnimage",eval_model=loadedModel), mimetype='image/jpg')
52 | #return send_file(os.path.join('./Temp', temp_filename), mimetype='image/jpg')
53 |
54 |
55 |
56 | if __name__ == '__main__':
57 | HOST = os.environ.get('SERVER_HOST', 'localhost')
58 | try:
59 | PORT = int(os.environ.get('SERVER_PORT', '5555'))
60 | except ValueError:
61 | PORT = 5555
62 | app.run(HOST, PORT)
63 |
64 |
65 | """ add UI later
66 | @app.route("/")
67 | def index():
68 | return render_template('index.html')
69 |
70 | @app.route("/api/uploader", methods=['POST'])
71 | @cross_origin()
72 | def api_upload_file():
73 | img = Image.open(BytesIO(request.files['imagefile'].read())).convert('RGB')
74 | img = ImageOps.fit(img, (224, 224), Image.ANTIALIAS)
75 | return json.dumps(run_some_deep_learning_cntk(img))
76 |
77 |
78 | def run_some_deep_learning_cntk(rgb_pil_image):
79 | # Convert to BGR
80 | rgb_image = np.array(rgb_pil_image, dtype=np.float32)
81 | bgr_image = rgb_image[..., [2, 1, 0]]
82 | img = np.ascontiguousarray(np.rollaxis(bgr_image, 2))
83 |
84 | # Use last layer to make prediction
85 | z_out = combine([MODEL.outputs[3].owner])
86 | result = np.squeeze(z_out.eval({z_out.arguments[0]: [img]}))
87 |
88 | # Sort probabilities
89 | a = np.argsort(result)[-1]
90 | predicted_category = " ".join(LABELS[a].split(" ")[1:])
91 |
92 | return predicted_category.split(",")[0]
93 | """
--------------------------------------------------------------------------------
/cntk_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | from __future__ import print_function
8 | from utils.rpn.bbox_transform import bbox_transform_inv
9 |
10 | def regress_rois(roi_proposals, roi_regression_factors, labels, dims_input):
11 | for i in range(len(labels)):
12 | label = labels[i]
13 | if label > 0:
14 | deltas = roi_regression_factors[i:i+1,label*4:(label+1)*4]
15 | roi_coords = roi_proposals[i:i+1,:]
16 | regressed_rois = bbox_transform_inv(roi_coords, deltas)
17 | roi_proposals[i,:] = regressed_rois
18 |
19 | if dims_input is not None:
20 | # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
21 | pad_width, pad_height, scaled_image_width, scaled_image_height, _, _ = dims_input
22 | left = (pad_width - scaled_image_width) / 2
23 | right = pad_width - left - 1
24 | top = (pad_height - scaled_image_height) / 2
25 | bottom = pad_height - top - 1
26 |
27 | roi_proposals[:,0] = roi_proposals[:,0].clip(left, right)
28 | roi_proposals[:,1] = roi_proposals[:,1].clip(top, bottom)
29 | roi_proposals[:,2] = roi_proposals[:,2].clip(left, right)
30 | roi_proposals[:,3] = roi_proposals[:,3].clip(top, bottom)
31 |
32 | return roi_proposals
33 |
34 |
--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import os
8 | import os.path as osp
9 | import numpy as np
10 | # `pip install easydict` if you don't have it
11 | from easydict import EasyDict as edict
12 |
13 | __C = edict()
14 | cfg = __C
15 |
16 | #
17 | # CNTK parameters
18 | #
19 |
20 | __C.CNTK = edict()
21 |
22 | # directories for web service:
23 | __C.CNTK.TEMP_PATH = "./Temp" # temp folder for image processing - do not change
24 | __C.CNTK.MODEL_DIRECTORY = "./CNTKModels" # directory for storing models and class map files
25 |
26 | #################
27 | # Model & Class Map Files names
28 | #################
29 | __C.CNTK.MODEL_NAME = "HotailorPOC2.model" # model file name
30 | __C.CNTK.CLASS_MAP_FILE = "HotailorPOC2_class_map.txt" # class map file name
31 |
32 | #################
33 |
34 | __C.CNTK.BASE_MODEL = "AlexNet" # "VGG16" or "AlexNet"
35 |
36 | __C.CNTK.CONV_BIAS_INIT = 0.0
37 | __C.CNTK.SIGMA_RPN_L1 = 3.0
38 |
39 | # change below settings to match variables used to train YOUR model
40 | __C.CNTK.IMAGE_WIDTH = 1000
41 | __C.CNTK.IMAGE_HEIGHT = 1000
42 | __C.CNTK.NUM_CHANNELS = 3
43 |
44 | __C.CNTK.RESULTS_NMS_THRESHOLD = 0.3 # see also: __C.TEST.NMS = 0.3
45 | __C.CNTK.RESULTS_NMS_CONF_THRESHOLD = 0.0
46 | __C.CNTK.RESULTS_BGR_PLOT_THRESHOLD = 0.1
47 |
48 | __C.CNTK.DRAW_NEGATIVE_ROIS = False
49 | __C.CNTK.DRAW_UNREGRESSED_ROIS = False
50 |
51 |
52 | #
53 | # Base models
54 | #
55 |
56 | if __C.CNTK.BASE_MODEL == "AlexNet":
57 | __C.CNTK.BASE_MODEL_FILE = "AlexNet.model"
58 | __C.CNTK.FEATURE_NODE_NAME = "features"
59 | __C.CNTK.LAST_CONV_NODE_NAME = "conv5.y"
60 | __C.CNTK.START_TRAIN_CONV_NODE_NAME = __C.CNTK.FEATURE_NODE_NAME
61 | __C.CNTK.POOL_NODE_NAME = "pool3"
62 | __C.CNTK.LAST_HIDDEN_NODE_NAME = "h2_d"
63 | __C.CNTK.RPN_NUM_CHANNELS = 256
64 | __C.CNTK.ROI_DIM = 6
65 | __C.CNTK.E2E_LR_FACTOR = 1.0
66 | __C.CNTK.RPN_LR_FACTOR = 1.0
67 | __C.CNTK.FRCN_LR_FACTOR = 1.0
68 |
69 | if __C.CNTK.BASE_MODEL == "VGG16":
70 | __C.CNTK.BASE_MODEL_FILE = "VGG16_ImageNet_Caffe.model"
71 | __C.CNTK.FEATURE_NODE_NAME = "data"
72 | __C.CNTK.LAST_CONV_NODE_NAME = "relu5_3"
73 | __C.CNTK.START_TRAIN_CONV_NODE_NAME = "pool2" # __C.CNTK.FEATURE_NODE_NAME
74 | __C.CNTK.POOL_NODE_NAME = "pool5"
75 | __C.CNTK.LAST_HIDDEN_NODE_NAME = "drop7"
76 | __C.CNTK.RPN_NUM_CHANNELS = 512
77 | __C.CNTK.ROI_DIM = 7
78 | __C.CNTK.E2E_LR_FACTOR = 1.0
79 | __C.CNTK.RPN_LR_FACTOR = 1.0
80 | __C.CNTK.FRCN_LR_FACTOR = 1.0
81 |
82 | #
83 | # Training options
84 | #
85 |
86 | __C.TRAIN = edict()
87 |
88 | # Minibatch size (number of regions of interest [ROIs])
89 | __C.TRAIN.BATCH_SIZE = 128
90 |
91 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
92 | __C.TRAIN.FG_FRACTION = 0.25
93 |
94 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
95 | __C.TRAIN.FG_THRESH = 0.5
96 |
97 | # Overlap threshold for a ROI to be considered background (class = 0 if
98 | # overlap in [LO, HI))
99 | __C.TRAIN.BG_THRESH_HI = 0.5
100 | __C.TRAIN.BG_THRESH_LO = 0.0
101 |
102 | # Use horizontally-flipped images during training?
103 | __C.TRAIN.USE_FLIPPED = True
104 |
105 | # Train bounding-box regressors
106 | __C.TRAIN.BBOX_REG = True
107 |
108 | # Overlap required between a ROI and ground-truth box in order for that ROI to
109 | # be used as a bounding-box regression training example
110 | __C.TRAIN.BBOX_THRESH = 0.5
111 |
112 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
113 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
114 | # Deprecated (inside weights)
115 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
116 | # Normalize the targets using "precomputed" (or made up) means and stdevs
117 | # (BBOX_NORMALIZE_TARGETS must also be True)
118 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True
119 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
120 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
121 |
122 | # Train using these proposals
123 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
124 |
125 | # IOU >= thresh: positive example
126 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
127 | # IOU < thresh: negative example
128 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
129 | # If an anchor statisfied by positive and negative conditions set to negative
130 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
131 | # Max number of foreground examples
132 | __C.TRAIN.RPN_FG_FRACTION = 0.5
133 | # Total number of examples
134 | __C.TRAIN.RPN_BATCHSIZE = 256
135 | # NMS threshold used on RPN proposals
136 | __C.TRAIN.RPN_NMS_THRESH = 0.7
137 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
138 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
139 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
140 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
141 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
142 | __C.TRAIN.RPN_MIN_SIZE = 16
143 | # Deprecated (outside weights)
144 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
145 | # Give the positive RPN examples weight of p * 1 / {num positives}
146 | # and give negatives a weight of (1 - p)
147 | # Set to -1.0 to use uniform example weighting
148 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
149 |
150 |
151 | #
152 | # Testing options
153 | #
154 |
155 | __C.TEST = edict()
156 |
157 | # Overlap threshold used for non-maximum suppression (suppress boxes with
158 | # IoU >= this threshold)
159 | __C.TEST.NMS = 0.3
160 |
161 | # Test using bounding-box regressors
162 | __C.TEST.BBOX_REG = True
163 |
164 | # Propose boxes
165 | __C.TEST.HAS_RPN = False
166 |
167 | # Test using these proposals
168 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
169 |
170 | ## NMS threshold used on RPN proposals
171 | __C.TEST.RPN_NMS_THRESH = 0.7
172 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
173 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
174 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
175 | __C.TEST.RPN_POST_NMS_TOP_N = 300
176 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
177 | __C.TEST.RPN_MIN_SIZE = 16
178 |
179 |
180 | #
181 | # MISC
182 | #
183 |
184 | # The mapping from image coordinates to feature map coordinates might cause
185 | # some boxes that are distinct in image space to become identical in feature
186 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
187 | # for identifying duplicate boxes.
188 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
189 | __C.DEDUP_BOXES = 1./16.
190 |
191 | # Pixel mean values (BGR order) as a (1, 1, 3) array
192 | # We use the same pixel mean for all networks even though it's not exactly what
193 | # they were trained with
194 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
195 |
196 | # For reproducibility
197 | __C.RNG_SEED = 3
198 |
199 | # A small number that's used many times
200 | __C.EPS = 1e-14
201 |
202 | # Use GPU implementation of non-maximum suppression
203 | __C.USE_GPU_NMS = True
204 |
205 | # Default GPU device id
206 | __C.GPU_ID = 0
207 |
208 |
209 | def _merge_a_into_b(a, b):
210 | """Merge config dictionary a into config dictionary b, clobbering the
211 | options in b whenever they are also specified in a.
212 | """
213 | if type(a) is not edict:
214 | return
215 |
216 | for k, v in a.iteritems():
217 | # a must specify keys that are in b
218 | if not b.has_key(k):
219 | raise KeyError('{} is not a valid config key'.format(k))
220 |
221 | # the types must match, too
222 | old_type = type(b[k])
223 | if old_type is not type(v):
224 | if isinstance(b[k], np.ndarray):
225 | v = np.array(v, dtype=b[k].dtype)
226 | else:
227 | raise ValueError(('Type mismatch ({} vs. {}) '
228 | 'for config key: {}').format(type(b[k]),
229 | type(v), k))
230 |
231 | # recursively merge dicts
232 | if type(v) is edict:
233 | try:
234 | _merge_a_into_b(a[k], b[k])
235 | except:
236 | print('Error under config key: {}'.format(k))
237 | raise
238 | else:
239 | b[k] = v
240 |
241 | def cfg_from_file(filename):
242 | """Load a config file and merge it into the default options."""
243 | import yaml
244 | with open(filename, 'r') as f:
245 | yaml_cfg = edict(yaml.load(f))
246 |
247 | _merge_a_into_b(yaml_cfg, __C)
248 |
249 | def cfg_from_list(cfg_list):
250 | """Set config keys via list (e.g., from command line)."""
251 | from ast import literal_eval
252 | assert len(cfg_list) % 2 == 0
253 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
254 | key_list = k.split('.')
255 | d = __C
256 | for subkey in key_list[:-1]:
257 | assert d.has_key(subkey)
258 | d = d[subkey]
259 | subkey = key_list[-1]
260 | assert d.has_key(subkey)
261 | try:
262 | value = literal_eval(v)
263 | except:
264 | # handle the case when v is a string literal
265 | value = v
266 | assert type(value) == type(d[subkey]), \
267 | 'type {} does not match original type {}'.format(
268 | type(value), type(d[subkey]))
269 | d[subkey] = value
270 |
--------------------------------------------------------------------------------
/deploy.cmd:
--------------------------------------------------------------------------------
1 | @if "%SCM_TRACE_LEVEL%" NEQ "4" @echo off
2 | :: ----------------------
3 | :: KUDU Deployment Script
4 | :: Version: 2.0.0
5 | :: ----------------------
6 | :: Prerequisites
7 | :: -------------
8 | :: VARIABLES
9 | echo "ATTENTION"
10 | echo "USER MUST CHECK/SET THESE VARIABLES:"
11 | SET PYTHON_DIR=%SYSTEMDRIVE%\home\python354x64
12 | SET PYTHON_EXE=%SYSTEMDRIVE%\home\python354x64\python.exe
13 | ::SET CNTK_BIN=https://azurewebappcntk.blob.core.windows.net/cntkrc/cntk.zip
14 | echo "Installed python extension installed here:"
15 | echo %PYTHON_EXE%
16 | ::echo "CNTK Binaries and version located here:"
17 | ::echo %CNTK_BIN%
18 | :: Verify node.js installed
19 | where node 2>nul >nul
20 | IF %ERRORLEVEL% NEQ 0 (
21 | echo Missing node.js executable, please install node.js, if already installed make sure it can be reached from current environment.
22 | goto error
23 | )
24 | :: Setup
25 | :: -----
26 | setlocal enabledelayedexpansion
27 | SET ARTIFACTS=%~dp0%..\artifacts
28 | IF NOT DEFINED DEPLOYMENT_SOURCE (
29 | SET DEPLOYMENT_SOURCE=%~dp0%.
30 | )
31 | IF NOT DEFINED DEPLOYMENT_TARGET (
32 | SET DEPLOYMENT_TARGET=%ARTIFACTS%\wwwroot
33 | )
34 | IF NOT DEFINED NEXT_MANIFEST_PATH (
35 | SET NEXT_MANIFEST_PATH=%ARTIFACTS%\manifest
36 | IF NOT DEFINED PREVIOUS_MANIFEST_PATH (
37 | SET PREVIOUS_MANIFEST_PATH=%ARTIFACTS%\manifest
38 | )
39 | )
40 | IF NOT DEFINED KUDU_SYNC_CMD (
41 | :: Install kudu sync
42 | echo Installing Kudu Sync
43 | call npm install kudusync -g --silent
44 | IF !ERRORLEVEL! NEQ 0 goto error
45 | :: Locally just running "kuduSync" would also work
46 | SET KUDU_SYNC_CMD=%appdata%\npm\kuduSync.cmd
47 | )
48 | goto Deployment
49 |
50 |
51 |
52 | ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
53 | :Deployment
54 | echo Handling python deployment.
55 | :: 1. KuduSync
56 | IF /I "%IN_PLACE_DEPLOYMENT%" NEQ "1" (
57 | call :ExecuteCmd "%KUDU_SYNC_CMD%" -v 50 -f "%DEPLOYMENT_SOURCE%" -t "%DEPLOYMENT_TARGET%" -n "%NEXT_MANIFEST_PATH%" -p "%PREVIOUS_MANIFEST_PATH%" -i ".git;.hg;.deployment;deploy.cmd"
58 | IF !ERRORLEVEL! NEQ 0 goto error
59 | )
60 | IF EXIST "%DEPLOYMENT_TARGET%\.skipPythonDeployment" goto postPython
61 | echo Detected requirements.txt. You can skip Python specific steps with a .skipPythonDeployment file.
62 | echo Custom Script
63 | pushd "%DEPLOYMENT_TARGET%"
64 | :: 3. Setup python
65 | ::echo "Configuring pip"
66 | ::curl https://bootstrap.pypa.io/get-pip.py | %PYTHON_EXE%
67 | :: 4. Install packages
68 | echo Pip install requirements.
69 | echo "Installing requirements"
70 | %PYTHON_EXE% -m pip install -r requirements.txt
71 | :: This PATH should direct to CNTK directory
72 | ::set PATH=%PYTHON_DIR%;%PATH%
73 | ::echo PATH set to %PYTHON_DIR%
74 | IF !ERRORLEVEL! NEQ 0 goto error
75 |
76 | :postPython
77 | ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
78 | goto end
79 |
80 |
81 | :: Execute command routine that will echo out when error
82 | :ExecuteCmd
83 | setlocal
84 | set _CMD_=%*
85 | call %_CMD_%
86 | if "%ERRORLEVEL%" NEQ "0" echo Failed exitCode=%ERRORLEVEL%, command=%_CMD_%
87 | exit /b %ERRORLEVEL%
88 | :error
89 | endlocal
90 | echo An error has occurred during web site deployment.
91 | call :exitSetErrorLevel
92 | call :exitFromFunction 2>nul
93 | :exitSetErrorLevel
94 | exit /b 1
95 | :exitFromFunction
96 | ()
97 | :end
98 | endlocal
99 | echo Finished successfully.
--------------------------------------------------------------------------------
/doc/Postman_2017-09-26_22-50-06.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/Postman_2017-09-26_22-50-06.jpg
--------------------------------------------------------------------------------
/doc/cmd_2017-09-26_22-15-45.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-15-45.jpg
--------------------------------------------------------------------------------
/doc/cmd_2017-09-26_22-18-52.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-18-52.jpg
--------------------------------------------------------------------------------
/doc/cmd_2017-09-26_22-20-23.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/cmd_2017-09-26_22-20-23.jpg
--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-17-20.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-17-20.jpg
--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-22-19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-22-19.jpg
--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-23-19.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-23-19.jpg
--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-23-59.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-23-59.jpg
--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_22-25-04.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_22-25-04.jpg
--------------------------------------------------------------------------------
/doc/iexplore_2017-09-26_23-09-42.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/doc/iexplore_2017-09-26_23-09-42.jpg
--------------------------------------------------------------------------------
/evaluate.py:
--------------------------------------------------------------------------------
1 |
2 | from __future__ import print_function
3 | import numpy as np
4 | import os, sys
5 | import cv2
6 | from cntk import load_model, Axis, input_variable
7 | from cntk.core import Value
8 | from cntk.io import MinibatchData
9 | from cntk.layers import Constant
10 |
11 | from utils.annotations.annotations_helper import parse_class_map_file
12 | from config import cfg
13 | from plot_helpers import visualizeResultsFaster, imsave, apply_nms_to_single_image_results
14 | from cntk_helpers import regress_rois
15 |
16 | ###############################################################
17 | # Variables
18 | ###############################################################
19 |
20 | image_width = cfg["CNTK"].IMAGE_WIDTH
21 | image_height = cfg["CNTK"].IMAGE_HEIGHT
22 | num_channels = cfg["CNTK"].NUM_CHANNELS
23 |
24 | # dims_input -- (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
25 | dims_input_const = MinibatchData(Value(batch=np.asarray(
26 | [image_width, image_height, image_width, image_height, image_width, image_height], dtype=np.float32)), 1, 1, False)
27 |
28 | # Color used for padding and normalization (Caffe model uses [102.98010, 115.94650, 122.77170])
29 | img_pad_value = [103, 116, 123] if cfg["CNTK"].BASE_MODEL == "VGG16" else [114, 114, 114]
30 | normalization_const = Constant([[[103]], [[116]], [[123]]]) if cfg["CNTK"].BASE_MODEL == "VGG16" else Constant([[[114]], [[114]], [[114]]])
31 |
32 |
33 | globalvars = {}
34 |
35 | map_file_path = cfg["CNTK"].MODEL_DIRECTORY
36 | globalvars['class_map_file'] = os.path.join(map_file_path, cfg["CNTK"].CLASS_MAP_FILE)
37 | globalvars['classes'] = parse_class_map_file(globalvars['class_map_file'])
38 | globalvars['num_classes'] = len(globalvars['classes'])
39 | globalvars['temppath'] = cfg["CNTK"].TEMP_PATH
40 | feature_node_name = cfg["CNTK"].FEATURE_NODE_NAME
41 | model_path = os.path.join(cfg["CNTK"].MODEL_DIRECTORY, cfg["CNTK"].MODEL_NAME)
42 |
43 | # helper function
44 | def load_resize_and_pad(image_path, width, height, pad_value=114):
45 | if "@" in image_path:
46 | print("WARNING: zipped image archives are not supported for visualizing results.")
47 | exit(0)
48 |
49 | img = cv2.imread(image_path)
50 | img_width = len(img[0])
51 | img_height = len(img)
52 | scale_w = img_width > img_height
53 | target_w = width
54 | target_h = height
55 |
56 | if scale_w:
57 | target_h = int(np.round(img_height * float(width) / float(img_width)))
58 | else:
59 | target_w = int(np.round(img_width * float(height) / float(img_height)))
60 |
61 | resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST)
62 |
63 | top = int(max(0, np.round((height - target_h) / 2)))
64 | left = int(max(0, np.round((width - target_w) / 2)))
65 | bottom = height - top - target_h
66 | right = width - left - target_w
67 | resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right,
68 | cv2.BORDER_CONSTANT, value=[pad_value, pad_value, pad_value])
69 |
70 | # transpose(2,0,1) converts the image to the HWC format which CNTK accepts
71 | model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))
72 |
73 | dims = (width, height, target_w, target_h, img_width, img_height)
74 | return resized_with_pad, model_arg_rep, dims
75 |
76 |
77 | # mode="returnimage" or "returntags"
78 | def eval_faster_rcnn(eval_model, imgPath, img_shape,
79 | results_base_path, feature_node_name, classes, mode,
80 | drawUnregressedRois=False, drawNegativeRois=False,
81 | nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8):
82 |
83 | # prepare model
84 | image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
85 | dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input')
86 | frcn_eval = eval_model(image_input, dims_input)
87 |
88 | #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6))
89 | print("Plotting results from Faster R-CNN model for image.")
90 | # evaluate single image
91 |
92 | _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1])
93 |
94 | dims_input = np.array(dims, dtype=np.float32)
95 | dims_input.shape = (1,) + dims_input.shape
96 | output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input})
97 |
98 | out_dict = dict([(k.name, k) for k in output])
99 | out_cls_pred = output[out_dict['cls_pred']][0]
100 | out_rpn_rois = output[out_dict['rpn_rois']][0]
101 | out_bbox_regr = output[out_dict['bbox_regr']][0]
102 |
103 | labels = out_cls_pred.argmax(axis=1)
104 | scores = out_cls_pred.max(axis=1).tolist()
105 |
106 | if mode=="returntags":
107 | class Tag(object):
108 | def __init__(self, label, score, bbox):
109 | self.label = label
110 | self.score = score
111 | self.bbox = bbox
112 |
113 | def serialize(self):
114 | return {
115 | 'label': self.label,
116 | 'score': self.score,
117 | 'bbox': self.bbox,
118 | }
119 |
120 | results = []
121 | for i in range(len(out_rpn_rois)):
122 | if labels[i] != 0:
123 | x = Tag(str(classes[labels[i]]), str(scores[i]), str(out_rpn_rois[i]))
124 | results.append(x)
125 |
126 | return results
127 |
128 |
129 | elif mode=="returnimage":
130 | evaluated_image_path = "{}/{}".format(results_base_path, 'evaluated_' + os.path.basename(imgPath))
131 | if drawUnregressedRois:
132 | # plot results without final regression
133 | imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1],
134 | classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois,
135 | decisionThreshold=bgrPlotThreshold)
136 | imsave(evaluated_image_path, imgDebug)
137 | else:
138 | # apply regression and nms to bbox coordinates
139 | regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
140 |
141 | nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores,
142 | nms_threshold=nmsThreshold,
143 | conf_threshold=nmsConfThreshold)
144 |
145 | img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1],
146 | classes, nmsKeepIndices=nmsKeepIndices,
147 | boDrawNegativeRois=drawNegativeRois,
148 | decisionThreshold=bgrPlotThreshold)
149 | imsave(evaluated_image_path, img)
150 |
151 | return evaluated_image_path
152 | else:
153 | raise ValueError("Unsupported value found in 'mode' parameter")
154 |
155 |
156 |
157 |
158 |
159 | # mode="returnimage" or "returntags"
160 | def evaluateimage(file_path, mode, eval_model=None):
161 |
162 | #from plot_helpers import eval_and_plot_faster_rcnn
163 | if eval_model==None:
164 | print("Loading existing model from %s" % model_path)
165 | eval_model = load_model(model_path)
166 | img_shape = (num_channels, image_height, image_width)
167 | results_folder = globalvars['temppath']
168 | results=eval_faster_rcnn(eval_model, file_path, img_shape,
169 | results_folder, feature_node_name, globalvars['classes'], mode,
170 | drawUnregressedRois=cfg["CNTK"].DRAW_UNREGRESSED_ROIS,
171 | drawNegativeRois=cfg["CNTK"].DRAW_NEGATIVE_ROIS,
172 | nmsThreshold=cfg["CNTK"].RESULTS_NMS_THRESHOLD,
173 | nmsConfThreshold=cfg["CNTK"].RESULTS_NMS_CONF_THRESHOLD,
174 | bgrPlotThreshold=cfg["CNTK"].RESULTS_BGR_PLOT_THRESHOLD)
175 | return results
176 |
177 |
--------------------------------------------------------------------------------
/logs/placeholder.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/logs/placeholder.txt
--------------------------------------------------------------------------------
/plot_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | from __future__ import print_function
8 | from builtins import str
9 | import sys, os, time
10 | import numpy as np
11 | from easydict import EasyDict
12 | from builtins import range
13 | import copy, textwrap
14 | from PIL import Image, ImageFont, ImageDraw
15 | from PIL.ExifTags import TAGS
16 |
17 | # this is important when deploying to headless server environment (non-GUI)
18 | ###################################################
19 | import matplotlib
20 | # force headless backend, or set 'backend' to 'Agg'
21 | # in your ~/.matplotlib/matplotlibrc
22 | matplotlib.use('Agg')
23 |
24 | import matplotlib.pyplot
25 | # force non-interactive mode, or set 'interactive' to False
26 | # in your ~/.matplotlib/matplotlibrc
27 | from matplotlib.pyplot import imsave
28 | matplotlib.pyplot.ioff()
29 | ###################################################
30 |
31 | import cntk
32 | from cntk import input_variable, Axis
33 | from utils.nms.nms_wrapper import apply_nms_to_single_image_results
34 | from cntk_helpers import regress_rois
35 | import cv2 # pip install opencv-python
36 |
37 | available_font = "arial.ttf"
38 | try:
39 | dummy = ImageFont.truetype(available_font, 16)
40 | except:
41 | available_font = "FreeMono.ttf"
42 |
43 |
44 | ####################################
45 | # Visualize results
46 | ####################################
47 | def visualizeResultsFaster(imgPath, roiLabels, roiScores, roiRelCoords, padWidth, padHeight, classes,
48 | nmsKeepIndices = None, boDrawNegativeRois = True, decisionThreshold = 0.0):
49 | # read and resize image
50 | imgWidth, imgHeight = imWidthHeight(imgPath)
51 | scale = 800.0 / max(imgWidth, imgHeight)
52 | imgHeight = int(imgHeight * scale)
53 | imgWidth = int(imgWidth * scale)
54 | if imgWidth > imgHeight:
55 | h_border = 0
56 | v_border = int((imgWidth - imgHeight)/2)
57 | else:
58 | h_border = int((imgHeight - imgWidth)/2)
59 | v_border = 0
60 |
61 | PAD_COLOR = [103, 116, 123] # [114, 114, 114]
62 | cv_img = cv2.imread(imgPath)
63 | rgb_img = cv2.cvtColor(cv_img,cv2.COLOR_BGR2RGB)
64 | resized = cv2.resize(rgb_img, (imgWidth, imgHeight), interpolation=cv2.INTER_NEAREST)
65 | imgDebug = cv2.copyMakeBorder(resized,v_border,v_border,h_border,h_border,cv2.BORDER_CONSTANT,value=PAD_COLOR)
66 | rect_scale = 800 / padWidth
67 |
68 | assert(len(roiLabels) == len(roiRelCoords))
69 | if roiScores:
70 | assert(len(roiLabels) == len(roiScores))
71 | minScore = min(roiScores)
72 | print("roiScores min: {}, max: {}, threshold: {}".format(minScore, max(roiScores), decisionThreshold))
73 | if minScore > decisionThreshold:
74 | decisionThreshold = minScore * 0.5
75 | print("reset decision threshold to: {}".format(decisionThreshold))
76 |
77 | # draw multiple times to avoid occlusions
78 | for iter in range(0,3):
79 | for roiIndex in range(len(roiRelCoords)):
80 | label = roiLabels[roiIndex]
81 | if roiScores:
82 | score = roiScores[roiIndex]
83 | if decisionThreshold and score < decisionThreshold:
84 | label = 0
85 |
86 | # init drawing parameters
87 | thickness = 1
88 | if label == 0:
89 | color = (255, 0, 0)
90 | else:
91 | color = getColorsPalette()[label]
92 |
93 | rect = [(rect_scale * i) for i in roiRelCoords[roiIndex]]
94 | rect[0] = int(max(0, min(padWidth, rect[0])))
95 | rect[1] = int(max(0, min(padHeight, rect[1])))
96 | rect[2] = int(max(0, min(padWidth, rect[2])))
97 | rect[3] = int(max(0, min(padHeight, rect[3])))
98 |
99 | # draw in higher iterations only the detections
100 | if iter == 0 and boDrawNegativeRois:
101 | drawRectangles(imgDebug, [rect], color=color, thickness=thickness)
102 | elif iter==1 and label > 0:
103 | if not nmsKeepIndices or (roiIndex in nmsKeepIndices):
104 | thickness = 4
105 | drawRectangles(imgDebug, [rect], color=color, thickness=thickness)
106 | elif iter == 2 and label > 0:
107 | if not nmsKeepIndices or (roiIndex in nmsKeepIndices):
108 | font = ImageFont.truetype(available_font, 18)
109 | text = classes[label]
110 | if roiScores:
111 | text += "(" + str(round(score, 2)) + ")"
112 | imgDebug = drawText(imgDebug, (rect[0],rect[1]), text, color = (255,255,255), font = font, colorBackground=color)
113 | return imgDebug
114 |
115 | def load_resize_and_pad(image_path, width, height, pad_value=114):
116 | if "@" in image_path:
117 | print("WARNING: zipped image archives are not supported for visualizing results.")
118 | exit(0)
119 |
120 | img = cv2.imread(image_path)
121 | img_width = len(img[0])
122 | img_height = len(img)
123 | scale_w = img_width > img_height
124 | target_w = width
125 | target_h = height
126 |
127 | if scale_w:
128 | target_h = int(np.round(img_height * float(width) / float(img_width)))
129 | else:
130 | target_w = int(np.round(img_width * float(height) / float(img_height)))
131 |
132 | resized = cv2.resize(img, (target_w, target_h), 0, 0, interpolation=cv2.INTER_NEAREST)
133 |
134 | top = int(max(0, np.round((height - target_h) / 2)))
135 | left = int(max(0, np.round((width - target_w) / 2)))
136 | bottom = height - top - target_h
137 | right = width - left - target_w
138 | resized_with_pad = cv2.copyMakeBorder(resized, top, bottom, left, right,
139 | cv2.BORDER_CONSTANT, value=[pad_value, pad_value, pad_value])
140 |
141 | # transpose(2,0,1) converts the image to the HWC format which CNTK accepts
142 | model_arg_rep = np.ascontiguousarray(np.array(resized_with_pad, dtype=np.float32).transpose(2, 0, 1))
143 |
144 | dims = (width, height, target_w, target_h, img_width, img_height)
145 | return resized_with_pad, model_arg_rep, dims
146 |
147 | # Tests a Faster R-CNN model and plots images with detected boxes
148 | def eval_and_plot_faster_rcnn(eval_model, num_images_to_plot, test_map_file, img_shape,
149 | results_base_path, feature_node_name, classes,
150 | drawUnregressedRois=False, drawNegativeRois=False,
151 | nmsThreshold=0.5, nmsConfThreshold=0.0, bgrPlotThreshold = 0.8):
152 | # get image paths
153 | with open(test_map_file) as f:
154 | content = f.readlines()
155 | img_base_path = os.path.dirname(os.path.abspath(test_map_file))
156 | img_file_names = [os.path.join(img_base_path, x.split('\t')[1]) for x in content]
157 |
158 | # prepare model
159 | image_input = input_variable(img_shape, dynamic_axes=[Axis.default_batch_axis()], name=feature_node_name)
160 | dims_input = input_variable((1,6), dynamic_axes=[Axis.default_batch_axis()], name='dims_input')
161 | frcn_eval = eval_model(image_input, dims_input)
162 |
163 | #dims_input_const = cntk.constant([image_width, image_height, image_width, image_height, image_width, image_height], (1, 6))
164 | print("Plotting results from Faster R-CNN model for %s images." % num_images_to_plot)
165 | for i in range(0, num_images_to_plot):
166 | imgPath = img_file_names[i]
167 |
168 | # evaluate single image
169 | _, cntk_img_input, dims = load_resize_and_pad(imgPath, img_shape[2], img_shape[1])
170 |
171 | dims_input = np.array(dims, dtype=np.float32)
172 | dims_input.shape = (1,) + dims_input.shape
173 | output = frcn_eval.eval({frcn_eval.arguments[0]: [cntk_img_input], frcn_eval.arguments[1]: dims_input})
174 |
175 | out_dict = dict([(k.name, k) for k in output])
176 | out_cls_pred = output[out_dict['cls_pred']][0]
177 | out_rpn_rois = output[out_dict['rpn_rois']][0]
178 | out_bbox_regr = output[out_dict['bbox_regr']][0]
179 |
180 | labels = out_cls_pred.argmax(axis=1)
181 | scores = out_cls_pred.max(axis=1).tolist()
182 |
183 | if drawUnregressedRois:
184 | # plot results without final regression
185 | imgDebug = visualizeResultsFaster(imgPath, labels, scores, out_rpn_rois, img_shape[2], img_shape[1],
186 | classes, nmsKeepIndices=None, boDrawNegativeRois=drawNegativeRois,
187 | decisionThreshold=bgrPlotThreshold)
188 | imsave("{}/{}_{}".format(results_base_path, i, os.path.basename(imgPath)), imgDebug)
189 |
190 | # apply regression and nms to bbox coordinates
191 | regressed_rois = regress_rois(out_rpn_rois, out_bbox_regr, labels, dims)
192 |
193 | nmsKeepIndices = apply_nms_to_single_image_results(regressed_rois, labels, scores,
194 | nms_threshold=nmsThreshold,
195 | conf_threshold=nmsConfThreshold)
196 |
197 | img = visualizeResultsFaster(imgPath, labels, scores, regressed_rois, img_shape[2], img_shape[1],
198 | classes, nmsKeepIndices=nmsKeepIndices,
199 | boDrawNegativeRois=drawNegativeRois,
200 | decisionThreshold=bgrPlotThreshold)
201 | imsave("{}/{}_regr_{}".format(results_base_path, i, os.path.basename(imgPath)), img)
202 |
203 |
204 | ####################################
205 | # helper library
206 | ####################################
207 |
208 | def imread(imgPath, boThrowErrorIfExifRotationTagSet = True):
209 | if not os.path.exists(imgPath):
210 | print("ERROR: image path does not exist.")
211 | error
212 |
213 | rotation = rotationFromExifTag(imgPath)
214 | if boThrowErrorIfExifRotationTagSet and rotation != 0:
215 | print ("Error: exif roation tag set, image needs to be rotated by %d degrees." % rotation)
216 | img = cv2.imread(imgPath)
217 | if img is None:
218 | print ("ERROR: cannot load image " + imgPath)
219 | error
220 | if rotation != 0:
221 | img = imrotate(img, -90).copy() # got this error occassionally without copy "TypeError: Layout of the output array img is incompatible with cv::Mat"
222 | return img
223 |
224 | def rotationFromExifTag(imgPath):
225 | TAGSinverted = {v: k for k, v in TAGS.items()}
226 | orientationExifId = TAGSinverted['Orientation']
227 | try:
228 | imageExifTags = Image.open(imgPath)._getexif()
229 | except:
230 | imageExifTags = None
231 |
232 | # rotate the image if orientation exif tag is present
233 | rotation = 0
234 | if imageExifTags != None and orientationExifId != None and orientationExifId in imageExifTags:
235 | orientation = imageExifTags[orientationExifId]
236 | # print ("orientation = " + str(imageExifTags[orientationExifId]))
237 | if orientation == 1 or orientation == 0:
238 | rotation = 0 # no need to do anything
239 | elif orientation == 6:
240 | rotation = -90
241 | elif orientation == 8:
242 | rotation = 90
243 | else:
244 | print ("ERROR: orientation = " + str(orientation) + " not_supported!")
245 | error
246 | return rotation
247 |
248 | def imwrite(img, imgPath):
249 | cv2.imwrite(imgPath, img)
250 |
251 | def imresize(img, scale, interpolation = cv2.INTER_LINEAR):
252 | return cv2.resize(img, (0,0), fx=scale, fy=scale, interpolation=interpolation)
253 |
254 | def imresizeMaxDim(img, maxDim, boUpscale = False, interpolation = cv2.INTER_LINEAR):
255 | scale = 1.0 * maxDim / max(img.shape[:2])
256 | if scale < 1 or boUpscale:
257 | img = imresize(img, scale, interpolation)
258 | else:
259 | scale = 1.0
260 | return img, scale
261 |
262 | def imWidth(input):
263 | return imWidthHeight(input)[0]
264 |
265 | def imHeight(input):
266 | return imWidthHeight(input)[1]
267 |
268 | def imWidthHeight(input):
269 | width, height = Image.open(input).size #this does not load the full image
270 | return width,height
271 |
272 | def imArrayWidth(input):
273 | return imArrayWidthHeight(input)[0]
274 |
275 | def imArrayHeight(input):
276 | return imArrayWidthHeight(input)[1]
277 |
278 | def imArrayWidthHeight(input):
279 | width = input.shape[1]
280 | height = input.shape[0]
281 | return width,height
282 |
283 | def imshow(img, waitDuration=0, maxDim = None, windowName = 'img'):
284 | if isinstance(img, str): #test if 'img' is a string
285 | img = cv2.imread(img)
286 | if maxDim is not None:
287 | scaleVal = 1.0 * maxDim / max(img.shape[:2])
288 | if scaleVal < 1:
289 | img = imresize(img, scaleVal)
290 | cv2.imshow(windowName, img)
291 | cv2.waitKey(waitDuration)
292 |
293 | def drawRectangles(img, rects, color = (0, 255, 0), thickness = 2):
294 | for rect in rects:
295 | pt1 = tuple(ToIntegers(rect[0:2]))
296 | pt2 = tuple(ToIntegers(rect[2:]))
297 | try:
298 | cv2.rectangle(img, pt1, pt2, color, thickness)
299 | except:
300 | import pdb; pdb.set_trace()
301 | print("Unexpected error:", sys.exc_info()[0])
302 |
303 | def drawCrossbar(img, pt):
304 | (x,y) = pt
305 | cv2.rectangle(img, (0, y), (x, y), (255, 255, 0), 1)
306 | cv2.rectangle(img, (x, 0), (x, y), (255, 255, 0), 1)
307 | cv2.rectangle(img, (img.shape[1],y), (x, y), (255, 255, 0), 1)
308 | cv2.rectangle(img, (x, img.shape[0]), (x, y), (255, 255, 0), 1)
309 |
310 | def ptClip(pt, maxWidth, maxHeight):
311 | pt = list(pt)
312 | pt[0] = max(pt[0], 0)
313 | pt[1] = max(pt[1], 0)
314 | pt[0] = min(pt[0], maxWidth)
315 | pt[1] = min(pt[1], maxHeight)
316 | return pt
317 |
318 | def drawText(img, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype(available_font, 16)):
319 | pilImg = imconvertCv2Pil(img)
320 | pilImg = pilDrawText(pilImg, pt, text, textWidth, color, colorBackground, font)
321 | return imconvertPil2Cv(pilImg)
322 |
323 | def pilDrawText(pilImg, pt, text, textWidth=None, color = (255,255,255), colorBackground = None, font = ImageFont.truetype(available_font, 16)):
324 | textY = pt[1]
325 | draw = ImageDraw.Draw(pilImg)
326 | if textWidth == None:
327 | lines = [text]
328 | else:
329 | lines = textwrap.wrap(text, width=textWidth)
330 | for line in lines:
331 | width, height = font.getsize(line)
332 | if colorBackground != None:
333 | draw.rectangle((pt[0], pt[1], pt[0] + width, pt[1] + height), fill=tuple(colorBackground[::-1]))
334 | draw.text(pt, line, fill = tuple(color), font = font)
335 | textY += height
336 | return pilImg
337 |
338 | def getColorsPalette():
339 | colors = [[255,0,0], [0,255,0], [0,0,255], [255,255,0], [255,0,255]]
340 | for i in range(5):
341 | for dim in range(0,3):
342 | for s in (0.25, 0.5, 0.75):
343 | if colors[i][dim] != 0:
344 | newColor = copy.deepcopy(colors[i])
345 | newColor[dim] = int(round(newColor[dim] * s))
346 | colors.append(newColor)
347 | return colors
348 |
349 | def imconvertPil2Cv(pilImg):
350 | rgb = pilImg.convert('RGB')
351 | return np.array(rgb).copy()[:, :, ::-1]
352 |
353 | def imconvertCv2Pil(img):
354 | cv2_im = cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
355 | return Image.fromarray(cv2_im)
356 |
357 | def ToIntegers(list1D):
358 | return [int(float(x)) for x in list1D]
359 |
360 | def getDictionary(keys, values, boConvertValueToInt = True):
361 | dictionary = {}
362 | for key,value in zip(keys, values):
363 | if (boConvertValueToInt):
364 | value = int(value)
365 | dictionary[key] = value
366 | return dictionary
367 |
368 | class Bbox:
369 | MAX_VALID_DIM = 100000
370 | left = top = right = bottom = None
371 |
372 | def __init__(self, left, top, right, bottom):
373 | self.left = int(round(float(left)))
374 | self.top = int(round(float(top)))
375 | self.right = int(round(float(right)))
376 | self.bottom = int(round(float(bottom)))
377 | self.standardize()
378 |
379 | def __str__(self):
380 | return ("Bbox object: left = {0}, top = {1}, right = {2}, bottom = {3}".format(self.left, self.top, self.right, self.bottom))
381 |
382 | def __repr__(self):
383 | return str(self)
384 |
385 | def rect(self):
386 | return [self.left, self.top, self.right, self.bottom]
387 |
388 | def max(self):
389 | return max([self.left, self.top, self.right, self.bottom])
390 |
391 | def min(self):
392 | return min([self.left, self.top, self.right, self.bottom])
393 |
394 | def width(self):
395 | width = self.right - self.left + 1
396 | assert(width>=0)
397 | return width
398 |
399 | def height(self):
400 | height = self.bottom - self.top + 1
401 | assert(height>=0)
402 | return height
403 |
404 | def surfaceArea(self):
405 | return self.width() * self.height()
406 |
407 |
408 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | easydict==1.6
2 | pytest==3.0.3
3 | opencv-python
4 | https://pypi.python.org/packages/be/5c/670e88bc3ae6afa23c1f09d52a77bbbc7d2e476e7449ad3b6750040a0ac6/scipy-1.0.0b1-cp35-none-win_amd64.whl#md5=dcc90577f2eebc264ec60a2d5729e30b
5 | https://cntk.ai/PythonWheel/CPU-Only/cntk-2.1-cp35-cp35m-win_amd64.whl
6 | Flask==0.12.2
7 | numpy==1.11.2
8 | matplotlib==1.5.3
9 | ipython==6.2.0
10 | Pillow==4.1.1
11 | PyYAML==3.12
12 |
--------------------------------------------------------------------------------
/utils/Readme.md:
--------------------------------------------------------------------------------
1 | ## Detection utils
2 |
3 | This folder contains Python modules that are utilities for object detection networks.
4 |
5 | ### Cython modules
6 |
7 | To use the rpn component you need precompiled cython modules for nms (at least cpu_nms.cpXX-win_amd64.pyd for Windows or cpu_nms.cpython-XXm.so for Linux) and bbox (cython_bbox.cpXX-win_amd64.pyd for Windows or cython_bbox.cpython-XXm.so for Linux).
8 | To compile the cython modules for windows see (https://github.com/MrGF/py-faster-rcnn-windows):
9 | ```
10 | git clone https://github.com/MrGF/py-faster-rcnn-windows
11 | cd $FRCN_ROOT/lib
12 | python setup.py build_ext --inplace
13 | ```
14 | For Linux see (https://github.com/rbgirshick/py-faster-rcnn):
15 | ```
16 | git clone https://github.com/rbgirshick/py-faster-rcnn
17 | cd $FRCN_ROOT/lib
18 | python setup.py build_ext --inplace
19 | ```
20 | Copy the compiled `.pyd` (Windows) or `.so` (Linux) files into the `cython_modules` subfolder of this utils folder.
21 |
22 | ##### `default_config`
23 |
24 | Contains all required parameters for using a region proposal network in training or evaluation. You can overwrite these parameters by specifying a `config.py` file of the same format inside your working directory.
25 |
26 | ### `rpn` module overview
27 |
28 | The rpn module contains helper methods and required layers to generate region proposal networks for object detection.
29 |
30 | ##### `rpn_helpers`
31 |
32 | Contains helper methods to create a region proposal network (rpn) and a proposal target layer for training the rpn.
33 |
34 | ##### `generate_anchors.py`
35 |
36 | Generates a regular grid of multi-scale, multi-aspect anchor boxes.
37 |
38 | ##### `proposal_layer.py`
39 |
40 | Converts RPN outputs (per-anchor scores and bbox regression estimates) into object proposals.
41 |
42 | ##### `anchor_target_layer.py`
43 |
44 | Generates training targets/labels for each anchor. Classification labels are 1 (object), 0 (not object) or -1 (ignore).
45 | Bbox regression targets are specified when the classification label is > 0.
46 |
47 | ##### `proposal_target_layer.py`
48 |
49 | Generates training targets/labels for each object proposal: classification labels 0 - K (bg or object class 1, ... , K)
50 | and bbox regression targets in that case that the label is > 0.
51 |
52 | ##### `generate.py`
53 |
54 | Generate object detection proposals from an imdb using an RPN.
55 |
--------------------------------------------------------------------------------
/utils/annotations/annotations_helper.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import numpy as np
8 | import os
9 |
10 | def parse_class_map_file(class_map_file):
11 | with open(class_map_file, "r") as f:
12 | lines = f.readlines()
13 | class_list = [None]*len(lines)
14 | for line in lines:
15 | tab_pos = line.find('\t')
16 | class_name = line[:tab_pos]
17 | class_id = int(line[tab_pos+1:-1])
18 | class_list[class_id] = class_name
19 |
20 | return class_list
--------------------------------------------------------------------------------
/utils/caffe_layers/anchor_target_layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | import os
9 | #import caffe
10 | import yaml
11 | import numpy as np
12 | import numpy.random as npr
13 | from utils.default_config import cfg
14 | from utils.rpn.generate_anchors import generate_anchors
15 | from utils.rpn.bbox_transform import bbox_transform
16 | from utils.cython_modules.cython_bbox import bbox_overlaps
17 |
18 | DEBUG = False
19 |
20 | class AnchorTargetLayer: #(caffe.Layer):
21 | """
22 | Assign anchors to ground-truth targets. Produces anchor classification
23 | labels and bounding-box regression targets.
24 | """
25 |
26 | def set_param_str(self, param_str):
27 | self.param_str_ = param_str
28 |
29 | def set_deterministic_mode(self, mode = True):
30 | self._determininistic_mode = mode
31 |
32 | def setup(self, bottom, top):
33 | layer_params = yaml.load(self.param_str_)
34 | anchor_scales = layer_params.get('scales', (8, 16, 32))
35 | self._anchors = generate_anchors(scales=np.array(anchor_scales))
36 | self._num_anchors = self._anchors.shape[0]
37 | self._feat_stride = layer_params['feat_stride']
38 |
39 | if DEBUG:
40 | print('anchors:')
41 | print(self._anchors)
42 | print('anchor shapes:')
43 | print(np.hstack((
44 | self._anchors[:, 2::4] - self._anchors[:, 0::4],
45 | self._anchors[:, 3::4] - self._anchors[:, 1::4],
46 | )))
47 | self._counts = cfg.EPS
48 | self._sums = np.zeros((1, 4))
49 | self._squared_sums = np.zeros((1, 4))
50 | self._fg_sum = 0
51 | self._bg_sum = 0
52 | self._count = 0
53 |
54 | # allow boxes to sit over the edge by a small amount
55 | self._allowed_border = layer_params.get('allowed_border', 0)
56 |
57 | height, width = bottom[0].data.shape[-2:]
58 | if DEBUG:
59 | print('AnchorTargetLayer: height', height, 'width', width)
60 |
61 | #A = self._num_anchors
62 | # labels
63 | #top[0].reshape(1, 1, A * height, width)
64 | # bbox_targets
65 | #top[1].reshape(1, A * 4, height, width)
66 | # bbox_inside_weights
67 | #top[2].reshape(1, A * 4, height, width)
68 | # bbox_outside_weights
69 | #top[3].reshape(1, A * 4, height, width)
70 |
71 | def forward(self, bottom, top):
72 | # Algorithm:
73 | #
74 | # for each (H, W) location i
75 | # generate 9 anchor boxes centered on cell i
76 | # apply predicted bbox deltas at cell i to each of the 9 anchors
77 | # filter out-of-image anchors
78 | # measure GT overlap
79 |
80 | assert bottom[0].data.shape[0] == 1, \
81 | 'Only single item batches are supported'
82 |
83 | # map of shape (..., H, W)
84 | height, width = bottom[0].data.shape[-2:]
85 | # GT boxes (x1, y1, x2, y2, label)
86 | gt_boxes = bottom[1]#.data
87 | # im_info
88 | im_info = bottom[2]#.data[0, :]
89 |
90 | if DEBUG:
91 | print('')
92 | print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
93 | print('scale: {}'.format(im_info[2]))
94 | print('height, width: ({}, {})'.format(height, width))
95 | print('rpn: gt_boxes.shape', gt_boxes.shape)
96 | print('rpn: gt_boxes', gt_boxes)
97 |
98 | # 1. Generate proposals from bbox deltas and shifted anchors
99 | shift_x = np.arange(0, width) * self._feat_stride
100 | shift_y = np.arange(0, height) * self._feat_stride
101 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
102 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
103 | shift_x.ravel(), shift_y.ravel())).transpose()
104 | # add A anchors (1, A, 4) to
105 | # cell K shifts (K, 1, 4) to get
106 | # shift anchors (K, A, 4)
107 | # reshape to (K*A, 4) shifted anchors
108 | A = self._num_anchors
109 | K = shifts.shape[0]
110 | all_anchors = (self._anchors.reshape((1, A, 4)) +
111 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
112 | all_anchors = all_anchors.reshape((K * A, 4))
113 | total_anchors = int(K * A)
114 |
115 | # only keep anchors inside the image
116 | inds_inside = np.where(
117 | (all_anchors[:, 0] >= -self._allowed_border) &
118 | (all_anchors[:, 1] >= -self._allowed_border) &
119 | (all_anchors[:, 2] < im_info[1] + self._allowed_border) & # width
120 | (all_anchors[:, 3] < im_info[0] + self._allowed_border) # height
121 | )[0]
122 |
123 | if DEBUG:
124 | print('total_anchors', total_anchors)
125 | print('inds_inside', len(inds_inside))
126 |
127 | # keep only inside anchors
128 | anchors = all_anchors[inds_inside, :]
129 | if DEBUG:
130 | print('anchors.shape', anchors.shape)
131 |
132 | # label: 1 is positive, 0 is negative, -1 is dont care
133 | labels = np.empty((len(inds_inside), ), dtype=np.float32)
134 | labels.fill(-1)
135 |
136 | # overlaps between the anchors and the gt boxes
137 | # overlaps (ex, gt)
138 | overlaps = bbox_overlaps(
139 | np.ascontiguousarray(anchors, dtype=np.float),
140 | np.ascontiguousarray(gt_boxes, dtype=np.float))
141 | argmax_overlaps = overlaps.argmax(axis=1)
142 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
143 | gt_argmax_overlaps = overlaps.argmax(axis=0)
144 | gt_max_overlaps = overlaps[gt_argmax_overlaps,
145 | np.arange(overlaps.shape[1])]
146 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
147 |
148 | if not cfg.TRAIN.RPN_CLOBBER_POSITIVES:
149 | # assign bg labels first so that positive labels can clobber them
150 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
151 |
152 | # fg label: for each gt, anchor with highest overlap
153 | labels[gt_argmax_overlaps] = 1
154 |
155 | # fg label: above threshold IOU
156 | labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1
157 |
158 | if cfg.TRAIN.RPN_CLOBBER_POSITIVES:
159 | # assign bg labels last so that negative labels can clobber positives
160 | labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0
161 |
162 | # subsample positive labels if we have too many
163 | num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE)
164 | fg_inds = np.where(labels == 1)[0]
165 | if len(fg_inds) > num_fg:
166 | if self._determininistic_mode:
167 | disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
168 | else:
169 | disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
170 | labels[disable_inds] = -1
171 |
172 | # subsample negative labels if we have too many
173 | num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1)
174 | bg_inds = np.where(labels == 0)[0]
175 | if len(bg_inds) > num_bg:
176 | if self._determininistic_mode:
177 | disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
178 | else:
179 | disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
180 | labels[disable_inds] = -1
181 | #print "was %s inds, disabling %s, now %s inds" % (
182 | #len(bg_inds), len(disable_inds), np.sum(labels == 0))
183 |
184 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
185 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
186 |
187 | bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
188 | bbox_inside_weights[labels == 1, :] = np.array(cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS)
189 |
190 | bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
191 | if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0:
192 | # uniform weighting of examples (given non-uniform sampling)
193 | num_examples = np.sum(labels >= 0)
194 | positive_weights = np.ones((1, 4)) * 1.0 / num_examples
195 | negative_weights = np.ones((1, 4)) * 1.0 / num_examples
196 | else:
197 | assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) &
198 | (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1))
199 | positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT /
200 | np.sum(labels == 1))
201 | negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) /
202 | np.sum(labels == 0))
203 | bbox_outside_weights[labels == 1, :] = positive_weights
204 | bbox_outside_weights[labels == 0, :] = negative_weights
205 |
206 | if DEBUG:
207 | self._sums += bbox_targets[labels == 1, :].sum(axis=0)
208 | self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
209 | self._counts += np.sum(labels == 1)
210 | means = self._sums / self._counts
211 | stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
212 | print('means:')
213 | print(means)
214 | print('stdevs:')
215 | print(stds)
216 |
217 | # map up to original set of anchors
218 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
219 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
220 | bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
221 | bbox_outside_weights = _unmap(bbox_outside_weights, total_anchors, inds_inside, fill=0)
222 |
223 | if DEBUG:
224 | print('rpn: max max_overlap', np.max(max_overlaps))
225 | print('rpn: num_positive', np.sum(labels == 1))
226 | print('rpn: num_negative', np.sum(labels == 0))
227 | self._fg_sum += np.sum(labels == 1)
228 | self._bg_sum += np.sum(labels == 0)
229 | self._count += 1
230 | print('rpn: num_positive avg', self._fg_sum / self._count)
231 | print('rpn: num_negative avg', self._bg_sum / self._count)
232 |
233 | # labels
234 | labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
235 | #labels = labels.reshape((1, 1, A * height, width))
236 | #top[0].reshape(*labels.shape)
237 | #top[0].data[...] = labels
238 |
239 | # bbox_targets
240 | bbox_targets = bbox_targets \
241 | .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
242 | #top[1].reshape(*bbox_targets.shape)
243 | #top[1].data[...] = bbox_targets
244 |
245 | # bbox_inside_weights
246 | bbox_inside_weights = bbox_inside_weights \
247 | .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
248 | #assert bbox_inside_weights.shape[2] == height
249 | #assert bbox_inside_weights.shape[3] == width
250 | #top[2].reshape(*bbox_inside_weights.shape)
251 | #top[2].data[...] = bbox_inside_weights
252 |
253 | # bbox_outside_weights
254 | #bbox_outside_weights = bbox_outside_weights \
255 | # .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
256 | #assert bbox_outside_weights.shape[2] == height
257 | #assert bbox_outside_weights.shape[3] == width
258 | #top[3].reshape(*bbox_outside_weights.shape)
259 | #top[3].data[...] = bbox_outside_weights
260 |
261 | return labels, bbox_targets, bbox_inside_weights
262 |
263 | def backward(self, top, propagate_down, bottom):
264 | """This layer does not propagate gradients."""
265 | pass
266 |
267 | def reshape(self, bottom, top):
268 | """Reshaping happens during the call to forward."""
269 | pass
270 |
271 |
272 | def _unmap(data, count, inds, fill=0):
273 | """ Unmap a subset of item (data) back to the original set of items (of
274 | size count) """
275 | if len(data.shape) == 1:
276 | ret = np.empty((count, ), dtype=np.float32)
277 | ret.fill(fill)
278 | ret[inds] = data
279 | else:
280 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
281 | ret.fill(fill)
282 | ret[inds, :] = data
283 | return ret
284 |
285 |
286 | def _compute_targets(ex_rois, gt_rois):
287 | """Compute bounding-box regression targets for an image."""
288 |
289 | assert ex_rois.shape[0] == gt_rois.shape[0]
290 | assert ex_rois.shape[1] == 4
291 | assert gt_rois.shape[1] == 5
292 |
293 | return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
294 |
--------------------------------------------------------------------------------
/utils/caffe_layers/bbox_transform.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Fast R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick
6 | # --------------------------------------------------------
7 |
8 | import numpy as np
9 |
10 | def bbox_transform(ex_rois, gt_rois):
11 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
12 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
13 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
14 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
15 |
16 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
17 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
18 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
19 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
20 |
21 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
22 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
23 | targets_dw = np.log(gt_widths / ex_widths)
24 | targets_dh = np.log(gt_heights / ex_heights)
25 |
26 | targets = np.vstack(
27 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
28 | return targets
29 |
30 | def bbox_transform_inv(boxes, deltas):
31 | if boxes.shape[0] == 0:
32 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
33 |
34 | boxes = boxes.astype(deltas.dtype, copy=False)
35 |
36 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
37 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
38 | ctr_x = boxes[:, 0] + 0.5 * widths
39 | ctr_y = boxes[:, 1] + 0.5 * heights
40 |
41 | dx = deltas[:, 0::4]
42 | dy = deltas[:, 1::4]
43 | dw = deltas[:, 2::4]
44 | dh = deltas[:, 3::4]
45 |
46 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
47 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
48 | pred_w = np.exp(dw) * widths[:, np.newaxis]
49 | pred_h = np.exp(dh) * heights[:, np.newaxis]
50 |
51 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
52 | # x1
53 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
54 | # y1
55 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
56 | # x2
57 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
58 | # y2
59 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
60 |
61 | return pred_boxes
62 |
63 | def clip_boxes(boxes, im_shape):
64 | """
65 | Clip boxes to image boundaries.
66 | """
67 |
68 | # x1 >= 0
69 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
70 | # y1 >= 0
71 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
72 | # x2 < im_shape[1]
73 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
74 | # y2 < im_shape[0]
75 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
76 | return boxes
77 |
--------------------------------------------------------------------------------
/utils/caffe_layers/proposal_layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | #import caffe
9 | import numpy as np
10 | import yaml
11 | from utils.default_config import cfg
12 | from utils.rpn.generate_anchors import generate_anchors
13 | from utils.caffe_layers.bbox_transform import bbox_transform_inv, clip_boxes
14 | from utils.nms.nms_wrapper import nms
15 |
16 | DEBUG = False
17 |
18 | class ProposalLayer: #(caffe.Layer):
19 | """
20 | Outputs object detection proposals by applying estimated bounding-box
21 | transformations to a set of regular boxes (called "anchors").
22 | """
23 |
24 | def set_param_str(self, param_str):
25 | self.param_str_ = param_str
26 |
27 | def setup(self, bottom, top):
28 | # parse the layer parameter string, which must be valid YAML
29 | layer_params = yaml.load(self.param_str_)
30 |
31 | self._feat_stride = layer_params['feat_stride']
32 | anchor_scales = layer_params.get('scales', (8, 16, 32))
33 | self._anchors = generate_anchors(scales=np.array(anchor_scales))
34 | self._num_anchors = self._anchors.shape[0]
35 | self.phase = "TEST"
36 |
37 | #if DEBUG:
38 | #print 'feat_stride: {}'.format(self._feat_stride)
39 | #print 'anchors:'
40 | #print self._anchors
41 |
42 | # rois blob: holds R regions of interest, each is a 5-tuple
43 | # (n, x1, y1, x2, y2) specifying an image batch index n and a
44 | # rectangle (x1, y1, x2, y2)
45 | #top[0].reshape(1, 5)
46 |
47 | # scores blob: holds scores for R regions of interest
48 | #if len(top) > 1:
49 | # top[1].reshape(1, 1, 1, 1)
50 |
51 | def forward(self, bottom, top):
52 | # Algorithm:
53 | #
54 | # for each (H, W) location i
55 | # generate A anchor boxes centered on cell i
56 | # apply predicted bbox deltas at cell i to each of the A anchors
57 | # clip predicted boxes to image
58 | # remove predicted boxes with either height or width < threshold
59 | # sort all (proposal, score) pairs by score from highest to lowest
60 | # take top pre_nms_topN proposals before NMS
61 | # apply NMS with threshold 0.7 to remaining proposals
62 | # take after_nms_topN proposals after NMS
63 | # return the top proposals (-> RoIs top, scores top)
64 |
65 | assert bottom[0].shape[0] == 1, \
66 | 'Only single item batches are supported'
67 |
68 | cfg_key = str(self.phase) # either 'TRAIN' or 'TEST'
69 | pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N
70 | post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N
71 | nms_thresh = cfg[cfg_key].RPN_NMS_THRESH
72 | min_size = cfg[cfg_key].RPN_MIN_SIZE
73 |
74 | # the first set of _num_anchors channels are bg probs
75 | # the second set are the fg probs, which we want
76 | scores = bottom[0][:, self._num_anchors:, :, :]
77 | bbox_deltas = bottom[1]
78 | im_info = bottom[2][0, :]
79 |
80 | #if DEBUG:
81 | # print 'im_size: ({}, {})'.format(im_info[0], im_info[1])
82 | # print 'scale: {}'.format(im_info[2])
83 |
84 | # 1. Generate proposals from bbox deltas and shifted anchors
85 | height, width = scores.shape[-2:]
86 |
87 | #if DEBUG:
88 | # print 'score map size: {}'.format(scores.shape)
89 |
90 | # Enumerate all shifts
91 | shift_x = np.arange(0, width) * self._feat_stride
92 | shift_y = np.arange(0, height) * self._feat_stride
93 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
94 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
95 | shift_x.ravel(), shift_y.ravel())).transpose()
96 |
97 | # Enumerate all shifted anchors:
98 | #
99 | # add A anchors (1, A, 4) to
100 | # cell K shifts (K, 1, 4) to get
101 | # shift anchors (K, A, 4)
102 | # reshape to (K*A, 4) shifted anchors
103 | A = self._num_anchors
104 | K = shifts.shape[0]
105 | anchors = self._anchors.reshape((1, A, 4)) + \
106 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))
107 | anchors = anchors.reshape((K * A, 4))
108 |
109 | # Transpose and reshape predicted bbox transformations to get them
110 | # into the same order as the anchors:
111 | #
112 | # bbox deltas will be (1, 4 * A, H, W) format
113 | # transpose to (1, H, W, 4 * A)
114 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
115 | # in slowest to fastest order
116 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
117 |
118 | # Same story for the scores:
119 | #
120 | # scores are (1, A, H, W) format
121 | # transpose to (1, H, W, A)
122 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
123 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
124 |
125 | # Convert anchors into proposals via bbox transformations
126 | proposals = bbox_transform_inv(anchors, bbox_deltas)
127 |
128 | # 2. clip predicted boxes to image
129 | proposals = clip_boxes(proposals, im_info[:2])
130 |
131 | # 3. remove predicted boxes with either height or width < threshold
132 | # (NOTE: convert min_size to input image scale stored in im_info[2])
133 | keep = _filter_boxes(proposals, min_size * im_info[2])
134 | proposals = proposals[keep, :]
135 | scores = scores[keep]
136 |
137 | # 4. sort all (proposal, score) pairs by score from highest to lowest
138 | # 5. take top pre_nms_topN (e.g. 6000)
139 | order = scores.ravel().argsort()[::-1]
140 | if pre_nms_topN > 0:
141 | order = order[:pre_nms_topN]
142 | proposals = proposals[order, :]
143 | scores = scores[order]
144 |
145 | # 6. apply nms (e.g. threshold = 0.7)
146 | # 7. take after_nms_topN (e.g. 300)
147 | # 8. return the top proposals (-> RoIs top)
148 | keep = nms(np.hstack((proposals, scores)), nms_thresh)
149 | if post_nms_topN > 0:
150 | keep = keep[:post_nms_topN]
151 | proposals = proposals[keep, :]
152 | scores = scores[keep]
153 |
154 | # Output rois blob
155 | # Our RPN implementation only supports a single input image, so all
156 | # batch inds are 0
157 | batch_inds = np.zeros((proposals.shape[0], 1), dtype=np.float32)
158 | blob = np.hstack((batch_inds, proposals.astype(np.float32, copy=False)))
159 |
160 | return blob
161 | #top[0].reshape(*(blob.shape))
162 | #top[0].data[...] = blob
163 |
164 | # [Optional] output scores blob
165 | #if len(top) > 1:
166 | # top[1].reshape(*(scores.shape))
167 | # top[1].data[...] = scores
168 |
169 | def backward(self, top, propagate_down, bottom):
170 | """This layer does not propagate gradients."""
171 | pass
172 |
173 | def reshape(self, bottom, top):
174 | """Reshaping happens during the call to forward."""
175 | pass
176 |
177 | def _filter_boxes(boxes, min_size):
178 | """Remove all boxes with any side smaller than min_size."""
179 | ws = boxes[:, 2] - boxes[:, 0] + 1
180 | hs = boxes[:, 3] - boxes[:, 1] + 1
181 | keep = np.where((ws >= min_size) & (hs >= min_size))[0]
182 | return keep
183 |
--------------------------------------------------------------------------------
/utils/caffe_layers/proposal_target_layer.py:
--------------------------------------------------------------------------------
1 | # --------------------------------------------------------
2 | # Faster R-CNN
3 | # Copyright (c) 2015 Microsoft
4 | # Licensed under The MIT License [see LICENSE for details]
5 | # Written by Ross Girshick and Sean Bell
6 | # --------------------------------------------------------
7 |
8 | #import caffe
9 | import yaml
10 | import numpy as np
11 | import numpy.random as npr
12 | from utils.default_config import cfg
13 | from utils.rpn.bbox_transform import bbox_transform
14 | from utils.cython_modules.cython_bbox import bbox_overlaps
15 |
16 | DEBUG = False
17 |
18 | class ProposalTargetLayer(): #caffe.Layer):
19 | """
20 | Assign object detection proposals to ground-truth targets. Produces proposal
21 | classification labels and bounding-box regression targets.
22 | """
23 |
24 | def set_param_str(self, param_str):
25 | self.param_str_ = param_str
26 |
27 | def set_deterministic_mode(self, mode = True):
28 | self._determininistic_mode = mode
29 |
30 | def setup(self, bottom, top):
31 | layer_params = yaml.load(self.param_str_)
32 | self._num_classes = layer_params['num_classes']
33 | self._determininistic_mode = False
34 |
35 | # sampled rois (0, x1, y1, x2, y2)
36 | #top[0].reshape(1, 5)
37 | # labels
38 | #top[1].reshape(1, 1)
39 | # bbox_targets
40 | #top[2].reshape(1, self._num_classes * 4)
41 | # bbox_inside_weights
42 | #top[3].reshape(1, self._num_classes * 4)
43 | # bbox_outside_weights
44 | #top[4].reshape(1, self._num_classes * 4)
45 |
46 | def forward(self, bottom, top):
47 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
48 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
49 | all_rois = bottom[0] #.data
50 | # GT boxes (x1, y1, x2, y2, label)
51 | # TODO(rbg): it's annoying that sometimes I have extra info before
52 | # and other times after box coordinates -- normalize to one format
53 | gt_boxes = bottom[1] #.data
54 |
55 | # Include ground-truth boxes in the set of candidate rois
56 | zeros = np.zeros((gt_boxes.shape[0], 1), dtype=gt_boxes.dtype)
57 | all_rois = np.vstack(
58 | (all_rois, np.hstack((zeros, gt_boxes[:, :-1])))
59 | )
60 |
61 | # Sanity check: single batch only
62 | assert np.all(all_rois[:, 0] == 0), \
63 | 'Only single item batches are supported'
64 |
65 | #num_images = 1
66 | #rois_per_image = int(cfg.TRAIN.BATCH_SIZE / num_images)
67 | rois_per_image = cfg.TRAIN.BATCH_SIZE
68 | fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image).astype(int)
69 |
70 | # Sample rois with classification labels and bounding box regression
71 | # targets
72 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
73 | all_rois, gt_boxes, fg_rois_per_image,
74 | rois_per_image, self._num_classes,
75 | deterministic=self._determininistic_mode)
76 |
77 | if DEBUG:
78 | print('num fg: {}'.format((labels > 0).sum()))
79 | print('num bg: {}'.format((labels == 0).sum()))
80 | self._count += 1
81 | self._fg_num += (labels > 0).sum()
82 | self._bg_num += (labels == 0).sum()
83 | print('num fg avg: {}'.format(self._fg_num / self._count))
84 | print('num bg avg: {}'.format(self._bg_num / self._count))
85 | print('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)))
86 |
87 | return rois, labels, bbox_targets, bbox_inside_weights
88 |
89 | # sampled rois
90 | #top[0].reshape(*rois.shape)
91 | #top[0].data[...] = rois
92 |
93 | # classification labels
94 | #top[1].reshape(*labels.shape)
95 | #top[1].data[...] = labels
96 |
97 | # bbox_targets
98 | #top[2].reshape(*bbox_targets.shape)
99 | #top[2].data[...] = bbox_targets
100 |
101 | # bbox_inside_weights
102 | #top[3].reshape(*bbox_inside_weights.shape)
103 | #top[3].data[...] = bbox_inside_weights
104 |
105 | # bbox_outside_weights
106 | #top[4].reshape(*bbox_inside_weights.shape)
107 | #top[4].data[...] = np.array(bbox_inside_weights > 0).astype(np.float32)
108 |
109 | def backward(self, top, propagate_down, bottom):
110 | """This layer does not propagate gradients."""
111 | pass
112 |
113 | def reshape(self, bottom, top):
114 | """Reshaping happens during the call to forward."""
115 | pass
116 |
117 |
118 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
119 | """Bounding-box regression targets (bbox_target_data) are stored in a
120 | compact form N x (class, tx, ty, tw, th)
121 |
122 | This function expands those targets into the 4-of-4*K representation used
123 | by the network (i.e. only one class has non-zero targets).
124 |
125 | Returns:
126 | bbox_target (ndarray): N x 4K blob of regression targets
127 | bbox_inside_weights (ndarray): N x 4K blob of loss weights
128 | """
129 |
130 | clss = bbox_target_data[:, 0].astype(int)
131 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
132 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
133 | inds = np.where(clss > 0)[0]
134 | for ind in inds:
135 | cls = clss[ind]
136 | start = 4 * cls
137 | end = start + 4
138 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
139 | bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
140 | return bbox_targets, bbox_inside_weights
141 |
142 |
143 | def _compute_targets(ex_rois, gt_rois, labels):
144 | """Compute bounding-box regression targets for an image."""
145 |
146 | assert ex_rois.shape[0] == gt_rois.shape[0]
147 | assert ex_rois.shape[1] == 4
148 | assert gt_rois.shape[1] == 4
149 |
150 | targets = bbox_transform(ex_rois, gt_rois)
151 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
152 | # Optionally normalize targets by a precomputed mean and stdev
153 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
154 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
155 | return np.hstack(
156 | (labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
157 |
158 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, deterministic=False):
159 | """Generate a random sample of RoIs comprising foreground and background
160 | examples.
161 | """
162 | # overlaps: (rois x gt_boxes)
163 | overlaps = bbox_overlaps(
164 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
165 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
166 | gt_assignment = overlaps.argmax(axis=1)
167 | max_overlaps = overlaps.max(axis=1)
168 | labels = gt_boxes[gt_assignment, 4]
169 |
170 | # Select foreground RoIs as those with >= FG_THRESH overlap
171 | fg_inds = np.where(max_overlaps >= cfg.TRAIN.FG_THRESH)[0]
172 | # Guard against the case when an image has fewer than fg_rois_per_image
173 | # foreground RoIs
174 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
175 |
176 | # Sample foreground regions without replacement
177 | if fg_inds.size > 0:
178 | if deterministic:
179 | fg_inds = fg_inds[:fg_rois_per_this_image]
180 | else:
181 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
182 |
183 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
184 | bg_inds = np.where((max_overlaps < cfg.TRAIN.BG_THRESH_HI) &
185 | (max_overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
186 | # Compute number of background RoIs to take from this image (guarding
187 | # against there being fewer than desired)
188 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
189 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
190 | # Sample background regions without replacement
191 | if bg_inds.size > 0:
192 | if deterministic:
193 | bg_inds = bg_inds[:bg_rois_per_this_image]
194 | else:
195 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
196 |
197 | # The indices that we're selecting (both fg and bg)
198 | keep_inds = np.append(fg_inds, bg_inds)
199 | # Select sampled values from various arrays:
200 | labels = labels[keep_inds]
201 | # Clamp labels for the background RoIs to 0
202 | labels[fg_rois_per_this_image:] = 0
203 | rois = all_rois[keep_inds]
204 |
205 | bbox_target_data = _compute_targets(
206 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
207 |
208 | bbox_targets, bbox_inside_weights = \
209 | _get_bbox_regression_labels(bbox_target_data, num_classes)
210 |
211 | return labels, rois, bbox_targets, bbox_inside_weights
212 |
--------------------------------------------------------------------------------
/utils/cython_modules/cpu_nms.cp35-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cpu_nms.cp35-win_amd64.pyd
--------------------------------------------------------------------------------
/utils/cython_modules/cpu_nms.cpython-34m.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cpu_nms.cpython-34m.so
--------------------------------------------------------------------------------
/utils/cython_modules/cython_bbox.cp35-win_amd64.pyd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cython_bbox.cp35-win_amd64.pyd
--------------------------------------------------------------------------------
/utils/cython_modules/cython_bbox.cpython-34m.so:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/karolzak/cntk-python-web-service-on-azure/3aecc606d3dbe8af6d140405949b448db97ef139/utils/cython_modules/cython_bbox.cpython-34m.so
--------------------------------------------------------------------------------
/utils/default_config.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import os
8 | import os.path as osp
9 | import numpy as np
10 | # `pip install easydict` if you don't have it
11 | from easydict import EasyDict as edict
12 |
13 | __C = edict()
14 | cfg = __C
15 |
16 | #
17 | # CNTK parameters
18 | #
19 |
20 | __C.CNTK = edict()
21 |
22 |
23 | __C.CNTK.CONV_BIAS_INIT = 0.0
24 | __C.CNTK.SIGMA_RPN_L1 = 3.0
25 |
26 | __C.CNTK.IMAGE_WIDTH = 850
27 | __C.CNTK.IMAGE_HEIGHT = 850
28 |
29 | __C.CNTK.RESULTS_NMS_THRESHOLD = 0.3 # see also: __C.TEST.NMS = 0.3
30 | __C.CNTK.RESULTS_NMS_CONF_THRESHOLD = 0.0
31 | __C.CNTK.RESULTS_BGR_PLOT_THRESHOLD = 0.1
32 |
33 | __C.CNTK.DRAW_NEGATIVE_ROIS = False
34 | __C.CNTK.DRAW_UNREGRESSED_ROIS = False
35 |
36 | #
37 | # Training options
38 | #
39 |
40 | __C.TRAIN = edict()
41 |
42 | # Minibatch size (number of regions of interest [ROIs])
43 | __C.TRAIN.BATCH_SIZE = 128
44 |
45 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
46 | __C.TRAIN.FG_FRACTION = 0.25
47 |
48 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
49 | __C.TRAIN.FG_THRESH = 0.5
50 |
51 | # Overlap threshold for a ROI to be considered background (class = 0 if
52 | # overlap in [LO, HI))
53 | __C.TRAIN.BG_THRESH_HI = 0.5
54 | __C.TRAIN.BG_THRESH_LO = 0.0
55 |
56 | # Use horizontally-flipped images during training?
57 | __C.TRAIN.USE_FLIPPED = True
58 |
59 | # Train bounding-box regressors
60 | __C.TRAIN.BBOX_REG = True
61 |
62 | # Overlap required between a ROI and ground-truth box in order for that ROI to
63 | # be used as a bounding-box regression training example
64 | __C.TRAIN.BBOX_THRESH = 0.5
65 |
66 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
67 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
68 | # Deprecated (inside weights)
69 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
70 | # Normalize the targets using "precomputed" (or made up) means and stdevs
71 | # (BBOX_NORMALIZE_TARGETS must also be True)
72 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True
73 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
74 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
75 |
76 | # Train using these proposals
77 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
78 |
79 | # IOU >= thresh: positive example
80 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
81 | # IOU < thresh: negative example
82 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
83 | # If an anchor statisfied by positive and negative conditions set to negative
84 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
85 | # Max number of foreground examples
86 | __C.TRAIN.RPN_FG_FRACTION = 0.5
87 | # Total number of examples
88 | __C.TRAIN.RPN_BATCHSIZE = 256
89 | # NMS threshold used on RPN proposals
90 | __C.TRAIN.RPN_NMS_THRESH = 0.7
91 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
92 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
93 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
94 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
95 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
96 | __C.TRAIN.RPN_MIN_SIZE = 16
97 | # Deprecated (outside weights)
98 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
99 | # Give the positive RPN examples weight of p * 1 / {num positives}
100 | # and give negatives a weight of (1 - p)
101 | # Set to -1.0 to use uniform example weighting
102 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
103 |
104 |
105 | #
106 | # Testing options
107 | #
108 |
109 | __C.TEST = edict()
110 |
111 | # Overlap threshold used for non-maximum suppression (suppress boxes with
112 | # IoU >= this threshold)
113 | __C.TEST.NMS = 0.3
114 |
115 | # Test using bounding-box regressors
116 | __C.TEST.BBOX_REG = True
117 |
118 | # Propose boxes
119 | __C.TEST.HAS_RPN = False
120 |
121 | # Test using these proposals
122 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
123 |
124 | ## NMS threshold used on RPN proposals
125 | __C.TEST.RPN_NMS_THRESH = 0.7
126 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
127 | __C.TEST.RPN_PRE_NMS_TOP_N = 6000
128 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
129 | __C.TEST.RPN_POST_NMS_TOP_N = 300
130 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
131 | __C.TEST.RPN_MIN_SIZE = 16
132 |
133 |
134 | #
135 | # MISC
136 | #
137 |
138 | # The mapping from image coordinates to feature map coordinates might cause
139 | # some boxes that are distinct in image space to become identical in feature
140 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
141 | # for identifying duplicate boxes.
142 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
143 | __C.DEDUP_BOXES = 1./16.
144 |
145 | # Pixel mean values (BGR order) as a (1, 1, 3) array
146 | # We use the same pixel mean for all networks even though it's not exactly what
147 | # they were trained with
148 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
149 |
150 | # For reproducibility
151 | __C.RNG_SEED = 3
152 |
153 | # A small number that's used many times
154 | __C.EPS = 1e-14
155 |
156 | # Use GPU implementation of non-maximum suppression
157 | __C.USE_GPU_NMS = True
158 |
159 | # Default GPU device id
160 | __C.GPU_ID = 0
161 |
162 |
163 | def _merge_a_into_b(a, b):
164 | """Merge config dictionary a into config dictionary b, clobbering the
165 | options in b whenever they are also specified in a.
166 | """
167 | if type(a) is not edict:
168 | return
169 |
170 | for k, v in a.iteritems():
171 | # a must specify keys that are in b
172 | if not b.has_key(k):
173 | raise KeyError('{} is not a valid config key'.format(k))
174 |
175 | # the types must match, too
176 | old_type = type(b[k])
177 | if old_type is not type(v):
178 | if isinstance(b[k], np.ndarray):
179 | v = np.array(v, dtype=b[k].dtype)
180 | else:
181 | raise ValueError(('Type mismatch ({} vs. {}) '
182 | 'for config key: {}').format(type(b[k]),
183 | type(v), k))
184 |
185 | # recursively merge dicts
186 | if type(v) is edict:
187 | try:
188 | _merge_a_into_b(a[k], b[k])
189 | except:
190 | print('Error under config key: {}'.format(k))
191 | raise
192 | else:
193 | b[k] = v
194 |
195 | def cfg_from_file(filename):
196 | """Load a config file and merge it into the default options."""
197 | import yaml
198 | with open(filename, 'r') as f:
199 | yaml_cfg = edict(yaml.load(f))
200 |
201 | _merge_a_into_b(yaml_cfg, __C)
202 |
203 | def cfg_from_list(cfg_list):
204 | """Set config keys via list (e.g., from command line)."""
205 | from ast import literal_eval
206 | assert len(cfg_list) % 2 == 0
207 | for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
208 | key_list = k.split('.')
209 | d = __C
210 | for subkey in key_list[:-1]:
211 | assert d.has_key(subkey)
212 | d = d[subkey]
213 | subkey = key_list[-1]
214 | assert d.has_key(subkey)
215 | try:
216 | value = literal_eval(v)
217 | except:
218 | # handle the case when v is a string literal
219 | value = v
220 | assert type(value) == type(d[subkey]), \
221 | 'type {} does not match original type {}'.format(
222 | type(value), type(d[subkey]))
223 | d[subkey] = value
224 |
--------------------------------------------------------------------------------
/utils/map/map_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import numpy as np
8 | from utils.nms.nms_wrapper import apply_nms_to_test_set_results
9 |
10 | def evaluate_detections(all_boxes, all_gt_infos, classes, use_07_metric=False, apply_mms=True, nms_threshold=0.5, conf_threshold=0.0):
11 | '''
12 | Computes per-class average precision.
13 |
14 | Args:
15 | all_boxes: shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
16 | all_gt_infos: a dictionary that contains all ground truth annoations in the following form:
17 | {'class_A': [{'bbox': array([[ 376., 210., 456., 288., 10.]], dtype=float32), 'det': [False], 'difficult': [False]}, ... ]}
18 | 'class_B': [ ], }
19 | classes: a list of class name, e.g. ['__background__', 'avocado', 'orange', 'butter']
20 | use_07_metric: whether to use VOC07's 11 point AP computation (default False)
21 | apply_mms: whether to apply non maximum suppression before computing average precision values
22 | nms_threshold: the threshold for discarding overlapping ROIs in nms
23 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded
24 |
25 | Returns:
26 | aps - average precision value per class in a dictionary {classname: ap}
27 | '''
28 |
29 | if apply_mms:
30 | print ("Number of rois before non-maximum suppression: %d" % sum([len(all_boxes[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))]))
31 | nms_dets,_ = apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold)
32 | print ("Number of rois after non-maximum suppression: %d" % sum([len(nms_dets[i][j]) for i in range(len(all_boxes)) for j in range(len(all_boxes[0]))]))
33 | else:
34 | print ("Skipping non-maximum suppression")
35 | nms_dets = all_boxes
36 |
37 | aps = {}
38 | for classIndex, className in enumerate(classes):
39 | if className != '__background__':
40 | rec, prec, ap = _evaluate_detections(classIndex, nms_dets, all_gt_infos[className], use_07_metric=use_07_metric)
41 | aps[className] = ap
42 |
43 | return aps
44 |
45 | def _evaluate_detections(classIndex, all_boxes, gtInfos, overlapThreshold=0.5, use_07_metric=False):
46 | '''
47 | Top level function that does the PASCAL VOC evaluation.
48 | '''
49 |
50 | # parse detections for this class
51 | # shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
52 | num_images = len(all_boxes[0])
53 | detBboxes = []
54 | detImgIndices = []
55 | detConfidences = []
56 | for imgIndex in range(num_images):
57 | dets = all_boxes[classIndex][imgIndex]
58 | if dets != []:
59 | for k in range(dets.shape[0]):
60 | detImgIndices.append(imgIndex)
61 | detConfidences.append(dets[k, -1])
62 | # the VOCdevkit expects 1-based indices
63 | detBboxes.append([dets[k, 0] + 1, dets[k, 1] + 1, dets[k, 2] + 1, dets[k, 3] + 1])
64 | detBboxes = np.array(detBboxes)
65 | detConfidences = np.array(detConfidences)
66 |
67 | # compute precision / recall / ap
68 | rec, prec, ap = _voc_computePrecisionRecallAp(
69 | class_recs=gtInfos,
70 | confidence=detConfidences,
71 | image_ids=detImgIndices,
72 | BB=detBboxes,
73 | ovthresh=overlapThreshold,
74 | use_07_metric=use_07_metric)
75 | return rec, prec, ap
76 |
77 | def computeAveragePrecision(recalls, precisions, use_07_metric=False):
78 | '''
79 | Computes VOC AP given precision and recall.
80 | '''
81 | if use_07_metric:
82 | # 11 point metric
83 | ap = 0.
84 | for t in np.arange(0., 1.1, 0.1):
85 | if np.sum(recalls >= t) == 0:
86 | p = 0
87 | else:
88 | p = np.max(precisions[recalls >= t])
89 | ap = ap + p / 11.
90 | else:
91 | # correct AP calculation
92 | # first append sentinel values at the end
93 | mrecalls = np.concatenate(([0.], recalls, [1.]))
94 | mprecisions = np.concatenate(([0.], precisions, [0.]))
95 |
96 | # compute the precision envelope
97 | for i in range(mprecisions.size - 1, 0, -1):
98 | mprecisions[i - 1] = np.maximum(mprecisions[i - 1], mprecisions[i])
99 |
100 | # to calculate area under PR curve, look for points
101 | # where X axis (recall) changes value
102 | i = np.where(mrecalls[1:] != mrecalls[:-1])[0]
103 |
104 | # and sum (\Delta recall) * prec
105 | ap = np.sum((mrecalls[i + 1] - mrecalls[i]) * mprecisions[i + 1])
106 | return ap
107 |
108 | def _voc_computePrecisionRecallAp(class_recs, confidence, image_ids, BB, ovthresh=0.5, use_07_metric=False):
109 | '''
110 | Computes precision, recall. and average precision
111 | '''
112 | if len(BB) == 0:
113 | return 0.0, 0.0, 0.0
114 |
115 | # sort by confidence
116 | sorted_ind = np.argsort(-confidence)
117 |
118 | BB = BB[sorted_ind, :]
119 | image_ids = [image_ids[x] for x in sorted_ind]
120 |
121 | # go down dets and mark TPs and FPs
122 | nd = len(image_ids)
123 | tp = np.zeros(nd)
124 | fp = np.zeros(nd)
125 | for d in range(nd):
126 | R = class_recs[image_ids[d]]
127 | bb = BB[d, :].astype(float)
128 | ovmax = -np.inf
129 | BBGT = R['bbox'].astype(float)
130 |
131 | if BBGT.size > 0:
132 | # compute overlaps
133 | ixmin = np.maximum(BBGT[:, 0], bb[0])
134 | iymin = np.maximum(BBGT[:, 1], bb[1])
135 | ixmax = np.minimum(BBGT[:, 2], bb[2])
136 | iymax = np.minimum(BBGT[:, 3], bb[3])
137 | iw = np.maximum(ixmax - ixmin + 1., 0.)
138 | ih = np.maximum(iymax - iymin + 1., 0.)
139 | inters = iw * ih
140 |
141 | # union
142 | uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
143 | (BBGT[:, 2] - BBGT[:, 0] + 1.) *
144 | (BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
145 |
146 | overlaps = inters / uni
147 | ovmax = np.max(overlaps)
148 | jmax = np.argmax(overlaps)
149 |
150 | if ovmax > ovthresh:
151 | if not R['difficult'][jmax]:
152 | if not R['det'][jmax]:
153 | tp[d] = 1.
154 | R['det'][jmax] = 1
155 | else:
156 | fp[d] = 1.
157 | else:
158 | fp[d] = 1.
159 |
160 | # compute precision recall
161 | npos = sum([len(cr['bbox']) for cr in class_recs])
162 | fp = np.cumsum(fp)
163 | tp = np.cumsum(tp)
164 | rec = tp / float(npos)
165 | # avoid divide by zero in case the first detection matches a difficult ground truth
166 | prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
167 | ap = computeAveragePrecision(rec, prec, use_07_metric)
168 | return rec, prec, ap
169 |
--------------------------------------------------------------------------------
/utils/nms/nms_wrapper.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import numpy as np
8 | from utils.cython_modules.cpu_nms import cpu_nms
9 | try:
10 | from utils.cython_modules.gpu_nms import gpu_nms
11 | gpu_nms_available = True
12 | except ImportError:
13 | gpu_nms_available = False
14 |
15 | try:
16 | from config import cfg
17 | except ImportError:
18 | from utils.default_config import cfg
19 |
20 | import pdb
21 |
22 | def nms(dets, thresh, force_cpu=False):
23 | '''
24 | Dispatches the call to either CPU or GPU NMS implementations
25 | '''
26 | if dets.shape[0] == 0:
27 | return []
28 | if gpu_nms_available and cfg.USE_GPU_NMS and not force_cpu:
29 | return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
30 | else:
31 | return cpu_nms(dets, thresh)
32 |
33 | def apply_nms_to_single_image_results(coords, labels, scores, nms_threshold=0.5, conf_threshold=0.0):
34 | '''
35 | Applies nms to the results for a single image.
36 |
37 | Args:
38 | coords: (x_min, y_min, x_max, y_max) coordinates for n rois. shape = (n, 4)
39 | labels: the predicted label per roi. shape = (n, 1)
40 | scores: the predicted score per roi. shape = (n, 1)
41 | nms_threshold: the threshold for discarding overlapping ROIs in nms
42 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded
43 |
44 | Returns:
45 | nmsKeepIndices - the indices of the ROIs to keep after nms
46 | '''
47 |
48 | # generate input for nms
49 | allIndices = []
50 | nmsRects = [[[]] for _ in range(max(labels) + 1)]
51 | coordsWithScores = np.hstack((coords, np.array([scores]).T))
52 | for i in range(max(labels) + 1):
53 | indices = np.where(np.array(labels) == i)[0]
54 | nmsRects[i][0] = coordsWithScores[indices,:]
55 | allIndices.append(indices)
56 |
57 | # call nms
58 | _, nmsKeepIndicesList = apply_nms_to_test_set_results(nmsRects, nms_threshold, conf_threshold)
59 |
60 | # map back to original roi indices
61 | nmsKeepIndices = []
62 | for i in range(max(labels) + 1):
63 | for keepIndex in nmsKeepIndicesList[i][0]:
64 | nmsKeepIndices.append(allIndices[i][keepIndex]) # for keepIndex in nmsKeepIndicesList[i][0]]
65 | assert (len(nmsKeepIndices) == len(set(nmsKeepIndices))) # check if no roi indices was added >1 times
66 | return nmsKeepIndices
67 |
68 | def apply_nms_to_test_set_results(all_boxes, nms_threshold, conf_threshold):
69 | '''
70 | Applies nms to the results of multiple images.
71 |
72 | Args:
73 | all_boxes: shape of all_boxes: e.g. 21 classes x 4952 images x 58 rois x 5 coords+score
74 | nms_threshold: the threshold for discarding overlapping ROIs in nms
75 | conf_threshold: a minimum value for the score of an ROI. ROIs with lower score will be discarded
76 |
77 | Returns:
78 | nms_boxes - the reduced set of rois after nms
79 | nmsKeepIndices - the indices of the ROIs to keep after nms
80 | '''
81 |
82 | num_classes = len(all_boxes)
83 | num_images = len(all_boxes[0])
84 | nms_boxes = [[[] for _ in range(num_images)]
85 | for _ in range(num_classes)]
86 | nms_keepIndices = [[[] for _ in range(num_images)]
87 | for _ in range(num_classes)]
88 | for cls_ind in range(num_classes):
89 | for im_ind in range(num_images):
90 | dets = all_boxes[cls_ind][im_ind]
91 | if dets == []:
92 | continue
93 | keep = nms(dets.astype(np.float32), nms_threshold)
94 |
95 | # also filter out low confidences
96 | if conf_threshold > 0:
97 | #pdb.set_trace()
98 | keep_conf_idx = np.where(dets[:, -1] > conf_threshold)
99 | keep = list(set(keep_conf_idx[0]).intersection(keep))
100 |
101 | if len(keep) == 0:
102 | continue
103 | nms_boxes[cls_ind][im_ind] = dets[keep, :].copy()
104 | nms_keepIndices[cls_ind][im_ind] = keep
105 | return nms_boxes, nms_keepIndices
106 |
107 |
--------------------------------------------------------------------------------
/utils/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | python_files = *_tests.py
3 |
--------------------------------------------------------------------------------
/utils/rpn/anchor_target_layer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import os
8 | from cntk import output_variable
9 | from cntk.ops.functions import UserFunction
10 | import yaml
11 | import numpy as np
12 | import numpy.random as npr
13 | from utils.rpn.generate_anchors import generate_anchors
14 | from utils.rpn.bbox_transform import bbox_transform
15 | from utils.cython_modules.cython_bbox import bbox_overlaps
16 |
17 | try:
18 | from config import cfg
19 | except ImportError:
20 | from utils.default_config import cfg
21 |
22 | DEBUG = False
23 |
24 | class AnchorTargetLayer(UserFunction):
25 | '''
26 | Assign anchors to ground-truth targets. Produces anchor classification
27 | labels and bounding-box regression targets.
28 | '''
29 |
30 | def __init__(self, arg1, arg2, arg3, name='AnchorTargetLayer', param_str=None, cfm_shape=None, deterministic=False):
31 | super(AnchorTargetLayer, self).__init__([arg1, arg2, arg3], name=name)
32 | self.param_str_ = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
33 |
34 | # parse the layer parameter string, which must be valid YAML
35 | layer_params = yaml.load(self.param_str_)
36 | anchor_scales = layer_params.get('scales', (8, 16, 32))
37 | self._anchors = generate_anchors(scales=np.array(anchor_scales))
38 | self._num_anchors = self._anchors.shape[0]
39 | self._feat_stride = layer_params['feat_stride']
40 | self._cfm_shape = cfm_shape
41 | self._determininistic_mode = deterministic
42 |
43 | if DEBUG:
44 | print ('anchors:')
45 | print (self._anchors)
46 | print ('anchor shapes:')
47 | print (np.hstack((
48 | self._anchors[:, 2::4] - self._anchors[:, 0::4],
49 | self._anchors[:, 3::4] - self._anchors[:, 1::4],
50 | )))
51 | self._counts = cfg.EPS
52 | self._sums = np.zeros((1, 4))
53 | self._squared_sums = np.zeros((1, 4))
54 | self._fg_sum = 0
55 | self._bg_sum = 0
56 | self._count = 0
57 |
58 | # allow boxes to sit over the edge by a small amount
59 | self._allowed_border = False # layer_params.get('allowed_border', 0)
60 |
61 | def infer_outputs(self):
62 | # This is a necessary work around since anfter cloning the cloned inputs are just place holders without the proper shape
63 | if self._cfm_shape is None:
64 | self._cfm_shape = self.inputs[0].shape
65 | height, width = self._cfm_shape[-2:]
66 |
67 | if DEBUG:
68 | print('AnchorTargetLayer: height', height, 'width', width)
69 |
70 | A = self._num_anchors
71 | # labels
72 | labelShape = (1, A, height, width)
73 | # Comment: this layer uses encoded labels, while in CNTK we mostly use one hot labels
74 | # bbox_targets
75 | bbox_target_shape = (1, A * 4, height, width)
76 | # bbox_inside_weights
77 | bbox_inside_weights_shape = (1, A * 4, height, width)
78 |
79 | return [output_variable(labelShape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
80 | name="objectness_target", needs_gradient=False),
81 | output_variable(bbox_target_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
82 | name="rpn_bbox_target", needs_gradient=False),
83 | output_variable(bbox_inside_weights_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
84 | name="rpn_bbox_inside_w", needs_gradient=False),]
85 |
86 | def forward(self, arguments, outputs, device=None, outputs_to_retain=None):
87 | # Algorithm:
88 | #
89 | # for each (H, W) location i
90 | # generate 9 anchor boxes centered on cell i
91 | # apply predicted bbox deltas at cell i to each of the 9 anchors
92 | # filter out-of-image anchors
93 | # measure GT overlap
94 |
95 | bottom = arguments
96 |
97 | # map of shape (..., H, W)
98 | height, width = bottom[0].shape[-2:]
99 | # GT boxes (x1, y1, x2, y2, label)
100 | gt_boxes = bottom[1][0,:]
101 | # im_info
102 | im_info = bottom[2][0]
103 |
104 | # remove zero padded ground truth boxes
105 | keep = np.where(
106 | ((gt_boxes[:,2] - gt_boxes[:,0]) > 0) &
107 | ((gt_boxes[:,3] - gt_boxes[:,1]) > 0)
108 | )
109 | gt_boxes = gt_boxes[keep]
110 |
111 | if DEBUG:
112 | print ('')
113 | # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
114 | # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
115 | print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
116 | print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3]))
117 | print ('original im_size: ({}, {})'.format(im_info[4], im_info[5]))
118 | print ('height, width: ({}, {})'.format(height, width))
119 | print ('rpn: gt_boxes.shape', gt_boxes.shape)
120 | #print ('rpn: gt_boxes', gt_boxes)
121 |
122 | # 1. Generate proposals from bbox deltas and shifted anchors
123 | shift_x = np.arange(0, width) * self._feat_stride
124 | shift_y = np.arange(0, height) * self._feat_stride
125 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
126 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
127 | shift_x.ravel(), shift_y.ravel())).transpose()
128 | # add A anchors (1, A, 4) to
129 | # cell K shifts (K, 1, 4) to get
130 | # shift anchors (K, A, 4)
131 | # reshape to (K*A, 4) shifted anchors
132 | A = self._num_anchors
133 | K = shifts.shape[0]
134 | all_anchors = (self._anchors.reshape((1, A, 4)) +
135 | shifts.reshape((1, K, 4)).transpose((1, 0, 2)))
136 | all_anchors = all_anchors.reshape((K * A, 4))
137 | total_anchors = int(K * A)
138 |
139 | # only keep anchors inside the image
140 | padded_wh = im_info[0:2]
141 | scaled_wh = im_info[2:4]
142 | xy_offset = (padded_wh - scaled_wh) / 2
143 | xy_min = xy_offset
144 | xy_max = xy_offset + scaled_wh
145 |
146 | inds_inside = np.where(
147 | (all_anchors[:, 0] >= xy_min[0] - self._allowed_border) &
148 | (all_anchors[:, 1] >= xy_min[1] - self._allowed_border) &
149 | (all_anchors[:, 2] < xy_max[0] + self._allowed_border) & # width
150 | (all_anchors[:, 3] < xy_max[1] + self._allowed_border) # height
151 | )[0]
152 |
153 | if DEBUG:
154 | print ('total_anchors', total_anchors)
155 | print ('inds_inside', len(inds_inside))
156 |
157 | # keep only inside anchors
158 | anchors = all_anchors[inds_inside, :]
159 | if DEBUG:
160 | print ('anchors.shape', anchors.shape)
161 | print('gt_boxes.shape', gt_boxes.shape)
162 |
163 | # label: 1 is positive, 0 is negative, -1 is dont care
164 | labels = np.empty((len(inds_inside), ), dtype=np.float32)
165 | labels.fill(-1)
166 |
167 | # overlaps between the anchors and the gt boxes
168 | # overlaps (ex, gt)
169 | overlaps = bbox_overlaps(
170 | np.ascontiguousarray(anchors, dtype=np.float),
171 | np.ascontiguousarray(gt_boxes, dtype=np.float))
172 | argmax_overlaps = overlaps.argmax(axis=1)
173 | max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps]
174 | gt_argmax_overlaps = overlaps.argmax(axis=0)
175 | gt_max_overlaps = overlaps[gt_argmax_overlaps,
176 | np.arange(overlaps.shape[1])]
177 | gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0]
178 |
179 | if not cfg["TRAIN"].RPN_CLOBBER_POSITIVES:
180 | # assign bg labels first so that positive labels can clobber them
181 | labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0
182 |
183 | # fg label: for each gt, anchor with highest overlap
184 | labels[gt_argmax_overlaps] = 1
185 |
186 | # fg label: above threshold IOU
187 | labels[max_overlaps >= cfg["TRAIN"].RPN_POSITIVE_OVERLAP] = 1
188 |
189 | if cfg["TRAIN"].RPN_CLOBBER_POSITIVES:
190 | # assign bg labels last so that negative labels can clobber positives
191 | labels[max_overlaps < cfg["TRAIN"].RPN_NEGATIVE_OVERLAP] = 0
192 |
193 | # subsample positive labels if we have too many
194 | num_fg = int(cfg["TRAIN"].RPN_FG_FRACTION * cfg["TRAIN"].RPN_BATCHSIZE)
195 | fg_inds = np.where(labels == 1)[0]
196 | if len(fg_inds) > num_fg:
197 | if self._determininistic_mode:
198 | disable_inds = fg_inds[:(len(fg_inds) - num_fg)]
199 | else:
200 | disable_inds = npr.choice(fg_inds, size=(len(fg_inds) - num_fg), replace=False)
201 | labels[disable_inds] = -1
202 |
203 | # subsample negative labels if we have too many
204 | num_bg = cfg["TRAIN"].RPN_BATCHSIZE - np.sum(labels == 1)
205 | bg_inds = np.where(labels == 0)[0]
206 | if len(bg_inds) > num_bg:
207 | if self._determininistic_mode:
208 | disable_inds = bg_inds[:(len(bg_inds) - num_bg)]
209 | else:
210 | disable_inds = npr.choice(bg_inds, size=(len(bg_inds) - num_bg), replace=False)
211 | labels[disable_inds] = -1
212 |
213 | bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32)
214 | bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :])
215 |
216 | bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32)
217 | bbox_inside_weights[labels == 1, :] = np.array((1.0, 1.0, 1.0, 1.0))
218 |
219 | if DEBUG:
220 | self._sums += bbox_targets[labels == 1, :].sum(axis=0)
221 | self._squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0)
222 | self._counts += np.sum(labels == 1)
223 | means = self._sums / self._counts
224 | stds = np.sqrt(self._squared_sums / self._counts - means ** 2)
225 | print ('means:')
226 | print (means)
227 | print ('stdevs:')
228 | print (stds)
229 |
230 | # map up to original set of anchors
231 | labels = _unmap(labels, total_anchors, inds_inside, fill=-1)
232 | bbox_targets = _unmap(bbox_targets, total_anchors, inds_inside, fill=0)
233 | bbox_inside_weights = _unmap(bbox_inside_weights, total_anchors, inds_inside, fill=0)
234 |
235 | if DEBUG:
236 | print ('rpn: max max_overlap', np.max(max_overlaps))
237 | print ('rpn: num_positive', np.sum(labels == 1))
238 | print ('rpn: num_negative', np.sum(labels == 0))
239 | self._fg_sum += np.sum(labels == 1)
240 | self._bg_sum += np.sum(labels == 0)
241 | self._count += 1
242 | print ('rpn: num_positive avg', self._fg_sum / self._count)
243 | print ('rpn: num_negative avg', self._bg_sum / self._count)
244 |
245 | # labels
246 | labels = labels.reshape((1, height, width, A)).transpose(0, 3, 1, 2)
247 | outputs[self.outputs[0]] = np.ascontiguousarray(labels)
248 |
249 | # bbox_targets
250 | bbox_targets = bbox_targets.reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
251 | outputs[self.outputs[1]] = np.ascontiguousarray(bbox_targets)
252 |
253 | # bbox_inside_weights
254 | bbox_inside_weights = bbox_inside_weights \
255 | .reshape((1, height, width, A * 4)).transpose(0, 3, 1, 2)
256 | assert bbox_inside_weights.shape[2] == height
257 | assert bbox_inside_weights.shape[3] == width
258 | outputs[self.outputs[2]] = np.ascontiguousarray(bbox_inside_weights)
259 |
260 | # No state needs to be passed to backward() so we just pass None
261 | return None
262 |
263 | def backward(self, state, root_gradients, variables):
264 | """This layer does not propagate gradients."""
265 | pass
266 |
267 | def clone(self, cloned_inputs):
268 | return AnchorTargetLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], param_str=self.param_str_, cfm_shape=self._cfm_shape)
269 |
270 | def serialize(self):
271 | internal_state = {}
272 | internal_state['param_str'] = self.param_str_
273 | return internal_state
274 |
275 | @staticmethod
276 | def deserialize(inputs, name, state):
277 | param_str = state['param_str']
278 | return AnchorTargetLayer(inputs[0], inputs[1], inputs[2], name=name, param_str=param_str)
279 |
280 |
281 | def _unmap(data, count, inds, fill=0):
282 | """ Unmap a subset of item (data) back to the original set of items (of size count) """
283 | if len(data.shape) == 1:
284 | ret = np.empty((count, ), dtype=np.float32)
285 | ret.fill(fill)
286 | ret[inds] = data
287 | else:
288 | ret = np.empty((count, ) + data.shape[1:], dtype=np.float32)
289 | ret.fill(fill)
290 | ret[inds, :] = data
291 | return ret
292 |
293 |
294 | def _compute_targets(ex_rois, gt_rois):
295 | """Compute bounding-box regression targets for an image."""
296 |
297 | assert ex_rois.shape[0] == gt_rois.shape[0]
298 | assert ex_rois.shape[1] == 4
299 | assert gt_rois.shape[1] == 5
300 |
301 | return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False)
302 |
--------------------------------------------------------------------------------
/utils/rpn/bbox_transform.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import numpy as np
8 |
9 | # compute example and gt width ctr, width and height
10 | # and returns optimal target deltas
11 | def bbox_transform(ex_rois, gt_rois):
12 | ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
13 | ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
14 | ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
15 | ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
16 |
17 | gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
18 | gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
19 | gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
20 | gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
21 |
22 | targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
23 | targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
24 | targets_dw = np.log(gt_widths / ex_widths)
25 | targets_dh = np.log(gt_heights / ex_heights)
26 |
27 | targets = np.vstack(
28 | (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
29 | return targets
30 |
31 | # gets
32 | # - boxes (n, 4) as [x_low, y_low, x_high, y_high]
33 | # - deltas (n, 4) as [dx, dy, dw, dh]
34 | # returns
35 | # - pred_boxes (n, 4) as [x_low, y_low, x_high, y_high]
36 | # where
37 | # pred_ctr_x = dx * widths + ctr_x
38 | # --> pred_x_low = pred_ctr_x - 0.5 * pred_w
39 | # and
40 | # pred_w = np.exp(dw) * widths
41 | def bbox_transform_inv(boxes, deltas):
42 | if boxes.shape[0] == 0:
43 | #import pdb; pdb.set_trace()
44 | return np.zeros((0, deltas.shape[1]), dtype=deltas.dtype)
45 |
46 | boxes = boxes.astype(deltas.dtype, copy=False)
47 |
48 | widths = boxes[:, 2] - boxes[:, 0] + 1.0
49 | heights = boxes[:, 3] - boxes[:, 1] + 1.0
50 | ctr_x = boxes[:, 0] + 0.5 * widths
51 | ctr_y = boxes[:, 1] + 0.5 * heights
52 |
53 | # avoid overflow in exp
54 | dx = np.clip(deltas[:, 0::4], None, 10)
55 | dy = np.clip(deltas[:, 1::4], None, 10)
56 | dw = np.clip(deltas[:, 2::4], None, 10)
57 | dh = np.clip(deltas[:, 3::4], None, 10)
58 |
59 | pred_ctr_x = dx * widths[:, np.newaxis] + ctr_x[:, np.newaxis]
60 | pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
61 | pred_w = np.exp(dw) * widths[:, np.newaxis]
62 | pred_h = np.exp(dh) * heights[:, np.newaxis]
63 |
64 | pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
65 | # x1
66 | pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
67 | # y1
68 | pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
69 | # x2
70 | pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
71 | # y2
72 | pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
73 |
74 | return pred_boxes
75 |
76 | def clip_boxes(boxes, im_info):
77 | '''
78 | Clip boxes to image boundaries.
79 | :param boxes: boxes
80 | :param im_info: (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
81 | e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
82 | '''
83 |
84 | im_info.shape = (6)
85 | padded_wh = im_info[0:2]
86 | scaled_wh = im_info[2:4]
87 | xy_offset = (padded_wh - scaled_wh) / 2
88 | xy_min = xy_offset
89 | xy_max = xy_offset + scaled_wh
90 |
91 | # x_min <= x1 <= x_max
92 | boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], xy_max[0] - 1), xy_min[0])
93 | # y_min <= y1 <= y_max
94 | boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], xy_max[1] - 1), xy_min[1])
95 | # x_min <= x2 <= x_max
96 | boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], xy_max[0] - 1), xy_min[0])
97 | # y_min <= y2 <= y_max
98 | boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], xy_max[1] - 1), xy_min[1])
99 | return boxes
100 |
--------------------------------------------------------------------------------
/utils/rpn/cntk_smoothL1_loss.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import numpy as np
8 | import cntk as C
9 |
10 | def SmoothL1Loss(sigma, bbox_pred, bbox_targets, bbox_inside_weights, bbox_outside_weights):
11 | """
12 | From https://github.com/smallcorgi/Faster-RCNN_TF/blob/master/lib/fast_rcnn/train.py
13 |
14 | ResultLoss = outside_weights * SmoothL1(inside_weights * (bbox_pred - bbox_targets))
15 | SmoothL1(x) = 0.5 * (sigma * x)^2, if |x| < 1 / sigma^2
16 | |x| - 0.5 / sigma^2, otherwise
17 | """
18 | sigma2 = sigma * sigma
19 |
20 | inside_mul_abs = C.abs(C.element_times(bbox_inside_weights, C.minus(bbox_pred, bbox_targets)))
21 |
22 | smooth_l1_sign = C.less(inside_mul_abs, 1.0 / sigma2)
23 | smooth_l1_option1 = C.element_times(C.element_times(inside_mul_abs, inside_mul_abs), 0.5 * sigma2)
24 | smooth_l1_option2 = C.minus(inside_mul_abs, 0.5 / sigma2)
25 | smooth_l1_result = C.plus(C.element_times(smooth_l1_option1, smooth_l1_sign),
26 | C.element_times(smooth_l1_option2, C.minus(1.0, smooth_l1_sign)))
27 |
28 | return C.element_times(bbox_outside_weights, smooth_l1_result)
29 |
--------------------------------------------------------------------------------
/utils/rpn/generate_anchors.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import numpy as np
8 |
9 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
10 | scales=2**np.arange(3, 6)):
11 | """
12 | Generate anchor (reference) windows by enumerating aspect ratios X
13 | scales wrt a reference (0, 0, 15, 15) window.
14 | """
15 |
16 | base_anchor = np.array([1, 1, base_size, base_size]) - 1
17 | ratio_anchors = _ratio_enum(base_anchor, ratios)
18 | anchors = np.vstack([_scale_enum(ratio_anchors[i, :], scales)
19 | for i in range(ratio_anchors.shape[0])]) # was xrange
20 | return anchors
21 |
22 | def _whctrs(anchor):
23 | """
24 | Return width, height, x center, and y center for an anchor (window).
25 | """
26 |
27 | w = anchor[2] - anchor[0] + 1
28 | h = anchor[3] - anchor[1] + 1
29 | x_ctr = anchor[0] + 0.5 * (w - 1)
30 | y_ctr = anchor[1] + 0.5 * (h - 1)
31 | return w, h, x_ctr, y_ctr
32 |
33 | def _mkanchors(ws, hs, x_ctr, y_ctr):
34 | """
35 | Given a vector of widths (ws) and heights (hs) around a center
36 | (x_ctr, y_ctr), output a set of anchors (windows).
37 | """
38 |
39 | ws = ws[:, np.newaxis]
40 | hs = hs[:, np.newaxis]
41 | anchors = np.hstack((x_ctr - 0.5 * (ws - 1),
42 | y_ctr - 0.5 * (hs - 1),
43 | x_ctr + 0.5 * (ws - 1),
44 | y_ctr + 0.5 * (hs - 1)))
45 | return anchors
46 |
47 | def _ratio_enum(anchor, ratios):
48 | """
49 | Enumerate a set of anchors for each aspect ratio wrt an anchor.
50 | """
51 |
52 | w, h, x_ctr, y_ctr = _whctrs(anchor)
53 | size = w * h
54 | size_ratios = size / ratios
55 | ws = np.round(np.sqrt(size_ratios))
56 | hs = np.round(ws * ratios)
57 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
58 | return anchors
59 |
60 | def _scale_enum(anchor, scales):
61 | """
62 | Enumerate a set of anchors for each scale wrt an anchor.
63 | """
64 |
65 | w, h, x_ctr, y_ctr = _whctrs(anchor)
66 | ws = w * scales
67 | hs = h * scales
68 | anchors = _mkanchors(ws, hs, x_ctr, y_ctr)
69 | return anchors
70 |
71 | if __name__ == '__main__':
72 | import time
73 | t = time.time()
74 | a = generate_anchors()
75 | print (time.time() - t)
76 | print (a)
77 | from IPython import embed; embed()
78 |
--------------------------------------------------------------------------------
/utils/rpn/proposal_layer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | from cntk import output_variable, FreeDimension
8 | from cntk.ops.functions import UserFunction
9 | import numpy as np
10 | import yaml
11 | from utils.rpn.generate_anchors import generate_anchors
12 | from utils.rpn.bbox_transform import bbox_transform_inv, clip_boxes
13 | from utils.nms.nms_wrapper import nms
14 |
15 | try:
16 | from config import cfg
17 | except ImportError:
18 | from utils.default_config import cfg
19 |
20 | DEBUG = False
21 |
22 | class ProposalLayer(UserFunction):
23 | '''
24 | Outputs object detection proposals by applying estimated bounding-box
25 | transformations to a set of regular boxes (called "anchors").
26 | '''
27 |
28 | def __init__(self, arg1, arg2, arg3, name='ProposalLayer', param_str=None):
29 | super(ProposalLayer, self).__init__([arg1, arg2, arg3], name=name)
30 | self.param_str_ = param_str if param_str is not None else "'feat_stride': 16\n'scales':\n - 8 \n - 16 \n - 32"
31 |
32 | # parse the layer parameter string, which must be valid YAML
33 | layer_params = yaml.load(self.param_str_)
34 | self._feat_stride = layer_params['feat_stride']
35 | anchor_scales = layer_params.get('scales', (8, 16, 32))
36 | self._anchors = generate_anchors(scales=np.array(anchor_scales))
37 | self._num_anchors = self._anchors.shape[0]
38 |
39 | if DEBUG:
40 | print ('feat_stride: {}'.format(self._feat_stride))
41 | print ('anchors:')
42 | print (self._anchors)
43 |
44 | def infer_outputs(self):
45 | # rois blob: holds R regions of interest, each is a 5-tuple
46 | # (n, x1, y1, x2, y2) specifying an image batch index n and a
47 | # rectangle (x1, y1, x2, y2)
48 | # for CNTK the proposal shape is [4 x roisPerImage], and mirrored in Python
49 | proposalShape = (FreeDimension, 4)
50 |
51 | return [output_variable(proposalShape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
52 | name="rpn_rois_raw", needs_gradient=False)]
53 |
54 | def forward(self, arguments, device=None, outputs_to_retain=None):
55 | # Algorithm:
56 | #
57 | # for each (H, W) location i
58 | # generate A anchor boxes centered on cell i
59 | # apply predicted bbox deltas at cell i to each of the A anchors
60 | # clip predicted boxes to image
61 | # remove predicted boxes with either height or width < threshold
62 | # sort all (proposal, score) pairs by score from highest to lowest
63 | # take top pre_nms_topN proposals before NMS
64 | # apply NMS with threshold 0.7 to remaining proposals
65 | # take after_nms_topN proposals after NMS
66 | # return the top proposals (-> RoIs top, scores top)
67 |
68 | # use potentially different number of proposals for training vs evaluation
69 | if len(outputs_to_retain) == 0:
70 | # print("EVAL")
71 | pre_nms_topN = cfg["TEST"].RPN_PRE_NMS_TOP_N
72 | post_nms_topN = cfg["TEST"].RPN_POST_NMS_TOP_N
73 | nms_thresh = cfg["TEST"].RPN_NMS_THRESH
74 | min_size = cfg["TEST"].RPN_MIN_SIZE
75 | else:
76 | pre_nms_topN = cfg["TRAIN"].RPN_PRE_NMS_TOP_N
77 | post_nms_topN = cfg["TRAIN"].RPN_POST_NMS_TOP_N
78 | nms_thresh = cfg["TRAIN"].RPN_NMS_THRESH
79 | min_size = cfg["TRAIN"].RPN_MIN_SIZE
80 |
81 | bottom = arguments
82 | assert bottom[0].shape[0] == 1, \
83 | 'Only single item batches are supported'
84 |
85 | # the first set of _num_anchors channels are bg probs
86 | # the second set are the fg probs, which we want
87 | scores = bottom[0][:, self._num_anchors:, :, :]
88 | bbox_deltas = bottom[1]
89 | im_info = bottom[2][0]
90 |
91 | if DEBUG:
92 | # im_info = (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
93 | # e.g.(1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
94 | print ('im_size: ({}, {})'.format(im_info[0], im_info[1]))
95 | print ('scaled im_size: ({}, {})'.format(im_info[2], im_info[3]))
96 | print ('original im_size: ({}, {})'.format(im_info[4], im_info[5]))
97 |
98 | # 1. Generate proposals from bbox deltas and shifted anchors
99 | height, width = scores.shape[-2:]
100 |
101 | if DEBUG:
102 | print ('score map size: {}'.format(scores.shape))
103 |
104 | # Enumerate all shifts
105 | shift_x = np.arange(0, width) * self._feat_stride
106 | shift_y = np.arange(0, height) * self._feat_stride
107 | shift_x, shift_y = np.meshgrid(shift_x, shift_y)
108 | shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
109 | shift_x.ravel(), shift_y.ravel())).transpose()
110 |
111 | # Enumerate all shifted anchors:
112 | #
113 | # add A anchors (1, A, 4) to
114 | # cell K shifts (K, 1, 4) to get
115 | # shift anchors (K, A, 4)
116 | # reshape to (K*A, 4) shifted anchors
117 | A = self._num_anchors
118 | K = shifts.shape[0]
119 | anchors = self._anchors.reshape((1, A, 4)) + \
120 | shifts.reshape((1, K, 4)).transpose((1, 0, 2))
121 | anchors = anchors.reshape((K * A, 4))
122 |
123 | # Transpose and reshape predicted bbox transformations to get them
124 | # into the same order as the anchors:
125 | #
126 | # bbox deltas will be (1, 4 * A, H, W) format
127 | # transpose to (1, H, W, 4 * A)
128 | # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
129 | # in slowest to fastest order
130 | bbox_deltas = bbox_deltas.transpose((0, 2, 3, 1)).reshape((-1, 4))
131 |
132 | # Same story for the scores:
133 | #
134 | # scores are (1, A, H, W) format
135 | # transpose to (1, H, W, A)
136 | # reshape to (1 * H * W * A, 1) where rows are ordered by (h, w, a)
137 | scores = scores.transpose((0, 2, 3, 1)).reshape((-1, 1))
138 |
139 | # Convert anchors into proposals via bbox transformations
140 | proposals = bbox_transform_inv(anchors, bbox_deltas)
141 |
142 | # 2. clip predicted boxes to image
143 | proposals = clip_boxes(proposals, im_info)
144 |
145 | # 3. remove predicted boxes with either height or width < threshold
146 | # (NOTE: convert min_size to input image scale. Original size = im_info[4:6], scaled size = im_info[2:4])
147 | cntk_image_scale = im_info[2] / im_info[4]
148 | keep = _filter_boxes(proposals, min_size * cntk_image_scale)
149 | proposals = proposals[keep, :]
150 | scores = scores[keep]
151 |
152 | # 4. sort all (proposal, score) pairs by score from highest to lowest
153 | # 5. take top pre_nms_topN (e.g. 6000)
154 | order = scores.ravel().argsort()[::-1]
155 | if pre_nms_topN > 0:
156 | order = order[:pre_nms_topN]
157 | proposals = proposals[order, :]
158 | scores = scores[order]
159 |
160 | # 6. apply nms (e.g. threshold = 0.7)
161 | # 7. take after_nms_topN (e.g. 300)
162 | # 8. return the top proposals (-> RoIs top)
163 | keep = nms(np.hstack((proposals, scores)), nms_thresh)
164 | if post_nms_topN > 0:
165 | keep = keep[:post_nms_topN]
166 | proposals = proposals[keep, :]
167 | scores = scores[keep]
168 |
169 | # pad with zeros if too few rois were found
170 | num_found_proposals = proposals.shape[0]
171 | if num_found_proposals < post_nms_topN:
172 | if DEBUG:
173 | print("Only {} proposals generated in ProposalLayer".format(num_found_proposals))
174 | proposals_padded = np.zeros(((post_nms_topN,) + proposals.shape[1:]), dtype=np.float32)
175 | proposals_padded[:num_found_proposals, :] = proposals
176 | proposals = proposals_padded
177 |
178 | # Output rois blob
179 | # Our RPN implementation only supports a single input image, so all
180 | # batch inds are 0
181 | # for CNTK: add batch axis to output shape
182 | proposals.shape = (1,) + proposals.shape
183 |
184 | return None, proposals
185 |
186 | def backward(self, state, root_gradients, variables):
187 | """This layer does not propagate gradients."""
188 | pass
189 |
190 | def clone(self, cloned_inputs):
191 | return ProposalLayer(cloned_inputs[0], cloned_inputs[1], cloned_inputs[2], param_str=self.param_str_)
192 |
193 | def serialize(self):
194 | internal_state = {}
195 | internal_state['param_str'] = self.param_str_
196 |
197 | return internal_state
198 |
199 | @staticmethod
200 | def deserialize(inputs, name, state):
201 | param_str = state['param_str']
202 |
203 | return ProposalLayer(inputs[0], inputs[1], inputs[2], name=name, param_str=param_str)
204 |
205 |
206 | def _filter_boxes(boxes, min_size):
207 | """Remove all boxes with any side smaller than min_size."""
208 | ws = boxes[:, 2] - boxes[:, 0] + 1
209 | hs = boxes[:, 3] - boxes[:, 1] + 1
210 | if np.isnan(ws[0]):
211 | print('NaN NaN NaN NaN')
212 | keep = np.where((ws >= min_size) & (hs >= min_size))[0]
213 | return keep
214 |
--------------------------------------------------------------------------------
/utils/rpn/proposal_target_layer.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | from cntk import output_variable, FreeDimension
8 | from cntk.ops.functions import UserFunction
9 | import yaml
10 | import numpy as np
11 | import numpy.random as npr
12 | from utils.rpn.bbox_transform import bbox_transform
13 | from utils.cython_modules.cython_bbox import bbox_overlaps
14 |
15 | try:
16 | from config import cfg
17 | except ImportError:
18 | from utils.default_config import cfg
19 |
20 | DEBUG = False
21 |
22 | class ProposalTargetLayer(UserFunction):
23 | '''
24 | Assign object detection proposals to ground-truth targets. Produces proposal
25 | classification labels and bounding-box regression targets.
26 | '''
27 |
28 | def __init__(self, arg1, arg2, name='ProposalTargetLayer', param_str=None, deterministic=False):
29 | super(ProposalTargetLayer, self).__init__([arg1, arg2], name=name)
30 | self.param_str_ = param_str if param_str is not None else "'num_classes': 2"
31 |
32 | # parse the layer parameter string, which must be valid YAML
33 | layer_params = yaml.load(self.param_str_)
34 | self._num_classes = layer_params['num_classes']
35 | self._determininistic_mode = deterministic
36 |
37 | self._count = 0
38 | self._fg_num = 0
39 | self._bg_num = 0
40 |
41 | def infer_outputs(self):
42 | # sampled rois (0, x1, y1, x2, y2)
43 | # for CNTK the proposal shape is [4 x roisPerImage], and mirrored in Python
44 | rois_shape = (FreeDimension, 4)
45 | labels_shape = (FreeDimension, self._num_classes)
46 | bbox_targets_shape = (FreeDimension, self._num_classes * 4)
47 | bbox_inside_weights_shape = (FreeDimension, self._num_classes * 4)
48 |
49 | return [output_variable(rois_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
50 | name="rpn_target_rois_raw", needs_gradient=False),
51 | output_variable(labels_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
52 | name="label_targets_raw", needs_gradient=False),
53 | output_variable(bbox_targets_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
54 | name="bbox_targets_raw", needs_gradient=False),
55 | output_variable(bbox_inside_weights_shape, self.inputs[0].dtype, self.inputs[0].dynamic_axes,
56 | name="bbox_inside_w_raw", needs_gradient=False)]
57 |
58 | def forward(self, arguments, outputs, device=None, outputs_to_retain=None):
59 | bottom = arguments
60 |
61 | # Proposal ROIs (0, x1, y1, x2, y2) coming from RPN
62 | # (i.e., rpn.proposal_layer.ProposalLayer), or any other source
63 | all_rois = bottom[0][0,:]
64 | # remove zero padded proposals
65 | keep0 = np.where(
66 | ((all_rois[:, 2] - all_rois[:, 0]) > 0) &
67 | ((all_rois[:, 3] - all_rois[:, 1]) > 0)
68 | )
69 | all_rois = all_rois[keep0]
70 |
71 | # GT boxes (x1, y1, x2, y2, label)
72 | # TODO(rbg): it's annoying that sometimes I have extra info before
73 | # and other times after box coordinates -- normalize to one format
74 | gt_boxes = bottom[1][0,:]
75 | # remove zero padded ground truth boxes
76 | keep1 = np.where(
77 | ((gt_boxes[:,2] - gt_boxes[:,0]) > 0) &
78 | ((gt_boxes[:,3] - gt_boxes[:,1]) > 0)
79 | )
80 | gt_boxes = gt_boxes[keep1]
81 |
82 | assert gt_boxes.shape[0] > 0, \
83 | "No ground truth boxes provided"
84 |
85 | # Include ground-truth boxes in the set of candidate rois
86 | # for CNTK: add batch index axis with all zeros to both inputs
87 | all_rois = np.vstack((all_rois, gt_boxes[:, :-1]))
88 | zeros = np.zeros((all_rois.shape[0], 1), dtype=all_rois.dtype)
89 | all_rois = np.hstack((zeros, all_rois))
90 |
91 | # Sanity check: single batch only
92 | assert np.all(all_rois[:, 0] == 0), \
93 | 'Only single item batches are supported'
94 |
95 | rois_per_image = cfg.TRAIN.BATCH_SIZE
96 | fg_rois_per_image = np.round(cfg["TRAIN"].FG_FRACTION * rois_per_image).astype(int)
97 |
98 | # Sample rois with classification labels and bounding box regression
99 | # targets
100 | labels, rois, bbox_targets, bbox_inside_weights = _sample_rois(
101 | all_rois, gt_boxes, fg_rois_per_image,
102 | rois_per_image, self._num_classes,
103 | deterministic=self._determininistic_mode)
104 |
105 | if DEBUG:
106 | print ('num rois: {}'.format(rois_per_image))
107 | print ('num fg: {}'.format((labels > 0).sum()))
108 | print ('num bg: {}'.format((labels == 0).sum()))
109 | self._count += 1
110 | self._fg_num += (labels > 0).sum()
111 | self._bg_num += (labels == 0).sum()
112 | print ('num fg avg: {}'.format(self._fg_num / self._count))
113 | print ('num bg avg: {}'.format(self._bg_num / self._count))
114 | print ('ratio: {:.3f}'.format(float(self._fg_num) / float(self._bg_num)))
115 |
116 | # pad with zeros if too few rois were found
117 | num_found_rois = rois.shape[0]
118 | if num_found_rois < rois_per_image:
119 | rois_padded = np.zeros((rois_per_image, rois.shape[1]), dtype=np.float32)
120 | rois_padded[:num_found_rois, :] = rois
121 | rois = rois_padded
122 |
123 | labels_padded = np.zeros((rois_per_image), dtype=np.float32)
124 | labels_padded[:num_found_rois] = labels
125 | labels = labels_padded
126 |
127 | bbox_targets_padded = np.zeros((rois_per_image, bbox_targets.shape[1]), dtype=np.float32)
128 | bbox_targets_padded[:num_found_rois, :] = bbox_targets
129 | bbox_targets = bbox_targets_padded
130 |
131 | bbox_inside_weights_padded = np.zeros((rois_per_image, bbox_inside_weights.shape[1]), dtype=np.float32)
132 | bbox_inside_weights_padded[:num_found_rois, :] = bbox_inside_weights
133 | bbox_inside_weights = bbox_inside_weights_padded
134 |
135 | # for CNTK: get rid of batch ind zeros and add batch axis
136 | rois = rois[:,1:]
137 |
138 | # sampled rois
139 | rois.shape = (1,) + rois.shape
140 | outputs[self.outputs[0]] = np.ascontiguousarray(rois)
141 |
142 | # classification labels
143 | labels_as_int = [i.item() for i in labels.astype(int)]
144 | labels_dense = np.eye(self._num_classes, dtype=np.float32)[labels_as_int]
145 | labels_dense.shape = (1,) + labels_dense.shape # batch axis
146 | outputs[self.outputs[1]] = labels_dense
147 |
148 | # bbox_targets
149 | bbox_targets.shape = (1,) + bbox_targets.shape # batch axis
150 | outputs[self.outputs[2]] = np.ascontiguousarray(bbox_targets)
151 |
152 | # bbox_inside_weights
153 | bbox_inside_weights.shape = (1,) + bbox_inside_weights.shape # batch axis
154 | outputs[self.outputs[3]] = np.ascontiguousarray(bbox_inside_weights)
155 |
156 | def backward(self, state, root_gradients, variables):
157 | """This layer does not propagate gradients."""
158 | pass
159 |
160 | def clone(self, cloned_inputs):
161 | return ProposalTargetLayer(cloned_inputs[0], cloned_inputs[1], param_str=self.param_str_)
162 |
163 | def serialize(self):
164 | internal_state = {}
165 | internal_state['param_str'] = self.param_str_
166 | return internal_state
167 |
168 | @staticmethod
169 | def deserialize(inputs, name, state):
170 | param_str = state['param_str']
171 | return ProposalTargetLayer(inputs[0], inputs[1], name=name, param_str=param_str)
172 |
173 |
174 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
175 | """Bounding-box regression targets (bbox_target_data) are stored in a
176 | compact form N x (class, tx, ty, tw, th)
177 |
178 | This function expands those targets into the 4-of-4*K representation used
179 | by the network (i.e. only one class has non-zero targets).
180 |
181 | Returns:
182 | bbox_target (ndarray): N x 4K blob of regression targets
183 | bbox_inside_weights (ndarray): N x 4K blob of loss weights
184 | """
185 |
186 | clss = bbox_target_data[:, 0].astype(int)
187 | bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
188 | bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
189 | inds = np.where(clss > 0)[0]
190 | for ind in inds:
191 | cls = clss[ind]
192 | start = 4 * cls
193 | end = start + 4
194 | bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
195 | bbox_inside_weights[ind, start:end] = [1.0, 1.0, 1.0, 1.0]
196 | return bbox_targets, bbox_inside_weights
197 |
198 |
199 | def _compute_targets(ex_rois, gt_rois, labels):
200 | """Compute bounding-box regression targets for an image."""
201 |
202 | assert ex_rois.shape[0] == gt_rois.shape[0]
203 | assert ex_rois.shape[1] == 4
204 | assert gt_rois.shape[1] == 4
205 |
206 | targets = bbox_transform(ex_rois, gt_rois)
207 | if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
208 | # Optionally normalize targets by a precomputed mean and stdev
209 | targets = ((targets - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS))
210 | / np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS))
211 |
212 | return np.hstack((labels[:, np.newaxis], targets)).astype(np.float32, copy=False)
213 |
214 | def _sample_rois(all_rois, gt_boxes, fg_rois_per_image, rois_per_image, num_classes, deterministic=False):
215 | """Generate a random sample of RoIs comprising foreground and background
216 | examples.
217 | """
218 | # overlaps: (rois x gt_boxes)
219 | overlaps = bbox_overlaps(
220 | np.ascontiguousarray(all_rois[:, 1:5], dtype=np.float),
221 | np.ascontiguousarray(gt_boxes[:, :4], dtype=np.float))
222 | gt_assignment = overlaps.argmax(axis=1)
223 | max_overlaps = overlaps.max(axis=1)
224 | labels = gt_boxes[gt_assignment, 4]
225 |
226 | # Select foreground RoIs as those with >= FG_THRESH overlap
227 | fg_inds = np.where(max_overlaps >= cfg["TRAIN"].FG_THRESH)[0]
228 | # Guard against the case when an image has fewer than fg_rois_per_image
229 | # foreground RoIs
230 | fg_rois_per_this_image = min(fg_rois_per_image, fg_inds.size)
231 |
232 | # Sample foreground regions without replacement
233 | if fg_inds.size > 0:
234 | if deterministic:
235 | fg_inds = fg_inds[:fg_rois_per_this_image]
236 | else:
237 | fg_inds = npr.choice(fg_inds, size=fg_rois_per_this_image, replace=False)
238 |
239 | # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
240 | bg_inds = np.where((max_overlaps < cfg["TRAIN"].BG_THRESH_HI) &
241 | (max_overlaps >= cfg["TRAIN"].BG_THRESH_LO))[0]
242 | # Compute number of background RoIs to take from this image (guarding
243 | # against there being fewer than desired)
244 | bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
245 | bg_rois_per_this_image = min(bg_rois_per_this_image, bg_inds.size)
246 | # Sample background regions without replacement
247 | if bg_inds.size > 0:
248 | if deterministic:
249 | bg_inds = bg_inds[:bg_rois_per_this_image]
250 | else:
251 | bg_inds = npr.choice(bg_inds, size=bg_rois_per_this_image, replace=False)
252 |
253 | # The indices that we're selecting (both fg and bg)
254 | keep_inds = np.append(fg_inds, bg_inds)
255 | # Select sampled values from various arrays:
256 | labels = labels[keep_inds]
257 | # Clamp labels for the background RoIs to 0
258 | labels[fg_rois_per_this_image:] = 0
259 | rois = all_rois[keep_inds]
260 |
261 | bbox_target_data = _compute_targets(
262 | rois[:, 1:5], gt_boxes[gt_assignment[keep_inds], :4], labels)
263 |
264 | bbox_targets, bbox_inside_weights = \
265 | _get_bbox_regression_labels(bbox_target_data, num_classes)
266 |
267 | return labels, rois, bbox_targets, bbox_inside_weights
268 |
--------------------------------------------------------------------------------
/utils/rpn/rpn_helpers.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import numpy as np
8 | import cntk
9 | from cntk import reduce_sum
10 | from cntk import user_function, relu, softmax, slice, splice, reshape, element_times, plus, minus, alias, classification_error
11 | from cntk.initializer import glorot_uniform, normal
12 | from cntk.layers import Convolution
13 | from cntk.losses import cross_entropy_with_softmax
14 | from utils.rpn.anchor_target_layer import AnchorTargetLayer
15 | from utils.rpn.proposal_layer import ProposalLayer
16 | from utils.rpn.proposal_target_layer import ProposalTargetLayer
17 | from utils.rpn.cntk_smoothL1_loss import SmoothL1Loss
18 | try:
19 | from config import cfg
20 | except ImportError:
21 | from utils.default_config import cfg
22 |
23 | # Please keep in sync with Readme.md
24 | def create_rpn(conv_out, scaled_gt_boxes, im_info, add_loss_functions=True,
25 | proposal_layer_param_string=None, conv_bias_init=0.0):
26 | '''
27 | Creates a region proposal network for object detection as proposed in the "Faster R-CNN" paper:
28 | Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
29 | "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
30 |
31 | Outputs object detection proposals by applying estimated bounding-box
32 | transformations to a set of regular boxes (called "anchors").
33 |
34 | Args:
35 | conv_out: The convolutional feature map, i.e. the output of the conv layers from the pretrained classification network
36 | scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
37 | im_info: A CNTK variable or constant containing
38 | (pad_width, pad_height, scaled_image_width, scaled_image_height, orig_img_width, orig_img_height)
39 | e.g. (1000, 1000, 1000, 600, 500, 300) for an original image of 600x300 that is scaled and padded to 1000x1000
40 | add_loss_functions: If set to True rpn_losses will be returned, otherwise None is returned for the losses
41 | proposal_layer_param_string: A yaml parameter string that is passed to the proposal layer.
42 |
43 | Returns:
44 | rpn_rois - the proposed ROIs
45 | rpn_losses - the losses (SmoothL1 loss for bbox regression plus cross entropy for objectness)
46 | '''
47 |
48 | # RPN network
49 | # init = 'normal', initValueScale = 0.01, initBias = 0.1
50 | num_channels = cfg["CNTK"].RPN_NUM_CHANNELS
51 | rpn_conv_3x3 = Convolution((3, 3), num_channels, activation=relu, pad=True, strides=1,
52 | init = normal(scale=0.01), init_bias=conv_bias_init)(conv_out)
53 | rpn_cls_score = Convolution((1, 1), 18, activation=None, name="rpn_cls_score",
54 | init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 2(bg/fg) * 9(anchors)
55 | rpn_bbox_pred = Convolution((1, 1), 36, activation=None, name="rpn_bbox_pred",
56 | init = normal(scale=0.01), init_bias=conv_bias_init)(rpn_conv_3x3) # 4(coords) * 9(anchors)
57 |
58 | # apply softmax to get (bg, fg) probabilities and reshape predictions back to grid of (18, H, W)
59 | num_predictions = int(rpn_cls_score.shape[0] / 2)
60 | rpn_cls_score_rshp = reshape(rpn_cls_score, (2, num_predictions, rpn_cls_score.shape[1], rpn_cls_score.shape[2]), name="rpn_cls_score_rshp")
61 | p_rpn_cls_score_rshp = cntk.placeholder()
62 | rpn_cls_sm = softmax(p_rpn_cls_score_rshp, axis=0)
63 | rpn_cls_prob = cntk.as_block(rpn_cls_sm, [(p_rpn_cls_score_rshp, rpn_cls_score_rshp)], 'Softmax', 'rpn_cls_prob')
64 | rpn_cls_prob_reshape = reshape(rpn_cls_prob, rpn_cls_score.shape, name="rpn_cls_prob_reshape")
65 |
66 | # proposal layer
67 | rpn_rois_raw = user_function(ProposalLayer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, param_str=proposal_layer_param_string))
68 | rpn_rois = alias(rpn_rois_raw, name='rpn_rois')
69 |
70 | rpn_losses = None
71 | if(add_loss_functions):
72 | # RPN targets
73 | # Comment: rpn_cls_score is only passed vvv to get width and height of the conv feature map ...
74 | atl = user_function(AnchorTargetLayer(rpn_cls_score, scaled_gt_boxes, im_info, param_str=proposal_layer_param_string))
75 | rpn_labels = atl.outputs[0]
76 | rpn_bbox_targets = atl.outputs[1]
77 | rpn_bbox_inside_weights = atl.outputs[2]
78 |
79 | # classification loss
80 | p_rpn_labels = cntk.placeholder()
81 | p_rpn_cls_score_rshp = cntk.placeholder()
82 |
83 | keeps = cntk.greater_equal(p_rpn_labels, 0.0)
84 | fg_labels = element_times(p_rpn_labels, keeps, name="fg_targets")
85 | bg_labels = minus(1, fg_labels, name="bg_targets")
86 | rpn_labels_ignore = splice(bg_labels, fg_labels, axis=0)
87 | rpn_ce = cross_entropy_with_softmax(p_rpn_cls_score_rshp, rpn_labels_ignore, axis=0)
88 | rpn_loss_cls = element_times(rpn_ce, keeps)
89 |
90 | # The terms that are accounted for in the cls loss are those that have a label >= 0
91 | cls_num_terms = reduce_sum(keeps)
92 | cls_normalization_factor = 1.0 / cls_num_terms
93 | normalized_rpn_cls_loss = reduce_sum(rpn_loss_cls) * cls_normalization_factor
94 |
95 | reduced_rpn_loss_cls = cntk.as_block(normalized_rpn_cls_loss,
96 | [(p_rpn_labels, rpn_labels), (p_rpn_cls_score_rshp, rpn_cls_score_rshp)],
97 | 'CE_with_ignore', 'norm_rpn_cls_loss')
98 |
99 | # regression loss
100 | p_rpn_bbox_pred = cntk.placeholder()
101 | p_rpn_bbox_targets = cntk.placeholder()
102 | p_rpn_bbox_inside_weights = cntk.placeholder()
103 | rpn_loss_bbox = SmoothL1Loss(cfg["CNTK"].SIGMA_RPN_L1, p_rpn_bbox_pred, p_rpn_bbox_targets, p_rpn_bbox_inside_weights, 1.0)
104 | # The bbox loss is normalized by the rpn batch size
105 | bbox_normalization_factor = 1.0 / cfg["TRAIN"].RPN_BATCHSIZE
106 | normalized_rpn_bbox_loss = reduce_sum(rpn_loss_bbox) * bbox_normalization_factor
107 |
108 | reduced_rpn_loss_bbox = cntk.as_block(normalized_rpn_bbox_loss,
109 | [(p_rpn_bbox_pred, rpn_bbox_pred), (p_rpn_bbox_targets, rpn_bbox_targets),
110 | (p_rpn_bbox_inside_weights, rpn_bbox_inside_weights)],
111 | 'SmoothL1Loss', 'norm_rpn_bbox_loss')
112 |
113 | rpn_losses = plus(reduced_rpn_loss_cls, reduced_rpn_loss_bbox, name="rpn_losses")
114 |
115 | return rpn_rois, rpn_losses
116 |
117 | def create_proposal_target_layer(rpn_rois, scaled_gt_boxes, num_classes):
118 | '''
119 | Creates a proposal target layer that is used for training an object detection network as proposed in the "Faster R-CNN" paper:
120 | Shaoqing Ren and Kaiming He and Ross Girshick and Jian Sun:
121 | "Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks"
122 |
123 | Assigns object detection proposals to ground-truth targets.
124 | Produces proposal classification labels and bounding-box regression targets.
125 | It also adds gt_boxes to candidates and samples fg and bg rois for training.
126 |
127 | Args:
128 | rpn_rois: The proposed ROIs, e.g. from a region proposal network
129 | scaled_gt_boxes: The ground truth boxes as (x1, y1, x2, y2, label). Coordinates are absolute pixels wrt. the input image.
130 | num_classes: The number of classes in the data set
131 |
132 | Returns:
133 | rpn_target_rois - a set of rois containing the ground truth and a number of sampled fg and bg ROIs
134 | label_targets - the target labels for the rois
135 | bbox_targets - the regression coefficient targets for the rois
136 | bbox_inside_weights - the weights for the regression loss
137 | '''
138 |
139 | ptl_param_string = "'num_classes': {}".format(num_classes)
140 | ptl = user_function(ProposalTargetLayer(rpn_rois, scaled_gt_boxes, param_str=ptl_param_string))
141 |
142 | # use an alias if you need to access the outputs, e.g., when cloning a trained network
143 | rois = alias(ptl.outputs[0], name='rpn_target_rois')
144 | label_targets = ptl.outputs[1]
145 | bbox_targets = ptl.outputs[2]
146 | bbox_inside_weights = ptl.outputs[3]
147 |
148 | return rois, label_targets, bbox_targets, bbox_inside_weights
149 |
150 |
151 |
--------------------------------------------------------------------------------
/utils/unit_tests.py:
--------------------------------------------------------------------------------
1 | # Copyright (c) Microsoft. All rights reserved.
2 |
3 | # Licensed under the MIT license. See LICENSE.md file in the project root
4 | # for full license information.
5 | # ==============================================================================
6 |
7 | import os, sys
8 | abs_path = os.path.dirname(os.path.abspath(__file__))
9 | sys.path.append(os.path.join(abs_path))
10 | sys.path.append(os.path.join(abs_path, ".."))
11 |
12 | import pytest
13 | import numpy as np
14 | import cntk
15 | from cntk import user_function
16 | from cntk.ops import input_variable
17 | from rpn.proposal_layer import ProposalLayer as CntkProposalLayer
18 | from rpn.proposal_target_layer import ProposalTargetLayer as CntkProposalTargetLayer
19 | from rpn.anchor_target_layer import AnchorTargetLayer as CntkAnchorTargetLayer
20 | from caffe_layers.proposal_layer import ProposalLayer as CaffeProposalLayer
21 | from caffe_layers.proposal_target_layer import ProposalTargetLayer as CaffeProposalTargetLayer
22 | from caffe_layers.anchor_target_layer import AnchorTargetLayer as CaffeAnchorTargetLayer
23 |
24 | def test_proposal_layer():
25 | cls_prob_shape_cntk = (18,61,61)
26 | cls_prob_shape_caffe = (18,61,61)
27 | rpn_bbox_shape = (36, 61, 61)
28 | dims_info_shape = (6,)
29 | im_info = [1000, 1000, 1]
30 |
31 | # Create input tensors with values
32 | cls_prob = np.random.random_sample(cls_prob_shape_cntk).astype(np.float32)
33 | rpn_bbox_pred = np.random.random_sample(rpn_bbox_shape).astype(np.float32)
34 | dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32)
35 |
36 | # Create CNTK layer and call forward
37 | cls_prob_var = input_variable(cls_prob_shape_cntk)
38 | rpn_bbox_var = input_variable(rpn_bbox_shape)
39 | dims_info_var = input_variable(dims_info_shape)
40 |
41 | cntk_layer = user_function(CntkProposalLayer(cls_prob_var, rpn_bbox_var, dims_info_var))
42 | state, cntk_output = cntk_layer.forward({cls_prob_var: [cls_prob], rpn_bbox_var: [rpn_bbox_pred], dims_info_var: dims_input})
43 | cntk_proposals = cntk_output[next(iter(cntk_output))][0]
44 |
45 | # Create Caffe layer and call forward
46 | cls_prob_caffe = cls_prob.reshape(cls_prob_shape_caffe)
47 | bottom = [np.array([cls_prob_caffe]),np.array([rpn_bbox_pred]),np.array([im_info])]
48 | top = None # handled through return statement in caffe layer for unit testing
49 |
50 | param_str = "'feat_stride': 16"
51 | caffe_layer = CaffeProposalLayer()
52 | caffe_layer.set_param_str(param_str)
53 | caffe_layer.setup(bottom, top)
54 | caffe_output = caffe_layer.forward(bottom, top)
55 | caffe_proposals = caffe_output[:,1:]
56 |
57 | # assert that results are exactly the same
58 | assert cntk_proposals.shape == caffe_proposals.shape
59 | assert np.allclose(cntk_proposals, caffe_proposals, rtol=0.0, atol=0.0)
60 | print("Verified ProposalLayer")
61 |
62 | def test_proposal_target_layer():
63 | num_rois = 400
64 | all_rois_shape_cntk = (num_rois,4)
65 | num_gt_boxes = 50
66 | gt_boxes_shape_cntk = (num_gt_boxes,5)
67 |
68 | # Create input tensors with values
69 | x1y1 = np.random.random_sample((num_rois, 2)) * 500
70 | wh = np.random.random_sample((num_rois, 2)) * 400
71 | x2y2 = x1y1 + wh + 50
72 | all_rois = np.hstack((x1y1, x2y2)).astype(np.float32)
73 |
74 | x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500
75 | wh = np.random.random_sample((num_gt_boxes, 2)) * 400
76 | x2y2 = x1y1 + wh + 50
77 | label = np.random.random_sample((num_gt_boxes, 1))
78 | label = (label * 17.0)
79 | gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32)
80 |
81 | # Create CNTK layer and call forward
82 | all_rois_var = input_variable(all_rois_shape_cntk)
83 | gt_boxes_var = input_variable(gt_boxes_shape_cntk)
84 |
85 | cntk_layer = user_function(CntkProposalTargetLayer(all_rois_var, gt_boxes_var, param_str="'num_classes': 17", deterministic=True))
86 | state, cntk_output = cntk_layer.forward({all_rois_var: [all_rois], gt_boxes_var: [gt_boxes]})
87 |
88 | roi_key = [k for k in cntk_output if 'rpn_target_rois_raw' in str(k)][0]
89 | labels_key = [k for k in cntk_output if 'label_targets_raw' in str(k)][0]
90 | bbox_key = [k for k in cntk_output if 'bbox_targets_raw' in str(k)][0]
91 | bbox_w_key = [k for k in cntk_output if 'bbox_inside_w_raw' in str(k)][0]
92 |
93 | cntk_rois = cntk_output[roi_key][0]
94 | cntk_labels_one_hot = cntk_output[labels_key][0]
95 | cntk_bbox_targets = cntk_output[bbox_key][0]
96 | cntk_bbox_inside_weights = cntk_output[bbox_w_key][0]
97 |
98 | cntk_labels = np.argmax(cntk_labels_one_hot, axis=1)
99 |
100 | # Create Caffe layer and call forward
101 | zeros = np.zeros((all_rois.shape[0], 1), dtype=gt_boxes.dtype)
102 | all_rois_caffe = np.hstack((zeros, all_rois))
103 |
104 | bottom = [np.array(all_rois_caffe),np.array(gt_boxes)]
105 | top = None # handled through return statement in caffe layer for unit testing
106 |
107 | param_str = "'num_classes': 17"
108 | caffe_layer = CaffeProposalTargetLayer()
109 | caffe_layer.set_param_str(param_str)
110 | caffe_layer.setup(bottom, top)
111 | caffe_layer.set_deterministic_mode()
112 |
113 | caffe_rois, caffe_labels, caffe_bbox_targets, caffe_bbox_inside_weights = caffe_layer.forward(bottom, top)
114 | caffe_rois = caffe_rois[:,1:]
115 |
116 | num_caffe_rois = caffe_rois.shape[0]
117 | cntk_rois = cntk_rois[:num_caffe_rois,:]
118 | cntk_labels = cntk_labels[:num_caffe_rois]
119 | cntk_bbox_targets = cntk_bbox_targets[:num_caffe_rois,:]
120 | cntk_bbox_inside_weights = cntk_bbox_inside_weights[:num_caffe_rois,:]
121 |
122 | # assert that results are exactly the same
123 | assert cntk_rois.shape == caffe_rois.shape
124 | assert cntk_labels.shape == caffe_labels.shape
125 | assert cntk_bbox_targets.shape == caffe_bbox_targets.shape
126 | assert cntk_bbox_inside_weights.shape == caffe_bbox_inside_weights.shape
127 |
128 | caffe_labels = [int(x) for x in caffe_labels]
129 |
130 | assert np.allclose(cntk_rois, caffe_rois, rtol=0.0, atol=0.0)
131 | assert np.allclose(cntk_labels, caffe_labels, rtol=0.0, atol=0.0)
132 | assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0)
133 | assert np.allclose(cntk_bbox_inside_weights, caffe_bbox_inside_weights, rtol=0.0, atol=0.0)
134 | print("Verified ProposalTargetLayer")
135 |
136 | def test_anchor_target_layer():
137 | rpn_cls_score_shape_cntk = (1, 18, 61, 61)
138 | num_gt_boxes = 50
139 | gt_boxes_shape_cntk = (num_gt_boxes,5)
140 | dims_info_shape = (6,)
141 | im_info = [1000, 1000, 1]
142 |
143 | # Create input tensors with values
144 | rpn_cls_score_dummy = np.random.random_sample(rpn_cls_score_shape_cntk).astype(np.float32)
145 | dims_input = np.array([1000, 1000, 1000, 1000, 1000, 1000]).astype(np.float32)
146 |
147 | x1y1 = np.random.random_sample((num_gt_boxes, 2)) * 500
148 | wh = np.random.random_sample((num_gt_boxes, 2)) * 400
149 | x2y2 = x1y1 + wh + 50
150 | label = np.random.random_sample((num_gt_boxes, 1))
151 | label = (label * 17.0)
152 | gt_boxes = np.hstack((x1y1, x2y2, label)).astype(np.float32)
153 |
154 | # Create CNTK layer and call forward
155 | rpn_cls_score_var = input_variable(rpn_cls_score_shape_cntk)
156 | gt_boxes_var = input_variable(gt_boxes_shape_cntk)
157 | dims_info_var = input_variable(dims_info_shape)
158 |
159 | cntk_layer = user_function(CntkAnchorTargetLayer(rpn_cls_score_var, gt_boxes_var, dims_info_var, deterministic=True))
160 | state, cntk_output = cntk_layer.forward({rpn_cls_score_var: [rpn_cls_score_dummy], gt_boxes_var: [gt_boxes], dims_info_var: dims_input})
161 |
162 | obj_key = [k for k in cntk_output if 'objectness_target' in str(k)][0]
163 | bbt_key = [k for k in cntk_output if 'rpn_bbox_target' in str(k)][0]
164 | bbw_key = [k for k in cntk_output if 'rpn_bbox_inside_w' in str(k)][0]
165 |
166 | cntk_objectness_target = cntk_output[obj_key][0]
167 | cntk_bbox_targets = cntk_output[bbt_key][0]
168 | cntk_bbox_inside_w = cntk_output[bbw_key][0]
169 |
170 | # Create Caffe layer and call forward
171 | bottom = [np.array(rpn_cls_score_dummy),np.array(gt_boxes), np.array(im_info)]
172 | top = None # handled through return statement in caffe layer for unit testing
173 |
174 | param_str = "'feat_stride': 16"
175 | caffe_layer = CaffeAnchorTargetLayer()
176 | caffe_layer.set_param_str(param_str)
177 | caffe_layer.setup(bottom, top)
178 | caffe_layer.set_deterministic_mode()
179 |
180 | caffe_objectness_target, caffe_bbox_targets, caffe_bbox_inside_w = caffe_layer.forward(bottom, top)
181 |
182 | # assert that results are exactly the same
183 | assert cntk_objectness_target.shape == caffe_objectness_target.shape
184 | assert cntk_bbox_targets.shape == caffe_bbox_targets.shape
185 | assert cntk_bbox_inside_w.shape == caffe_bbox_inside_w.shape
186 |
187 | assert np.allclose(cntk_objectness_target, caffe_objectness_target, rtol=0.0, atol=0.0)
188 | assert np.allclose(cntk_bbox_targets, caffe_bbox_targets, rtol=0.0, atol=0.0)
189 | assert np.allclose(cntk_bbox_inside_w, caffe_bbox_inside_w, rtol=0.0, atol=0.0)
190 | print("Verified AnchorTargetLayer")
191 |
192 | if __name__ == '__main__':
193 | test_proposal_layer()
194 | test_proposal_target_layer()
195 | test_anchor_target_layer()
196 |
--------------------------------------------------------------------------------
/web.config:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
13 |
14 |
15 |
16 |
17 |
18 |
--------------------------------------------------------------------------------