├── .gitignore
├── IP
    ├── Bank Cheque
    │   ├── 1.jpg
    │   ├── 2.jpg
    │   ├── 3.jpg
    │   ├── bank_details_extraction.ipynb
    │   ├── bit.png
    │   ├── cheque_details_extraction.py
    │   ├── diff.png
    │   ├── intermediat_gradx.png
    │   ├── output.png
    │   ├── ref.png
    │   ├── template_acc.jpg
    │   └── test.jpg
    ├── Contour Detection
    │   ├── ContourImageDetectedForCursive.png
    │   ├── Inkedlicense_con_LI.jpg
    │   ├── contour_detect.ipynb
    │   ├── croppedCursive.png
    │   ├── cursive.jpg
    │   ├── cursive_processing.ipynb
    │   ├── license.jpg
    │   ├── license2.jpg
    │   ├── license_con.jpg
    │   ├── license_econ.jpg
    │   ├── output_2ndapproach.jpg
    │   ├── pan_card_contours.jpg
    │   ├── pan_card_dcontours.jpg
    │   └── pancard.jpg
    └── Face Detection
    │   ├── face_detection.py
    │   └── haarcascade_frontalface_default.xml
├── LICENSE
├── Preprocessing.ipynb
├── README.md
├── api
    ├── .vscode
    │   └── settings.json
    ├── UPLOAD_FOLDER
    │   └── .gitkeep
    ├── cheque_details_extraction.py
    ├── ctpn
    │   ├── __init__.py
    │   ├── demo.py
    │   ├── demo_pb.py
    │   ├── generate_pb.py
    │   ├── text.yml
    │   └── train_net.py
    ├── data
    │   ├── VOCdevkit2007
    │   └── ctpn.pb
    ├── db.py
    ├── face_matching.py
    ├── haarcascade_frontalface_default.xml
    ├── lib
    │   ├── __init__.py
    │   ├── datasets
    │   │   ├── __init__.py
    │   │   ├── factory.py
    │   │   ├── imdb.py
    │   │   └── pascal_voc.py
    │   ├── fast_rcnn
    │   │   ├── __init__.py
    │   │   ├── bbox_transform.py
    │   │   ├── config.py
    │   │   ├── nms_wrapper.py
    │   │   ├── test.py
    │   │   └── train.py
    │   ├── networks
    │   │   ├── VGGnet_test.py
    │   │   ├── VGGnet_train.py
    │   │   ├── __init__.py
    │   │   ├── factory.py
    │   │   └── network.py
    │   ├── prepare_training_data
    │   │   ├── ToVoc.py
    │   │   └── split_label.py
    │   ├── roi_data_layer
    │   │   ├── __init__.py
    │   │   ├── layer.py
    │   │   ├── minibatch.py
    │   │   └── roidb.py
    │   ├── rpn_msr
    │   │   ├── __init__.py
    │   │   ├── anchor_target_layer_tf.py
    │   │   ├── generate_anchors.py
    │   │   └── proposal_layer_tf.py
    │   ├── text_connector
    │   │   ├── __init__.py
    │   │   ├── detectors.py
    │   │   ├── other.py
    │   │   ├── text_connect_cfg.py
    │   │   ├── text_proposal_connector.py
    │   │   ├── text_proposal_connector_oriented.py
    │   │   └── text_proposal_graph_builder.py
    │   └── utils
    │   │   ├── __init__.py
    │   │   ├── bbox.c
    │   │   ├── bbox.pyx
    │   │   ├── blob.py
    │   │   ├── boxes_grid.py
    │   │   ├── cython_nms.c
    │   │   ├── cython_nms.pyx
    │   │   ├── gpu_nms.c
    │   │   ├── gpu_nms.cpp
    │   │   ├── gpu_nms.hpp
    │   │   ├── gpu_nms.pyx
    │   │   ├── make.sh
    │   │   ├── nms_kernel.cu
    │   │   ├── setup.py
    │   │   └── timer.py
    ├── model
    │   ├── model.h5
    │   └── model.json
    ├── outputs.txt
    ├── processing.py
    ├── ref.png
    ├── server.py
    └── templates
    │   ├── aadhar_template.png
    │   ├── index.html
    │   ├── license_template.jpg
    │   ├── pancard_template.jpg
    │   ├── template_acc.jpg
    │   └── template_ifsc.png
├── image_utils.py
├── inference.py
├── tile.jpg
├── train.py
└── utils
    ├── dataset.py
    ├── image_aug.py
    ├── nn_block.py
    └── unet.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python,android,reactnative,androidstudio,jupyternotebook,visualstudiocode,jupyternotebooks
  3 | # Edit at https://www.gitignore.io/?templates=python,android,reactnative,androidstudio,jupyternotebook,visualstudiocode,jupyternotebooks
  4 | **/UPLOAD_FOLDER/*/
  5 | **/ignore/
  6 | 
  7 | ### Android ###
  8 | # Built application files
  9 | *.apk
 10 | *.ap_
 11 | *.aab
 12 | 
 13 | # Files for the ART/Dalvik VM
 14 | *.dex
 15 | 
 16 | # Java class files
 17 | *.class
 18 | 
 19 | # Generated files
 20 | bin/
 21 | gen/
 22 | out/
 23 | 
 24 | # Gradle files
 25 | .gradle/
 26 | build/
 27 | 
 28 | # Local configuration file (sdk path, etc)
 29 | local.properties
 30 | 
 31 | # Proguard folder generated by Eclipse
 32 | proguard/
 33 | 
 34 | # Log Files
 35 | *.log
 36 | 
 37 | # Android Studio Navigation editor temp files
 38 | .navigation/
 39 | 
 40 | # Android Studio captures folder
 41 | captures/
 42 | 
 43 | # IntelliJ
 44 | *.iml
 45 | .idea/workspace.xml
 46 | .idea/tasks.xml
 47 | .idea/gradle.xml
 48 | .idea/assetWizardSettings.xml
 49 | .idea/dictionaries
 50 | .idea/libraries
 51 | .idea/caches
 52 | 
 53 | # Keystore files
 54 | # Uncomment the following lines if you do not want to check your keystore files in.
 55 | #*.jks
 56 | #*.keystore
 57 | 
 58 | # External native build folder generated in Android Studio 2.2 and later
 59 | .externalNativeBuild
 60 | 
 61 | # Google Services (e.g. APIs or Firebase)
 62 | google-services.json
 63 | 
 64 | # Freeline
 65 | freeline.py
 66 | freeline/
 67 | freeline_project_description.json
 68 | 
 69 | # fastlane
 70 | fastlane/report.xml
 71 | fastlane/Preview.html
 72 | fastlane/screenshots
 73 | fastlane/test_output
 74 | fastlane/readme.md
 75 | 
 76 | ### Android Patch ###
 77 | gen-external-apklibs
 78 | 
 79 | ### AndroidStudio ###
 80 | # Covers files to be ignored for android development using Android Studio.
 81 | 
 82 | # Built application files
 83 | 
 84 | # Files for the ART/Dalvik VM
 85 | 
 86 | # Java class files
 87 | 
 88 | # Generated files
 89 | 
 90 | # Gradle files
 91 | .gradle
 92 | 
 93 | # Signing files
 94 | .signing/
 95 | 
 96 | # Local configuration file (sdk path, etc)
 97 | 
 98 | # Proguard folder generated by Eclipse
 99 | 
100 | # Log Files
101 | 
102 | # Android Studio
103 | /*/build/
104 | /*/local.properties
105 | /*/out
106 | /*/*/build
107 | /*/*/production
108 | *.ipr
109 | *~
110 | *.swp
111 | 
112 | # Android Patch
113 | 
114 | # External native build folder generated in Android Studio 2.2 and later
115 | 
116 | # NDK
117 | obj/
118 | 
119 | # IntelliJ IDEA
120 | *.iws
121 | /out/
122 | 
123 | # User-specific configurations
124 | .idea/caches/
125 | .idea/libraries/
126 | .idea/shelf/
127 | .idea/.name
128 | .idea/compiler.xml
129 | .idea/copyright/profiles_settings.xml
130 | .idea/encodings.xml
131 | .idea/misc.xml
132 | .idea/modules.xml
133 | .idea/scopes/scope_settings.xml
134 | .idea/vcs.xml
135 | .idea/jsLibraryMappings.xml
136 | .idea/datasources.xml
137 | .idea/dataSources.ids
138 | .idea/sqlDataSources.xml
139 | .idea/dynamic.xml
140 | .idea/uiDesigner.xml
141 | 
142 | # OS-specific files
143 | .DS_Store
144 | .DS_Store?
145 | ._*
146 | .Spotlight-V100
147 | .Trashes
148 | ehthumbs.db
149 | Thumbs.db
150 | 
151 | # Legacy Eclipse project files
152 | .classpath
153 | .project
154 | .cproject
155 | .settings/
156 | 
157 | # Mobile Tools for Java (J2ME)
158 | .mtj.tmp/
159 | 
160 | # Package Files #
161 | *.war
162 | *.ear
163 | 
164 | # virtual machine crash logs (Reference: http://www.java.com/en/download/help/error_hotspot.xml)
165 | hs_err_pid*
166 | 
167 | ## Plugin-specific files:
168 | 
169 | # mpeltonen/sbt-idea plugin
170 | .idea_modules/
171 | 
172 | # JIRA plugin
173 | atlassian-ide-plugin.xml
174 | 
175 | # Mongo Explorer plugin
176 | .idea/mongoSettings.xml
177 | 
178 | # Crashlytics plugin (for Android Studio and IntelliJ)
179 | com_crashlytics_export_strings.xml
180 | crashlytics.properties
181 | crashlytics-build.properties
182 | fabric.properties
183 | 
184 | ### AndroidStudio Patch ###
185 | 
186 | !/gradle/wrapper/gradle-wrapper.jar
187 | 
188 | ### JupyterNotebook ###
189 | .ipynb_checkpoints
190 | */.ipynb_checkpoints/*
191 | 
192 | # Remove previous ipynb_checkpoints
193 | #   git rm -r .ipynb_checkpoints/
194 | #
195 | 
196 | ### JupyterNotebooks ###
197 | # gitignore template for Jupyter Notebooks
198 | # website: http://jupyter.org/
199 | 
200 | 
201 | # Remove previous ipynb_checkpoints
202 | #   git rm -r .ipynb_checkpoints/
203 | #
204 | 
205 | ### Python ###
206 | # Byte-compiled / optimized / DLL files
207 | __pycache__/
208 | *.py[cod]
209 | *$py.class
210 | 
211 | # C extensions
212 | *.so
213 | 
214 | # Distribution / packaging
215 | .Python
216 | develop-eggs/
217 | dist/
218 | downloads/
219 | eggs/
220 | .eggs/
221 | lib64/
222 | parts/
223 | sdist/
224 | var/
225 | wheels/
226 | share/python-wheels/
227 | *.egg-info/
228 | .installed.cfg
229 | *.egg
230 | MANIFEST
231 | 
232 | # PyInstaller
233 | #  Usually these files are written by a python script from a template
234 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
235 | *.manifest
236 | *.spec
237 | 
238 | # Installer logs
239 | pip-log.txt
240 | pip-delete-this-directory.txt
241 | 
242 | # Unit test / coverage reports
243 | htmlcov/
244 | .tox/
245 | .nox/
246 | .coverage
247 | .coverage.*
248 | .cache
249 | nosetests.xml
250 | coverage.xml
251 | *.cover
252 | .hypothesis/
253 | .pytest_cache/
254 | 
255 | # Translations
256 | *.mo
257 | *.pot
258 | 
259 | # Django stuff:
260 | local_settings.py
261 | db.sqlite3
262 | 
263 | # Flask stuff:
264 | instance/
265 | .webassets-cache
266 | 
267 | # Scrapy stuff:
268 | .scrapy
269 | 
270 | # Sphinx documentation
271 | docs/_build/
272 | 
273 | # PyBuilder
274 | target/
275 | 
276 | # Jupyter Notebook
277 | 
278 | # IPython
279 | profile_default/
280 | ipython_config.py
281 | 
282 | # pyenv
283 | .python-version
284 | 
285 | # celery beat schedule file
286 | celerybeat-schedule
287 | 
288 | # SageMath parsed files
289 | *.sage.py
290 | 
291 | # Environments
292 | .env
293 | .venv
294 | env/
295 | venv/
296 | ENV/
297 | env.bak/
298 | venv.bak/
299 | 
300 | # Spyder project settings
301 | .spyderproject
302 | .spyproject
303 | 
304 | # Rope project settings
305 | .ropeproject
306 | 
307 | # mkdocs documentation
308 | /site
309 | 
310 | # mypy
311 | .mypy_cache/
312 | .dmypy.json
313 | dmypy.json
314 | 
315 | # Pyre type checker
316 | .pyre/
317 | 
318 | ### Python Patch ###
319 | .venv/
320 | 
321 | ### ReactNative ###
322 | # React Native Stack Base
323 | 
324 | .expo
325 | __generated__
326 | 
327 | ### ReactNative.Xcode Stack ###
328 | # Xcode
329 | #
330 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
331 | 
332 | ## User settings
333 | xcuserdata/
334 | 
335 | ## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9)
336 | *.xcscmblueprint
337 | *.xccheckout
338 | 
339 | ## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4)
340 | DerivedData/
341 | *.moved-aside
342 | *.pbxuser
343 | !default.pbxuser
344 | *.mode1v3
345 | !default.mode1v3
346 | *.mode2v3
347 | !default.mode2v3
348 | *.perspectivev3
349 | !default.perspectivev3
350 | 
351 | ### ReactNative.Linux Stack ###
352 | 
353 | # temporary files which can be created if a process still has a handle open of a deleted file
354 | .fuse_hidden*
355 | 
356 | # KDE directory preferences
357 | .directory
358 | 
359 | # Linux trash folder which might appear on any partition or disk
360 | .Trash-*
361 | 
362 | # .nfs files are created when an open file is removed but is still being accessed
363 | .nfs*
364 | 
365 | ### ReactNative.Android Stack ###
366 | # Built application files
367 | 
368 | # Files for the ART/Dalvik VM
369 | 
370 | # Java class files
371 | 
372 | # Generated files
373 | 
374 | # Gradle files
375 | 
376 | # Local configuration file (sdk path, etc)
377 | 
378 | # Proguard folder generated by Eclipse
379 | 
380 | # Log Files
381 | 
382 | # Android Studio Navigation editor temp files
383 | 
384 | # Android Studio captures folder
385 | 
386 | # IntelliJ
387 | 
388 | # Keystore files
389 | # Uncomment the following lines if you do not want to check your keystore files in.
390 | #*.jks
391 | #*.keystore
392 | 
393 | # External native build folder generated in Android Studio 2.2 and later
394 | 
395 | # Google Services (e.g. APIs or Firebase)
396 | 
397 | # Freeline
398 | 
399 | # fastlane
400 | 
401 | ### ReactNative.macOS Stack ###
402 | # General
403 | .AppleDouble
404 | .LSOverride
405 | 
406 | # Icon must end with two \r
407 | Icon
408 | 
409 | 
410 | # Thumbnails
411 | 
412 | # Files that might appear in the root of a volume
413 | .DocumentRevisions-V100
414 | .fseventsd
415 | .TemporaryItems
416 | .VolumeIcon.icns
417 | .com.apple.timemachine.donotpresent
418 | 
419 | # Directories potentially created on remote AFP share
420 | .AppleDB
421 | .AppleDesktop
422 | Network Trash Folder
423 | Temporary Items
424 | .apdisk
425 | 
426 | ### ReactNative.Node Stack ###
427 | # Logs
428 | logs
429 | npm-debug.log*
430 | yarn-debug.log*
431 | yarn-error.log*
432 | 
433 | # Runtime data
434 | pids
435 | *.pid
436 | *.seed
437 | *.pid.lock
438 | 
439 | # Directory for instrumented libs generated by jscoverage/JSCover
440 | lib-cov
441 | 
442 | # Coverage directory used by tools like istanbul
443 | coverage
444 | 
445 | # nyc test coverage
446 | .nyc_output
447 | 
448 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
449 | .grunt
450 | 
451 | # Bower dependency directory (https://bower.io/)
452 | bower_components
453 | 
454 | # node-waf configuration
455 | .lock-wscript
456 | 
457 | # Compiled binary addons (https://nodejs.org/api/addons.html)
458 | build/Release
459 | 
460 | # Dependency directories
461 | node_modules/
462 | jspm_packages/
463 | 
464 | # TypeScript v1 declaration files
465 | typings/
466 | 
467 | # Optional npm cache directory
468 | .npm
469 | 
470 | # Optional eslint cache
471 | .eslintcache
472 | 
473 | # Optional REPL history
474 | .node_repl_history
475 | 
476 | # Output of 'npm pack'
477 | *.tgz
478 | 
479 | # Yarn Integrity file
480 | .yarn-integrity
481 | 
482 | # dotenv environment variables file
483 | .env.test
484 | 
485 | # parcel-bundler cache (https://parceljs.org/)
486 | 
487 | # next.js build output
488 | .next
489 | 
490 | # nuxt.js build output
491 | .nuxt
492 | 
493 | # vuepress build output
494 | .vuepress/dist
495 | 
496 | # Serverless directories
497 | .serverless/
498 | 
499 | # FuseBox cache
500 | .fusebox/
501 | 
502 | # DynamoDB Local files
503 | .dynamodb/
504 | 
505 | ### ReactNative.Buck Stack ###
506 | buck-out/
507 | .buckconfig.local
508 | .buckd/
509 | .buckversion
510 | .fakebuckversion
511 | 
512 | ### ReactNative.Gradle Stack ###
513 | /build/
514 | 
515 | # Ignore Gradle GUI config
516 | gradle-app.setting
517 | 
518 | # Avoid ignoring Gradle wrapper jar file (.jar files are usually ignored)
519 | !gradle-wrapper.jar
520 | 
521 | # Cache of project
522 | .gradletasknamecache
523 | 
524 | # # Work around https://youtrack.jetbrains.com/issue/IDEA-116898
525 | # gradle/wrapper/gradle-wrapper.properties
526 | 
527 | ### VisualStudioCode ###
528 | .vscode/*
529 | !.vscode/settings.json
530 | !.vscode/tasks.json
531 | !.vscode/launch.json
532 | !.vscode/extensions.json
533 | 
534 | ### VisualStudioCode Patch ###
535 | # Ignore all local history of files
536 | .history
537 | 
538 | # End of https://www.gitignore.io/api/python,android,reactnative,androidstudio,jupyternotebook,visualstudiocode,jupyternotebooks


--------------------------------------------------------------------------------
/IP/Bank Cheque/1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/1.jpg


--------------------------------------------------------------------------------
/IP/Bank Cheque/2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/2.jpg


--------------------------------------------------------------------------------
/IP/Bank Cheque/3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/3.jpg


--------------------------------------------------------------------------------
/IP/Bank Cheque/bit.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/bit.png


--------------------------------------------------------------------------------
/IP/Bank Cheque/diff.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/diff.png


--------------------------------------------------------------------------------
/IP/Bank Cheque/intermediat_gradx.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/intermediat_gradx.png


--------------------------------------------------------------------------------
/IP/Bank Cheque/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/output.png


--------------------------------------------------------------------------------
/IP/Bank Cheque/ref.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/ref.png


--------------------------------------------------------------------------------
/IP/Bank Cheque/template_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/template_acc.jpg


--------------------------------------------------------------------------------
/IP/Bank Cheque/test.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Bank Cheque/test.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/ContourImageDetectedForCursive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/ContourImageDetectedForCursive.png


--------------------------------------------------------------------------------
/IP/Contour Detection/Inkedlicense_con_LI.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/Inkedlicense_con_LI.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/contour_detect.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 8,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import cv2\n",
 10 |     "import numpy as np\n",
 11 |     "\n",
 12 |     "image = cv2.imread('cursive.jpg')\n",
 13 |     "blurred = cv2.pyrMeanShiftFiltering(image, 81, 101)\n",
 14 |     "gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)\n",
 15 |     "_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)\n",
 16 |     "\n",
 17 |     "_, contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)\n",
 18 |     "\n",
 19 |     "for i in range(len(contours)):\n",
 20 |     "    cv2.drawContours(image, contours, i, (0,255,0), 5)\n",
 21 |     "    cv2.namedWindow('Display', cv2.WINDOW_NORMAL)\n",
 22 |     "    cv2.imshow('Display', image)\n",
 23 |     "    cv2.waitKey(0)\n",
 24 |     "\n",
 25 |     "cv2.destroyAllWindows()"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 16,
 31 |    "metadata": {},
 32 |    "outputs": [
 33 |     {
 34 |      "name": "stdout",
 35 |      "output_type": "stream",
 36 |      "text": [
 37 |       "239.0\n",
 38 |       "209.5\n",
 39 |       "35.5\n",
 40 |       "245.0\n",
 41 |       "287.5\n",
 42 |       "2318.5\n",
 43 |       "21.5\n",
 44 |       "21.5\n",
 45 |       "89.0\n",
 46 |       "42.5\n",
 47 |       "43.0\n",
 48 |       "125.0\n",
 49 |       "866.5\n",
 50 |       "203.0\n",
 51 |       "120.5\n",
 52 |       "182.0\n",
 53 |       "1288.5\n",
 54 |       "567.5\n",
 55 |       "878.0\n",
 56 |       "977.0\n",
 57 |       "370.5\n",
 58 |       "389.5\n",
 59 |       "60.5\n",
 60 |       "96.5\n",
 61 |       "122.5\n",
 62 |       "27.0\n",
 63 |       "203.5\n",
 64 |       "236.0\n",
 65 |       "203.5\n",
 66 |       "200.5\n",
 67 |       "40.0\n",
 68 |       "2369.0\n",
 69 |       "2682.5\n",
 70 |       "104.5\n",
 71 |       "111.0\n",
 72 |       "244.5\n",
 73 |       "246.5\n",
 74 |       "34.5\n",
 75 |       "35.5\n",
 76 |       "192.5\n",
 77 |       "1354.0\n",
 78 |       "2424.0\n",
 79 |       "21.5\n",
 80 |       "4.0\n",
 81 |       "1275.5\n",
 82 |       "140.5\n",
 83 |       "74.5\n",
 84 |       "236.0\n",
 85 |       "238.0\n",
 86 |       "47.0\n",
 87 |       "21.0\n",
 88 |       "45.5\n",
 89 |       "983.0\n",
 90 |       "42.0\n",
 91 |       "21.0\n",
 92 |       "2982.5\n",
 93 |       "1710.5\n",
 94 |       "2018.0\n",
 95 |       "183141.0\n"
 96 |      ]
 97 |     }
 98 |    ],
 99 |    "source": [
100 |     "import cv2\n",
101 |     "import numpy as np\n",
102 |     "\n",
103 |     "image = cv2.imread('cursive.jpg')\n",
104 |     "image = cv2.resize(image, (image.shape[0]*2, image.shape[1]))\n",
105 |     "blurred = cv2.pyrMeanShiftFiltering(image, 31, 71)\n",
106 |     "gray = cv2.cvtColor(blurred, cv2.COLOR_BGR2GRAY)\n",
107 |     "_, thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY+cv2.THRESH_OTSU)\n",
108 |     "\n",
109 |     "_, contours, _ = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_NONE)\n",
110 |     "\n",
111 |     "for c in contours:\n",
112 |     "    rect = cv2.boundingRect(c)\n",
113 |     "#     if rect[2] < 5 or rect[3] < 5: continue\n",
114 |     "    print(cv2.contourArea(c))\n",
115 |     "    x, y, w, h = rect\n",
116 |     "    cv2.rectangle(image,(x,y),(x+w,y+h),(0,255,0),2)\n",
117 |     "\n",
118 |     "cv2.imshow(\"Show\",image)\n",
119 |     "cv2.waitKey()  \n",
120 |     "cv2.destroyAllWindows()"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": null,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": []
129 |   },
130 |   {
131 |    "cell_type": "code",
132 |    "execution_count": null,
133 |    "metadata": {},
134 |    "outputs": [],
135 |    "source": []
136 |   }
137 |  ],
138 |  "metadata": {
139 |   "kernelspec": {
140 |    "display_name": "Python 3",
141 |    "language": "python",
142 |    "name": "python3"
143 |   },
144 |   "language_info": {
145 |    "codemirror_mode": {
146 |     "name": "ipython",
147 |     "version": 3
148 |    },
149 |    "file_extension": ".py",
150 |    "mimetype": "text/x-python",
151 |    "name": "python",
152 |    "nbconvert_exporter": "python",
153 |    "pygments_lexer": "ipython3",
154 |    "version": "3.6.6"
155 |   }
156 |  },
157 |  "nbformat": 4,
158 |  "nbformat_minor": 2
159 | }
160 | 


--------------------------------------------------------------------------------
/IP/Contour Detection/croppedCursive.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/croppedCursive.png


--------------------------------------------------------------------------------
/IP/Contour Detection/cursive.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/cursive.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/license.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/license.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/license2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/license2.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/license_con.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/license_con.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/license_econ.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/license_econ.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/output_2ndapproach.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/output_2ndapproach.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/pan_card_contours.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/pan_card_contours.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/pan_card_dcontours.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/pan_card_dcontours.jpg


--------------------------------------------------------------------------------
/IP/Contour Detection/pancard.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/IP/Contour Detection/pancard.jpg


--------------------------------------------------------------------------------
/IP/Face Detection/face_detection.py:
--------------------------------------------------------------------------------
 1 | import cv2 
 2 | import numpy as np
 3 | 
 4 | def get_photo(image):
 5 |     '''
 6 |     Image Should be 1920 x 1080 pixels
 7 |     '''
 8 |     scale_factor = 1.1
 9 |     min_neighbors = 3
10 |     min_size = (250, 250)
11 |     flags = cv2.CASCADE_SCALE_IMAGE
12 | 
13 |     face_cascade = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
14 |     image = cv2.imread(image)
15 |     gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
16 | 
17 |     faces = face_cascade.detectMultiScale(image, scaleFactor = scale_factor, minNeighbors = min_neighbors,
18 |                                           minSize = min_size, flags = flags)
19 |     x, y, w, h = faces[0]
20 |     face = image[y-50:y+h+40, x-10:x+w+10]
21 | 	
22 |     return face
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 Praneet
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Docify
 2 | Deep Learning based Flask api to extract details from Indian ID cards like Aadhar Card, PAN Card and Driving Licence.
 3 | 
 4 | #### Tech
 5 | Docify uses a number of open source projects to work properly:
 6 | 
 7 | * [Tesseract](https://github.com/tesseract-ocr/tesseract) - Tesseract Open Source OCR Engine 
 8 | * [Text-Detection-CTPN](https://github.com/eragonruan/text-detection-ctpn/tree/master) - Text detection mainly based on ctpn model in tensorflow
 9 | * [Python3.6](https://www.python.org) - duh
10 | 
11 | # Installation
12 | #### Install Linux Dependencies
13 | ```sh
14 | $ sudo apt install cmake
15 | $ sudo apt install tesseract-ocr
16 | $ sudo apt install mongodb
17 | $ sudo apt install libsm6 libxext6
18 | $ sudo apt install supervisor
19 | $ sudo systemctl start mongo
20 | ```
21 | #### Download Tesseract Models [ENG+HIN+MAR]
22 | ```sh
23 | https://github.com/tesseract-ocr/tessdata_best
24 | https://github.com/BigPino67/Tesseract-MICR-OCR
25 | ```
26 | 
27 | #### Install Python-Dependencies
28 | ```sh
29 | $ pip3 install opencv-python easydict flask face_recognition gunicorn tensorflow keras pytesseract dlib imutils opencv-contrib-python pymongo PyYAML scikit-image scikit-learn
30 | ```
31 | #### Start Python Api
32 | ```sh
33 | python3 server.py
34 | ```
35 | 


--------------------------------------------------------------------------------
/api/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.pythonPath": "/home/praneet/envs/deeplearning/bin/python"
3 | }


--------------------------------------------------------------------------------
/api/UPLOAD_FOLDER/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/UPLOAD_FOLDER/.gitkeep


--------------------------------------------------------------------------------
/api/cheque_details_extraction.py:
--------------------------------------------------------------------------------
  1 | ## All neccessary imports ##
  2 | import cv2
  3 | import re
  4 | import imutils
  5 | import numpy as np
  6 | import pytesseract as pyt
  7 | from imutils import contours
  8 | from skimage.segmentation import clear_border
  9 | 
 10 | 
 11 | ## New MICR Method ##
 12 | def get_micrcode(image_name):
 13 |     try:
 14 |         image = cv2.imread(image_name, 0)
 15 |         image = cv2.resize(image, (1920,1080))
 16 | 
 17 |         (h,w,) = image.shape[:2]
 18 |         delta = int(h - (h*0.17))
 19 |         bottom = image[delta:h, 0:w]
 20 | 
 21 |         thresh = cv2.threshold(bottom, 100, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
 22 | 
 23 |         text = pyt.image_to_string(thresh, lang='mcr', config='--oem 1 --psm 3')
 24 | 
 25 |         a, b, c, d = text.split()[:4]
 26 | 
 27 |         if len(b) > 10:
 28 |             b = b[0:9]
 29 |             b += 'a'
 30 |         return a + ' ' + b + ' ' + c + ' ' + d
 31 |     except:
 32 |         return 'MICR Not Found'
 33 | ## New MICR End ##
 34 | 
 35 | #### IFSC #####
 36 | def get_ifsc(image_path):
 37 |     
 38 |     def replace(text):
 39 |         # Remove some noise present in the text
 40 |         chars = "`*_{}[]()>#+-.!$:;?"
 41 |         for c in chars:
 42 |             text = text.replace(c, '')
 43 |         return text
 44 |     
 45 |     # Read image
 46 |     image = cv2.imread(image_path)
 47 |     image = cv2.resize(image, (1920,1080))
 48 |     lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
 49 |     luminance, a, b = cv2.split(lab)
 50 |     
 51 |     hist,bins = np.histogram(luminance,256,[0,256])
 52 | 
 53 |     mean = int((np.argmax(hist) + np.argmin(hist)) / 2)
 54 | 
 55 |     luminance[luminance > mean] = 255
 56 |     luminance[luminance <= mean] = 0
 57 |     
 58 |     # Forward it to ocr to get all the text present in image
 59 |     text = pyt.image_to_string(luminance, config=('--oem 1 --psm 3'))
 60 |     
 61 |     # Find IFSC in text and find the IFSC Code using regex
 62 |     ifsc = text.find('IFSC')
 63 |     # Select the range where the real IFSC Code will be present
 64 |     text = text[ifsc: ifsc + 30]
 65 |     
 66 |     text = replace(text)
 67 |     try:
 68 |         text = re.findall(r'[A-Z0-9]{11}', text)[0]
 69 |     except:
 70 |         return 0
 71 |     return text
 72 | 
 73 | def get_ifsc2(image_path):
 74 |     
 75 |     def replace(text):
 76 |         # Remove some noise present in the text
 77 |         chars = "`*_{}[]()>#+-.!$:;?"
 78 |         for c in chars:
 79 |             text = text.replace(c, '')
 80 |         return text
 81 |     
 82 |     # Read image
 83 |     image = cv2.imread(image_path)
 84 |     image = cv2.resize(image, (1920,1080))
 85 |     gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
 86 |     lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
 87 |     luminance, a, b = cv2.split(lab)
 88 |     
 89 |     hist,bins = np.histogram(luminance,256,[0,256])
 90 | 
 91 |     mean = int((np.argmax(hist) + np.argmin(hist)) / 2)
 92 | 
 93 |     luminance[luminance > mean] = 255
 94 |     luminance[luminance <= mean] = 0
 95 |     
 96 |     # Read template
 97 |     template = cv2.imread('templates/template_ifsc.png')
 98 |     template_gray = cv2.cvtColor(template, cv2.COLOR_BGR2GRAY)
 99 |     template_thresh = cv2.threshold(template_gray, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
100 |     
101 |     diff = cv2.subtract(luminance, template_thresh)
102 |     diff = cv2.bitwise_and(diff, gray_image)
103 |     # Forward it to ocr to get all the text present in image
104 |     text = pyt.image_to_string(diff, config=('--oem 1 --psm 3'))
105 |     
106 |     # Find IFSC in text and find the IFSC Code using regex
107 |     
108 |     # Select the range where the real IFSC Code will be present
109 |     text = replace(text)
110 |     try:
111 |         text = re.findall(r'[A-Z0-9]{11}', text)[0]
112 |     except:
113 |         return 0
114 |     return text
115 | 
116 | def get_ifsc3(image):
117 |     
118 |     def replace(text):
119 |         return text.replace('?', '7')
120 |     
121 |     img = cv2.imread(image)
122 |     text = pyt.image_to_string(img, config=('--oem 1 --psm 3'))
123 |     
124 |     ifsc = text.find('IFSC')
125 |     new_text = text[ifsc : ifsc + 30]
126 |     new_text = replace(new_text)
127 |     
128 |     try:
129 |         code = re.findall(r'[A-Z0-9]{11}', new_text)[0]
130 |     except:
131 |         return 0
132 |     return code
133 | 
134 | def ensemble_ifsc_output(cheque_img):
135 |     ifsc1 = get_ifsc(cheque_img)
136 |     ifsc2 = get_ifsc2(cheque_img)
137 |     ifsc3 = get_ifsc3(cheque_img)
138 |     ifsc = [ifsc1, ifsc2, ifsc3]
139 |     
140 |     if ifsc1 == 0 and ifsc2 == 0 and ifsc3 == 0:
141 |         return 'IFSC Not Found'
142 |     else:
143 |         for code in ifsc:
144 |             if code != 0:
145 |                 return code
146 |         return 'IFSC Not Found'
147 |     
148 | #### IFSC END #####
149 | 
150 | 
151 | #### Account No ####
152 | def get_acc(image_path):
153 |     # Read image
154 |     image = cv2.imread(image_path)
155 |     image = cv2.resize(image, (1920,1080))
156 |     lab = cv2.cvtColor(image, cv2.COLOR_BGR2LAB)
157 |     luminance, a, b = cv2.split(lab)
158 |     
159 |     hist,bins = np.histogram(luminance,256,[0,256])
160 | 
161 |     mean = int((np.argmax(hist) + np.argmin(hist)) / 2)
162 | 
163 |     luminance[luminance > mean] = 255
164 |     luminance[luminance <= mean] = 0
165 |     
166 |     # Read template
167 |     template = cv2.imread('templates/template_acc.jpg', 0)
168 |     
169 |     thresh = cv2.threshold(template, 150, 255, cv2.THRESH_BINARY | cv2.THRESH_OTSU)[1]
170 |     
171 |     # Set difference
172 |     diff = cv2.subtract(luminance, template)
173 |     
174 |     text = pyt.image_to_string(diff, config=('--oem 1 --psm 3'))
175 |     
176 |     if '-' in list(text):
177 |         
178 |         text = text.replace('-', '')
179 |         
180 |     try:
181 |         acc_no = re.findall(r'[0-9]{9,18}',text)[0]
182 |     except:
183 |         text = pyt.image_to_string(luminance, config=('--oem 1 --psm 3'))
184 |         if '-' in list(text):
185 |             
186 |             text = text.replace('-', '')
187 |         try:
188 |             acc_no = re.findall(r'[0-9]{9,18}',text)[0]
189 |         except:
190 |             return 0
191 |     return acc_no
192 |     
193 | def get_acc2(cheque_img):
194 |     img = cv2.imread(cheque_img)
195 |     
196 |     text = pyt.image_to_string(img, config=('--oem 1 --psm 3'))
197 |     
198 |     if '-' in list(text):
199 |         text = text.replace('-', '')
200 |     try:
201 |         text = re.findall(r'[0-9]{9,18}', text)[0]
202 |     except:
203 |         return 0
204 |     return text
205 | 
206 | 
207 | def ensemble_acc_output(cheque_img):
208 |     acc1 = get_acc(cheque_img)
209 |     acc2 = get_acc2(cheque_img)
210 |     acc = [acc1, acc2]
211 |     
212 |     
213 |     if acc1 == 0 and acc2 == 0:
214 |         return 'Account Number Not Found'
215 |     else:
216 |         for no in acc:
217 |             if no != 0:
218 |                 return no
219 |         return 'Account Number Not Found'
220 | #### Account No END ####


--------------------------------------------------------------------------------
/api/ctpn/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/api/ctpn/demo.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | 
  3 | import cv2
  4 | import glob
  5 | import os
  6 | import shutil
  7 | import sys
  8 | 
  9 | import numpy as np
 10 | import tensorflow as tf
 11 | 
 12 | sys.path.append(os.getcwd())
 13 | from lib.networks.factory import get_network
 14 | from lib.fast_rcnn.config import cfg, cfg_from_file
 15 | from lib.fast_rcnn.test import test_ctpn
 16 | from lib.utils.timer import Timer
 17 | from lib.text_connector.detectors import TextDetector
 18 | from lib.text_connector.text_connect_cfg import Config as TextLineCfg
 19 | 
 20 | 
 21 | def resize_im(im, scale, max_scale=None):
 22 |     f = float(scale) / min(im.shape[0], im.shape[1])
 23 |     if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale:
 24 |         f = float(max_scale) / max(im.shape[0], im.shape[1])
 25 |     return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f
 26 | 
 27 | 
 28 | def draw_boxes(img, image_name, boxes, scale):
 29 |     base_name = image_name.split('/')[-1]
 30 |     with open('data/results/' + 'res_{}.txt'.format(base_name.split('.')[0]), 'w') as f:
 31 |         for box in boxes:
 32 |             if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5:
 33 |                 continue
 34 |             if box[8] >= 0.9:
 35 |                 color = (0, 255, 0)
 36 |             elif box[8] >= 0.8:
 37 |                 color = (255, 0, 0)
 38 |             cv2.line(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
 39 |             cv2.line(img, (int(box[0]), int(box[1])), (int(box[4]), int(box[5])), color, 2)
 40 |             cv2.line(img, (int(box[6]), int(box[7])), (int(box[2]), int(box[3])), color, 2)
 41 |             cv2.line(img, (int(box[4]), int(box[5])), (int(box[6]), int(box[7])), color, 2)
 42 | 
 43 |             min_x = min(int(box[0] / scale), int(box[2] / scale), int(box[4] / scale), int(box[6] / scale))
 44 |             min_y = min(int(box[1] / scale), int(box[3] / scale), int(box[5] / scale), int(box[7] / scale))
 45 |             max_x = max(int(box[0] / scale), int(box[2] / scale), int(box[4] / scale), int(box[6] / scale))
 46 |             max_y = max(int(box[1] / scale), int(box[3] / scale), int(box[5] / scale), int(box[7] / scale))
 47 | 
 48 |             line = ','.join([str(min_x), str(min_y), str(max_x), str(max_y)]) + '\r\n'
 49 |             f.write(line)
 50 | 
 51 |     img = cv2.resize(img, None, None, fx=1.0 / scale, fy=1.0 / scale, interpolation=cv2.INTER_LINEAR)
 52 |     cv2.imwrite(os.path.join("data/results", base_name), img)
 53 | 
 54 | 
 55 | def ctpn(sess, net, image_name):
 56 |     timer = Timer()
 57 |     timer.tic()
 58 | 
 59 |     img = cv2.imread(image_name)
 60 |     img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
 61 |     scores, boxes = test_ctpn(sess, net, img)
 62 | 
 63 |     textdetector = TextDetector()
 64 |     boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
 65 |     draw_boxes(img, image_name, boxes, scale)
 66 |     timer.toc()
 67 |     print(('Detection took {:.3f}s for '
 68 |            '{:d} object proposals').format(timer.total_time, boxes.shape[0]))
 69 | 
 70 | 
 71 | if __name__ == '__main__':
 72 |     if os.path.exists("data/results/"):
 73 |         shutil.rmtree("data/results/")
 74 |     os.makedirs("data/results/")
 75 | 
 76 |     cfg_from_file('ctpn/text.yml')
 77 | 
 78 |     # init session
 79 |     config = tf.ConfigProto(allow_soft_placement=True)
 80 |     sess = tf.Session(config=config)
 81 |     # load network
 82 |     net = get_network("VGGnet_test")
 83 |     # load model
 84 |     print(('Loading network {:s}... '.format("VGGnet_test")), end=' ')
 85 |     saver = tf.train.Saver()
 86 | 
 87 |     try:
 88 |         ckpt = tf.train.get_checkpoint_state(cfg.TEST.checkpoints_path)
 89 |         print('Restoring from {}...'.format(ckpt.model_checkpoint_path), end=' ')
 90 |         saver.restore(sess, ckpt.model_checkpoint_path)
 91 |         print('done')
 92 |     except:
 93 |         raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path)
 94 | 
 95 |     im = 128 * np.ones((300, 300, 3), dtype=np.uint8)
 96 |     for i in range(2):
 97 |         _, _ = test_ctpn(sess, net, im)
 98 | 
 99 |     im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \
100 |                glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg'))
101 | 
102 |     for im_name in im_names:
103 |         print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~')
104 |         print(('Demo for {:s}'.format(im_name)))
105 |         ctpn(sess, net, im_name)
106 | 


--------------------------------------------------------------------------------
/api/ctpn/demo_pb.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import glob
 4 | import os
 5 | import shutil
 6 | import sys
 7 | 
 8 | import cv2
 9 | import numpy as np
10 | import tensorflow as tf
11 | 
12 | sys.path.append(os.getcwd())
13 | from lib.fast_rcnn.config import cfg, cfg_from_file
14 | from lib.fast_rcnn.test import _get_blobs
15 | from lib.text_connector.detectors import TextDetector
16 | from lib.text_connector.text_connect_cfg import Config as TextLineCfg
17 | from lib.rpn_msr.proposal_layer_tf import proposal_layer
18 | 
19 | 
20 | cfg_from_file('ctpn/text.yml')
21 | 
22 | # init session
23 | config = tf.ConfigProto(allow_soft_placement=True)
24 | sess = tf.Session(config=config)
25 | with open('data/ctpn.pb', 'rb') as f:
26 |     graph_def = tf.GraphDef()
27 |     graph_def.ParseFromString(f.read())
28 |     sess.graph.as_default()
29 |     tf.import_graph_def(graph_def, name='')
30 | sess.run(tf.global_variables_initializer())
31 | 
32 | input_img = sess.graph.get_tensor_by_name('Placeholder:0')
33 | output_cls_prob = sess.graph.get_tensor_by_name('Reshape_2:0')
34 | output_box_pred = sess.graph.get_tensor_by_name('rpn_bbox_pred/Reshape_1:0')
35 | 
36 | textdetector = TextDetector()
37 | 
38 | 
39 | def resize_im(im, scale, max_scale=None):
40 |     f = float(scale) / min(im.shape[0], im.shape[1])
41 |     if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale:
42 |         f = float(max_scale) / max(im.shape[0], im.shape[1])
43 |     return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f
44 | 
45 | 
46 | def draw_boxes(img, image_name, boxes, scale):
47 |     # base_name = image_name.split('/')[-1]
48 |     all_boxes = []
49 |     for box in boxes:
50 |         if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5:
51 |             continue
52 |         # if box[8] >= 0.9:
53 |         #     color = (0, 255, 0)
54 |         # elif box[8] >= 0.8:
55 |         #     color = (255, 0, 0)
56 |         # cv2.line(img, (int(box[0]), int(box[1])), (int(box[2]), int(box[3])), color, 2)
57 |         # cv2.line(img, (int(box[0]), int(box[1])), (int(box[4]), int(box[5])), color, 2)
58 |         # cv2.line(img, (int(box[6]), int(box[7])), (int(box[2]), int(box[3])), color, 2)
59 |         # cv2.line(img, (int(box[4]), int(box[5])), (int(box[6]), int(box[7])), color, 2)
60 | 
61 |         min_x = min(int(box[0] / scale), int(box[2] / scale), int(box[4] / scale), int(box[6] / scale))
62 |         min_y = min(int(box[1] / scale), int(box[3] / scale), int(box[5] / scale), int(box[7] / scale))
63 |         max_x = max(int(box[0] / scale), int(box[2] / scale), int(box[4] / scale), int(box[6] / scale))
64 |         max_y = max(int(box[1] / scale), int(box[3] / scale), int(box[5] / scale), int(box[7] / scale))
65 | 
66 |         all_boxes.append([min_x, min_y, max_x, max_y])
67 | 
68 |     return all_boxes
69 | 
70 | def get_coords(image_name):
71 | 
72 |     img = cv2.imread(image_name)
73 |     img, scale = resize_im(img, scale=TextLineCfg.SCALE, max_scale=TextLineCfg.MAX_SCALE)
74 |     blobs, im_scales = _get_blobs(img, None)
75 |     if cfg.TEST.HAS_RPN:
76 |         im_blob = blobs['data']
77 |         blobs['im_info'] = np.array(
78 |             [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
79 |             dtype=np.float32)
80 |     cls_prob, box_pred = sess.run([output_cls_prob, output_box_pred], feed_dict={input_img: blobs['data']})
81 |     rois, _ = proposal_layer(cls_prob, box_pred, blobs['im_info'], 'TEST', anchor_scales=cfg.ANCHOR_SCALES)
82 | 
83 |     scores = rois[:, 0]
84 |     boxes = rois[:, 1:5] / im_scales[0]
85 |     # textdetector = TextDetector()
86 |     boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2])
87 |     all_coords = draw_boxes(img, image_name, boxes, scale)
88 |     return all_coords


--------------------------------------------------------------------------------
/api/ctpn/generate_pb.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | import tensorflow as tf
 7 | from tensorflow.python.framework.graph_util import convert_variables_to_constants
 8 | 
 9 | sys.path.append(os.getcwd())
10 | from lib.networks.factory import get_network
11 | from lib.fast_rcnn.config import cfg, cfg_from_file
12 | 
13 | if __name__ == "__main__":
14 |     cfg_from_file('ctpn/text.yml')
15 | 
16 |     config = tf.ConfigProto(allow_soft_placement=True)
17 |     sess = tf.Session(config=config)
18 |     net = get_network("VGGnet_test")
19 |     print(('Loading network {:s}... '.format("VGGnet_test")), end=' ')
20 |     saver = tf.train.Saver()
21 |     try:
22 |         ckpt = tf.train.get_checkpoint_state(cfg.TEST.checkpoints_path)
23 |         print('Restoring from {}...'.format(ckpt.model_checkpoint_path), end=' ')
24 |         saver.restore(sess, ckpt.model_checkpoint_path)
25 |         print('done')
26 |     except:
27 |         raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path)
28 |     print(' done.')
29 | 
30 |     print('all nodes are:\n')
31 |     graph = tf.get_default_graph()
32 |     input_graph_def = graph.as_graph_def()
33 |     node_names = [node.name for node in input_graph_def.node]
34 |     for x in node_names:
35 |         print(x)
36 |     output_node_names = 'Reshape_2,rpn_bbox_pred/Reshape_1'
37 |     output_graph_def = convert_variables_to_constants(sess, input_graph_def, output_node_names.split(','))
38 |     output_graph = 'data/ctpn.pb'
39 |     with tf.gfile.GFile(output_graph, 'wb') as f:
40 |         f.write(output_graph_def.SerializeToString())
41 |     sess.close()
42 | 


--------------------------------------------------------------------------------
/api/ctpn/text.yml:
--------------------------------------------------------------------------------
 1 | EXP_DIR: ctpn_end2end
 2 | LOG_DIR: ctpn
 3 | IS_MULTISCALE: False
 4 | NET_NAME: VGGnet
 5 | ANCHOR_SCALES: [16]
 6 | NCLASSES: 2
 7 | USE_GPU_NMS: True
 8 | TRAIN:
 9 |   restore: 0
10 |   max_steps: 50000
11 |   SOLVER: Adam
12 |   OHEM: False
13 |   RPN_BATCHSIZE: 300
14 |   BATCH_SIZE: 300
15 |   LOG_IMAGE_ITERS: 100
16 |   DISPLAY: 10
17 |   SNAPSHOT_ITERS: 1000
18 |   HAS_RPN: True
19 |   LEARNING_RATE: 0.00001
20 |   MOMENTUM: 0.9
21 |   GAMMA: 0.1
22 |   STEPSIZE: 30000
23 |   IMS_PER_BATCH: 1
24 |   BBOX_NORMALIZE_TARGETS_PRECOMPUTED: True
25 |   RPN_POSITIVE_OVERLAP: 0.7
26 |   PROPOSAL_METHOD: gt
27 |   BG_THRESH_LO: 0.0
28 |   PRECLUDE_HARD_SAMPLES: True
29 |   BBOX_INSIDE_WEIGHTS: [0, 1, 0, 1]
30 |   RPN_BBOX_INSIDE_WEIGHTS: [0, 1, 0, 1]
31 |   RPN_POSITIVE_WEIGHT: -1.0
32 |   FG_FRACTION: 0.3
33 |   WEIGHT_DECAY: 0.0005
34 | TEST:
35 |   HAS_RPN: True
36 |   DETECT_MODE: H
37 |   checkpoints_path: checkpoints/
38 |   # checkpoints_path: output/ctpn_end2end/voc_2007_trainval
39 | 


--------------------------------------------------------------------------------
/api/ctpn/train_net.py:
--------------------------------------------------------------------------------
 1 | import os.path
 2 | import pprint
 3 | import sys
 4 | 
 5 | sys.path.append(os.getcwd())
 6 | from lib.fast_rcnn.train import get_training_roidb, train_net
 7 | from lib.fast_rcnn.config import cfg_from_file, get_output_dir, get_log_dir
 8 | from lib.datasets.factory import get_imdb
 9 | from lib.networks.factory import get_network
10 | from lib.fast_rcnn.config import cfg
11 | 
12 | if __name__ == '__main__':
13 |     cfg_from_file('ctpn/text.yml')
14 |     print('Using config:')
15 |     pprint.pprint(cfg)
16 |     imdb = get_imdb('voc_2007_trainval')
17 |     print('Loaded dataset `{:s}` for training'.format(imdb.name))
18 |     roidb = get_training_roidb(imdb)
19 | 
20 |     output_dir = get_output_dir(imdb, None)
21 |     log_dir = get_log_dir(imdb)
22 |     print('Output will be saved to `{:s}`'.format(output_dir))
23 |     print('Logs will be saved to `{:s}`'.format(log_dir))
24 | 
25 |     device_name = '/gpu:0'
26 |     print(device_name)
27 | 
28 |     network = get_network('VGGnet_train')
29 | 
30 |     train_net(network, imdb, roidb,
31 |               output_dir=output_dir,
32 |               log_dir=log_dir,
33 |               pretrained_model='data/pretrain/VGG_imagenet.npy',
34 |               max_iters=int(cfg.TRAIN.max_steps),
35 |               restore=bool(int(cfg.TRAIN.restore)))
36 | 


--------------------------------------------------------------------------------
/api/data/VOCdevkit2007:
--------------------------------------------------------------------------------
1 | /media/D/code/OCR/CTPN_LSTM/data/VOCdevkit


--------------------------------------------------------------------------------
/api/data/ctpn.pb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/data/ctpn.pb


--------------------------------------------------------------------------------
/api/db.py:
--------------------------------------------------------------------------------
 1 | from pymongo import MongoClient
 2 | 
 3 | def getConnection():
 4 |     client = MongoClient('localhost:27017')
 5 |     return client
 6 | 
 7 | def getDB(client):
 8 |     db = client.DocScanner
 9 |     return db
10 | 
11 | def getCollection(collection_name, db):
12 |     collection = db[collection_name]
13 |     return collection
14 | 
15 | def closeConnection(client):
16 |     client.close()
17 | 
18 | ##Database Helper functions
19 | def insert_data(collection, args_dict):
20 |     client = getConnection()
21 |     db = getDB(client)
22 |     collection_name = getCollection(collection, db)
23 |     '''
24 |     db_name -> string i.e name of the db
25 |     args_dict -> a dictionary of entries in db
26 |     '''
27 |     collection_name.insert_one(args_dict)
28 |     
29 |     closeConnection(client)
30 | 
31 | def read_data(collection):
32 |     client = getConnection()
33 |     db = getDB(client)
34 |     collection_name = getCollection(collection, db)
35 |     '''
36 |     returns a cursor of objects
37 |     which can be iterated and printed
38 |     '''
39 |     cols = collection_name.find({})
40 |     closeConnection(client)
41 |     return cols
42 | 
43 | #Update in data base
44 | def update_data(collection, idno, updation):
45 |     client = getConnection()
46 |     db = getDB(client)
47 |     collection_name = getCollection(collection, db)
48 |     '''
49 |     db_name -> string
50 |     idno -> id number of database entry in dict
51 |     '''
52 |     collection_name.update_one(idno, updation)
53 |     closeConnection(client)
54 | 
55 | def delete_row(collection, idno):
56 |     client = getConnection()
57 |     db = getDB(client)
58 |     collection_name = getCollection(collection, db)
59 |     '''
60 |     Deletes the complete row
61 |     idno must be a dict {idno:'anything'}
62 |     '''
63 |     collection_name.delete_many(idno)
64 |     closeConnection(client)


--------------------------------------------------------------------------------
/api/face_matching.py:
--------------------------------------------------------------------------------
 1 | import cv2
 2 | import face_recognition
 3 | import numpy as np
 4 | 
 5 | def detect_faces(image_path):
 6 |     faceDetector = cv2.CascadeClassifier('haarcascade_frontalface_default.xml')
 7 |     image = cv2.imread(image_path)    
 8 |     faces = faceDetector.detectMultiScale(image, scaleFactor=1.1, minNeighbors = 3, minSize = (250,250), flags = cv2.CASCADE_SCALE_IMAGE)
 9 |     
10 |     try:
11 |         x,y,w,h = faces[0]
12 |     except:
13 |         return 'No Face Found'
14 |     face = image[y-50:y+h+40, x-10:x+w+10]
15 |     return face
16 | 
17 | def match_faces(id_card_image, ref_image):
18 |     id_card = detect_faces(id_card_image)
19 |     ref = detect_faces(ref_image)
20 |     try:
21 |         ref = cv2.resize(ref, (id_card.shape[1], id_card.shape[0]))
22 | 
23 |         id_card_encodings = face_recognition.face_encodings(id_card)[0]
24 |         ref_encodings = face_recognition.face_encodings(ref)[0]
25 | 
26 |         result = face_recognition.compare_faces([id_card_encodings], ref_encodings)[0]
27 |         percent = face_recognition.face_distance([id_card_encodings], ref_encodings)[0]
28 |         percent = (1 - percent) * 100.00
29 | 
30 |         return result, percent
31 |     except:
32 |         return False, 0


--------------------------------------------------------------------------------
/api/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/lib/__init__.py


--------------------------------------------------------------------------------
/api/lib/datasets/__init__.py:
--------------------------------------------------------------------------------
1 | from .imdb import imdb
2 | from .pascal_voc import pascal_voc
3 | from . import factory
4 | 
5 | 


--------------------------------------------------------------------------------
/api/lib/datasets/factory.py:
--------------------------------------------------------------------------------
 1 | from .pascal_voc import pascal_voc
 2 | __sets = {}
 3 | def _selective_search_IJCV_top_k(split, year, top_k):
 4 |     imdb = pascal_voc(split, year)
 5 |     imdb.roidb_handler = imdb.selective_search_IJCV_roidb
 6 |     imdb.config['top_k'] = top_k
 7 |     return imdb
 8 | # Set up voc_<year>_<split> using selective search "fast" mode
 9 | for year in ['2007', '2012', '0712']:
10 |     for split in ['train', 'val', 'trainval', 'test']:
11 |         name = 'voc_{}_{}'.format(year, split)
12 |         __sets[name] = (lambda split=split, year=year:
13 |                 pascal_voc(split, year))
14 | 
15 | def get_imdb(name):
16 |     """Get an imdb (image database) by name."""
17 |     if name not in __sets:
18 |         print((list_imdbs()))
19 |         raise KeyError('Unknown dataset: {}'.format(name))
20 |     return __sets[name]()
21 | 
22 | def list_imdbs():
23 |     """List all registered imdbs."""
24 |     return list(__sets.keys())
25 | 


--------------------------------------------------------------------------------
/api/lib/datasets/imdb.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import PIL
  4 | import numpy as np
  5 | import scipy.sparse
  6 | from lib.utils.bbox import bbox_overlaps
  7 | from lib.fast_rcnn.config import cfg
  8 | 
  9 | class imdb(object):
 10 | 
 11 |     def __init__(self, name):
 12 |         self._name = name
 13 |         self._num_classes = 0
 14 |         self._classes = []
 15 |         self._image_index = []
 16 |         self._obj_proposer = 'selective_search'
 17 |         self._roidb = None
 18 |         print(self.default_roidb)
 19 |         self._roidb_handler = self.default_roidb
 20 |         # Use this dict for storing dataset specific config options
 21 |         self.config = {}
 22 | 
 23 |     @property
 24 |     def name(self):
 25 |         return self._name
 26 | 
 27 |     @property
 28 |     def num_classes(self):
 29 |         return len(self._classes)
 30 | 
 31 |     @property
 32 |     def classes(self):
 33 |         return self._classes
 34 | 
 35 |     @property
 36 |     def image_index(self):
 37 |         return self._image_index
 38 | 
 39 |     @property
 40 |     def roidb_handler(self):
 41 |         return self._roidb_handler
 42 | 
 43 |     @roidb_handler.setter
 44 |     def roidb_handler(self, val):
 45 |         self._roidb_handler = val
 46 | 
 47 |     def set_proposal_method(self, method):
 48 |         method = eval('self.' + method + '_roidb')
 49 |         self.roidb_handler = method
 50 | 
 51 |     @property
 52 |     def roidb(self):
 53 |         # A roidb is a list of dictionaries, each with the following keys:
 54 |         #   boxes
 55 |         #   gt_overlaps
 56 |         #   gt_classes
 57 |         #   flipped
 58 |         if self._roidb is not None:
 59 |             return self._roidb
 60 |         self._roidb = self.roidb_handler()
 61 |         return self._roidb
 62 | 
 63 |     @property
 64 |     def cache_path(self):
 65 |         cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache'))
 66 |         if not os.path.exists(cache_path):
 67 |             os.makedirs(cache_path)
 68 |         return cache_path
 69 | 
 70 |     @property
 71 |     def num_images(self):
 72 |       return len(self.image_index)
 73 | 
 74 |     def image_path_at(self, i):
 75 |         raise NotImplementedError
 76 | 
 77 |     def default_roidb(self):
 78 |         raise NotImplementedError
 79 | 
 80 |     def _get_widths(self):
 81 |       return [PIL.Image.open(self.image_path_at(i)).size[0]
 82 |               for i in range(self.num_images)]
 83 | 
 84 |     def append_flipped_images(self):
 85 |         num_images = self.num_images
 86 |         widths = self._get_widths()
 87 |         for i in range(num_images):
 88 |             boxes = self.roidb[i]['boxes'].copy()
 89 |             oldx1 = boxes[:, 0].copy()
 90 |             oldx2 = boxes[:, 2].copy()
 91 |             boxes[:, 0] = widths[i] - oldx2 - 1
 92 |             boxes[:, 2] = widths[i] - oldx1 - 1
 93 |             for b in range(len(boxes)):
 94 |                 if boxes[b][2]< boxes[b][0]:
 95 |                     boxes[b][0] = 0
 96 |             assert (boxes[:, 2] >= boxes[:, 0]).all()
 97 |             entry = {'boxes' : boxes,
 98 |                      'gt_overlaps' : self.roidb[i]['gt_overlaps'],
 99 |                      'gt_classes' : self.roidb[i]['gt_classes'],
100 |                      'flipped' : True}
101 | 
102 |             if 'gt_ishard' in self.roidb[i] and 'dontcare_areas' in self.roidb[i]:
103 |                 entry['gt_ishard'] = self.roidb[i]['gt_ishard'].copy()
104 |                 dontcare_areas = self.roidb[i]['dontcare_areas'].copy()
105 |                 oldx1 = dontcare_areas[:, 0].copy()
106 |                 oldx2 = dontcare_areas[:, 2].copy()
107 |                 dontcare_areas[:, 0] = widths[i] - oldx2 - 1
108 |                 dontcare_areas[:, 2] = widths[i] - oldx1 - 1
109 |                 entry['dontcare_areas'] = dontcare_areas
110 | 
111 |             self.roidb.append(entry)
112 | 
113 |         self._image_index = self._image_index * 2
114 | 
115 | 
116 |     def create_roidb_from_box_list(self, box_list, gt_roidb):
117 |         assert len(box_list) == self.num_images, \
118 |                 'Number of boxes must match number of ground-truth images'
119 |         roidb = []
120 |         for i in range(self.num_images):
121 |             boxes = box_list[i]
122 |             num_boxes = boxes.shape[0]
123 |             overlaps = np.zeros((num_boxes, self.num_classes), dtype=np.float32)
124 | 
125 |             if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0:
126 |                 gt_boxes = gt_roidb[i]['boxes']
127 |                 gt_classes = gt_roidb[i]['gt_classes']
128 |                 gt_overlaps = bbox_overlaps(boxes.astype(np.float),
129 |                                             gt_boxes.astype(np.float))
130 |                 argmaxes = gt_overlaps.argmax(axis=1)
131 |                 maxes = gt_overlaps.max(axis=1)
132 |                 I = np.where(maxes > 0)[0]
133 |                 overlaps[I, gt_classes[argmaxes[I]]] = maxes[I]
134 | 
135 |             overlaps = scipy.sparse.csr_matrix(overlaps)
136 |             roidb.append({
137 |                 'boxes' : boxes,
138 |                 'gt_classes' : np.zeros((num_boxes,), dtype=np.int32),
139 |                 'gt_overlaps' : overlaps,
140 |                 'flipped' : False,
141 |                 'seg_areas' : np.zeros((num_boxes,), dtype=np.float32),
142 |             })
143 |         return roidb
144 | 
145 |     @staticmethod
146 |     def merge_roidbs(a, b):
147 |         assert len(a) == len(b)
148 |         for i in range(len(a)):
149 |             a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes']))
150 |             a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'],
151 |                                             b[i]['gt_classes']))
152 |             a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'],
153 |                                                        b[i]['gt_overlaps']])
154 |             a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'],
155 |                                            b[i]['seg_areas']))
156 |         return a
157 | 
158 | 


--------------------------------------------------------------------------------
/api/lib/datasets/pascal_voc.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import os
  3 | import numpy as np
  4 | import scipy.sparse
  5 | try:
  6 |     import cPickle as pickle
  7 | except:
  8 |     import pickle
  9 | import uuid
 10 | import xml.etree.ElementTree as ET
 11 | from .imdb import imdb
 12 | from lib.fast_rcnn.config import cfg
 13 | 
 14 | class pascal_voc(imdb):
 15 |     def __init__(self, image_set, year, devkit_path=None):
 16 |         imdb.__init__(self, 'voc_' + year + '_' + image_set)
 17 |         self._year = year
 18 |         self._image_set = image_set
 19 |         self._devkit_path = self._get_default_path() if devkit_path is None \
 20 |                             else devkit_path
 21 |         self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year)
 22 |         self._classes = ('__background__', # always index 0
 23 |                          'text')
 24 | 
 25 |         self._class_to_ind = dict(list(zip(self.classes, list(range(self.num_classes)))))
 26 |         self._image_ext = '.jpg'
 27 |         self._image_index = self._load_image_set_index()
 28 |         # Default to roidb handler
 29 |         #self._roidb_handler = self.selective_search_roidb
 30 |         self._roidb_handler = self.gt_roidb
 31 |         self._salt = str(uuid.uuid4())
 32 |         self._comp_id = 'comp4'
 33 | 
 34 |         # PASCAL specific config options
 35 |         self.config = {'cleanup'     : True,
 36 |                        'use_salt'    : True,
 37 |                        'use_diff'    : False,
 38 |                        'matlab_eval' : False,
 39 |                        'rpn_file'    : None,
 40 |                        'min_size'    : 2}
 41 | 
 42 |         assert os.path.exists(self._devkit_path), \
 43 |                 'VOCdevkit path does not exist: {}'.format(self._devkit_path)
 44 |         assert os.path.exists(self._data_path), \
 45 |                 'Path does not exist: {}'.format(self._data_path)
 46 | 
 47 |     def image_path_at(self, i):
 48 |         """
 49 |         Return the absolute path to image i in the image sequence.
 50 |         """
 51 |         return self.image_path_from_index(self._image_index[i])
 52 | 
 53 |     def image_path_from_index(self, index):
 54 |         """
 55 |         Construct an image path from the image's "index" identifier.
 56 |         """
 57 |         image_path = os.path.join(self._data_path, 'JPEGImages',
 58 |                                   index + self._image_ext)
 59 |         assert os.path.exists(image_path), \
 60 |                 'Path does not exist: {}'.format(image_path)
 61 |         return image_path
 62 | 
 63 |     def _load_image_set_index(self):
 64 |         """
 65 |         Load the indexes listed in this dataset's image set file.
 66 |         """
 67 |         # Example path to image set file:
 68 |         # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt
 69 |         image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main',
 70 |                                       self._image_set + '.txt')
 71 |         assert os.path.exists(image_set_file), \
 72 |                 'Path does not exist: {}'.format(image_set_file)
 73 |         with open(image_set_file) as f:
 74 |             image_index = [x.strip() for x in f.readlines()]
 75 |         return image_index
 76 | 
 77 |     def _get_default_path(self):
 78 |         """
 79 |         Return the default path where PASCAL VOC is expected to be installed.
 80 |         """
 81 |         return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year)
 82 | 
 83 |     def gt_roidb(self):
 84 |         """
 85 |         Return the database of ground-truth regions of interest.
 86 | 
 87 |         This function loads/saves from/to a cache file to speed up future calls.
 88 |         """
 89 |         cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl')
 90 |         if os.path.exists(cache_file):
 91 |             with open(cache_file, 'rb') as fid:
 92 |                 roidb = pickle.load(fid)
 93 |             print('{} gt roidb loaded from {}'.format(self.name, cache_file))
 94 |             return roidb
 95 | 
 96 |         gt_roidb = [self._load_pascal_annotation(index)
 97 |                     for index in self.image_index]
 98 |         with open(cache_file, 'wb') as fid:
 99 |             pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL)
100 |         print('wrote gt roidb to {}'.format(cache_file))
101 | 
102 |         return gt_roidb
103 | 
104 |     def rpn_roidb(self):
105 |         if int(self._year) == 2007 or self._image_set != 'test':
106 |             gt_roidb = self.gt_roidb()
107 |             rpn_roidb = self._load_rpn_roidb(gt_roidb)
108 |             roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb)
109 |         else:
110 |             roidb = self._load_rpn_roidb(None)
111 | 
112 |         return roidb
113 | 
114 |     def _load_rpn_roidb(self, gt_roidb):
115 |         filename = self.config['rpn_file']
116 |         print('loading {}'.format(filename))
117 |         assert os.path.exists(filename), \
118 |                'rpn data not found at: {}'.format(filename)
119 |         with open(filename, 'rb') as f:
120 |             box_list = pickle.load(f)
121 |         return self.create_roidb_from_box_list(box_list, gt_roidb)
122 | 
123 | 
124 |     def _load_pascal_annotation(self, index):
125 |         """
126 |         Load image and bounding boxes info from XML file in the PASCAL VOC
127 |         format.
128 |         """
129 |         filename = os.path.join(self._data_path, 'Annotations', index + '.xml')
130 |         tree = ET.parse(filename)
131 |         objs = tree.findall('object')
132 |         num_objs = len(objs)
133 | 
134 |         boxes = np.zeros((num_objs, 4), dtype=np.uint16)
135 |         gt_classes = np.zeros((num_objs), dtype=np.int32)
136 |         overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32)
137 |         # "Seg" area for pascal is just the box area
138 |         seg_areas = np.zeros((num_objs), dtype=np.float32)
139 |         ishards = np.zeros((num_objs), dtype=np.int32)
140 | 
141 |         # Load object bounding boxes into a data frame.
142 |         for ix, obj in enumerate(objs):
143 |             bbox = obj.find('bndbox')
144 |             # Make pixel indexes 0-based
145 |             x1 = float(bbox.find('xmin').text)
146 |             y1 = float(bbox.find('ymin').text)
147 |             x2 = float(bbox.find('xmax').text)
148 |             y2 = float(bbox.find('ymax').text)
149 |             diffc = obj.find('difficult')
150 |             difficult = 0 if diffc == None else int(diffc.text)
151 |             ishards[ix] = difficult
152 | 
153 |             cls = self._class_to_ind[obj.find('name').text.lower().strip()]
154 |             boxes[ix, :] = [x1, y1, x2, y2]
155 |             gt_classes[ix] = cls
156 |             overlaps[ix, cls] = 1.0
157 |             seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1)
158 | 
159 |         overlaps = scipy.sparse.csr_matrix(overlaps)
160 | 
161 |         return {'boxes' : boxes,
162 |                 'gt_classes': gt_classes,
163 |                 'gt_ishard': ishards,
164 |                 'gt_overlaps' : overlaps,
165 |                 'flipped' : False,
166 |                 'seg_areas' : seg_areas}
167 | 
168 |     def _get_comp_id(self):
169 |         comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt']
170 |             else self._comp_id)
171 |         return comp_id
172 | 
173 |     def _get_voc_results_file_template(self):
174 |         filename = self._get_comp_id() + '_det_' + self._image_set + '_{:s}.txt'
175 |         filedir = os.path.join(self._devkit_path, 'results', 'VOC' + self._year, 'Main')
176 |         if not os.path.exists(filedir):
177 |             os.makedirs(filedir)
178 |         path = os.path.join(filedir, filename)
179 |         return path
180 | 
181 |     def _write_voc_results_file(self, all_boxes):
182 |         for cls_ind, cls in enumerate(self.classes):
183 |             if cls == '__background__':
184 |                 continue
185 |             print('Writing {} VOC results file'.format(cls))
186 |             filename = self._get_voc_results_file_template().format(cls)
187 |             with open(filename, 'wt') as f:
188 |                 for im_ind, index in enumerate(self.image_index):
189 |                     dets = all_boxes[cls_ind][im_ind]
190 |                     if dets == []:
191 |                         continue
192 |                     # the VOCdevkit expects 1-based indices
193 |                     for k in range(dets.shape[0]):
194 |                         f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.
195 |                                 format(index, dets[k, -1],
196 |                                        dets[k, 0] + 1, dets[k, 1] + 1,
197 |                                        dets[k, 2] + 1, dets[k, 3] + 1))
198 | 
199 | 
200 | if __name__ == '__main__':
201 |     d = pascal_voc('trainval', '2007')
202 |     res = d.roidb
203 |     from IPython import embed; embed()
204 | 


--------------------------------------------------------------------------------
/api/lib/fast_rcnn/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/lib/fast_rcnn/__init__.py


--------------------------------------------------------------------------------
/api/lib/fast_rcnn/bbox_transform.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def bbox_transform(ex_rois, gt_rois):
 4 |     """
 5 |     computes the distance from ground-truth boxes to the given boxes, normed by their size
 6 |     :param ex_rois: n * 4 numpy array, given boxes
 7 |     :param gt_rois: n * 4 numpy array, ground-truth boxes
 8 |     :return: deltas: n * 4 numpy array, ground-truth boxes
 9 |     """
10 |     ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0
11 |     ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0
12 |     ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths
13 |     ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights
14 | 
15 |     assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \
16 |         'Invalid boxes found: {} {}'. \
17 |             format(ex_rois[np.argmin(ex_widths), :], ex_rois[np.argmin(ex_heights), :])
18 | 
19 |     gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0
20 |     gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0
21 |     gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths
22 |     gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights
23 | 
24 |     # warnings.catch_warnings()
25 |     # warnings.filterwarnings('error')
26 |     targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths
27 |     targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights
28 |     targets_dw = np.log(gt_widths / ex_widths)
29 |     targets_dh = np.log(gt_heights / ex_heights)
30 | 
31 |     targets = np.vstack(
32 |         (targets_dx, targets_dy, targets_dw, targets_dh)).transpose()
33 | 
34 |     return targets
35 | 
36 | def bbox_transform_inv(boxes, deltas):
37 | 
38 |     boxes = boxes.astype(deltas.dtype, copy=False)
39 | 
40 |     widths = boxes[:, 2] - boxes[:, 0] + 1.0
41 |     heights = boxes[:, 3] - boxes[:, 1] + 1.0
42 |     ctr_x = boxes[:, 0] + 0.5 * widths
43 |     ctr_y = boxes[:, 1] + 0.5 * heights
44 | 
45 |     dx = deltas[:, 0::4]
46 |     dy = deltas[:, 1::4]
47 |     dw = deltas[:, 2::4]
48 |     dh = deltas[:, 3::4]
49 | 
50 |     pred_ctr_x = ctr_x[:, np.newaxis]
51 |     pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis]
52 |     pred_w = widths[:, np.newaxis]
53 |     pred_h = np.exp(dh) * heights[:, np.newaxis]
54 | 
55 |     pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype)
56 |     # x1
57 |     pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w
58 |     # y1
59 |     pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h
60 |     # x2
61 |     pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w
62 |     # y2
63 |     pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h
64 | 
65 |     return pred_boxes
66 | 
67 | def clip_boxes(boxes, im_shape):
68 |     """
69 |     Clip boxes to image boundaries.
70 |     """
71 | 
72 |     # x1 >= 0
73 |     boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0)
74 |     # y1 >= 0
75 |     boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0)
76 |     # x2 < im_shape[1]
77 |     boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0)
78 |     # y2 < im_shape[0]
79 |     boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0)
80 |     return boxes
81 | 


--------------------------------------------------------------------------------
/api/lib/fast_rcnn/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import os.path as osp
  3 | import numpy as np
  4 | from time import strftime, localtime
  5 | from easydict import EasyDict as edict
  6 | 
  7 | __C = edict()
  8 | cfg = __C
  9 | 
 10 | # Default GPU device id
 11 | __C.GPU_ID = 0
 12 | 
 13 | # Training options
 14 | __C.IS_RPN = True
 15 | __C.ANCHOR_SCALES = [16]
 16 | __C.NCLASSES = 2
 17 | __C.USE_GPU_NMS = True
 18 | # multiscale training and testing
 19 | __C.IS_MULTISCALE = False
 20 | __C.IS_EXTRAPOLATING = True
 21 | 
 22 | __C.REGION_PROPOSAL = 'RPN'
 23 | 
 24 | __C.NET_NAME = 'VGGnet'
 25 | __C.SUBCLS_NAME = 'voxel_exemplars'
 26 | 
 27 | __C.TRAIN = edict()
 28 | # Adam, Momentum, RMS
 29 | __C.TRAIN.restore = 0
 30 | __C.TRAIN.max_steps = 100000
 31 | __C.TRAIN.SOLVER = 'Momentum'
 32 | # learning rate
 33 | __C.TRAIN.WEIGHT_DECAY = 0.0005
 34 | __C.TRAIN.LEARNING_RATE = 0.001
 35 | __C.TRAIN.MOMENTUM = 0.9
 36 | __C.TRAIN.GAMMA = 0.1
 37 | __C.TRAIN.STEPSIZE = 50000
 38 | __C.TRAIN.DISPLAY = 10
 39 | __C.TRAIN.LOG_IMAGE_ITERS = 100
 40 | __C.TRAIN.OHEM = False
 41 | __C.TRAIN.RANDOM_DOWNSAMPLE = False
 42 | 
 43 | # Scales to compute real features
 44 | __C.TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0)
 45 | __C.TRAIN.KERNEL_SIZE = 5
 46 | __C.TRAIN.ASPECTS= (1,)
 47 | __C.TRAIN.SCALES = (600,)
 48 | 
 49 | # Max pixel size of the longest side of a scaled input image
 50 | __C.TRAIN.MAX_SIZE = 1000
 51 | 
 52 | # Images to use per minibatch
 53 | __C.TRAIN.IMS_PER_BATCH = 2
 54 | 
 55 | # Minibatch size (number of regions of interest [ROIs])
 56 | __C.TRAIN.BATCH_SIZE = 128
 57 | 
 58 | # Fraction of minibatch that is labeled foreground (i.e. class > 0)
 59 | __C.TRAIN.FG_FRACTION = 0.25
 60 | 
 61 | # Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH)
 62 | __C.TRAIN.FG_THRESH = 0.5
 63 | 
 64 | # Overlap threshold for a ROI to be considered background (class = 0 if
 65 | # overlap in [LO, HI))
 66 | __C.TRAIN.BG_THRESH_HI = 0.5
 67 | __C.TRAIN.BG_THRESH_LO = 0.1
 68 | 
 69 | # Use horizontally-flipped images during training?
 70 | __C.TRAIN.USE_FLIPPED = True
 71 | 
 72 | # Train bounding-box regressors
 73 | __C.TRAIN.BBOX_REG = True
 74 | 
 75 | # Overlap required between a ROI and ground-truth box in order for that ROI to
 76 | # be used as a bounding-box regression training example
 77 | __C.TRAIN.BBOX_THRESH = 0.5
 78 | 
 79 | # Iterations between snapshots
 80 | __C.TRAIN.SNAPSHOT_ITERS = 5000
 81 | 
 82 | # solver.prototxt specifies the snapshot path prefix, this adds an optional
 83 | # infix to yield the path: <prefix>[_<infix>]_iters_XYZ.caffemodel
 84 | __C.TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn'
 85 | __C.TRAIN.SNAPSHOT_INFIX = ''
 86 | 
 87 | # Use a prefetch thread in roi_data_layer.layer
 88 | # So far I haven't found this useful; likely more engineering work is required
 89 | __C.TRAIN.USE_PREFETCH = False
 90 | 
 91 | # Normalize the targets (subtract empirical mean, divide by empirical stddev)
 92 | __C.TRAIN.BBOX_NORMALIZE_TARGETS = True
 93 | # Deprecated (inside weights)
 94 | # used for assigning weights for each coords (x1, y1, w, h)
 95 | __C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
 96 | # Normalize the targets using "precomputed" (or made up) means and stdevs
 97 | # (BBOX_NORMALIZE_TARGETS must also be True)
 98 | __C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True
 99 | __C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0)
100 | __C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2)
101 | # faster rcnn dont use pre-generated rois by selective search
102 | # __C.TRAIN.BBOX_NORMALIZE_STDS = (1, 1, 1, 1)
103 | 
104 | # Train using these proposals
105 | __C.TRAIN.PROPOSAL_METHOD = 'selective_search'
106 | 
107 | # Make minibatches from images that have similar aspect ratios (i.e. both
108 | # tall and thin or both short and wide) in order to avoid wasting computation
109 | # on zero-padding.
110 | __C.TRAIN.ASPECT_GROUPING = True
111 | # preclude rois intersected with dontcare areas above the value
112 | __C.TRAIN.DONTCARE_AREA_INTERSECTION_HI = 0.5
113 | __C.TRAIN.PRECLUDE_HARD_SAMPLES = True
114 | # Use RPN to detect objects
115 | __C.TRAIN.HAS_RPN = True
116 | # IOU >= thresh: positive example
117 | __C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7
118 | # IOU < thresh: negative example
119 | __C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3
120 | # If an anchor statisfied by positive and negative conditions set to negative
121 | __C.TRAIN.RPN_CLOBBER_POSITIVES = False
122 | # Max number of foreground examples
123 | __C.TRAIN.RPN_FG_FRACTION = 0.5
124 | # Total number of examples
125 | __C.TRAIN.RPN_BATCHSIZE = 256
126 | # NMS threshold used on RPN proposals
127 | __C.TRAIN.RPN_NMS_THRESH = 0.7
128 | # Number of top scoring boxes to keep before apply NMS to RPN proposals
129 | __C.TRAIN.RPN_PRE_NMS_TOP_N = 12000
130 | # Number of top scoring boxes to keep after applying NMS to RPN proposals
131 | __C.TRAIN.RPN_POST_NMS_TOP_N = 2000
132 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
133 | __C.TRAIN.RPN_MIN_SIZE = 8
134 | # Deprecated (outside weights)
135 | __C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0)
136 | # Give the positive RPN examples weight of p * 1 / {num positives}
137 | # and give negatives a weight of (1 - p)
138 | # Set to -1.0 to use uniform example weighting
139 | __C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0
140 | # __C.TRAIN.RPN_POSITIVE_WEIGHT = 0.5
141 | 
142 | 
143 | #
144 | # Testing options
145 | #
146 | 
147 | __C.TEST = edict()
148 | __C.TEST.checkpoints_path = "checkpoints/"
149 | __C.TEST.DETECT_MODE = "H"#H/O for horizontal/oriented mode
150 | # Scales to use during testing (can list multiple scales)
151 | # Each scale is the pixel size of an image's shortest side
152 | __C.TEST.SCALES = (600,)
153 | 
154 | # Max pixel size of the longest side of a scaled input image
155 | __C.TEST.MAX_SIZE = 1000
156 | 
157 | # Overlap threshold used for non-maximum suppression (suppress boxes with
158 | # IoU >= this threshold)
159 | __C.TEST.NMS = 0.3
160 | 
161 | # Experimental: treat the (K+1) units in the cls_score layer as linear
162 | # predictors (trained, eg, with one-vs-rest SVMs).
163 | __C.TEST.SVM = False
164 | 
165 | # Test using bounding-box regressors
166 | __C.TEST.BBOX_REG = True
167 | 
168 | # Propose boxes
169 | __C.TEST.HAS_RPN = True
170 | 
171 | # Test using these proposals
172 | __C.TEST.PROPOSAL_METHOD = 'selective_search'
173 | 
174 | ## NMS threshold used on RPN proposals
175 | __C.TEST.RPN_NMS_THRESH = 0.7
176 | ## Number of top scoring boxes to keep before apply NMS to RPN proposals
177 | #__C.TEST.RPN_PRE_NMS_TOP_N = 6000
178 | __C.TEST.RPN_PRE_NMS_TOP_N = 12000
179 | ## Number of top scoring boxes to keep after applying NMS to RPN proposals
180 | __C.TEST.RPN_POST_NMS_TOP_N = 1000
181 | #__C.TEST.RPN_POST_NMS_TOP_N = 2000
182 | # Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale)
183 | __C.TEST.RPN_MIN_SIZE = 8
184 | 
185 | 
186 | #
187 | # MISC
188 | #
189 | 
190 | # The mapping from image coordinates to feature map coordinates might cause
191 | # some boxes that are distinct in image space to become identical in feature
192 | # coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor
193 | # for identifying duplicate boxes.
194 | # 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16
195 | __C.DEDUP_BOXES = 1./16.
196 | 
197 | # Pixel mean values (BGR order) as a (1, 1, 3) array
198 | # We use the same pixel mean for all networks even though it's not exactly what
199 | # they were trained with
200 | __C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]])
201 | 
202 | # For reproducibility
203 | #__C.RNG_SEED = 3
204 | __C.RNG_SEED = 3
205 | 
206 | # A small number that's used many times
207 | __C.EPS = 1e-14
208 | 
209 | # Root directory of project
210 | __C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..'))
211 | 
212 | # Data directory
213 | __C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data'))
214 | 
215 | # Model directory
216 | __C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc'))
217 | 
218 | # Name (or path to) the matlab executable
219 | __C.MATLAB = 'matlab'
220 | 
221 | # Place outputs under an experiments directory
222 | __C.EXP_DIR = 'default'
223 | __C.LOG_DIR = 'default'
224 | 
225 | # Use GPU implementation of non-maximum suppression
226 | __C.USE_GPU_NMS = True
227 | 
228 | 
229 | 
230 | def get_output_dir(imdb, weights_filename):
231 |     """Return the directory where experimental artifacts are placed.
232 |     If the directory does not exist, it is created.
233 | 
234 |     A canonical path is built using the name from an imdb and a network
235 |     (if not None).
236 |     """
237 |     outdir = osp.abspath(osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name))
238 |     if weights_filename is not None:
239 |         outdir = osp.join(outdir, weights_filename)
240 |     if not os.path.exists(outdir):
241 |         os.makedirs(outdir)
242 |     return outdir
243 | 
244 | def get_log_dir(imdb):
245 |     """Return the directory where experimental artifacts are placed.
246 |     If the directory does not exist, it is created.
247 |     A canonical path is built using the name from an imdb and a network
248 |     (if not None).
249 |     """
250 |     log_dir = osp.abspath(\
251 |         osp.join(__C.ROOT_DIR, 'logs', __C.LOG_DIR, imdb.name, strftime("%Y-%m-%d-%H-%M-%S", localtime())))
252 |     if not os.path.exists(log_dir):
253 |         os.makedirs(log_dir)
254 |     return log_dir
255 | 
256 | def _merge_a_into_b(a, b):
257 |     """Merge config dictionary a into config dictionary b, clobbering the
258 |     options in b whenever they are also specified in a.
259 |     """
260 |     if type(a) is not edict:
261 |         return
262 | 
263 |     for k, v in a.items():
264 |         # a must specify keys that are in b
265 |         if k not in b:
266 |             raise KeyError('{} is not a valid config key'.format(k))
267 | 
268 |         # the types must match, too
269 |         old_type = type(b[k])
270 |         if old_type is not type(v):
271 |             if isinstance(b[k], np.ndarray):
272 |                 v = np.array(v, dtype=b[k].dtype)
273 |             else:
274 |                 raise ValueError(('Type mismatch ({} vs. {}) '
275 |                                 'for config key: {}').format(type(b[k]),
276 |                                                             type(v), k))
277 | 
278 |         # recursively merge dicts
279 |         if type(v) is edict:
280 |             try:
281 |                 _merge_a_into_b(a[k], b[k])
282 |             except:
283 |                 print(('Error under config key: {}'.format(k)))
284 |                 raise
285 |         else:
286 |             b[k] = v
287 | 
288 | def cfg_from_file(filename):
289 |     """Load a config file and merge it into the default options."""
290 |     import yaml
291 |     with open(filename, 'r') as f:
292 |         yaml_cfg = edict(yaml.load(f))
293 | 
294 |     _merge_a_into_b(yaml_cfg, __C)
295 | 
296 | def cfg_from_list(cfg_list):
297 |     """Set config keys via list (e.g., from command line)."""
298 |     from ast import literal_eval
299 |     assert len(cfg_list) % 2 == 0
300 |     for k, v in zip(cfg_list[0::2], cfg_list[1::2]):
301 |         key_list = k.split('.')
302 |         d = __C
303 |         for subkey in key_list[:-1]:
304 |             assert subkey in d
305 |             d = d[subkey]
306 |         subkey = key_list[-1]
307 |         assert subkey in d
308 |         try:
309 |             value = literal_eval(v)
310 |         except:
311 |             # handle the case when v is a string literal
312 |             value = v
313 |         assert type(value) == type(d[subkey]), \
314 |             'type {} does not match original type {}'.format(
315 |             type(value), type(d[subkey]))
316 |         d[subkey] = value
317 | 


--------------------------------------------------------------------------------
/api/lib/fast_rcnn/nms_wrapper.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .config import cfg
 3 | pure_python_nms = False
 4 | try:
 5 |     from lib.utils.gpu_nms import gpu_nms
 6 |     from ..utils.cython_nms import nms as cython_nms
 7 | except ImportError:
 8 |     pure_python_nms = True
 9 | 
10 | 
11 | def nms(dets, thresh):
12 |     if dets.shape[0] == 0:
13 |         return []
14 |     if pure_python_nms:
15 |         # print("Fall back to pure python nms")
16 |         return py_cpu_nms(dets, thresh)
17 |     if cfg.USE_GPU_NMS:
18 |         return gpu_nms(dets, thresh, device_id=cfg.GPU_ID)
19 |     else:
20 |         return cython_nms(dets, thresh)
21 | 
22 | 
23 | def py_cpu_nms(dets, thresh):
24 |     x1 = dets[:, 0]
25 |     y1 = dets[:, 1]
26 |     x2 = dets[:, 2]
27 |     y2 = dets[:, 3]
28 |     scores = dets[:, 4]
29 | 
30 |     areas = (x2 - x1 + 1) * (y2 - y1 + 1)
31 |     order = scores.argsort()[::-1]
32 | 
33 |     keep = []
34 |     while order.size > 0:
35 |         i = order[0]
36 |         keep.append(i)
37 |         xx1 = np.maximum(x1[i], x1[order[1:]])
38 |         yy1 = np.maximum(y1[i], y1[order[1:]])
39 |         xx2 = np.minimum(x2[i], x2[order[1:]])
40 |         yy2 = np.minimum(y2[i], y2[order[1:]])
41 |         w = np.maximum(0.0, xx2 - xx1 + 1)
42 |         h = np.maximum(0.0, yy2 - yy1 + 1)
43 |         inter = w * h
44 |         ovr = inter / (areas[i] + areas[order[1:]] - inter)
45 |         inds = np.where(ovr <= thresh)[0]
46 |         order = order[inds + 1]
47 |     return keep
48 | 


--------------------------------------------------------------------------------
/api/lib/fast_rcnn/test.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import cv2
 3 | from .config import cfg
 4 | from lib.utils.blob import im_list_to_blob
 5 | 
 6 | 
 7 | def _get_image_blob(im):
 8 |     im_orig = im.astype(np.float32, copy=True)
 9 |     im_orig -= cfg.PIXEL_MEANS
10 | 
11 |     im_shape = im_orig.shape
12 |     im_size_min = np.min(im_shape[0:2])
13 |     im_size_max = np.max(im_shape[0:2])
14 | 
15 |     processed_ims = []
16 |     im_scale_factors = []
17 | 
18 |     for target_size in cfg.TEST.SCALES:
19 |         im_scale = float(target_size) / float(im_size_min)
20 |         # Prevent the biggest axis from being more than MAX_SIZE
21 |         if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE:
22 |             im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max)
23 |         im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale,
24 |                         interpolation=cv2.INTER_LINEAR)
25 |         im_scale_factors.append(im_scale)
26 |         processed_ims.append(im)
27 | 
28 |     # Create a blob to hold the input images
29 |     blob = im_list_to_blob(processed_ims)
30 | 
31 |     return blob, np.array(im_scale_factors)
32 | 
33 | 
34 | def _get_blobs(im, rois):
35 |     blobs = {'data' : None, 'rois' : None}
36 |     blobs['data'], im_scale_factors = _get_image_blob(im)
37 |     return blobs, im_scale_factors
38 | 
39 | 
40 | def test_ctpn(sess, net, im, boxes=None):
41 |     blobs, im_scales = _get_blobs(im, boxes)
42 |     if cfg.TEST.HAS_RPN:
43 |         im_blob = blobs['data']
44 |         blobs['im_info'] = np.array(
45 |             [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
46 |             dtype=np.float32)
47 |     # forward pass
48 |     if cfg.TEST.HAS_RPN:
49 |         feed_dict = {net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0}
50 | 
51 |     rois = sess.run([net.get_output('rois')[0]],feed_dict=feed_dict)
52 |     rois=rois[0]
53 | 
54 |     scores = rois[:, 0]
55 |     if cfg.TEST.HAS_RPN:
56 |         assert len(im_scales) == 1, "Only single-image batch implemented"
57 |         boxes = rois[:, 1:5] / im_scales[0]
58 |     return scores,boxes
59 | 


--------------------------------------------------------------------------------
/api/lib/fast_rcnn/train.py:
--------------------------------------------------------------------------------
  1 | from __future__ import print_function
  2 | import numpy as np
  3 | import os
  4 | import tensorflow as tf
  5 | from lib.roi_data_layer.layer import RoIDataLayer
  6 | from lib.utils.timer import Timer
  7 | from lib.roi_data_layer import roidb as rdl_roidb
  8 | from lib.fast_rcnn.config import cfg
  9 | 
 10 | _DEBUG = False
 11 | 
 12 | class SolverWrapper(object):
 13 |     def __init__(self, sess, network, imdb, roidb, output_dir, logdir, pretrained_model=None):
 14 |         """Initialize the SolverWrapper."""
 15 |         self.net = network
 16 |         self.imdb = imdb
 17 |         self.roidb = roidb
 18 |         self.output_dir = output_dir
 19 |         self.pretrained_model = pretrained_model
 20 | 
 21 |         print('Computing bounding-box regression targets...')
 22 |         if cfg.TRAIN.BBOX_REG:
 23 |             self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets(roidb)
 24 |         print('done')
 25 | 
 26 |         # For checkpoint
 27 |         self.saver = tf.train.Saver(max_to_keep=100,write_version=tf.train.SaverDef.V2)
 28 |         self.writer = tf.summary.FileWriter(logdir=logdir,
 29 |                                              graph=tf.get_default_graph(),
 30 |                                              flush_secs=5)
 31 | 
 32 |     def snapshot(self, sess, iter):
 33 |         net = self.net
 34 |         if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers and cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 35 |             # save original values
 36 |             with tf.variable_scope('bbox_pred', reuse=True):
 37 |                 weights = tf.get_variable("weights")
 38 |                 biases = tf.get_variable("biases")
 39 | 
 40 |             orig_0 = weights.eval()
 41 |             orig_1 = biases.eval()
 42 | 
 43 |             # scale and shift with bbox reg unnormalization; then save snapshot
 44 |             weights_shape = weights.get_shape().as_list()
 45 |             sess.run(weights.assign(orig_0 * np.tile(self.bbox_stds, (weights_shape[0],1))))
 46 |             sess.run(biases.assign(orig_1 * self.bbox_stds + self.bbox_means))
 47 | 
 48 |         if not os.path.exists(self.output_dir):
 49 |             os.makedirs(self.output_dir)
 50 | 
 51 |         infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX
 52 |                  if cfg.TRAIN.SNAPSHOT_INFIX != '' else '')
 53 |         filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix +
 54 |                     '_iter_{:d}'.format(iter+1) + '.ckpt')
 55 |         filename = os.path.join(self.output_dir, filename)
 56 | 
 57 |         self.saver.save(sess, filename)
 58 |         print('Wrote snapshot to: {:s}'.format(filename))
 59 | 
 60 |         if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers:
 61 |             # restore net to original state
 62 |             sess.run(weights.assign(orig_0))
 63 |             sess.run(biases.assign(orig_1))
 64 | 
 65 |     def build_image_summary(self):
 66 |         # A simple graph for write image summary
 67 | 
 68 |         log_image_data = tf.placeholder(tf.uint8, [None, None, 3])
 69 |         log_image_name = tf.placeholder(tf.string)
 70 |         # import tensorflow.python.ops.gen_logging_ops as logging_ops
 71 |         from tensorflow.python.ops import gen_logging_ops
 72 |         from tensorflow.python.framework import ops as _ops
 73 |         log_image = gen_logging_ops._image_summary(log_image_name, tf.expand_dims(log_image_data, 0), max_images=1)
 74 |         _ops.add_to_collection(_ops.GraphKeys.SUMMARIES, log_image)
 75 |         # log_image = tf.summary.image(log_image_name, tf.expand_dims(log_image_data, 0), max_outputs=1)
 76 |         return log_image, log_image_data, log_image_name
 77 | 
 78 | 
 79 |     def train_model(self, sess, max_iters, restore=False):
 80 |         """Network training loop."""
 81 |         data_layer = get_data_layer(self.roidb, self.imdb.num_classes)
 82 |         total_loss,model_loss, rpn_cross_entropy, rpn_loss_box=self.net.build_loss(ohem=cfg.TRAIN.OHEM)
 83 |         # scalar summary
 84 |         tf.summary.scalar('rpn_reg_loss', rpn_loss_box)
 85 |         tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy)
 86 |         tf.summary.scalar('model_loss', model_loss)
 87 |         tf.summary.scalar('total_loss',total_loss)
 88 |         summary_op = tf.summary.merge_all()
 89 | 
 90 |         log_image, log_image_data, log_image_name =\
 91 |             self.build_image_summary()
 92 | 
 93 |         # optimizer
 94 |         lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False)
 95 |         if cfg.TRAIN.SOLVER == 'Adam':
 96 |             opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE)
 97 |         elif cfg.TRAIN.SOLVER == 'RMS':
 98 |             opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE)
 99 |         else:
100 |             # lr = tf.Variable(0.0, trainable=False)
101 |             momentum = cfg.TRAIN.MOMENTUM
102 |             opt = tf.train.MomentumOptimizer(lr, momentum)
103 | 
104 |         global_step = tf.Variable(0, trainable=False)
105 |         with_clip = True
106 |         if with_clip:
107 |             tvars = tf.trainable_variables()
108 |             grads, norm = tf.clip_by_global_norm(tf.gradients(total_loss, tvars), 10.0)
109 |             train_op = opt.apply_gradients(list(zip(grads, tvars)), global_step=global_step)
110 |         else:
111 |             train_op = opt.minimize(total_loss, global_step=global_step)
112 | 
113 |         # intialize variables
114 |         sess.run(tf.global_variables_initializer())
115 |         restore_iter = 0
116 | 
117 |         # load vgg16
118 |         if self.pretrained_model is not None and not restore:
119 |             try:
120 |                 print(('Loading pretrained model '
121 |                    'weights from {:s}').format(self.pretrained_model))
122 |                 self.net.load(self.pretrained_model, sess, True)
123 |             except:
124 |                 raise Exception('Check your pretrained model {:s}'.format(self.pretrained_model))
125 | 
126 |         # resuming a trainer
127 |         if restore:
128 |             try:
129 |                 ckpt = tf.train.get_checkpoint_state(self.output_dir)
130 |                 print('Restoring from {}...'.format(ckpt.model_checkpoint_path), end=' ')
131 |                 self.saver.restore(sess, ckpt.model_checkpoint_path)
132 |                 stem = os.path.splitext(os.path.basename(ckpt.model_checkpoint_path))[0]
133 |                 restore_iter = int(stem.split('_')[-1])
134 |                 sess.run(global_step.assign(restore_iter))
135 |                 print('done')
136 |             except:
137 |                 raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path)
138 | 
139 |         last_snapshot_iter = -1
140 |         timer = Timer()
141 |         for iter in range(restore_iter, max_iters):
142 |             timer.tic()
143 |             # learning rate
144 |             if iter != 0 and iter % cfg.TRAIN.STEPSIZE == 0:
145 |                 sess.run(tf.assign(lr, lr.eval() * cfg.TRAIN.GAMMA))
146 |                 print(lr)
147 | 
148 |             # get one batch
149 |             blobs = data_layer.forward()
150 | 
151 |             feed_dict={
152 |                 self.net.data: blobs['data'],
153 |                 self.net.im_info: blobs['im_info'],
154 |                 self.net.keep_prob: 0.5,
155 |                 self.net.gt_boxes: blobs['gt_boxes'],
156 |                 self.net.gt_ishard: blobs['gt_ishard'],
157 |                 self.net.dontcare_areas: blobs['dontcare_areas']
158 |             }
159 |             res_fetches=[]
160 |             fetch_list = [total_loss,model_loss, rpn_cross_entropy, rpn_loss_box,
161 |                           summary_op,
162 |                           train_op] + res_fetches
163 | 
164 |             total_loss_val,model_loss_val, rpn_loss_cls_val, rpn_loss_box_val, \
165 |                 summary_str, _ = sess.run(fetches=fetch_list, feed_dict=feed_dict)
166 | 
167 |             self.writer.add_summary(summary=summary_str, global_step=global_step.eval())
168 | 
169 |             _diff_time = timer.toc(average=False)
170 | 
171 | 
172 |             if (iter) % (cfg.TRAIN.DISPLAY) == 0:
173 |                 print('iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f'%\
174 |                         (iter, max_iters, total_loss_val,model_loss_val,rpn_loss_cls_val,rpn_loss_box_val,lr.eval()))
175 |                 print('speed: {:.3f}s / iter'.format(_diff_time))
176 | 
177 |             if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0:
178 |                 last_snapshot_iter = iter
179 |                 self.snapshot(sess, iter)
180 | 
181 |         if last_snapshot_iter != iter:
182 |             self.snapshot(sess, iter)
183 | 
184 | def get_training_roidb(imdb):
185 |     """Returns a roidb (Region of Interest database) for use in training."""
186 |     if cfg.TRAIN.USE_FLIPPED:
187 |         print('Appending horizontally-flipped training examples...')
188 |         imdb.append_flipped_images()
189 |         print('done')
190 | 
191 |     print('Preparing training data...')
192 |     if cfg.TRAIN.HAS_RPN:
193 |             rdl_roidb.prepare_roidb(imdb)
194 |     else:
195 |         rdl_roidb.prepare_roidb(imdb)
196 |     print('done')
197 | 
198 |     return imdb.roidb
199 | 
200 | 
201 | def get_data_layer(roidb, num_classes):
202 |     """return a data layer."""
203 |     if cfg.TRAIN.HAS_RPN:
204 |         if cfg.IS_MULTISCALE:
205 |             # obsolete
206 |             # layer = GtDataLayer(roidb)
207 |             raise "Calling caffe modules..."
208 |         else:
209 |             layer = RoIDataLayer(roidb, num_classes)
210 |     else:
211 |         layer = RoIDataLayer(roidb, num_classes)
212 | 
213 |     return layer
214 | 
215 | 
216 | 
217 | def train_net(network, imdb, roidb, output_dir, log_dir, pretrained_model=None, max_iters=40000, restore=False):
218 |     """Train a Fast R-CNN network."""
219 | 
220 |     config = tf.ConfigProto(allow_soft_placement=True)
221 |     config.gpu_options.allocator_type = 'BFC'
222 |     config.gpu_options.per_process_gpu_memory_fraction = 0.75
223 |     with tf.Session(config=config) as sess:
224 |         sw = SolverWrapper(sess, network, imdb, roidb, output_dir, logdir= log_dir, pretrained_model=pretrained_model)
225 |         print('Solving...')
226 |         sw.train_model(sess, max_iters, restore=restore)
227 |         print('done solving')
228 | 


--------------------------------------------------------------------------------
/api/lib/networks/VGGnet_test.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from .network import Network
 3 | from lib.fast_rcnn.config import cfg
 4 | 
 5 | 
 6 | class VGGnet_test(Network):
 7 |     def __init__(self, trainable=True):
 8 |         self.inputs = []
 9 |         self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3])
10 |         self.im_info = tf.placeholder(tf.float32, shape=[None, 3])
11 |         self.keep_prob = tf.placeholder(tf.float32)
12 |         self.layers = dict({'data': self.data, 'im_info': self.im_info})
13 |         self.trainable = trainable
14 |         self.setup()
15 | 
16 |     def setup(self):
17 |         anchor_scales = cfg.ANCHOR_SCALES
18 |         _feat_stride = [16, ]
19 | 
20 |         (self.feed('data')
21 |          .conv(3, 3, 64, 1, 1, name='conv1_1')
22 |          .conv(3, 3, 64, 1, 1, name='conv1_2')
23 |          .max_pool(2, 2, 2, 2, padding='VALID', name='pool1')
24 |          .conv(3, 3, 128, 1, 1, name='conv2_1')
25 |          .conv(3, 3, 128, 1, 1, name='conv2_2')
26 |          .max_pool(2, 2, 2, 2, padding='VALID', name='pool2')
27 |          .conv(3, 3, 256, 1, 1, name='conv3_1')
28 |          .conv(3, 3, 256, 1, 1, name='conv3_2')
29 |          .conv(3, 3, 256, 1, 1, name='conv3_3')
30 |          .max_pool(2, 2, 2, 2, padding='VALID', name='pool3')
31 |          .conv(3, 3, 512, 1, 1, name='conv4_1')
32 |          .conv(3, 3, 512, 1, 1, name='conv4_2')
33 |          .conv(3, 3, 512, 1, 1, name='conv4_3')
34 |          .max_pool(2, 2, 2, 2, padding='VALID', name='pool4')
35 |          .conv(3, 3, 512, 1, 1, name='conv5_1')
36 |          .conv(3, 3, 512, 1, 1, name='conv5_2')
37 |          .conv(3, 3, 512, 1, 1, name='conv5_3'))
38 | 
39 |         (self.feed('conv5_3').conv(3, 3, 512, 1, 1, name='rpn_conv/3x3'))
40 | 
41 |         (self.feed('rpn_conv/3x3').Bilstm(512, 128, 512, name='lstm_o'))
42 |         (self.feed('lstm_o').lstm_fc(512, len(anchor_scales) * 10 * 4, name='rpn_bbox_pred'))
43 |         (self.feed('lstm_o').lstm_fc(512, len(anchor_scales) * 10 * 2, name='rpn_cls_score'))
44 | 
45 |         #  shape is (1, H, W, Ax2) -> (1, H, WxA, 2)
46 |         (self.feed('rpn_cls_score')
47 |          .spatial_reshape_layer(2, name='rpn_cls_score_reshape')
48 |          .spatial_softmax(name='rpn_cls_prob'))
49 | 
50 |         # shape is (1, H, WxA, 2) -> (1, H, W, Ax2)
51 |         (self.feed('rpn_cls_prob')
52 |          .spatial_reshape_layer(len(anchor_scales) * 10 * 2, name='rpn_cls_prob_reshape'))
53 | 
54 |         (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info')
55 |          .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois'))
56 | 


--------------------------------------------------------------------------------
/api/lib/networks/VGGnet_train.py:
--------------------------------------------------------------------------------
 1 | # -*- coding:utf-8 -*-
 2 | import tensorflow as tf
 3 | from .network import Network
 4 | from lib.fast_rcnn.config import cfg
 5 | 
 6 | class VGGnet_train(Network):
 7 |     def __init__(self, trainable=True):
 8 |         self.inputs = []
 9 |         self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3], name='data')
10 |         self.im_info = tf.placeholder(tf.float32, shape=[None, 3], name='im_info')
11 |         self.gt_boxes = tf.placeholder(tf.float32, shape=[None, 5], name='gt_boxes')
12 |         self.gt_ishard = tf.placeholder(tf.int32, shape=[None], name='gt_ishard')
13 |         self.dontcare_areas = tf.placeholder(tf.float32, shape=[None, 4], name='dontcare_areas')
14 |         self.keep_prob = tf.placeholder(tf.float32)
15 |         self.layers = dict({'data':self.data, 'im_info':self.im_info, 'gt_boxes':self.gt_boxes,\
16 |                             'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas})
17 |         self.trainable = trainable
18 |         self.setup()
19 | 
20 |     def setup(self):
21 | 
22 |         # n_classes = 21
23 |         n_classes = cfg.NCLASSES
24 |         # anchor_scales = [8, 16, 32]
25 |         anchor_scales = cfg.ANCHOR_SCALES
26 |         _feat_stride = [16, ]
27 | 
28 |         (self.feed('data')
29 |              .conv(3, 3, 64, 1, 1, name='conv1_1')
30 |              .conv(3, 3, 64, 1, 1, name='conv1_2')
31 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool1')
32 |              .conv(3, 3, 128, 1, 1, name='conv2_1')
33 |              .conv(3, 3, 128, 1, 1, name='conv2_2')
34 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool2')
35 |              .conv(3, 3, 256, 1, 1, name='conv3_1')
36 |              .conv(3, 3, 256, 1, 1, name='conv3_2')
37 |              .conv(3, 3, 256, 1, 1, name='conv3_3')
38 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool3')
39 |              .conv(3, 3, 512, 1, 1, name='conv4_1')
40 |              .conv(3, 3, 512, 1, 1, name='conv4_2')
41 |              .conv(3, 3, 512, 1, 1, name='conv4_3')
42 |              .max_pool(2, 2, 2, 2, padding='VALID', name='pool4')
43 |              .conv(3, 3, 512, 1, 1, name='conv5_1')
44 |              .conv(3, 3, 512, 1, 1, name='conv5_2')
45 |              .conv(3, 3, 512, 1, 1, name='conv5_3'))
46 |         #========= RPN ============
47 |         (self.feed('conv5_3')
48 |              .conv(3,3,512,1,1,name='rpn_conv/3x3'))
49 | 
50 |         (self.feed('rpn_conv/3x3').Bilstm(512,128,512,name='lstm_o'))
51 |         (self.feed('lstm_o').lstm_fc(512,len(anchor_scales) * 10 * 4, name='rpn_bbox_pred'))
52 |         (self.feed('lstm_o').lstm_fc(512,len(anchor_scales) * 10 * 2,name='rpn_cls_score'))
53 | 
54 |         # generating training labels on the fly
55 |         # output: rpn_labels(HxWxA, 2) rpn_bbox_targets(HxWxA, 4) rpn_bbox_inside_weights rpn_bbox_outside_weights
56 |         # 给每个anchor上标签，并计算真值（也是delta的形式），以及内部权重和外部权重
57 |         (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info')
58 |              .anchor_target_layer(_feat_stride, anchor_scales, name = 'rpn-data' ))
59 | 
60 |         # shape is (1, H, W, Ax2) -> (1, H, WxA, 2)
61 |         # 给之前得到的score进行softmax，得到0-1之间的得分
62 |         (self.feed('rpn_cls_score')
63 |              .spatial_reshape_layer(2, name = 'rpn_cls_score_reshape')
64 |              .spatial_softmax(name='rpn_cls_prob'))
65 | 


--------------------------------------------------------------------------------
/api/lib/networks/__init__.py:
--------------------------------------------------------------------------------
1 | from .VGGnet_train import VGGnet_train
2 | from .VGGnet_test import VGGnet_test
3 | from . import factory
4 | 


--------------------------------------------------------------------------------
/api/lib/networks/factory.py:
--------------------------------------------------------------------------------
 1 | from .VGGnet_test import VGGnet_test
 2 | from .VGGnet_train import VGGnet_train
 3 | 
 4 | def get_network(name):
 5 |     """Get a network by name."""
 6 |     if name.split('_')[0] == 'VGGnet':
 7 |         if name.split('_')[1] == 'test':
 8 |            return VGGnet_test()
 9 |         elif name.split('_')[1] == 'train':
10 |            return VGGnet_train()
11 |         else:
12 |            raise KeyError('Unknown dataset: {}'.format(name))
13 |     else:
14 |         raise KeyError('Unknown dataset: {}'.format(name))
15 | 


--------------------------------------------------------------------------------
/api/lib/prepare_training_data/ToVoc.py:
--------------------------------------------------------------------------------
  1 | from xml.dom.minidom import Document
  2 | import cv2
  3 | import os
  4 | import glob
  5 | import shutil
  6 | import numpy as np
  7 | 
  8 | def generate_xml(name, lines, img_size, class_sets, doncateothers=True):
  9 |     doc = Document()
 10 | 
 11 |     def append_xml_node_attr(child, parent=None, text=None):
 12 |         ele = doc.createElement(child)
 13 |         if not text is None:
 14 |             text_node = doc.createTextNode(text)
 15 |             ele.appendChild(text_node)
 16 |         parent = doc if parent is None else parent
 17 |         parent.appendChild(ele)
 18 |         return ele
 19 | 
 20 |     img_name = name + '.jpg'
 21 |     # create header
 22 |     annotation = append_xml_node_attr('annotation')
 23 |     append_xml_node_attr('folder', parent=annotation, text='text')
 24 |     append_xml_node_attr('filename', parent=annotation, text=img_name)
 25 |     source = append_xml_node_attr('source', parent=annotation)
 26 |     append_xml_node_attr('database', parent=source, text='coco_text_database')
 27 |     append_xml_node_attr('annotation', parent=source, text='text')
 28 |     append_xml_node_attr('image', parent=source, text='text')
 29 |     append_xml_node_attr('flickrid', parent=source, text='000000')
 30 |     owner = append_xml_node_attr('owner', parent=annotation)
 31 |     append_xml_node_attr('name', parent=owner, text='ms')
 32 |     size = append_xml_node_attr('size', annotation)
 33 |     append_xml_node_attr('width', size, str(img_size[1]))
 34 |     append_xml_node_attr('height', size, str(img_size[0]))
 35 |     append_xml_node_attr('depth', size, str(img_size[2]))
 36 |     append_xml_node_attr('segmented', parent=annotation, text='0')
 37 | 
 38 |     # create objects
 39 |     objs = []
 40 |     for line in lines:
 41 |         splitted_line = line.strip().lower().split()
 42 |         cls = splitted_line[0].lower()
 43 |         if not doncateothers and cls not in class_sets:
 44 |             continue
 45 |         cls = 'dontcare' if cls not in class_sets else cls
 46 |         if cls == 'dontcare':
 47 |             continue
 48 |         obj = append_xml_node_attr('object', parent=annotation)
 49 |         occlusion = int(0)
 50 |         x1, y1, x2, y2 = int(float(splitted_line[1]) + 1), int(float(splitted_line[2]) + 1), \
 51 |                          int(float(splitted_line[3]) + 1), int(float(splitted_line[4]) + 1)
 52 |         truncation = float(0)
 53 |         difficult = 1 if _is_hard(cls, truncation, occlusion, x1, y1, x2, y2) else 0
 54 |         truncted = 0 if truncation < 0.5 else 1
 55 | 
 56 |         append_xml_node_attr('name', parent=obj, text=cls)
 57 |         append_xml_node_attr('pose', parent=obj, text='none')
 58 |         append_xml_node_attr('truncated', parent=obj, text=str(truncted))
 59 |         append_xml_node_attr('difficult', parent=obj, text=str(int(difficult)))
 60 |         bb = append_xml_node_attr('bndbox', parent=obj)
 61 |         append_xml_node_attr('xmin', parent=bb, text=str(x1))
 62 |         append_xml_node_attr('ymin', parent=bb, text=str(y1))
 63 |         append_xml_node_attr('xmax', parent=bb, text=str(x2))
 64 |         append_xml_node_attr('ymax', parent=bb, text=str(y2))
 65 | 
 66 |         o = {'class': cls, 'box': np.asarray([x1, y1, x2, y2], dtype=float), \
 67 |              'truncation': truncation, 'difficult': difficult, 'occlusion': occlusion}
 68 |         objs.append(o)
 69 | 
 70 |     return doc, objs
 71 | 
 72 | 
 73 | def _is_hard(cls, truncation, occlusion, x1, y1, x2, y2):
 74 |     hard = False
 75 |     if y2 - y1 < 25 and occlusion >= 2:
 76 |         hard = True
 77 |         return hard
 78 |     if occlusion >= 3:
 79 |         hard = True
 80 |         return hard
 81 |     if truncation > 0.8:
 82 |         hard = True
 83 |         return hard
 84 |     return hard
 85 | 
 86 | 
 87 | def build_voc_dirs(outdir):
 88 |     mkdir = lambda dir: os.makedirs(dir) if not os.path.exists(dir) else None
 89 |     mkdir(outdir)
 90 |     mkdir(os.path.join(outdir, 'Annotations'))
 91 |     mkdir(os.path.join(outdir, 'ImageSets'))
 92 |     mkdir(os.path.join(outdir, 'ImageSets', 'Layout'))
 93 |     mkdir(os.path.join(outdir, 'ImageSets', 'Main'))
 94 |     mkdir(os.path.join(outdir, 'ImageSets', 'Segmentation'))
 95 |     mkdir(os.path.join(outdir, 'JPEGImages'))
 96 |     mkdir(os.path.join(outdir, 'SegmentationClass'))
 97 |     mkdir(os.path.join(outdir, 'SegmentationObject'))
 98 |     return os.path.join(outdir, 'Annotations'), os.path.join(outdir, 'JPEGImages'), os.path.join(outdir, 'ImageSets',
 99 |                                                                                                  'Main')
100 | 
101 | 
102 | if __name__ == '__main__':
103 |     _outdir = 'TEXTVOC/VOC2007'
104 |     _draw = bool(0)
105 |     _dest_label_dir, _dest_img_dir, _dest_set_dir = build_voc_dirs(_outdir)
106 |     _doncateothers = bool(1)
107 |     for dset in ['train']:
108 |         _labeldir = 'label_tmp'
109 |         _imagedir = 're_image'
110 |         class_sets = ('text', 'dontcare')
111 |         class_sets_dict = dict((k, i) for i, k in enumerate(class_sets))
112 |         allclasses = {}
113 |         fs = [open(os.path.join(_dest_set_dir, cls + '_' + dset + '.txt'), 'w') for cls in class_sets]
114 |         ftrain = open(os.path.join(_dest_set_dir, dset + '.txt'), 'w')
115 | 
116 |         files = glob.glob(os.path.join(_labeldir, '*.txt'))
117 |         files.sort()
118 |         for file in files:
119 |             path, basename = os.path.split(file)
120 |             stem, ext = os.path.splitext(basename)
121 |             with open(file, 'r') as f:
122 |                 lines = f.readlines()
123 |             img_file = os.path.join(_imagedir, stem + '.jpg')
124 | 
125 |             print(img_file)
126 |             img = cv2.imread(img_file)
127 |             img_size = img.shape
128 | 
129 |             doc, objs = generate_xml(stem, lines, img_size, class_sets=class_sets, doncateothers=_doncateothers)
130 | 
131 |             cv2.imwrite(os.path.join(_dest_img_dir, stem + '.jpg'), img)
132 |             xmlfile = os.path.join(_dest_label_dir, stem + '.xml')
133 |             with open(xmlfile, 'w') as f:
134 |                 f.write(doc.toprettyxml(indent='	'))
135 | 
136 |             ftrain.writelines(stem + '\n')
137 | 
138 |             cls_in_image = set([o['class'] for o in objs])
139 | 
140 |             for obj in objs:
141 |                 cls = obj['class']
142 |                 allclasses[cls] = 0 \
143 |                     if not cls in list(allclasses.keys()) else allclasses[cls] + 1
144 | 
145 |             for cls in cls_in_image:
146 |                 if cls in class_sets:
147 |                     fs[class_sets_dict[cls]].writelines(stem + ' 1\n')
148 |             for cls in class_sets:
149 |                 if cls not in cls_in_image:
150 |                     fs[class_sets_dict[cls]].writelines(stem + ' -1\n')
151 | 
152 | 
153 |         (f.close() for f in fs)
154 |         ftrain.close()
155 | 
156 |         print('~~~~~~~~~~~~~~~~~~~')
157 |         print(allclasses)
158 |         print('~~~~~~~~~~~~~~~~~~~')
159 |         shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'val.txt'))
160 |         shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'trainval.txt'))
161 |         for cls in class_sets:
162 |             shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),
163 |                             os.path.join(_dest_set_dir, cls + '_trainval.txt'))
164 |             shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),
165 |                             os.path.join(_dest_set_dir, cls + '_val.txt'))
166 | 


--------------------------------------------------------------------------------
/api/lib/prepare_training_data/split_label.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import numpy as np
  3 | import math
  4 | import cv2 as cv
  5 | 
  6 | path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/image'
  7 | gt_path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/label'
  8 | out_path = 're_image'
  9 | if not os.path.exists(out_path):
 10 |     os.makedirs(out_path)
 11 | files = os.listdir(path)
 12 | files.sort()
 13 | #files=files[:100]
 14 | for file in files:
 15 |     _, basename = os.path.split(file)
 16 |     if basename.lower().split('.')[-1] not in ['jpg', 'png']:
 17 |         continue
 18 |     stem, ext = os.path.splitext(basename)
 19 |     gt_file = os.path.join(gt_path, 'gt_' + stem + '.txt')
 20 |     img_path = os.path.join(path, file)
 21 |     print(img_path)
 22 |     img = cv.imread(img_path)
 23 |     img_size = img.shape
 24 |     im_size_min = np.min(img_size[0:2])
 25 |     im_size_max = np.max(img_size[0:2])
 26 | 
 27 |     im_scale = float(600) / float(im_size_min)
 28 |     if np.round(im_scale * im_size_max) > 1200:
 29 |         im_scale = float(1200) / float(im_size_max)
 30 |     re_im = cv.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv.INTER_LINEAR)
 31 |     re_size = re_im.shape
 32 |     cv.imwrite(os.path.join(out_path, stem) + '.jpg', re_im)
 33 | 
 34 |     with open(gt_file, 'r') as f:
 35 |         lines = f.readlines()
 36 |     for line in lines:
 37 |         splitted_line = line.strip().lower().split(',')
 38 |         pt_x = np.zeros((4, 1))
 39 |         pt_y = np.zeros((4, 1))
 40 |         pt_x[0, 0] = int(float(splitted_line[0]) / img_size[1] * re_size[1])
 41 |         pt_y[0, 0] = int(float(splitted_line[1]) / img_size[0] * re_size[0])
 42 |         pt_x[1, 0] = int(float(splitted_line[2]) / img_size[1] * re_size[1])
 43 |         pt_y[1, 0] = int(float(splitted_line[3]) / img_size[0] * re_size[0])
 44 |         pt_x[2, 0] = int(float(splitted_line[4]) / img_size[1] * re_size[1])
 45 |         pt_y[2, 0] = int(float(splitted_line[5]) / img_size[0] * re_size[0])
 46 |         pt_x[3, 0] = int(float(splitted_line[6]) / img_size[1] * re_size[1])
 47 |         pt_y[3, 0] = int(float(splitted_line[7]) / img_size[0] * re_size[0])
 48 | 
 49 |         ind_x = np.argsort(pt_x, axis=0)
 50 |         pt_x = pt_x[ind_x]
 51 |         pt_y = pt_y[ind_x]
 52 | 
 53 |         if pt_y[0] < pt_y[1]:
 54 |             pt1 = (pt_x[0], pt_y[0])
 55 |             pt3 = (pt_x[1], pt_y[1])
 56 |         else:
 57 |             pt1 = (pt_x[1], pt_y[1])
 58 |             pt3 = (pt_x[0], pt_y[0])
 59 | 
 60 |         if pt_y[2] < pt_y[3]:
 61 |             pt2 = (pt_x[2], pt_y[2])
 62 |             pt4 = (pt_x[3], pt_y[3])
 63 |         else:
 64 |             pt2 = (pt_x[3], pt_y[3])
 65 |             pt4 = (pt_x[2], pt_y[2])
 66 | 
 67 |         xmin = int(min(pt1[0], pt2[0]))
 68 |         ymin = int(min(pt1[1], pt2[1]))
 69 |         xmax = int(max(pt2[0], pt4[0]))
 70 |         ymax = int(max(pt3[1], pt4[1]))
 71 | 
 72 |         if xmin < 0:
 73 |             xmin = 0
 74 |         if xmax > re_size[1] - 1:
 75 |             xmax = re_size[1] - 1
 76 |         if ymin < 0:
 77 |             ymin = 0
 78 |         if ymax > re_size[0] - 1:
 79 |             ymax = re_size[0] - 1
 80 | 
 81 |         width = xmax - xmin
 82 |         height = ymax - ymin
 83 | 
 84 |         # reimplement
 85 |         step = 16.0
 86 |         x_left = []
 87 |         x_right = []
 88 |         x_left.append(xmin)
 89 |         x_left_start = int(math.ceil(xmin / 16.0) * 16.0)
 90 |         if x_left_start == xmin:
 91 |             x_left_start = xmin + 16
 92 |         for i in np.arange(x_left_start, xmax, 16):
 93 |             x_left.append(i)
 94 |         x_left = np.array(x_left)
 95 | 
 96 |         x_right.append(x_left_start - 1)
 97 |         for i in range(1, len(x_left) - 1):
 98 |             x_right.append(x_left[i] + 15)
 99 |         x_right.append(xmax)
100 |         x_right = np.array(x_right)
101 | 
102 |         idx = np.where(x_left == x_right)
103 |         x_left = np.delete(x_left, idx, axis=0)
104 |         x_right = np.delete(x_right, idx, axis=0)
105 | 
106 |         if not os.path.exists('label_tmp'):
107 |             os.makedirs('label_tmp')
108 |         with open(os.path.join('label_tmp', stem) + '.txt', 'a') as f:
109 |             for i in range(len(x_left)):
110 |                 f.writelines("text\t")
111 |                 f.writelines(str(int(x_left[i])))
112 |                 f.writelines("\t")
113 |                 f.writelines(str(int(ymin)))
114 |                 f.writelines("\t")
115 |                 f.writelines(str(int(x_right[i])))
116 |                 f.writelines("\t")
117 |                 f.writelines(str(int(ymax)))
118 |                 f.writelines("\n")
119 | 


--------------------------------------------------------------------------------
/api/lib/roi_data_layer/__init__.py:
--------------------------------------------------------------------------------
1 | from . import roidb


--------------------------------------------------------------------------------
/api/lib/roi_data_layer/layer.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from lib.fast_rcnn.config import cfg
 3 | from lib.roi_data_layer.minibatch import get_minibatch
 4 | 
 5 | class RoIDataLayer(object):
 6 |     """Fast R-CNN data layer used for training."""
 7 | 
 8 |     def __init__(self, roidb, num_classes):
 9 |         """Set the roidb to be used by this layer during training."""
10 |         self._roidb = roidb
11 |         self._num_classes = num_classes
12 |         self._shuffle_roidb_inds()
13 | 
14 |     def _shuffle_roidb_inds(self):
15 |         """Randomly permute the training roidb."""
16 |         self._perm = np.random.permutation(np.arange(len(self._roidb)))
17 |         self._cur = 0
18 | 
19 |     def _get_next_minibatch_inds(self):
20 |         """Return the roidb indices for the next minibatch."""
21 |         
22 |         if cfg.TRAIN.HAS_RPN:
23 |             if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb):
24 |                 self._shuffle_roidb_inds()
25 | 
26 |             db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH]
27 |             self._cur += cfg.TRAIN.IMS_PER_BATCH
28 |         else:
29 |             # sample images
30 |             db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32)
31 |             i = 0
32 |             while (i < cfg.TRAIN.IMS_PER_BATCH):
33 |                 ind = self._perm[self._cur]
34 |                 num_objs = self._roidb[ind]['boxes'].shape[0]
35 |                 if num_objs != 0:
36 |                     db_inds[i] = ind
37 |                     i += 1
38 | 
39 |                 self._cur += 1
40 |                 if self._cur >= len(self._roidb):
41 |                     self._shuffle_roidb_inds()
42 | 
43 |         return db_inds
44 | 
45 |     def _get_next_minibatch(self):
46 |         """Return the blobs to be used for the next minibatch.
47 | 
48 |         If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a
49 |         separate process and made available through self._blob_queue.
50 |         """
51 |         db_inds = self._get_next_minibatch_inds()
52 |         minibatch_db = [self._roidb[i] for i in db_inds]
53 |         return get_minibatch(minibatch_db, self._num_classes)
54 |             
55 |     def forward(self):
56 |         """Get blobs and copy them into this layer's top blob vector."""
57 |         blobs = self._get_next_minibatch()
58 |         return blobs
59 | 


--------------------------------------------------------------------------------
/api/lib/roi_data_layer/minibatch.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import numpy.random as npr
  3 | import cv2
  4 | import os
  5 | from lib.fast_rcnn.config import cfg
  6 | from lib.utils.blob import prep_im_for_blob, im_list_to_blob
  7 | 
  8 | def get_minibatch(roidb, num_classes):
  9 |     """Given a roidb, construct a minibatch sampled from it."""
 10 |     num_images = len(roidb)
 11 |     # Sample random scales to use for each image in this batch
 12 |     random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
 13 |                                     size=num_images)
 14 |     assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
 15 |         'num_images ({}) must divide BATCH_SIZE ({})'. \
 16 |         format(num_images, cfg.TRAIN.BATCH_SIZE)
 17 |     rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images
 18 |     fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image)
 19 | 
 20 |     # Get the input image blob, formatted for caffe
 21 |     im_blob, im_scales = _get_image_blob(roidb, random_scale_inds)
 22 | 
 23 |     blobs = {'data': im_blob}
 24 | 
 25 |     if cfg.TRAIN.HAS_RPN:
 26 |         assert len(im_scales) == 1, "Single batch only"
 27 |         assert len(roidb) == 1, "Single batch only"
 28 |         # gt boxes: (x1, y1, x2, y2, cls)
 29 |         gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
 30 |         gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
 31 |         gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
 32 |         gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
 33 |         blobs['gt_boxes'] = gt_boxes
 34 |         blobs['gt_ishard'] = roidb[0]['gt_ishard'][gt_inds]  \
 35 |             if 'gt_ishard' in roidb[0] else np.zeros(gt_inds.size, dtype=int)
 36 |         # blobs['gt_ishard'] = roidb[0]['gt_ishard'][gt_inds]
 37 |         blobs['dontcare_areas'] = roidb[0]['dontcare_areas'] * im_scales[0] \
 38 |             if 'dontcare_areas' in roidb[0] else np.zeros([0, 4], dtype=float)
 39 |         blobs['im_info'] = np.array(
 40 |             [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]],
 41 |             dtype=np.float32)
 42 |         blobs['im_name'] = os.path.basename(roidb[0]['image'])
 43 | 
 44 |     else: # not using RPN
 45 |         # Now, build the region of interest and label blobs
 46 |         rois_blob = np.zeros((0, 5), dtype=np.float32)
 47 |         labels_blob = np.zeros((0), dtype=np.float32)
 48 |         bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
 49 |         bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
 50 |         # all_overlaps = []
 51 |         for im_i in range(num_images):
 52 |             labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
 53 |                 = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
 54 |                                num_classes)
 55 | 
 56 |             # Add to RoIs blob
 57 |             rois = _project_im_rois(im_rois, im_scales[im_i])
 58 |             batch_ind = im_i * np.ones((rois.shape[0], 1))
 59 |             rois_blob_this_image = np.hstack((batch_ind, rois))
 60 |             rois_blob = np.vstack((rois_blob, rois_blob_this_image))
 61 | 
 62 |             # Add to labels, bbox targets, and bbox loss blobs
 63 |             labels_blob = np.hstack((labels_blob, labels))
 64 |             bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
 65 |             bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
 66 |             # all_overlaps = np.hstack((all_overlaps, overlaps))
 67 | 
 68 |         # For debug visualizations
 69 |         # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps)
 70 | 
 71 |         blobs['rois'] = rois_blob
 72 |         blobs['labels'] = labels_blob
 73 | 
 74 |         if cfg.TRAIN.BBOX_REG:
 75 |             blobs['bbox_targets'] = bbox_targets_blob
 76 |             blobs['bbox_inside_weights'] = bbox_inside_blob
 77 |             blobs['bbox_outside_weights'] = \
 78 |                 np.array(bbox_inside_blob > 0).astype(np.float32)
 79 | 
 80 |     return blobs
 81 | 
 82 | def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
 83 |     """Generate a random sample of RoIs comprising foreground and background
 84 |     examples.
 85 |     """
 86 |     # label = class RoI has max overlap with
 87 |     labels = roidb['max_classes']
 88 |     overlaps = roidb['max_overlaps']
 89 |     rois = roidb['boxes']
 90 | 
 91 |     # Select foreground RoIs as those with >= FG_THRESH overlap
 92 |     fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
 93 |     # Guard against the case when an image has fewer than fg_rois_per_image
 94 |     # foreground RoIs
 95 |     fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
 96 |     # Sample foreground regions without replacement
 97 |     if fg_inds.size > 0:
 98 |         fg_inds = npr.choice(
 99 |                 fg_inds, size=fg_rois_per_this_image, replace=False)
100 | 
101 |     # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
102 |     bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
103 |                        (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
104 |     # Compute number of background RoIs to take from this image (guarding
105 |     # against there being fewer than desired)
106 |     bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
107 |     bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
108 |                                         bg_inds.size)
109 |     # Sample foreground regions without replacement
110 |     if bg_inds.size > 0:
111 |         bg_inds = npr.choice(
112 |                 bg_inds, size=bg_rois_per_this_image, replace=False)
113 | 
114 |     # The indices that we're selecting (both fg and bg)
115 |     keep_inds = np.append(fg_inds, bg_inds)
116 |     # Select sampled values from various arrays:
117 |     labels = labels[keep_inds]
118 |     # Clamp labels for the background RoIs to 0
119 |     labels[fg_rois_per_this_image:] = 0
120 |     overlaps = overlaps[keep_inds]
121 |     rois = rois[keep_inds]
122 | 
123 |     bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
124 |             roidb['bbox_targets'][keep_inds, :], num_classes)
125 | 
126 |     return labels, overlaps, rois, bbox_targets, bbox_inside_weights
127 | 
128 | def _get_image_blob(roidb, scale_inds):
129 |     """Builds an input blob from the images in the roidb at the specified
130 |     scales.
131 |     """
132 |     num_images = len(roidb)
133 |     processed_ims = []
134 |     im_scales = []
135 |     for i in range(num_images):
136 |         im = cv2.imread(roidb[i]['image'])
137 |         if roidb[i]['flipped']:
138 |             im = im[:, ::-1, :]
139 |         target_size = cfg.TRAIN.SCALES[scale_inds[i]]
140 |         im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
141 |                                         cfg.TRAIN.MAX_SIZE)
142 |         im_scales.append(im_scale)
143 |         processed_ims.append(im)
144 | 
145 |     # Create a blob to hold the input images
146 |     blob = im_list_to_blob(processed_ims)
147 | 
148 |     return blob, im_scales
149 | 
150 | def _project_im_rois(im_rois, im_scale_factor):
151 |     """Project image RoIs into the rescaled training image."""
152 |     rois = im_rois * im_scale_factor
153 |     return rois
154 | 
155 | def _get_bbox_regression_labels(bbox_target_data, num_classes):
156 |     """Bounding-box regression targets are stored in a compact form in the
157 |     roidb.
158 | 
159 |     This function expands those targets into the 4-of-4*K representation used
160 |     by the network (i.e. only one class has non-zero targets). The loss weights
161 |     are similarly expanded.
162 | 
163 |     Returns:
164 |         bbox_target_data (ndarray): N x 4K blob of regression targets
165 |         bbox_inside_weights (ndarray): N x 4K blob of loss weights
166 |     """
167 |     clss = bbox_target_data[:, 0]
168 |     bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
169 |     bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
170 |     inds = np.where(clss > 0)[0]
171 |     for ind in inds:
172 |         cls = clss[ind]
173 |         start = 4 * cls
174 |         end = start + 4
175 |         bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
176 |         bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
177 |     return bbox_targets, bbox_inside_weights
178 | 
179 | def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
180 |     """Visualize a mini-batch for debugging."""
181 |     import matplotlib.pyplot as plt
182 |     for i in range(rois_blob.shape[0]):
183 |         rois = rois_blob[i, :]
184 |         im_ind = rois[0]
185 |         roi = rois[1:]
186 |         im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
187 |         im += cfg.PIXEL_MEANS
188 |         im = im[:, :, (2, 1, 0)]
189 |         im = im.astype(np.uint8)
190 |         cls = labels_blob[i]
191 |         plt.imshow(im)
192 |         print('class: ', cls, ' overlap: ', overlaps[i])
193 |         plt.gca().add_patch(
194 |             plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
195 |                           roi[3] - roi[1], fill=False,
196 |                           edgecolor='r', linewidth=3)
197 |             )
198 |         plt.show()
199 | 


--------------------------------------------------------------------------------
/api/lib/roi_data_layer/roidb.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import PIL
  3 | from lib.fast_rcnn.config import cfg
  4 | from lib.fast_rcnn.bbox_transform import bbox_transform
  5 | from lib.utils.bbox import bbox_overlaps
  6 | 
  7 | def prepare_roidb(imdb):
  8 |     """Enrich the imdb's roidb by adding some derived quantities that
  9 |     are useful for training. This function precomputes the maximum
 10 |     overlap, taken over ground-truth boxes, between each ROI and
 11 |     each ground-truth box. The class with maximum overlap is also
 12 |     recorded.
 13 |     """
 14 |     sizes = [PIL.Image.open(imdb.image_path_at(i)).size
 15 |              for i in range(imdb.num_images)]
 16 |     roidb = imdb.roidb
 17 |     for i in range(len(imdb.image_index)):
 18 |         roidb[i]['image'] = imdb.image_path_at(i)
 19 |         roidb[i]['width'] = sizes[i][0]
 20 |         roidb[i]['height'] = sizes[i][1]
 21 |         # need gt_overlaps as a dense array for argmax
 22 |         gt_overlaps = roidb[i]['gt_overlaps'].toarray()
 23 |         # max overlap with gt over classes (columns)
 24 |         max_overlaps = gt_overlaps.max(axis=1)
 25 |         # gt class that had the max overlap
 26 |         max_classes = gt_overlaps.argmax(axis=1)
 27 |         roidb[i]['max_classes'] = max_classes
 28 |         roidb[i]['max_overlaps'] = max_overlaps
 29 |         # sanity checks
 30 |         # max overlap of 0 => class should be zero (background)
 31 |         zero_inds = np.where(max_overlaps == 0)[0]
 32 |         assert all(max_classes[zero_inds] == 0)
 33 |         # max overlap > 0 => class should not be zero (must be a fg class)
 34 |         nonzero_inds = np.where(max_overlaps > 0)[0]
 35 |         assert all(max_classes[nonzero_inds] != 0)
 36 | 
 37 | def add_bbox_regression_targets(roidb):
 38 |     """
 39 |     Add information needed to train bounding-box regressors.
 40 |     For each roi find the corresponding gt box, and compute the distance.
 41 |     then normalize the distance into Gaussian by minus mean and divided by std
 42 |     """
 43 |     assert len(roidb) > 0
 44 |     assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?'
 45 | 
 46 |     num_images = len(roidb)
 47 |     # Infer number of classes from the number of columns in gt_overlaps
 48 |     num_classes = roidb[0]['gt_overlaps'].shape[1]
 49 |     for im_i in range(num_images):
 50 |         rois = roidb[im_i]['boxes']
 51 |         max_overlaps = roidb[im_i]['max_overlaps']
 52 |         max_classes = roidb[im_i]['max_classes']
 53 |         roidb[im_i]['bbox_targets'] = \
 54 |                 _compute_targets(rois, max_overlaps, max_classes)
 55 | 
 56 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED:
 57 |         # Use fixed / precomputed "means" and "stds" instead of empirical values
 58 |         means = np.tile(
 59 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1))
 60 |         stds = np.tile(
 61 |                 np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1))
 62 |     else:
 63 |         # Compute values needed for means and stds
 64 |         # var(x) = E(x^2) - E(x)^2
 65 |         class_counts = np.zeros((num_classes, 1)) + cfg.EPS
 66 |         sums = np.zeros((num_classes, 4))
 67 |         squared_sums = np.zeros((num_classes, 4))
 68 |         for im_i in range(num_images):
 69 |             targets = roidb[im_i]['bbox_targets']
 70 |             for cls in range(1, num_classes):
 71 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 72 |                 if cls_inds.size > 0:
 73 |                     class_counts[cls] += cls_inds.size
 74 |                     sums[cls, :] += targets[cls_inds, 1:].sum(axis=0)
 75 |                     squared_sums[cls, :] += \
 76 |                             (targets[cls_inds, 1:] ** 2).sum(axis=0)
 77 | 
 78 |         means = sums / class_counts
 79 |         stds = np.sqrt(squared_sums / class_counts - means ** 2)
 80 |         # too small number will cause nan error
 81 |         assert np.min(stds) < 0.01, \
 82 |             'Boxes std is too small, std:{}'.format(stds)
 83 | 
 84 |     print('bbox target means:')
 85 |     print(means)
 86 |     print(means[1:, :].mean(axis=0)) # ignore bg class
 87 |     print('bbox target stdevs:')
 88 |     print(stds)
 89 |     print(stds[1:, :].mean(axis=0)) # ignore bg class
 90 | 
 91 |     # Normalize targets
 92 |     if cfg.TRAIN.BBOX_NORMALIZE_TARGETS:
 93 |         print("Normalizing targets")
 94 |         for im_i in range(num_images):
 95 |             targets = roidb[im_i]['bbox_targets']
 96 |             for cls in range(1, num_classes):
 97 |                 cls_inds = np.where(targets[:, 0] == cls)[0]
 98 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :]
 99 |                 roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :]
100 |     else:
101 |         print("NOT normalizing targets")
102 | 
103 |     # These values will be needed for making predictions
104 |     # (the predicts will need to be unnormalized and uncentered)
105 |     return means.ravel(), stds.ravel()
106 | 
107 | def _compute_targets(rois, overlaps, labels):
108 |     """
109 |     Compute bounding-box regression targets for an image.
110 |     for each roi find the corresponding gt_box, then compute the distance.
111 |     """
112 |     # Indices of ground-truth ROIs
113 |     gt_inds = np.where(overlaps == 1)[0]
114 |     if len(gt_inds) == 0:
115 |         # Bail if the image has no ground-truth ROIs
116 |         return np.zeros((rois.shape[0], 5), dtype=np.float32)
117 |     # Indices of examples for which we try to make predictions
118 |     ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0]
119 | 
120 |     # Get IoU overlap between each ex ROI and gt ROI
121 |     ex_gt_overlaps = bbox_overlaps(
122 |         np.ascontiguousarray(rois[ex_inds, :], dtype=np.float),
123 |         np.ascontiguousarray(rois[gt_inds, :], dtype=np.float))
124 | 
125 |     # Find which gt ROI each ex ROI has max overlap with:
126 |     # this will be the ex ROI's gt target
127 |     gt_assignment = ex_gt_overlaps.argmax(axis=1)
128 |     gt_rois = rois[gt_inds[gt_assignment], :]
129 |     ex_rois = rois[ex_inds, :]
130 | 
131 |     targets = np.zeros((rois.shape[0], 5), dtype=np.float32)
132 |     targets[ex_inds, 0] = labels[ex_inds]
133 |     targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois)
134 |     return targets
135 | 


--------------------------------------------------------------------------------
/api/lib/rpn_msr/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/lib/rpn_msr/__init__.py


--------------------------------------------------------------------------------
/api/lib/rpn_msr/generate_anchors.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | def generate_basic_anchors(sizes, base_size=16):
 4 |     base_anchor = np.array([0, 0, base_size - 1, base_size - 1], np.int32)
 5 |     anchors = np.zeros((len(sizes), 4), np.int32)
 6 |     index = 0
 7 |     for h, w in sizes:
 8 |         anchors[index] = scale_anchor(base_anchor, h, w)
 9 |         index += 1
10 |     return anchors
11 | 
12 | 
13 | def scale_anchor(anchor, h, w):
14 |     x_ctr = (anchor[0] + anchor[2]) * 0.5
15 |     y_ctr = (anchor[1] + anchor[3]) * 0.5
16 |     scaled_anchor = anchor.copy()
17 |     scaled_anchor[0] = x_ctr - w / 2  # xmin
18 |     scaled_anchor[2] = x_ctr + w / 2  # xmax
19 |     scaled_anchor[1] = y_ctr - h / 2  # ymin
20 |     scaled_anchor[3] = y_ctr + h / 2  # ymax
21 |     return scaled_anchor
22 | 
23 | 
24 | def generate_anchors(base_size=16, ratios=[0.5, 1, 2],
25 |                      scales=2**np.arange(3, 6)):
26 |     heights = [11, 16, 23, 33, 48, 68, 97, 139, 198, 283]
27 |     widths = [16]
28 |     sizes = []
29 |     for h in heights:
30 |         for w in widths:
31 |             sizes.append((h, w))
32 |     return generate_basic_anchors(sizes)
33 | 
34 | if __name__ == '__main__':
35 |     import time
36 |     t = time.time()
37 |     a = generate_anchors()
38 |     print(time.time() - t)
39 |     print(a)
40 |     from IPython import embed; embed()
41 | 


--------------------------------------------------------------------------------
/api/lib/rpn_msr/proposal_layer_tf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding:utf-8 -*-
  2 | import numpy as np
  3 | from .generate_anchors import generate_anchors
  4 | from lib.fast_rcnn.config import cfg
  5 | from lib.fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes
  6 | from lib.fast_rcnn.nms_wrapper import nms
  7 | 
  8 | 
  9 | DEBUG = False
 10 | """
 11 | Outputs object detection proposals by applying estimated bounding-box
 12 | transformations to a set of regular boxes (called "anchors").
 13 | """
 14 | def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride = [16,], anchor_scales = [16,]):
 15 |     """
 16 |     Parameters
 17 |     ----------
 18 |     rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg
 19 |                          NOTICE: the old version is ordered by (1, H, W, 2, A) !!!!
 20 |     rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN
 21 |     im_info: a list of [image_height, image_width, scale_ratios]
 22 |     cfg_key: 'TRAIN' or 'TEST'
 23 |     _feat_stride: the downsampling ratio of feature map to the original input image
 24 |     anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16])
 25 |     ----------
 26 |     Returns
 27 |     ----------
 28 |     rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2]
 29 | 
 30 |     # Algorithm:
 31 |     #
 32 |     # for each (H, W) location i
 33 |     #   generate A anchor boxes centered on cell i
 34 |     #   apply predicted bbox deltas at cell i to each of the A anchors
 35 |     # clip predicted boxes to image
 36 |     # remove predicted boxes with either height or width < threshold
 37 |     # sort all (proposal, score) pairs by score from highest to lowest
 38 |     # take top pre_nms_topN proposals before NMS
 39 |     # apply NMS with threshold 0.7 to remaining proposals
 40 |     # take after_nms_topN proposals after NMS
 41 |     # return the top proposals (-> RoIs top, scores top)
 42 |     #layer_params = yaml.load(self.param_str_)
 43 | 
 44 |     """
 45 |     # cfg_key=cfg_key.decode('ascii')
 46 |     _anchors = generate_anchors(scales=np.array(anchor_scales))#生成基本的9个anchor
 47 |     _num_anchors = _anchors.shape[0]#9个anchor
 48 | 
 49 |     im_info = im_info[0]#原始图像的高宽、缩放尺度
 50 | 
 51 |     assert rpn_cls_prob_reshape.shape[0] == 1, \
 52 |         'Only single item batches are supported'
 53 | 
 54 |     pre_nms_topN  = cfg[cfg_key].RPN_PRE_NMS_TOP_N#12000,在做nms之前，最多保留的候选box数目
 55 |     post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N#2000，做完nms之后，最多保留的box的数目
 56 |     nms_thresh    = cfg[cfg_key].RPN_NMS_THRESH#nms用参数，阈值是0.7
 57 |     min_size      = cfg[cfg_key].RPN_MIN_SIZE#候选box的最小尺寸，目前是16，高宽均要大于16
 58 |     #TODO 后期需要修改这个最小尺寸，改为8？
 59 | 
 60 |     height, width = rpn_cls_prob_reshape.shape[1:3]#feature-map的高宽
 61 | 
 62 |     # the first set of _num_anchors channels are bg probs
 63 |     # the second set are the fg probs, which we want
 64 |     # (1, H, W, A)
 65 |     scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:,:,:,:,1],
 66 |                         [1, height, width, _num_anchors])
 67 |     #提取到object的分数，non-object的我们不关心
 68 |     #并reshape到1*H*W*9
 69 | 
 70 |     bbox_deltas = rpn_bbox_pred#模型输出的pred是相对值，需要进一步处理成真实图像中的坐标
 71 |     #im_info = bottom[2].data[0, :]
 72 | 
 73 |     if DEBUG:
 74 |         print('im_size: ({}, {})'.format(im_info[0], im_info[1]))
 75 |         print('scale: {}'.format(im_info[2]))
 76 | 
 77 |     # 1. Generate proposals from bbox deltas and shifted anchors
 78 |     if DEBUG:
 79 |         print('score map size: {}'.format(scores.shape))
 80 | 
 81 |     # Enumerate all shifts
 82 |     # 同anchor-target-layer-tf这个文件一样，生成anchor的shift，进一步得到整张图像上的所有anchor
 83 |     shift_x = np.arange(0, width) * _feat_stride
 84 |     shift_y = np.arange(0, height) * _feat_stride
 85 |     shift_x, shift_y = np.meshgrid(shift_x, shift_y)
 86 |     shifts = np.vstack((shift_x.ravel(), shift_y.ravel(),
 87 |                         shift_x.ravel(), shift_y.ravel())).transpose()
 88 | 
 89 |     # Enumerate all shifted anchors:
 90 |     #
 91 |     # add A anchors (1, A, 4) to
 92 |     # cell K shifts (K, 1, 4) to get
 93 |     # shift anchors (K, A, 4)
 94 |     # reshape to (K*A, 4) shifted anchors
 95 |     A = _num_anchors
 96 |     K = shifts.shape[0]
 97 |     anchors = _anchors.reshape((1, A, 4)) + \
 98 |               shifts.reshape((1, K, 4)).transpose((1, 0, 2))
 99 |     anchors = anchors.reshape((K * A, 4))#这里得到的anchor就是整张图像上的所有anchor
100 | 
101 |     # Transpose and reshape predicted bbox transformations to get them
102 |     # into the same order as the anchors:
103 |     # bbox deltas will be (1, 4 * A, H, W) format
104 |     # transpose to (1, H, W, 4 * A)
105 |     # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a)
106 |     # in slowest to fastest order
107 |     bbox_deltas = bbox_deltas.reshape((-1, 4)) #(HxWxA, 4)
108 | 
109 |     # Same story for the scores:
110 |     scores = scores.reshape((-1, 1))
111 | 
112 |     # Convert anchors into proposals via bbox transformations
113 |     proposals = bbox_transform_inv(anchors, bbox_deltas)#做逆变换，得到box在图像上的真实坐标
114 | 
115 |     # 2. clip predicted boxes to image
116 |     proposals = clip_boxes(proposals, im_info[:2])#将所有的proposal修建一下，超出图像范围的将会被修剪掉
117 | 
118 |     # 3. remove predicted boxes with either height or width < threshold
119 |     # (NOTE: convert min_size to input image scale stored in im_info[2])
120 |     keep = _filter_boxes(proposals, min_size * im_info[2])#移除那些proposal小于一定尺寸的proposal
121 |     proposals = proposals[keep, :]#保留剩下的proposal
122 |     scores = scores[keep]
123 |     bbox_deltas=bbox_deltas[keep,:]
124 | 
125 | 
126 |     # # remove irregular boxes, too fat too tall
127 |     # keep = _filter_irregular_boxes(proposals)
128 |     # proposals = proposals[keep, :]
129 |     # scores = scores[keep]
130 | 
131 |     # 4. sort all (proposal, score) pairs by score from highest to lowest
132 |     # 5. take top pre_nms_topN (e.g. 6000)
133 |     order = scores.ravel().argsort()[::-1]#score按得分的高低进行排序
134 |     if pre_nms_topN > 0:                #保留12000个proposal进去做nms
135 |         order = order[:pre_nms_topN]
136 |     proposals = proposals[order, :]
137 |     scores = scores[order]
138 |     bbox_deltas=bbox_deltas[order,:]
139 | 
140 | 
141 |     # 6. apply nms (e.g. threshold = 0.7)
142 |     # 7. take after_nms_topN (e.g. 300)
143 |     # 8. return the top proposals (-> RoIs top)
144 |     keep = nms(np.hstack((proposals, scores)), nms_thresh)#进行nms操作，保留2000个proposal
145 |     if post_nms_topN > 0:
146 |         keep = keep[:post_nms_topN]
147 |     proposals = proposals[keep, :]
148 |     scores = scores[keep]
149 |     bbox_deltas=bbox_deltas[keep,:]
150 | 
151 | 
152 |     # Output rois blob
153 |     # Our RPN implementation only supports a single input image, so all
154 |     # batch inds are 0
155 |     blob = np.hstack((scores.astype(np.float32, copy=False), proposals.astype(np.float32, copy=False)))
156 | 
157 |     return blob,bbox_deltas
158 | 
159 | 
160 | def _filter_boxes(boxes, min_size):
161 |     """Remove all boxes with any side smaller than min_size."""
162 |     ws = boxes[:, 2] - boxes[:, 0] + 1
163 |     hs = boxes[:, 3] - boxes[:, 1] + 1
164 |     keep = np.where((ws >= min_size) & (hs >= min_size))[0]
165 |     return keep
166 | 
167 | def _filter_irregular_boxes(boxes, min_ratio = 0.2, max_ratio = 5):
168 |     """Remove all boxes with any side smaller than min_size."""
169 |     ws = boxes[:, 2] - boxes[:, 0] + 1
170 |     hs = boxes[:, 3] - boxes[:, 1] + 1
171 |     rs = ws / hs
172 |     keep = np.where((rs <= max_ratio) & (rs >= min_ratio))[0]
173 |     return keep
174 | 


--------------------------------------------------------------------------------
/api/lib/text_connector/__init__.py:
--------------------------------------------------------------------------------
1 | from .detectors import TextDetector
2 | from .text_connect_cfg import Config
3 | 


--------------------------------------------------------------------------------
/api/lib/text_connector/detectors.py:
--------------------------------------------------------------------------------
 1 | #coding:utf-8
 2 | import numpy as np
 3 | from lib.fast_rcnn.nms_wrapper import nms
 4 | from lib.fast_rcnn.config import cfg
 5 | from .text_proposal_connector import TextProposalConnector
 6 | from .text_proposal_connector_oriented import TextProposalConnector as TextProposalConnectorOriented
 7 | from .text_connect_cfg import Config as TextLineCfg
 8 | 
 9 | 
10 | class TextDetector:
11 |     def __init__(self):
12 |         self.mode= cfg.TEST.DETECT_MODE
13 |         if self.mode == "H":
14 |             self.text_proposal_connector=TextProposalConnector()
15 |         elif self.mode == "O":
16 |             self.text_proposal_connector=TextProposalConnectorOriented()
17 | 
18 |         
19 |     def detect(self, text_proposals,scores,size):
20 |         # 删除得分较低的proposal
21 |         keep_inds=np.where(scores>TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0]
22 |         text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]
23 | 
24 |         # 按得分排序
25 |         sorted_indices=np.argsort(scores.ravel())[::-1]
26 |         text_proposals, scores=text_proposals[sorted_indices], scores[sorted_indices]
27 | 
28 |         # 对proposal做nms
29 |         keep_inds=nms(np.hstack((text_proposals, scores)), TextLineCfg.TEXT_PROPOSALS_NMS_THRESH)
30 |         text_proposals, scores=text_proposals[keep_inds], scores[keep_inds]
31 | 
32 |         # 获取检测结果
33 |         text_recs=self.text_proposal_connector.get_text_lines(text_proposals, scores, size)
34 |         keep_inds=self.filter_boxes(text_recs)
35 |         return text_recs[keep_inds]
36 | 
37 |     def filter_boxes(self, boxes):
38 |         heights=np.zeros((len(boxes), 1), np.float)
39 |         widths=np.zeros((len(boxes), 1), np.float)
40 |         scores=np.zeros((len(boxes), 1), np.float)
41 |         index=0
42 |         for box in boxes:
43 |             heights[index]=(abs(box[5]-box[1])+abs(box[7]-box[3]))/2.0+1
44 |             widths[index]=(abs(box[2]-box[0])+abs(box[6]-box[4]))/2.0+1
45 |             scores[index] = box[8]
46 |             index += 1
47 | 
48 |         return np.where((widths/heights>TextLineCfg.MIN_RATIO) & (scores>TextLineCfg.LINE_MIN_SCORE) &
49 |                           (widths>(TextLineCfg.TEXT_PROPOSALS_WIDTH*TextLineCfg.MIN_NUM_PROPOSALS)))[0]


--------------------------------------------------------------------------------
/api/lib/text_connector/other.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def threshold(coords, min_, max_):
 5 |     return np.maximum(np.minimum(coords, max_), min_)
 6 | 
 7 | def clip_boxes(boxes, im_shape):
 8 |     """
 9 |     Clip boxes to image boundaries.
10 |     """
11 |     boxes[:, 0::2]=threshold(boxes[:, 0::2], 0, im_shape[1]-1)
12 |     boxes[:, 1::2]=threshold(boxes[:, 1::2], 0, im_shape[0]-1)
13 |     return boxes
14 | 
15 | 
16 | class Graph:
17 |     def __init__(self, graph):
18 |         self.graph=graph
19 | 
20 |     def sub_graphs_connected(self):
21 |         sub_graphs=[]
22 |         for index in range(self.graph.shape[0]):
23 |             if not self.graph[:, index].any() and self.graph[index, :].any():
24 |                 v=index
25 |                 sub_graphs.append([v])
26 |                 while self.graph[v, :].any():
27 |                     v=np.where(self.graph[v, :])[0][0]
28 |                     sub_graphs[-1].append(v)
29 |         return sub_graphs
30 | 
31 | 


--------------------------------------------------------------------------------
/api/lib/text_connector/text_connect_cfg.py:
--------------------------------------------------------------------------------
 1 | class Config:
 2 |     SCALE=600
 3 |     MAX_SCALE=1200
 4 |     TEXT_PROPOSALS_WIDTH=16
 5 |     MIN_NUM_PROPOSALS = 2
 6 |     MIN_RATIO=0.5
 7 |     LINE_MIN_SCORE=0.9
 8 |     MAX_HORIZONTAL_GAP=50
 9 |     TEXT_PROPOSALS_MIN_SCORE=0.7
10 |     TEXT_PROPOSALS_NMS_THRESH=0.2
11 |     MIN_V_OVERLAPS=0.7
12 |     MIN_SIZE_SIM=0.7
13 | 
14 | 
15 | 


--------------------------------------------------------------------------------
/api/lib/text_connector/text_proposal_connector.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .other import clip_boxes
 3 | from .text_proposal_graph_builder import TextProposalGraphBuilder
 4 | 
 5 | class TextProposalConnector:
 6 |     def __init__(self):
 7 |         self.graph_builder=TextProposalGraphBuilder()
 8 | 
 9 |     def group_text_proposals(self, text_proposals, scores, im_size):
10 |         graph=self.graph_builder.build_graph(text_proposals, scores, im_size)
11 |         return graph.sub_graphs_connected()
12 | 
13 |     def fit_y(self, X, Y, x1, x2):
14 |         len(X)!=0
15 |         # if X only include one point, the function will get line y=Y[0]
16 |         if np.sum(X==X[0])==len(X):
17 |             return Y[0], Y[0]
18 |         p=np.poly1d(np.polyfit(X, Y, 1))
19 |         return p(x1), p(x2)
20 | 
21 |     def get_text_lines(self, text_proposals, scores, im_size):
22 |         # tp=text proposal
23 |         tp_groups=self.group_text_proposals(text_proposals, scores, im_size)
24 |         text_lines=np.zeros((len(tp_groups), 5), np.float32)
25 | 
26 |         for index, tp_indices in enumerate(tp_groups):
27 |             text_line_boxes=text_proposals[list(tp_indices)]
28 | 
29 |             x0=np.min(text_line_boxes[:, 0])
30 |             x1=np.max(text_line_boxes[:, 2])
31 | 
32 |             offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5
33 | 
34 |             lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)
35 |             lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)
36 | 
37 |             # the score of a text line is the average score of the scores
38 |             # of all text proposals contained in the text line
39 |             score=scores[list(tp_indices)].sum()/float(len(tp_indices))
40 | 
41 |             text_lines[index, 0]=x0
42 |             text_lines[index, 1]=min(lt_y, rt_y)
43 |             text_lines[index, 2]=x1
44 |             text_lines[index, 3]=max(lb_y, rb_y)
45 |             text_lines[index, 4]=score
46 | 
47 |         text_lines=clip_boxes(text_lines, im_size)
48 | 
49 |         text_recs = np.zeros((len(text_lines), 9), np.float)
50 |         index = 0
51 |         for line in text_lines:
52 |             xmin,ymin,xmax,ymax=line[0],line[1],line[2],line[3]
53 |             text_recs[index, 0] = xmin
54 |             text_recs[index, 1] = ymin
55 |             text_recs[index, 2] = xmax
56 |             text_recs[index, 3] = ymin
57 |             text_recs[index, 4] = xmin
58 |             text_recs[index, 5] = ymax
59 |             text_recs[index, 6] = xmax
60 |             text_recs[index, 7] = ymax
61 |             text_recs[index, 8] = line[4]
62 |             index = index + 1
63 | 
64 |         return text_recs
65 | 


--------------------------------------------------------------------------------
/api/lib/text_connector/text_proposal_connector_oriented.py:
--------------------------------------------------------------------------------
  1 | #coding:utf-8
  2 | import numpy as np
  3 | from .text_proposal_graph_builder import TextProposalGraphBuilder
  4 | 
  5 | class TextProposalConnector:
  6 |     """
  7 |         Connect text proposals into text lines
  8 |     """
  9 |     def __init__(self):
 10 |         self.graph_builder=TextProposalGraphBuilder()
 11 | 
 12 |     def group_text_proposals(self, text_proposals, scores, im_size):
 13 |         graph=self.graph_builder.build_graph(text_proposals, scores, im_size)
 14 |         return graph.sub_graphs_connected()
 15 | 
 16 |     def fit_y(self, X, Y, x1, x2):
 17 |         len(X)!=0
 18 |         # if X only include one point, the function will get line y=Y[0]
 19 |         if np.sum(X==X[0])==len(X):
 20 |             return Y[0], Y[0]
 21 |         p=np.poly1d(np.polyfit(X, Y, 1))
 22 |         return p(x1), p(x2)
 23 | 
 24 |     def get_text_lines(self, text_proposals, scores, im_size):
 25 |         """
 26 |         text_proposals:boxes
 27 |         
 28 |         """
 29 |         # tp=text proposal
 30 |         tp_groups=self.group_text_proposals(text_proposals, scores, im_size)#首先还是建图，获取到文本行由哪几个小框构成
 31 |         
 32 |         text_lines=np.zeros((len(tp_groups), 8), np.float32)
 33 | 
 34 |         for index, tp_indices in enumerate(tp_groups):
 35 |             text_line_boxes=text_proposals[list(tp_indices)]#每个文本行的全部小框
 36 |             X = (text_line_boxes[:,0] + text_line_boxes[:,2]) / 2# 求每一个小框的中心x，y坐标
 37 |             Y = (text_line_boxes[:,1] + text_line_boxes[:,3]) / 2
 38 |             
 39 |             z1 = np.polyfit(X,Y,1)#多项式拟合，根据之前求的中心店拟合一条直线（最小二乘）
 40 | 
 41 |             x0=np.min(text_line_boxes[:, 0])#文本行x坐标最小值
 42 |             x1=np.max(text_line_boxes[:, 2])#文本行x坐标最大值
 43 | 
 44 |             offset=(text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5#小框宽度的一半
 45 | 
 46 |             # 以全部小框的左上角这个点去拟合一条直线，然后计算一下文本行x坐标的极左极右对应的y坐标
 47 |             lt_y, rt_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset)
 48 |             # 以全部小框的左下角这个点去拟合一条直线，然后计算一下文本行x坐标的极左极右对应的y坐标
 49 |             lb_y, rb_y=self.fit_y(text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset)
 50 | 
 51 |             score=scores[list(tp_indices)].sum()/float(len(tp_indices))#求全部小框得分的均值作为文本行的均值
 52 | 
 53 |             text_lines[index, 0]=x0
 54 |             text_lines[index, 1]=min(lt_y, rt_y)#文本行上端 线段 的y坐标的小值
 55 |             text_lines[index, 2]=x1
 56 |             text_lines[index, 3]=max(lb_y, rb_y)#文本行下端 线段 的y坐标的大值
 57 |             text_lines[index, 4]=score#文本行得分
 58 |             text_lines[index, 5]=z1[0]#根据中心点拟合的直线的k，b
 59 |             text_lines[index, 6]=z1[1]
 60 |             height = np.mean( (text_line_boxes[:,3]-text_line_boxes[:,1]) )#小框平均高度
 61 |             text_lines[index, 7]= height + 2.5
 62 | 
 63 |         text_recs = np.zeros((len(text_lines), 9), np.float)
 64 |         index = 0
 65 |         for line in text_lines:
 66 |             b1 = line[6] - line[7] / 2  # 根据高度和文本行中心线，求取文本行上下两条线的b值
 67 |             b2 = line[6] + line[7] / 2
 68 |             x1 = line[0]
 69 |             y1 = line[5] * line[0] + b1  # 左上
 70 |             x2 = line[2]
 71 |             y2 = line[5] * line[2] + b1  # 右上
 72 |             x3 = line[0]
 73 |             y3 = line[5] * line[0] + b2  # 左下
 74 |             x4 = line[2]
 75 |             y4 = line[5] * line[2] + b2  # 右下
 76 |             disX = x2 - x1
 77 |             disY = y2 - y1
 78 |             width = np.sqrt(disX * disX + disY * disY)  # 文本行宽度
 79 | 
 80 |             fTmp0 = y3 - y1  # 文本行高度
 81 |             fTmp1 = fTmp0 * disY / width
 82 |             x = np.fabs(fTmp1 * disX / width)  # 做补偿
 83 |             y = np.fabs(fTmp1 * disY / width)
 84 |             if line[5] < 0:
 85 |                 x1 -= x
 86 |                 y1 += y
 87 |                 x4 += x
 88 |                 y4 -= y
 89 |             else:
 90 |                 x2 += x
 91 |                 y2 += y
 92 |                 x3 -= x
 93 |                 y3 -= y
 94 |             text_recs[index, 0] = x1
 95 |             text_recs[index, 1] = y1
 96 |             text_recs[index, 2] = x2
 97 |             text_recs[index, 3] = y2
 98 |             text_recs[index, 4] = x3
 99 |             text_recs[index, 5] = y3
100 |             text_recs[index, 6] = x4
101 |             text_recs[index, 7] = y4
102 |             text_recs[index, 8] = line[4]
103 |             index = index + 1
104 | 
105 |         return text_recs
106 | 


--------------------------------------------------------------------------------
/api/lib/text_connector/text_proposal_graph_builder.py:
--------------------------------------------------------------------------------
 1 | from .text_connect_cfg import Config as TextLineCfg
 2 | from .other import Graph
 3 | import numpy as np
 4 | 
 5 | 
 6 | class TextProposalGraphBuilder:
 7 |     """
 8 |         Build Text proposals into a graph.
 9 |     """
10 |     def get_successions(self, index):
11 |             box=self.text_proposals[index]
12 |             results=[]
13 |             for left in range(int(box[0])+1, min(int(box[0])+TextLineCfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])):
14 |                 adj_box_indices=self.boxes_table[left]
15 |                 for adj_box_index in adj_box_indices:
16 |                     if self.meet_v_iou(adj_box_index, index):
17 |                         results.append(adj_box_index)
18 |                 if len(results)!=0:
19 |                     return results
20 |             return results
21 | 
22 |     def get_precursors(self, index):
23 |         box=self.text_proposals[index]
24 |         results=[]
25 |         for left in range(int(box[0])-1, max(int(box[0]-TextLineCfg.MAX_HORIZONTAL_GAP), 0)-1, -1):
26 |             adj_box_indices=self.boxes_table[left]
27 |             for adj_box_index in adj_box_indices:
28 |                 if self.meet_v_iou(adj_box_index, index):
29 |                     results.append(adj_box_index)
30 |             if len(results)!=0:
31 |                 return results
32 |         return results
33 | 
34 |     def is_succession_node(self, index, succession_index):
35 |         precursors=self.get_precursors(succession_index)
36 |         if self.scores[index]>=np.max(self.scores[precursors]):
37 |             return True
38 |         return False
39 | 
40 |     def meet_v_iou(self, index1, index2):
41 |         def overlaps_v(index1, index2):
42 |             h1=self.heights[index1]
43 |             h2=self.heights[index2]
44 |             y0=max(self.text_proposals[index2][1], self.text_proposals[index1][1])
45 |             y1=min(self.text_proposals[index2][3], self.text_proposals[index1][3])
46 |             return max(0, y1-y0+1)/min(h1, h2)
47 | 
48 |         def size_similarity(index1, index2):
49 |             h1=self.heights[index1]
50 |             h2=self.heights[index2]
51 |             return min(h1, h2)/max(h1, h2)
52 | 
53 |         return overlaps_v(index1, index2)>=TextLineCfg.MIN_V_OVERLAPS and \
54 |                size_similarity(index1, index2)>=TextLineCfg.MIN_SIZE_SIM
55 | 
56 |     def build_graph(self, text_proposals, scores, im_size):
57 |         self.text_proposals=text_proposals
58 |         self.scores=scores
59 |         self.im_size=im_size
60 |         self.heights=text_proposals[:, 3]-text_proposals[:, 1]+1
61 | 
62 |         boxes_table=[[] for _ in range(self.im_size[1])]
63 |         for index, box in enumerate(text_proposals):
64 |             boxes_table[int(box[0])].append(index)
65 |         self.boxes_table=boxes_table
66 | 
67 |         graph=np.zeros((text_proposals.shape[0], text_proposals.shape[0]), np.bool)
68 | 
69 |         for index, box in enumerate(text_proposals):
70 |             successions=self.get_successions(index)
71 |             if len(successions)==0:
72 |                 continue
73 |             succession_index=successions[np.argmax(scores[successions])]
74 |             if self.is_succession_node(index, succession_index):
75 |                 # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors)
76 |                 # have equal scores.
77 |                 graph[index, succession_index]=True
78 |         return Graph(graph)
79 | 


--------------------------------------------------------------------------------
/api/lib/utils/__init__.py:
--------------------------------------------------------------------------------
1 | from . import boxes_grid
2 | from . import blob
3 | from . import timer


--------------------------------------------------------------------------------
/api/lib/utils/bbox.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Fast R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Sergey Karayev
 6 | # --------------------------------------------------------
 7 | 
 8 | cimport cython
 9 | import numpy as np
10 | cimport numpy as np
11 | 
12 | DTYPE = np.float
13 | ctypedef np.float_t DTYPE_t
14 | 
15 | def bbox_overlaps(
16 |         np.ndarray[DTYPE_t, ndim=2] boxes,
17 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
18 |     """
19 |     Parameters
20 |     ----------
21 |     boxes: (N, 4) ndarray of float
22 |     query_boxes: (K, 4) ndarray of float
23 |     Returns
24 |     -------
25 |     overlaps: (N, K) ndarray of overlap between boxes and query_boxes
26 |     """
27 |     cdef unsigned int N = boxes.shape[0]
28 |     cdef unsigned int K = query_boxes.shape[0]
29 |     cdef np.ndarray[DTYPE_t, ndim=2] overlaps = np.zeros((N, K), dtype=DTYPE)
30 |     cdef DTYPE_t iw, ih, box_area
31 |     cdef DTYPE_t ua
32 |     cdef unsigned int k, n
33 |     for k in range(K):
34 |         box_area = (
35 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
36 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
37 |         )
38 |         for n in range(N):
39 |             iw = (
40 |                 min(boxes[n, 2], query_boxes[k, 2]) -
41 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
42 |             )
43 |             if iw > 0:
44 |                 ih = (
45 |                     min(boxes[n, 3], query_boxes[k, 3]) -
46 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
47 |                 )
48 |                 if ih > 0:
49 |                     ua = float(
50 |                         (boxes[n, 2] - boxes[n, 0] + 1) *
51 |                         (boxes[n, 3] - boxes[n, 1] + 1) +
52 |                         box_area - iw * ih
53 |                     )
54 |                     overlaps[n, k] = iw * ih / ua
55 |     return overlaps
56 | 
57 | def bbox_intersections(
58 |         np.ndarray[DTYPE_t, ndim=2] boxes,
59 |         np.ndarray[DTYPE_t, ndim=2] query_boxes):
60 |     """
61 |     For each query box compute the intersection ratio covered by boxes
62 |     ----------
63 |     Parameters
64 |     ----------
65 |     boxes: (N, 4) ndarray of float
66 |     query_boxes: (K, 4) ndarray of float
67 |     Returns
68 |     -------
69 |     overlaps: (N, K) ndarray of intersec between boxes and query_boxes
70 |     """
71 |     cdef unsigned int N = boxes.shape[0]
72 |     cdef unsigned int K = query_boxes.shape[0]
73 |     cdef np.ndarray[DTYPE_t, ndim=2] intersec = np.zeros((N, K), dtype=DTYPE)
74 |     cdef DTYPE_t iw, ih, box_area
75 |     cdef DTYPE_t ua
76 |     cdef unsigned int k, n
77 |     for k in range(K):
78 |         box_area = (
79 |             (query_boxes[k, 2] - query_boxes[k, 0] + 1) *
80 |             (query_boxes[k, 3] - query_boxes[k, 1] + 1)
81 |         )
82 |         for n in range(N):
83 |             iw = (
84 |                 min(boxes[n, 2], query_boxes[k, 2]) -
85 |                 max(boxes[n, 0], query_boxes[k, 0]) + 1
86 |             )
87 |             if iw > 0:
88 |                 ih = (
89 |                     min(boxes[n, 3], query_boxes[k, 3]) -
90 |                     max(boxes[n, 1], query_boxes[k, 1]) + 1
91 |                 )
92 |                 if ih > 0:
93 |                     intersec[n, k] = iw * ih / box_area
94 |     return intersec


--------------------------------------------------------------------------------
/api/lib/utils/blob.py:
--------------------------------------------------------------------------------
 1 | """Blob helper functions."""
 2 | import numpy as np
 3 | import cv2
 4 | from ..fast_rcnn.config import cfg
 5 | 
 6 | def im_list_to_blob(ims):
 7 |     """Convert a list of images into a network input.
 8 | 
 9 |     Assumes images are already prepared (means subtracted, BGR order, ...).
10 |     """
11 |     max_shape = np.array([im.shape for im in ims]).max(axis=0)
12 |     num_images = len(ims)
13 |     blob = np.zeros((num_images, max_shape[0], max_shape[1], 3),
14 |                     dtype=np.float32)
15 |     for i in range(num_images):
16 |         im = ims[i]
17 |         blob[i, 0:im.shape[0], 0:im.shape[1], :] = im
18 | 
19 |     return blob
20 | 
21 | def prep_im_for_blob(im, pixel_means, target_size, max_size):
22 |     """Mean subtract and scale an image for use in a blob."""
23 |     im = im.astype(np.float32, copy=False)
24 |     im -= pixel_means
25 |     im_shape = im.shape
26 |     im_size_min = np.min(im_shape[0:2])
27 |     im_size_max = np.max(im_shape[0:2])
28 |     im_scale = float(target_size) / float(im_size_min)
29 |     # Prevent the biggest axis from being more than MAX_SIZE
30 |     if np.round(im_scale * im_size_max) > max_size:
31 |         im_scale = float(max_size) / float(im_size_max)
32 |     if cfg.TRAIN.RANDOM_DOWNSAMPLE:
33 |         r = 0.6 + np.random.rand() * 0.4
34 |         im_scale *= r
35 |     im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale,
36 |                     interpolation=cv2.INTER_LINEAR)
37 | 
38 |     return im, im_scale
39 | 


--------------------------------------------------------------------------------
/api/lib/utils/boxes_grid.py:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Subcategory CNN
 3 | # Copyright (c) 2015 CVGL Stanford
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Yu Xiang
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | import math
10 | # TODO: make fast_rcnn irrelevant
11 | # >>>> obsolete, because it depends on sth outside of this project
12 | from ..fast_rcnn.config import cfg
13 | # <<<< obsolete
14 | 
15 | def get_boxes_grid(image_height, image_width):
16 |     """
17 |     Return the boxes on image grid.
18 |     calling this function when cfg.IS_MULTISCALE is True, otherwise, calling rdl_roidb.prepare_roidb(imdb) instead.
19 |     """
20 | 
21 |     # fixed a bug, change cfg.TRAIN.SCALES to cfg.TRAIN.SCALES_BASE
22 |     # coz, here needs a ratio around 1.0, not the accutual size.
23 |     # height and width of the feature map
24 |     if cfg.NET_NAME == 'CaffeNet':
25 |         height = np.floor((image_height * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1)
26 |         height = np.floor((height - 1) / 2.0 + 1 + 0.5)
27 |         height = np.floor((height - 1) / 2.0 + 1 + 0.5)
28 | 
29 |         width = np.floor((image_width * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1)
30 |         width = np.floor((width - 1) / 2.0 + 1 + 0.5)
31 |         width = np.floor((width - 1) / 2.0 + 1 + 0.5)
32 |     elif cfg.NET_NAME == 'VGGnet':
33 |         height = np.floor(image_height * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5)
34 |         height = np.floor(height / 2.0 + 0.5)
35 |         height = np.floor(height / 2.0 + 0.5)
36 |         height = np.floor(height / 2.0 + 0.5)
37 | 
38 |         width = np.floor(image_width * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5)
39 |         width = np.floor(width / 2.0 + 0.5)
40 |         width = np.floor(width / 2.0 + 0.5)
41 |         width = np.floor(width / 2.0 + 0.5)
42 |     else:
43 |         assert (1), 'The network architecture is not supported in utils.get_boxes_grid!'
44 | 
45 |     # compute the grid box centers
46 |     h = np.arange(height)
47 |     w = np.arange(width)
48 |     y, x = np.meshgrid(h, w, indexing='ij') 
49 |     centers = np.dstack((x, y))
50 |     centers = np.reshape(centers, (-1, 2))
51 |     num = centers.shape[0]
52 | 
53 |     # compute width and height of grid box
54 |     area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE
55 |     aspect = cfg.TRAIN.ASPECTS  # height / width
56 |     num_aspect = len(aspect)
57 |     widths = np.zeros((1, num_aspect), dtype=np.float32)
58 |     heights = np.zeros((1, num_aspect), dtype=np.float32)
59 |     for i in range(num_aspect):
60 |         widths[0,i] = math.sqrt(area / aspect[i])
61 |         heights[0,i] = widths[0,i] * aspect[i]
62 | 
63 |     # construct grid boxes
64 |     centers = np.repeat(centers, num_aspect, axis=0)
65 |     widths = np.tile(widths, num).transpose()
66 |     heights = np.tile(heights, num).transpose()
67 | 
68 |     x1 = np.reshape(centers[:,0], (-1, 1)) - widths * 0.5
69 |     x2 = np.reshape(centers[:,0], (-1, 1)) + widths * 0.5
70 |     y1 = np.reshape(centers[:,1], (-1, 1)) - heights * 0.5
71 |     y2 = np.reshape(centers[:,1], (-1, 1)) + heights * 0.5
72 |     
73 |     boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE
74 | 
75 |     return boxes_grid, centers[:,0], centers[:,1]
76 | 


--------------------------------------------------------------------------------
/api/lib/utils/cython_nms.pyx:
--------------------------------------------------------------------------------
  1 | # --------------------------------------------------------
  2 | # Fast R-CNN
  3 | # Copyright (c) 2015 Microsoft
  4 | # Licensed under The MIT License [see LICENSE for details]
  5 | # Written by Ross Girshick
  6 | # --------------------------------------------------------
  7 | 
  8 | import numpy as np
  9 | cimport numpy as np
 10 | 
 11 | cdef inline np.float32_t max(np.float32_t a, np.float32_t b):
 12 |     return a if a >= b else b
 13 | 
 14 | cdef inline np.float32_t min(np.float32_t a, np.float32_t b):
 15 |     return a if a <= b else b
 16 | 
 17 | def nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 18 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 19 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 20 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 21 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 22 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 23 | 
 24 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 25 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 26 | 
 27 |     cdef int ndets = dets.shape[0]
 28 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 29 |             np.zeros((ndets), dtype=np.int)
 30 | 
 31 |     # nominal indices
 32 |     cdef int _i, _j
 33 |     # sorted indices
 34 |     cdef int i, j
 35 |     # temp variables for box i's (the box currently under consideration)
 36 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 37 |     # variables for computing overlap with box j (lower scoring box)
 38 |     cdef np.float32_t xx1, yy1, xx2, yy2
 39 |     cdef np.float32_t w, h
 40 |     cdef np.float32_t inter, ovr
 41 | 
 42 |     keep = []
 43 |     for _i in range(ndets):
 44 |         i = order[_i]
 45 |         if suppressed[i] == 1:
 46 |             continue
 47 |         keep.append(i)
 48 |         ix1 = x1[i]
 49 |         iy1 = y1[i]
 50 |         ix2 = x2[i]
 51 |         iy2 = y2[i]
 52 |         iarea = areas[i]
 53 |         for _j in range(_i + 1, ndets):
 54 |             j = order[_j]
 55 |             if suppressed[j] == 1:
 56 |                 continue
 57 |             xx1 = max(ix1, x1[j])
 58 |             yy1 = max(iy1, y1[j])
 59 |             xx2 = min(ix2, x2[j])
 60 |             yy2 = min(iy2, y2[j])
 61 |             w = max(0.0, xx2 - xx1 + 1)
 62 |             h = max(0.0, yy2 - yy1 + 1)
 63 |             inter = w * h
 64 |             ovr = inter / (iarea + areas[j] - inter)
 65 |             if ovr >= thresh:
 66 |                 suppressed[j] = 1
 67 | 
 68 |     return keep
 69 | 
 70 | def nms_new(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh):
 71 |     cdef np.ndarray[np.float32_t, ndim=1] x1 = dets[:, 0]
 72 |     cdef np.ndarray[np.float32_t, ndim=1] y1 = dets[:, 1]
 73 |     cdef np.ndarray[np.float32_t, ndim=1] x2 = dets[:, 2]
 74 |     cdef np.ndarray[np.float32_t, ndim=1] y2 = dets[:, 3]
 75 |     cdef np.ndarray[np.float32_t, ndim=1] scores = dets[:, 4]
 76 | 
 77 |     cdef np.ndarray[np.float32_t, ndim=1] areas = (x2 - x1 + 1) * (y2 - y1 + 1)
 78 |     cdef np.ndarray[np.int_t, ndim=1] order = scores.argsort()[::-1]
 79 | 
 80 |     cdef int ndets = dets.shape[0]
 81 |     cdef np.ndarray[np.int_t, ndim=1] suppressed = \
 82 |             np.zeros((ndets), dtype=np.int)
 83 | 
 84 |     # nominal indices
 85 |     cdef int _i, _j
 86 |     # sorted indices
 87 |     cdef int i, j
 88 |     # temp variables for box i's (the box currently under consideration)
 89 |     cdef np.float32_t ix1, iy1, ix2, iy2, iarea
 90 |     # variables for computing overlap with box j (lower scoring box)
 91 |     cdef np.float32_t xx1, yy1, xx2, yy2
 92 |     cdef np.float32_t w, h
 93 |     cdef np.float32_t inter, ovr
 94 | 
 95 |     keep = []
 96 |     for _i in range(ndets):
 97 |         i = order[_i]
 98 |         if suppressed[i] == 1:
 99 |             continue
100 |         keep.append(i)
101 |         ix1 = x1[i]
102 |         iy1 = y1[i]
103 |         ix2 = x2[i]
104 |         iy2 = y2[i]
105 |         iarea = areas[i]
106 |         for _j in range(_i + 1, ndets):
107 |             j = order[_j]
108 |             if suppressed[j] == 1:
109 |                 continue
110 |             xx1 = max(ix1, x1[j])
111 |             yy1 = max(iy1, y1[j])
112 |             xx2 = min(ix2, x2[j])
113 |             yy2 = min(iy2, y2[j])
114 |             w = max(0.0, xx2 - xx1 + 1)
115 |             h = max(0.0, yy2 - yy1 + 1)
116 |             inter = w * h
117 |             ovr = inter / (iarea + areas[j] - inter)
118 |             ovr1 = inter / iarea
119 |             ovr2 = inter / areas[j]
120 |             if ovr >= thresh or ovr1 > 0.95 or ovr2 > 0.95:
121 |                 suppressed[j] = 1
122 | 
123 |     return keep
124 | 


--------------------------------------------------------------------------------
/api/lib/utils/gpu_nms.hpp:
--------------------------------------------------------------------------------
1 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
2 |           int boxes_dim, float nms_overlap_thresh, int device_id);
3 | 


--------------------------------------------------------------------------------
/api/lib/utils/gpu_nms.pyx:
--------------------------------------------------------------------------------
 1 | # --------------------------------------------------------
 2 | # Faster R-CNN
 3 | # Copyright (c) 2015 Microsoft
 4 | # Licensed under The MIT License [see LICENSE for details]
 5 | # Written by Ross Girshick
 6 | # --------------------------------------------------------
 7 | 
 8 | import numpy as np
 9 | cimport numpy as np
10 | 
11 | assert sizeof(int) == sizeof(np.int32_t)
12 | 
13 | cdef extern from "gpu_nms.hpp":
14 |     void _nms(np.int32_t*, int*, np.float32_t*, int, int, float, int)
15 | 
16 | def gpu_nms(np.ndarray[np.float32_t, ndim=2] dets, np.float thresh,
17 |             np.int32_t device_id=0):
18 |     cdef int boxes_num = dets.shape[0]
19 |     cdef int boxes_dim = dets.shape[1]
20 |     cdef int num_out
21 |     cdef np.ndarray[np.int32_t, ndim=1] \
22 |         keep = np.zeros(boxes_num, dtype=np.int32)
23 |     cdef np.ndarray[np.float32_t, ndim=1] \
24 |         scores = dets[:, 4]
25 |     cdef np.ndarray[np.int_t, ndim=1] \
26 |         order = scores.argsort()[::-1]
27 |     cdef np.ndarray[np.float32_t, ndim=2] \
28 |         sorted_dets = dets[order, :]
29 |     _nms(&keep[0], &num_out, &sorted_dets[0, 0], boxes_num, boxes_dim, thresh, device_id)
30 |     keep = keep[:num_out]
31 |     return list(order[keep])
32 | 


--------------------------------------------------------------------------------
/api/lib/utils/make.sh:
--------------------------------------------------------------------------------
1 | cython bbox.pyx
2 | cython cython_nms.pyx
3 | cython gpu_nms.pyx
4 | python setup.py build_ext --inplace
5 | rm -rf build
6 | 


--------------------------------------------------------------------------------
/api/lib/utils/nms_kernel.cu:
--------------------------------------------------------------------------------
  1 | // ------------------------------------------------------------------
  2 | // Faster R-CNN
  3 | // Copyright (c) 2015 Microsoft
  4 | // Licensed under The MIT License [see fast-rcnn/LICENSE for details]
  5 | // Written by Shaoqing Ren
  6 | // ------------------------------------------------------------------
  7 | 
  8 | #include "gpu_nms.hpp"
  9 | #include <vector>
 10 | #include <iostream>
 11 | 
 12 | #define CUDA_CHECK(condition) \
 13 |   /* Code block avoids redefinition of cudaError_t error */ \
 14 |   do { \
 15 |     cudaError_t error = condition; \
 16 |     if (error != cudaSuccess) { \
 17 |       std::cout << cudaGetErrorString(error) << std::endl; \
 18 |     } \
 19 |   } while (0)
 20 | 
 21 | #define DIVUP(m,n) ((m) / (n) + ((m) % (n) > 0))
 22 | int const threadsPerBlock = sizeof(unsigned long long) * 8;
 23 | 
 24 | __device__ inline float devIoU(float const * const a, float const * const b) {
 25 |   float left = max(a[0], b[0]), right = min(a[2], b[2]);
 26 |   float top = max(a[1], b[1]), bottom = min(a[3], b[3]);
 27 |   float width = max(right - left + 1, 0.f), height = max(bottom - top + 1, 0.f);
 28 |   float interS = width * height;
 29 |   float Sa = (a[2] - a[0] + 1) * (a[3] - a[1] + 1);
 30 |   float Sb = (b[2] - b[0] + 1) * (b[3] - b[1] + 1);
 31 |   return interS / (Sa + Sb - interS);
 32 | }
 33 | 
 34 | __global__ void nms_kernel(const int n_boxes, const float nms_overlap_thresh,
 35 |                            const float *dev_boxes, unsigned long long *dev_mask) {
 36 |   const int row_start = blockIdx.y;
 37 |   const int col_start = blockIdx.x;
 38 | 
 39 |   // if (row_start > col_start) return;
 40 | 
 41 |   const int row_size =
 42 |         min(n_boxes - row_start * threadsPerBlock, threadsPerBlock);
 43 |   const int col_size =
 44 |         min(n_boxes - col_start * threadsPerBlock, threadsPerBlock);
 45 | 
 46 |   __shared__ float block_boxes[threadsPerBlock * 5];
 47 |   if (threadIdx.x < col_size) {
 48 |     block_boxes[threadIdx.x * 5 + 0] =
 49 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 0];
 50 |     block_boxes[threadIdx.x * 5 + 1] =
 51 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 1];
 52 |     block_boxes[threadIdx.x * 5 + 2] =
 53 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 2];
 54 |     block_boxes[threadIdx.x * 5 + 3] =
 55 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 3];
 56 |     block_boxes[threadIdx.x * 5 + 4] =
 57 |         dev_boxes[(threadsPerBlock * col_start + threadIdx.x) * 5 + 4];
 58 |   }
 59 |   __syncthreads();
 60 | 
 61 |   if (threadIdx.x < row_size) {
 62 |     const int cur_box_idx = threadsPerBlock * row_start + threadIdx.x;
 63 |     const float *cur_box = dev_boxes + cur_box_idx * 5;
 64 |     int i = 0;
 65 |     unsigned long long t = 0;
 66 |     int start = 0;
 67 |     if (row_start == col_start) {
 68 |       start = threadIdx.x + 1;
 69 |     }
 70 |     for (i = start; i < col_size; i++) {
 71 |       if (devIoU(cur_box, block_boxes + i * 5) > nms_overlap_thresh) {
 72 |         t |= 1ULL << i;
 73 |       }
 74 |     }
 75 |     const int col_blocks = DIVUP(n_boxes, threadsPerBlock);
 76 |     dev_mask[cur_box_idx * col_blocks + col_start] = t;
 77 |   }
 78 | }
 79 | 
 80 | void _set_device(int device_id) {
 81 |   int current_device;
 82 |   CUDA_CHECK(cudaGetDevice(&current_device));
 83 |   if (current_device == device_id) {
 84 |     return;
 85 |   }
 86 |   // The call to cudaSetDevice must come before any calls to Get, which
 87 |   // may perform initialization using the GPU.
 88 |   CUDA_CHECK(cudaSetDevice(device_id));
 89 | }
 90 | 
 91 | void _nms(int* keep_out, int* num_out, const float* boxes_host, int boxes_num,
 92 |           int boxes_dim, float nms_overlap_thresh, int device_id) {
 93 |   _set_device(device_id);
 94 | 
 95 |   float* boxes_dev = NULL;
 96 |   unsigned long long* mask_dev = NULL;
 97 | 
 98 |   const int col_blocks = DIVUP(boxes_num, threadsPerBlock);
 99 | 
100 |   CUDA_CHECK(cudaMalloc(&boxes_dev,
101 |                         boxes_num * boxes_dim * sizeof(float)));
102 |   CUDA_CHECK(cudaMemcpy(boxes_dev,
103 |                         boxes_host,
104 |                         boxes_num * boxes_dim * sizeof(float),
105 |                         cudaMemcpyHostToDevice));
106 | 
107 |   CUDA_CHECK(cudaMalloc(&mask_dev,
108 |                         boxes_num * col_blocks * sizeof(unsigned long long)));
109 | 
110 |   dim3 blocks(DIVUP(boxes_num, threadsPerBlock),
111 |               DIVUP(boxes_num, threadsPerBlock));
112 |   dim3 threads(threadsPerBlock);
113 |   nms_kernel<<<blocks, threads>>>(boxes_num,
114 |                                   nms_overlap_thresh,
115 |                                   boxes_dev,
116 |                                   mask_dev);
117 | 
118 |   std::vector<unsigned long long> mask_host(boxes_num * col_blocks);
119 |   CUDA_CHECK(cudaMemcpy(&mask_host[0],
120 |                         mask_dev,
121 |                         sizeof(unsigned long long) * boxes_num * col_blocks,
122 |                         cudaMemcpyDeviceToHost));
123 | 
124 |   std::vector<unsigned long long> remv(col_blocks);
125 |   memset(&remv[0], 0, sizeof(unsigned long long) * col_blocks);
126 | 
127 |   int num_to_keep = 0;
128 |   for (int i = 0; i < boxes_num; i++) {
129 |     int nblock = i / threadsPerBlock;
130 |     int inblock = i % threadsPerBlock;
131 | 
132 |     if (!(remv[nblock] & (1ULL << inblock))) {
133 |       keep_out[num_to_keep++] = i;
134 |       unsigned long long *p = &mask_host[0] + i * col_blocks;
135 |       for (int j = nblock; j < col_blocks; j++) {
136 |         remv[j] |= p[j];
137 |       }
138 |     }
139 |   }
140 |   *num_out = num_to_keep;
141 | 
142 |   CUDA_CHECK(cudaFree(boxes_dev));
143 |   CUDA_CHECK(cudaFree(mask_dev));
144 | }
145 | 


--------------------------------------------------------------------------------
/api/lib/utils/setup.py:
--------------------------------------------------------------------------------
  1 | from Cython.Build import cythonize
  2 | import os
  3 | from os.path import join as pjoin
  4 | import numpy as np
  5 | from distutils.core import setup
  6 | from distutils.extension import Extension
  7 | from Cython.Distutils import build_ext
  8 | 
  9 | def find_in_path(name, path):
 10 |     for dir in path.split(os.pathsep):
 11 |         binpath = pjoin(dir, name)
 12 |         if os.path.exists(binpath):
 13 |             return os.path.abspath(binpath)
 14 |     return None
 15 | 
 16 | def locate_cuda():
 17 |     # first check if the CUDAHOME env variable is in use
 18 |     if 'CUDAHOME' in os.environ:
 19 |         home = os.environ['CUDAHOME']
 20 |         nvcc = pjoin(home, 'bin', 'nvcc')
 21 |     else:
 22 |         # otherwise, search the PATH for NVCC
 23 |         default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin')
 24 |         nvcc = find_in_path('nvcc', os.environ['PATH'] + os.pathsep + default_path)
 25 |         if nvcc is None:
 26 |             raise EnvironmentError('The nvcc binary could not be '
 27 |                 'located in your $PATH. Either add it to your path, or set $CUDAHOME')
 28 |         home = os.path.dirname(os.path.dirname(nvcc))
 29 | 
 30 |     cudaconfig = {'home':home, 'nvcc':nvcc,
 31 |                   'include': pjoin(home, 'include'),
 32 |                   'lib64': pjoin(home, 'lib64')}
 33 |     for k, v in cudaconfig.items():
 34 |     #for k, v in cudaconfig.iteritems():
 35 |         if not os.path.exists(v):
 36 |             raise EnvironmentError('The CUDA %s path could not be located in %s' % (k, v))
 37 |     return cudaconfig
 38 | 
 39 | CUDA = locate_cuda()
 40 | 
 41 | 
 42 | try:
 43 |     numpy_include = np.get_include()
 44 | except AttributeError:
 45 |     numpy_include = np.get_numpy_include()
 46 | 
 47 | def customize_compiler_for_nvcc(self):
 48 |     self.src_extensions.append('.cu')
 49 |     default_compiler_so = self.compiler_so
 50 |     super = self._compile
 51 |     def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
 52 |         print(extra_postargs)
 53 |         if os.path.splitext(src)[1] == '.cu':
 54 |             # use the cuda for .cu files
 55 |             self.set_executable('compiler_so', CUDA['nvcc'])
 56 |             # use only a subset of the extra_postargs, which are 1-1 translated
 57 |             # from the extra_compile_args in the Extension class
 58 |             postargs = extra_postargs['nvcc']
 59 |         else:
 60 |             postargs = extra_postargs['gcc']
 61 | 
 62 |         super(obj, src, ext, cc_args, postargs, pp_opts)
 63 |         # reset the default compiler_so, which we might have changed for cuda
 64 |         self.compiler_so = default_compiler_so
 65 |     # inject our redefined _compile method into the class
 66 |     self._compile = _compile
 67 | 
 68 | 
 69 | # run the customize_compiler
 70 | class custom_build_ext(build_ext):
 71 |     def build_extensions(self):
 72 |         customize_compiler_for_nvcc(self.compiler)
 73 |         build_ext.build_extensions(self)
 74 | 
 75 | ext_modules = [
 76 |     Extension(
 77 |         "bbox",
 78 |         ["bbox.pyx"],
 79 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
 80 |         include_dirs = [numpy_include]
 81 |     ),
 82 |     Extension(
 83 |         "cython_nms",
 84 |         ["cython_nms.pyx"],
 85 |         extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]},
 86 |         include_dirs = [numpy_include]
 87 |     ),
 88 |     Extension('gpu_nms',
 89 |         ['nms_kernel.cu', 'gpu_nms.pyx'],
 90 |         library_dirs=[CUDA['lib64']],
 91 |         libraries=['cudart'],
 92 |         language='c++',
 93 |         runtime_library_dirs=[CUDA['lib64']],
 94 |         extra_compile_args={'gcc': ["-Wno-unused-function"],
 95 |                             'nvcc': ['-arch=sm_35',
 96 |                                      '--ptxas-options=-v',
 97 |                                      '-c',
 98 |                                      '--compiler-options',
 99 |                                      "'-fPIC'"]},
100 |         include_dirs = [numpy_include, CUDA['include']]
101 |     ),
102 | ]
103 | 
104 | setup(
105 |     ext_modules=ext_modules,
106 |     cmdclass={'build_ext': custom_build_ext},
107 | )
108 | 
109 | 


--------------------------------------------------------------------------------
/api/lib/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | class Timer(object):
 3 |     def __init__(self):
 4 |         self.total_time = 0.
 5 |         self.calls = 0
 6 |         self.start_time = 0.
 7 |         self.diff = 0.
 8 |         self.average_time = 0.
 9 | 
10 |     def tic(self):
11 |         self.start_time = time.time()
12 | 
13 |     def toc(self, average=True):
14 |         self.diff = time.time() - self.start_time
15 |         self.total_time += self.diff
16 |         self.calls += 1
17 |         self.average_time = self.total_time / self.calls
18 |         if average:
19 |             return self.average_time
20 |         else:
21 |             return self.diff
22 | 


--------------------------------------------------------------------------------
/api/model/model.h5:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/model/model.h5


--------------------------------------------------------------------------------
/api/model/model.json:
--------------------------------------------------------------------------------
1 | {"config": {"name": "sequential_1", "layers": [{"config": {"data_format": "channels_last", "dtype": "float32", "trainable": true, "name": "conv2d_1", "activity_regularizer": null, "padding": "valid", "batch_input_shape": [null, 32, 32, 1], "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "kernel_size": [3, 3], "strides": [1, 1], "bias_regularizer": null, "activation": "relu", "kernel_regularizer": null, "dilation_rate": [1, 1], "kernel_constraint": null, "use_bias": true, "bias_constraint": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "filters": 64}, "class_name": "Conv2D"}, {"config": {"data_format": "channels_last", "trainable": true, "name": "conv2d_2", "activity_regularizer": null, "padding": "valid", "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "activation": "relu", "strides": [1, 1], "bias_regularizer": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "kernel_regularizer": null, "kernel_size": [3, 3], "dilation_rate": [1, 1], "kernel_constraint": null, "use_bias": true, "bias_constraint": null, "filters": 64}, "class_name": "Conv2D"}, {"config": {"data_format": "channels_last", "trainable": true, "pool_size": [1, 1], "padding": "valid", "name": "max_pooling2d_1", "strides": [1, 1]}, "class_name": "MaxPooling2D"}, {"config": {"data_format": "channels_last", "trainable": true, "name": "conv2d_3", "activity_regularizer": null, "padding": "valid", "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "activation": "relu", "strides": [1, 1], "bias_regularizer": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "kernel_regularizer": null, "kernel_size": [3, 3], "dilation_rate": [1, 1], "kernel_constraint": null, "use_bias": true, "bias_constraint": null, "filters": 128}, "class_name": "Conv2D"}, {"config": {"data_format": "channels_last", "trainable": true, "name": "conv2d_4", "activity_regularizer": null, "padding": "valid", "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "activation": "relu", "strides": [1, 1], "bias_regularizer": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "kernel_regularizer": null, "kernel_size": [3, 3], "dilation_rate": [1, 1], "kernel_constraint": null, "use_bias": true, "bias_constraint": null, "filters": 128}, "class_name": "Conv2D"}, {"config": {"data_format": "channels_last", "trainable": true, "pool_size": [3, 3], "padding": "valid", "name": "max_pooling2d_2", "strides": [3, 3]}, "class_name": "MaxPooling2D"}, {"config": {"seed": null, "trainable": true, "name": "dropout_1", "noise_shape": null, "rate": 0.5}, "class_name": "Dropout"}, {"config": {"data_format": "channels_last", "trainable": true, "name": "conv2d_5", "activity_regularizer": null, "padding": "valid", "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "activation": "relu", "strides": [1, 1], "bias_regularizer": null, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "kernel_regularizer": null, "kernel_size": [3, 3], "dilation_rate": [1, 1], "kernel_constraint": null, "use_bias": true, "bias_constraint": null, "filters": 256}, "class_name": "Conv2D"}, {"config": {"data_format": "channels_last", "trainable": true, "pool_size": [2, 2], "padding": "valid", "name": "max_pooling2d_3", "strides": [2, 2]}, "class_name": "MaxPooling2D"}, {"config": {"data_format": "channels_last", "trainable": true, "name": "flatten_1"}, "class_name": "Flatten"}, {"config": {"trainable": true, "name": "dense_1", "activity_regularizer": null, "units": 256, "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "activation": "relu", "kernel_regularizer": null, "kernel_constraint": null, "bias_constraint": null, "use_bias": true}, "class_name": "Dense"}, {"config": {"seed": null, "trainable": true, "name": "dropout_2", "noise_shape": null, "rate": 0.5}, "class_name": "Dropout"}, {"config": {"trainable": true, "name": "dense_2", "activity_regularizer": null, "units": 128, "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "activation": "relu", "kernel_regularizer": null, "kernel_constraint": null, "bias_constraint": null, "use_bias": true}, "class_name": "Dense"}, {"config": {"trainable": true, "name": "dense_3", "activity_regularizer": null, "units": 10, "kernel_initializer": {"config": {"scale": 1.0, "distribution": "uniform", "mode": "fan_avg", "seed": null}, "class_name": "VarianceScaling"}, "bias_initializer": {"config": {}, "class_name": "Zeros"}, "bias_regularizer": null, "activation": "softmax", "kernel_regularizer": null, "kernel_constraint": null, "bias_constraint": null, "use_bias": true}, "class_name": "Dense"}]}, "keras_version": "2.2.4", "class_name": "Sequential", "backend": "tensorflow"}


--------------------------------------------------------------------------------
/api/outputs.txt:
--------------------------------------------------------------------------------
 1 | ##########################################################################
 2 | 
 3 | ######################## Raw Output #############################
 4 | 
 5 | Govermment of India
 6 | Suman Goyal
 7 | / 008 01/01/1979
 8 | / FEMALE
 9 | 8144 5625 3341
10 | Scanned by CamScanner
11 | 
12 | 
13 | ######################## Cleaned Output #############################
14 | 
15 | Aadhar No : 8144 5625 3341
16 | Name : Govermment of India
17 | Date of Birth : 01/01/1979
18 | Gender : Female
19 | ##########################################################################
20 | 
21 | 


--------------------------------------------------------------------------------
/api/ref.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/ref.png


--------------------------------------------------------------------------------
/api/server.py:
--------------------------------------------------------------------------------
  1 | from flask import Flask, request, jsonify, render_template
  2 | import os
  3 | from processing import recognise_text, crop_aadhar, get_address, seven_segment, _init_model, get_labels_from_aadhar, get_labels_from_licence
  4 | from cheque_details_extraction import get_micrcode, ensemble_acc_output, ensemble_ifsc_output
  5 | import datetime
  6 | import db
  7 | from face_matching import match_faces
  8 | 
  9 | app = Flask(__name__)
 10 | 
 11 | # path to upload images
 12 | UPLOAD_FOLDER = './UPLOAD_FOLDER/'
 13 | 
 14 | # initializing seven segment display model
 15 | _init_model()
 16 | 
 17 | # route to uploading images of id cards
 18 | @app.route('/image/upload', methods=['POST'])
 19 | def index():
 20 |     
 21 |     if request.method == 'POST':
 22 | 
 23 |         # saving current timestamp
 24 |         current_time = str(datetime.datetime.now()).replace('-', '_').replace(':', '_')
 25 | 
 26 |         # get the type of image that is being received
 27 |         image_type = request.form['type']
 28 |         
 29 |         # setting filename that is being received to current time stamp with its directory
 30 |         filename = UPLOAD_FOLDER + image_type + '/' + current_time + '.png'
 31 | 
 32 |         # if the image_type folder doesn't already exist, create it
 33 |         if not os.path.exists(UPLOAD_FOLDER + image_type):
 34 |             os.mkdir(UPLOAD_FOLDER + image_type)
 35 |             # directory for saving faces in the id cards
 36 |             os.mkdir(UPLOAD_FOLDER + image_type + '/' + 'faces')
 37 |         
 38 |         # if image_type is bank cheque, preprocess accordingly
 39 |         if image_type == 'Bank Cheque':
 40 |             details = {}
 41 | 
 42 |             # get photo from android
 43 |             photo = request.files['photo']
 44 |             photo.save(filename)
 45 | 
 46 |             # get details from the image
 47 |             details['MICR'] = get_micrcode(filename)
 48 |             details['ACC.No'] = ensemble_acc_output(filename)
 49 |             details['IFSC'] = ensemble_ifsc_output(filename)
 50 | 
 51 |             # return the details and the image name it is saved as
 52 |             return jsonify({'status':True, 'fields': details, 'image_path': filename, 'photo_path': 'none' })
 53 | 
 54 |         # if image_type is seven segment, preprocess accordingly
 55 |         elif image_type == 'Seven Segment':
 56 |             details = {}
 57 | 
 58 |             # get photo from android
 59 |             photo = request.files['photo']
 60 |             photo.save(filename)
 61 | 
 62 |             # get text from seven segment
 63 |             text = seven_segment(filename)
 64 | 
 65 |             details[0] = text
 66 | 
 67 |             # return the details and the image name it is saved as
 68 |             return jsonify({'status':True, 'fields': details, 'image_path': filename, 'photo_path': 'none' })
 69 | 
 70 |         # elif image_type == 'Aadhar Back':
 71 |         #     details = {}
 72 | 
 73 |         #     # get photo from android
 74 |         #     photo = request.files['photo']
 75 |         #     photo.save(filename)
 76 | 
 77 |         #     crop_path = UPLOAD_FOLDER + image_type + '/temp/' + current_time + '.png'
 78 | 
 79 |         #     if not os.path.exists(UPLOAD_FOLDER + image_type + '/temp'):
 80 |         #         os.mkdir(UPLOAD_FOLDER + image_type + '/temp')
 81 | 
 82 |         #     crop_aadhar(filename, crop_path)
 83 | 
 84 |         #     # recognise text in the id card
 85 |         #     data, photo_path = recognise_text(crop_path, 'none')
 86 |             
 87 |         #     details = get_address(data)
 88 | 
 89 |         #     os.remove(crop_path)
 90 | 
 91 |         #     # return the details and the image name it is saved as
 92 |         #     return jsonify({'status':True, 'fields': details, 'image_path': filename, 'photo_path': 'none' })
 93 |         
 94 |         else:
 95 |             # setting directory for saving face in the id card
 96 |             photo_path = UPLOAD_FOLDER + image_type + '/' + 'faces' + '/' + current_time + '.png'
 97 |             
 98 |             # get photo from android
 99 |             photo = request.files['photo']
100 |             photo.save(filename)
101 | 
102 |             # recognise text in the id card
103 |             data, photo_path = recognise_text(filename, photo_path)
104 |             
105 |             # extract labels from the recognised text according to the image_type
106 |             if image_type == "Driving Licence":
107 |                 details = { idx : text for idx, text in enumerate(data) }
108 |                 details = get_labels_from_licence(details)
109 |             elif image_type == "Aadhar Card":
110 |                 details = get_labels_from_aadhar(data)
111 |             else:
112 |                 details = { idx : text for idx, text in enumerate(data) }
113 | 
114 |             with open('outputs.txt', 'a+') as f:
115 |                 f.write("##########################################################################\n\n")
116 |                 f.write('######################## Raw Output #############################\n\n')
117 |                 for value in data:
118 |                     f.write(str(value) + '\n')
119 |                 f.write('\n\n######################## Cleaned Output #############################\n\n')
120 |                 for key, value in details.items():
121 |                     f.write(str(key) + ' : ' + str(value) + '\n')
122 |                 f.write("##########################################################################\n\n")
123 | 
124 |             # return the details and the image name and photo path it is saved as
125 |             return jsonify({'status':True, 'fields': details, 'image_path': filename, 'photo_path': photo_path})
126 |     else:
127 |         # if not POST, terminate
128 |         return jsonify({'status':False})
129 | 
130 | # save data to database
131 | @app.route('/api/data', methods=['POST'])
132 | def saveData():
133 |     
134 |     # get values as json
135 |     values = request.get_json()
136 |     image_type = values.get('type')
137 |     data = values.get('fields')
138 |     
139 |     db.insert_data(image_type, args_dict = data)
140 | 
141 |     return jsonify({'status': True})
142 | 
143 | 
144 | @app.route('/image/face_match',methods=['GET','POST'])
145 | def face_match():
146 | 
147 |     # saving current timestamp
148 |     current_time = str(datetime.datetime.now())
149 | 
150 |     # temporary folder for saving face for face matching
151 |     if not os.path.exists(UPLOAD_FOLDER + 'temp'):
152 |             os.mkdir(UPLOAD_FOLDER + 'temp')
153 | 
154 |     # setting filename that is being received to current time stamp with its directory
155 |     filename = UPLOAD_FOLDER + 'temp' + '/' + current_time + '.png'
156 |     
157 |     # getting the path of the saved face image
158 |     photo_path = request.form['photopath']
159 | 
160 |     # get live face from android
161 |     photo = request.files['liveface']
162 |     photo.save(filename)
163 |     
164 |     # check face match and probability
165 |     result, percent = match_faces(id_card_image=photo_path, ref_image=filename)
166 | 
167 |     # delete the temp face image
168 |     os.remove(filename)
169 | 
170 |     # return face match prediction and percentage
171 |     return jsonify({'status':str(result), 'percent': percent})
172 | 
173 | 
174 | # GET
175 | @app.route('/')
176 | def home():
177 | 
178 |     return render_template('index.html')
179 | 
180 | 
181 | # running web app in local machine
182 | if __name__ == '__main__':
183 |     app.run(host='0.0.0.0', port=5000, debug=False)
184 | 


--------------------------------------------------------------------------------
/api/templates/aadhar_template.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/templates/aadhar_template.png


--------------------------------------------------------------------------------
/api/templates/index.html:
--------------------------------------------------------------------------------
 1 | <!doctype html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |     <!-- Required meta tags -->
 6 |     <meta charset="utf-8">
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
 8 | 
 9 |     <!-- Bootstrap CSS -->
10 |     <!-- <link rel="stylesheet" href="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/css/bootstrap.min.css"
11 |         integrity="sha384-MCw98/SFnGE8fJT3GXwEOngsV7Zt27NXFoaoApmYm81iuXoPkFOJwJ8ERdknLPMO" crossorigin="anonymous">
12 |      Latest compiled and minified CSS -->
13 |     <!-- <link rel="stylesheet" href="//cdnjs.cloudflare.com/ajax/libs/jasny-bootstrap/4.0.0/css/jasny-bootstrap.min.css"> -->
14 | 
15 |     <!-- <link rel="stylesheet" type="text/css" href="../static/css/bootstrap.min.css">
16 | 
17 |     <link rel="stylesheet" type="text/css" href="../static/css/styles.css"> -->
18 | 
19 |     <title>Docify</title>
20 | </head>
21 | 
22 | <body>
23 | 
24 |     <form action="/image/upload" method="POST" enctype="multipart/form-data">
25 |         
26 |         <input type="file" name="photo" class="btn btn-primary"
27 |                                         id="file-field" accept="image/*" >
28 |                                     </input>
29 |         <input type="text" name="type" value="Aadhar Card">
30 | 
31 |         <button type="submit">Submit</button>
32 | 
33 |     </form>
34 | 
35 |     <!-- Optional JavaScript -->
36 |     <!-- jQuery first, then Popper.js, then Bootstrap JS -->
37 |     <!-- <script src="../static/js/Chart.min.js" type="text/javascript"></script> -->
38 |     <!-- <script src="../static/js/main.js" type="text/javascript"></script> -->
39 |     <!-- 
40 |     <script src="https://code.jquery.com/jquery-3.3.1.min.js"
41 |         integrity="sha256-FgpCb/KJQlLNfOu91ta32o/NMZxltwRo8QtmkMRdAu8=" crossorigin="anonymous"></script> -->
42 |     <!-- <script type="text/javascript" src="../static/js/jquery-3.3.1.min.js"></script>
43 |     <script src="{{url_for('static', filename='js/main.js')}}" type="text/javascript"></script>
44 |     <script src="https://cdnjs.cloudflare.com/ajax/libs/popper.js/1.14.3/umd/popper.min.js"
45 |         integrity="sha384-ZMP7rVo3mIykV+2+9J3UJ46jBk0WLaUAdn689aCwoqbBJiSnjAK/l8WvCWPIPm49"
46 |         crossorigin="anonymous"></script>
47 |     <script src="https://stackpath.bootstrapcdn.com/bootstrap/4.1.3/js/bootstrap.min.js"
48 |         integrity="sha384-ChfqqxuZUCnJSK3+MXmPNIyE6ZbWh2IMqE241rYiqJxyMiZ6OW/JmZQ5stwEULTy"
49 |         crossorigin="anonymous"></script> -->
50 | 
51 | </body>
52 | 
53 | </html>


--------------------------------------------------------------------------------
/api/templates/license_template.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/templates/license_template.jpg


--------------------------------------------------------------------------------
/api/templates/pancard_template.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/templates/pancard_template.jpg


--------------------------------------------------------------------------------
/api/templates/template_acc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/templates/template_acc.jpg


--------------------------------------------------------------------------------
/api/templates/template_ifsc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/api/templates/template_ifsc.png


--------------------------------------------------------------------------------
/inference.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from utils.unet import UNet
 3 | 
 4 | import torch
 5 | import cv2
 6 | from utils.image_aug import normalization2
 7 | 
 8 | def get_image_tensor(img_path):
 9 |     image = cv2.imread(img_path)
10 |     image = cv2.resize(image, (360, 480))
11 |     image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
12 |     og_image = image.copy()
13 |     
14 |     # Normalize the image
15 |     image = normalization2(image, max=1, min=0)
16 | 
17 |     # HWC to CHW
18 |     image = image.transpose((2, 0, 1))
19 |     image = np.expand_dims(image, axis=0)
20 | 
21 |     image = torch.from_numpy(image).type(torch.FloatTensor)
22 |     
23 |     return og_image, image
24 | 
25 | def get_mask(model, device, image):
26 |     image = image.to(device=device, dtype=torch.float32)
27 |     
28 |     mask_pred = model(image)
29 |     pred = torch.sigmoid(mask_pred)
30 |     pred = (pred > 0.5).float()
31 | 
32 |     pred = pred.squeeze()
33 |     pred = pred.cpu().detach().numpy()
34 |     
35 |     return pred
36 | 
37 | if __name__ == '__main__':
38 |     
39 |     unet = UNet(n_channels=3, n_classes=1)
40 |     checkpoint = torch.load('17_model.pth')
41 |     unet.load_state_dict(checkpoint['model_state_dict'])
42 |     unet.eval()
43 | 
44 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
45 |     print("Device:", device)
46 |     unet = unet.to(device=device)
47 |     
48 |     img_path = '/data/Data/midv500_data/IMG_2126.JPG'
49 |     
50 |     original, img_tensor = get_image_tensor(img_path)
51 |     prediction = get_mask(unet, device, img_tensor)
52 |     prediction = prediction.astype(int)
53 |     prediction = np.expand_dims(prediction, axis=2)
54 |     masked_image = original * prediction
55 |     masked_image = masked_image.astype(np.uint8)
56 |     tile = cv2.hconcat([original, masked_image])
57 |     
58 |     tile = cv2.cvtColor(tile, cv2.COLOR_RGB2BGR)
59 |     masked_image = cv2.cvtColor(masked_image, cv2.COLOR_RGB2BGR)
60 |     cv2.imwrite('tile.jpg', tile)
61 |     cv2.imwrite('result.jpg', masked_image)


--------------------------------------------------------------------------------
/tile.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Praneet9/Docify/a936014750dedf4a6b5a84918bbbf66cd63109de/tile.jpg


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from tqdm import tqdm
  3 | import cv2
  4 | 
  5 | import torch
  6 | from torch.utils.data import DataLoader, random_split
  7 | from torch.utils.tensorboard import SummaryWriter
  8 | from torchsummary import summary
  9 | from torch import optim
 10 | import torch.nn as nn
 11 | 
 12 | from utils.unet import UNet
 13 | from utils.dataset import BasicDataset
 14 | 
 15 | 
 16 | def train_model(model, device, 
 17 |                 img_dir, mask_dir, 
 18 |                 checkpoint_dir, 
 19 |                 checkpoint_file=None, 
 20 |                 epochs=20, lr=0.001, 
 21 |                 val_split=0.20, 
 22 |                 batch_size=1):
 23 |     
 24 |     dataset = BasicDataset(img_dir, mask_dir)
 25 |     val_samples = int(len(dataset) * val_split)
 26 |     train_samples = len(dataset) - val_samples
 27 |     train, val = random_split(dataset, [train_samples, val_samples])
 28 |     train_loader = DataLoader(train, batch_size=batch_size, shuffle=True, num_workers=8)
 29 |     val_loader = DataLoader(val, batch_size=batch_size, shuffle=False, num_workers=8, drop_last=True)
 30 |     
 31 |     writer = SummaryWriter(log_dir=checkpoint_dir, comment=f'LR_{lr}_BS_{batch_size}')
 32 |     global_step = 0
 33 |     
 34 |     optimizer = optim.Adam(model.parameters(), lr=lr, weight_decay=1e-8)
 35 |     criterion = nn.BCEWithLogitsLoss()
 36 |     
 37 |     training_loss = []
 38 |     validation_loss = []
 39 |     current_epoch = 0
 40 |     
 41 |     if checkpoint_file is not None:
 42 |         checkpoint = torch.load(checkpoint_dir + checkpoint_file)
 43 |         model.load_state_dict(checkpoint['model_state_dict'])
 44 |         optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
 45 |         current_epoch = checkpoint['epoch']
 46 |         training_loss = checkpoint['loss']
 47 |         validation_loss = checkpoint['val_loss']
 48 |         global_step = checkpoint['global_step']
 49 | 
 50 |     for epoch in range(1 + current_epoch, epochs + 1):
 51 |         model.train()
 52 | 
 53 |         losses = []
 54 |         val_losses = []
 55 |         avg_val_loss = np.inf
 56 |         
 57 |         with tqdm(total=train_samples, desc=f'Epoch {epoch}/{epochs}', unit='img') as pbar:
 58 |             for batch in train_loader:
 59 |                 imgs = batch['image']
 60 |                 true_masks = batch['mask']
 61 |                 
 62 |                 imgs = imgs.to(device=device, dtype=torch.float32)
 63 |                 mask_type = torch.float32 if model.n_classes == 1 else torch.long
 64 |                 true_masks = true_masks.to(device=device, dtype=mask_type)
 65 |                 
 66 |                 masks_pred = model(imgs)
 67 |                 loss = criterion(masks_pred, true_masks)
 68 |                 losses.append(loss.item())
 69 |                 writer.add_scalar('Loss/train', sum(losses)/len(losses), global_step)
 70 | 
 71 |                 pbar.set_postfix(**{'loss': sum(losses)/len(losses)})
 72 |                 
 73 |                 optimizer.zero_grad()
 74 |                 loss.backward()
 75 |                 nn.utils.clip_grad_value_(model.parameters(), 0.1)
 76 |                 optimizer.step()
 77 |                 
 78 |                 pbar.update(imgs.shape[0])
 79 |                 global_step += 1
 80 | 
 81 |             val_loss = 0
 82 |             for val_batch in val_loader:
 83 |                 imgs, true_masks = val_batch['image'], val_batch['mask']
 84 |                 imgs = imgs.to(device=device, dtype=torch.float32)
 85 |                 true_masks = true_masks.to(device=device, dtype=torch.float32)
 86 | 
 87 |                 with torch.no_grad():
 88 |                     mask_pred = model(imgs)
 89 |                 
 90 |                 pred = torch.sigmoid(mask_pred)
 91 |                 pred = (pred > 0.5).float()
 92 |                 val_loss += criterion(masks_pred, true_masks).item()
 93 |             val_score = val_loss / len(val_loader)
 94 |             val_losses.append(val_score)
 95 |             avg_val_loss = sum(val_losses) / len(val_losses)
 96 |             pbar.set_postfix(**{'loss': sum(losses)/len(losses), 'val_loss': avg_val_loss})
 97 |         
 98 |         training_loss.append(sum(losses)/len(losses))
 99 |         validation_loss.append(avg_val_loss)
100 |         
101 |         torch.save({
102 |             'epoch': epoch,
103 |             'model_state_dict': model.state_dict(),
104 |             'optimizer_state_dict': optimizer.state_dict(),
105 |             'loss': training_loss,
106 |             'val_loss': validation_loss,
107 |             'global_step': global_step
108 |         }, checkpoint_dir + str(epoch) + '_model.pth')
109 |         
110 |     writer.close()
111 | 
112 | if __name__ == '__main__':
113 |     
114 |     IMAGES_PATH = '/data/Data/midv500_data/dataset/images_resized/'
115 |     MASKS_PATH = '/data/Data/midv500_data/dataset/masks_resized/'
116 |     MODEL_CHECKPOINT_PATH = '/data/Data/midv500_data/dataset/checkpoints/'
117 |     
118 |     dataset = BasicDataset(IMAGES_PATH, MASKS_PATH)
119 |     
120 |     unet = UNet(n_channels=3, n_classes=1)
121 |     summary(unet.cuda(), (3, 480, 360))
122 |     
123 |     device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
124 |     print("Device:", device)
125 |     unet = unet.to(device=device)
126 |     
127 |     train_model(unet,
128 |             device,
129 |             IMAGES_PATH,
130 |             MASKS_PATH,
131 |             MODEL_CHECKPOINT_PATH)


--------------------------------------------------------------------------------
/utils/dataset.py:
--------------------------------------------------------------------------------
 1 | from os import listdir
 2 | from os.path import splitext
 3 | from glob import glob
 4 | 
 5 | from utils.image_aug import flip, add_gaussian_noise, add_uniform_noise, change_brightness, normalization2
 6 | 
 7 | import torch
 8 | from torch.utils.data import Dataset
 9 | 
10 | import numpy as np
11 | from random import randint
12 | import cv2
13 | 
14 | class BasicDataset(Dataset):
15 |     def __init__(self, imgs_dir, masks_dir, mask_suffix=''):
16 |         self.imgs_dir = imgs_dir
17 |         self.masks_dir = masks_dir
18 |         self.mask_suffix = mask_suffix
19 |         self.height = 480
20 |         self.width = 360
21 | 
22 |         self.ids = [splitext(file)[0] for file in listdir(imgs_dir)
23 |                     if not file.startswith('.')]
24 | 
25 |     def __len__(self):
26 |         return len(self.ids)
27 | 
28 |     def preprocess(self, img, mask):
29 |         
30 |         # Augmentation
31 |         # flip {0: vertical, 1: horizontal, 2: both, 3: none}
32 |         flip_num = randint(0, 3)
33 |         img = flip(img, flip_num)
34 |         mask = flip(mask, flip_num)
35 |         
36 |         # Noise Determine {0: Gaussian_noise, 1: uniform_noise
37 |         if randint(0, 1):
38 |             # Gaussian_noise
39 |             gaus_sd, gaus_mean = randint(0, 20), 0
40 |             img = add_gaussian_noise(img, gaus_mean, gaus_sd)
41 |         else:
42 |             # uniform_noise
43 |             l_bound, u_bound = randint(-20, 0), randint(0, 20)
44 |             img = add_uniform_noise(img, l_bound, u_bound)
45 |         
46 |         # Brightness
47 |         pix_add = randint(-20, 20)
48 |         img = change_brightness(img, pix_add)
49 |         
50 |         # Normalize the image
51 |         img = normalization2(img, max=1, min=0)
52 |         
53 |         
54 |         # Normalize mask to only 0 and 1
55 |         mask = mask/255
56 |         # msk_as_np = np.expand_dims(msk_as_np, axis=0)  # add additional dimension
57 |         
58 |         if len(mask.shape) == 2:
59 |             mask = np.expand_dims(mask, axis=2)
60 |             
61 |         # HWC to CHW
62 |         img = img.transpose((2, 0, 1))
63 |         mask = mask.transpose((2, 0, 1))
64 |         
65 |         return img, mask
66 | 
67 |     def __getitem__(self, i):
68 |         idx = self.ids[i]
69 |         mask_file = glob(self.masks_dir + idx + self.mask_suffix + '.*')
70 |         img_file = glob(self.imgs_dir + idx + '.*')
71 | 
72 |         assert len(mask_file) == 1, \
73 |             f'Either no mask or multiple masks found for the ID {idx}: {mask_file}'
74 |         assert len(img_file) == 1, \
75 |             f'Either no image or multiple images found for the ID {idx}: {img_file}'
76 |         
77 |         mask = cv2.imread(mask_file[0])
78 |         img = cv2.imread(img_file[0])
79 |         
80 |         img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
81 |         mask = cv2.cvtColor(mask, cv2.COLOR_BGR2GRAY)
82 |         mask = cv2.threshold(mask, 100, 255, cv2.THRESH_BINARY)[1]
83 | 
84 |         img, mask = self.preprocess(img, mask)
85 | 
86 |         return {
87 |             'image': torch.from_numpy(img).type(torch.FloatTensor),
88 |             'mask': torch.from_numpy(mask).type(torch.FloatTensor)
89 |         }


--------------------------------------------------------------------------------
/utils/image_aug.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from random import randint
  3 | 
  4 | 
  5 | def flip(image, option_value):
  6 |     """
  7 |     Args:
  8 |         image : numpy array of image
  9 |         option_value = random integer between 0 to 3
 10 |     Return :
 11 |         image : numpy array of flipped image
 12 |     """
 13 |     if option_value == 0:
 14 |         # vertical
 15 |         image = np.flip(image, option_value)
 16 |     elif option_value == 1:
 17 |         # horizontal
 18 |         image = np.flip(image, option_value)
 19 |     elif option_value == 2:
 20 |         # horizontally and vertically flip
 21 |         image = np.flip(image, 0)
 22 |         image = np.flip(image, 1)
 23 |     else:
 24 |         image = image
 25 |         # no effect
 26 |     return image
 27 | 
 28 | def add_gaussian_noise(image, mean=0, std=1):
 29 |     """
 30 |     Args:
 31 |         image : numpy array of image
 32 |         mean : pixel mean of image
 33 |         standard deviation : pixel standard deviation of image
 34 |     Return :
 35 |         image : numpy array of image with gaussian noise added
 36 |     """
 37 |     gaus_noise = np.random.normal(mean, std, image.shape)
 38 |     image = image.astype("int16")
 39 |     noise_img = image + gaus_noise
 40 |     image = ceil_floor_image(image)
 41 |     return noise_img
 42 | 
 43 | def add_uniform_noise(image, low=-10, high=10):
 44 |     """
 45 |     Args:
 46 |         image : numpy array of image
 47 |         low : lower boundary of output interval
 48 |         high : upper boundary of output interval
 49 |     Return :
 50 |         image : numpy array of image with uniform noise added
 51 |     """
 52 |     uni_noise = np.random.uniform(low, high, image.shape)
 53 |     image = image.astype("int16")
 54 |     noise_img = image + uni_noise
 55 |     image = ceil_floor_image(image)
 56 |     return noise_img
 57 | 
 58 | def change_brightness(image, value):
 59 |     """
 60 |     Args:
 61 |         image : numpy array of image
 62 |         value : brightness
 63 |     Return :
 64 |         image : numpy array of image with brightness added
 65 |     """
 66 |     image = image.astype("int16")
 67 |     image = image + value
 68 |     image = ceil_floor_image(image)
 69 |     return image
 70 | 
 71 | def ceil_floor_image(image):
 72 |     """
 73 |     Args:
 74 |         image : numpy array of image in datatype int16
 75 |     Return :
 76 |         image : numpy array of image in datatype uint8 with ceilling(maximum 255) and flooring(minimum 0)
 77 |     """
 78 |     image[image > 255] = 255
 79 |     image[image < 0] = 0
 80 |     image = image.astype("uint8")
 81 |     return image
 82 | 
 83 | def approximate_image(image):
 84 |     """
 85 |     Args:
 86 |         image : numpy array of image in datatype int16
 87 |     Return :
 88 |         image : numpy array of image in datatype uint8 only with 255 and 0
 89 |     """
 90 |     image[image > 127.5] = 255
 91 |     image[image < 127.5] = 0
 92 |     image = image.astype("uint8")
 93 |     return image
 94 | 
 95 | def normalization2(image, max, min):
 96 |     """Normalization to range of [min, max]
 97 |     Args :
 98 |         image : numpy array of image
 99 |         mean :
100 |     Return :
101 |         image : numpy array of image with values turned into standard scores
102 |     """
103 |     image_new = (image - np.min(image))*(max - min)/(np.max(image)-np.min(image)) + min
104 |     return image_new
105 | 
106 | def stride_size(image_len, crop_num, crop_size):
107 |     """return stride size
108 |     Args :
109 |         image_len(int) : length of one size of image (width or height)
110 |         crop_num(int) : number of crop in certain direction
111 |         crop_size(int) : size of crop
112 |     Return :
113 |         stride_size(int) : stride size
114 |     """
115 |     return int((image_len - crop_size)/(crop_num - 1))
116 | 
117 | # def multi_cropping(image, crop_size, crop_num1, crop_num2):
118 | #     """crop the image and pad it to in_size
119 | #     Args :
120 | #         images : numpy arrays of images
121 | #         crop_size(int) : size of cropped image
122 | #         crop_num2 (int) : number of crop in horizontal way
123 | #         crop_num1 (int) : number of crop in vertical way
124 | #     Return :
125 | #         cropped_imgs : numpy arrays of stacked images
126 | #     """
127 | 
128 | #     img_height, img_width = image.shape[0], image.shape[1]
129 | #     assert crop_size*crop_num1 >= img_width and crop_size * \
130 | #         crop_num2 >= img_height, "Whole image cannot be sufficiently expressed"
131 | #     assert crop_num1 <= img_width - crop_size + 1 and crop_num2 <= img_height - \
132 | #         crop_size + 1, "Too many number of crops"
133 | 
134 | #     cropped_imgs = []
135 | #     # int((img_height - crop_size)/(crop_num1 - 1))
136 | #     dim1_stride = stride_size(img_height, crop_num1, crop_size)
137 | #     # int((img_width - crop_size)/(crop_num2 - 1))
138 | #     dim2_stride = stride_size(img_width, crop_num2, crop_size)
139 | #     for i in range(crop_num1):
140 | #         for j in range(crop_num2):
141 | #             cropped_imgs.append(cropping(image, crop_size,
142 | #                                          dim1_stride*i, dim2_stride*j))
143 | #     return np.asarray(cropped_imgs)
144 | 
145 | # def cropping(image, vert_crop_size, hort_crop_size, dim1, dim2):
146 | #     """crop the image and pad it to in_size
147 | #     Args :
148 | #         images : numpy array of images
149 | #         crop_size(int) : size of cropped image
150 | #         dim1(int) : vertical location of crop
151 | #         dim2(int) : horizontal location of crop
152 | #     Return :
153 | #         cropped_img: numpy array of cropped image
154 | #     """
155 | #     cropped_img = image[dim1:dim1+vert_crop_size, dim2:dim2+hort_crop_size]
156 | #     return cropped_img
157 | 
158 | def add_padding(image, in_size, out_size, mode):
159 |     """Pad the image to in_size
160 |     Args :
161 |         images : numpy array of images
162 |         in_size(int) : the input_size of model
163 |         out_size(int) : the output_size of model
164 |         mode(str) : mode of padding
165 |     Return :
166 |         padded_img: numpy array of padded image
167 |     """
168 |     pad_size = int((in_size - out_size)/2)
169 |     padded_img = np.pad(image, pad_size, mode=mode)
170 |     return padded_img
171 | 
172 | 
173 | def division_array(crop_size, crop_num1, crop_num2, dim1, dim2):
174 |     """Make division array
175 |     Args :
176 |         crop_size(int) : size of cropped image
177 |         crop_num2 (int) : number of crop in horizontal way
178 |         crop_num1 (int) : number of crop in vertical way
179 |         dim1(int) : vertical size of output
180 |         dim2(int) : horizontal size_of_output
181 |     Return :
182 |         div_array : numpy array of numbers of 1,2,4
183 |     """
184 |     div_array = np.zeros([dim1, dim2])  # make division array
185 |     one_array = np.ones([crop_size, crop_size])  # one array to be added to div_array
186 |     dim1_stride = stride_size(dim1, crop_num1, crop_size)  # vertical stride
187 |     dim2_stride = stride_size(dim2, crop_num2, crop_size)  # horizontal stride
188 |     for i in range(crop_num1):
189 |         for j in range(crop_num2):
190 |             # add ones to div_array at specific position
191 |             div_array[dim1_stride*i:dim1_stride*i + crop_size,
192 |                       dim2_stride*j:dim2_stride*j + crop_size] += one_array
193 |     return div_array
194 | 
195 | def image_concatenate(image, crop_num1, crop_num2, dim1, dim2):
196 |     """concatenate images
197 |     Args :
198 |         image : output images (should be square)
199 |         crop_num2 (int) : number of crop in horizontal way (2)
200 |         crop_num1 (int) : number of crop in vertical way (2)
201 |         dim1(int) : vertical size of output (512)
202 |         dim2(int) : horizontal size_of_output (512)
203 |     Return :
204 |         div_array : numpy arrays of numbers of 1,2,4
205 |     """
206 |     crop_size = image.shape[1]  # size of crop
207 |     empty_array = np.zeros([dim1, dim2]).astype("float64")  # to make sure no overflow
208 |     dim1_stride = stride_size(dim1, crop_num1, crop_size)  # vertical stride
209 |     dim2_stride = stride_size(dim2, crop_num2, crop_size)  # horizontal stride
210 |     index = 0
211 |     for i in range(crop_num1):
212 |         for j in range(crop_num2):
213 |             # add image to empty_array at specific position
214 |             empty_array[dim1_stride*i:dim1_stride*i + crop_size,
215 |                         dim2_stride*j:dim2_stride*j + crop_size] += image[index]
216 |             index += 1
217 |     return empty_array


--------------------------------------------------------------------------------
/utils/nn_block.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | import torch
 3 | 
 4 | 
 5 | class DualConv(nn.Module):
 6 |     
 7 |     def __init__(self, input_channels, output_channels):
 8 |         
 9 |         super().__init__()
10 |         
11 |         self.dual_conv = nn.Sequential(
12 |             nn.Conv2d(input_channels, output_channels, kernel_size=3, padding=1),
13 |             nn.BatchNorm2d(output_channels),
14 |             nn.ReLU(inplace=True),
15 |             nn.Conv2d(output_channels, output_channels, kernel_size=3, padding=1),
16 |             nn.BatchNorm2d(output_channels),
17 |             nn.ReLU(inplace=True)
18 |         )
19 |         
20 |     def forward(self, x):
21 |         
22 |         return self.dual_conv(x)
23 | 
24 | class DownConv(nn.Module):
25 |     
26 |     def __init__(self, input_channels, output_channels):
27 |         
28 |         super().__init__()
29 |         
30 |         self.down_conv = nn.Sequential(
31 |             nn.MaxPool2d(2),
32 |             DualConv(input_channels, output_channels)
33 |         )
34 |     
35 |     def forward(self, x):
36 |         
37 |         return self.down_conv(x)
38 | 
39 | class UpConv(nn.Module):
40 |     
41 |     def __init__(self, input_channels, output_channels):
42 |         
43 |         super().__init__()
44 |         
45 |         self.up_conv = nn.ConvTranspose2d(input_channels, output_channels, kernel_size=2, stride=2)
46 |         self.conv = DualConv(input_channels, output_channels)
47 |     
48 |     def forward(self, x1, x2):
49 |         
50 |         x1 = self.up_conv(x1)
51 |         
52 |         y_pad = x2.size()[2] - x1.size()[2]
53 |         x_pad = x2.size()[3] - x1.size()[3]
54 |         
55 |         x1 = nn.functional.pad(x1, [x_pad // 2, x_pad - x_pad // 2,
56 |                                     y_pad // 2, y_pad - y_pad // 2])
57 |         
58 |         x = torch.cat([x2, x1], dim = 1)
59 |         
60 |         return self.conv(x)
61 | 
62 | class OutputConv(nn.Module):
63 |     
64 |     def __init__(self, input_channels, output_channels):
65 |         
66 |         super().__init__()
67 |         
68 |         self.conv = nn.Conv2d(input_channels, output_channels, kernel_size=1)
69 | 
70 |     def forward(self, x):
71 |         
72 |         return self.conv(x)
73 | 
74 | 


--------------------------------------------------------------------------------
/utils/unet.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from utils.nn_block import DualConv, DownConv, UpConv, OutputConv
 3 | 
 4 | 
 5 | class UNet(nn.Module):
 6 |     
 7 |     def __init__(self, n_channels, n_classes):
 8 |         
 9 |         super().__init__()
10 |         
11 |         self.n_channels = n_channels
12 |         self.n_classes = n_classes
13 |         
14 |         self.inp = DualConv(n_channels, 64)
15 |         
16 |         self.down_conv_1 = DownConv(64, 128)
17 |         self.down_conv_2 = DownConv(128, 256)
18 |         self.down_conv_3 = DownConv(256, 512)
19 |         self.down_conv_4 = DownConv(512, 1024)
20 |         
21 |         self.up_conv_1 = UpConv(1024, 512)
22 |         self.up_conv_2 = UpConv(512, 256)
23 |         self.up_conv_3 = UpConv(256, 128)
24 |         self.up_conv_4 = UpConv(128, 64)
25 |         
26 |         self.op_conv = OutputConv(64, n_classes)
27 |     
28 |     def forward(self, x):
29 |         
30 |         x1 = self.inp(x)
31 |         
32 |         x2 = self.down_conv_1(x1)
33 |         x3 = self.down_conv_2(x2)
34 |         x4 = self.down_conv_3(x3)
35 |         x5 = self.down_conv_4(x4)
36 |         
37 |         x6 = self.up_conv_1(x5, x4)
38 |         x7 = self.up_conv_2(x6, x3)
39 |         x8 = self.up_conv_3(x7, x2)
40 |         x9 = self.up_conv_4(x8, x1)
41 |         
42 |         result = self.op_conv(x9)
43 |         
44 |         return result


--------------------------------------------------------------------------------