├── .gitattributes
├── .gitignore
├── Dockerfile
├── LICENCE
├── README.md
├── __init__.py
├── app.env
├── app.py
├── helpers
    ├── __init__.py
    ├── logger.py
    └── temp.py
├── lib
    ├── __init__.py
    ├── models
    │   ├── jar-nojar
    │   │   ├── deploy.prototxt
    │   │   └── weights.caffemodel
    │   └── text-notext
    │   │   ├── deploy.prototxt
    │   │   └── weights.caffemodel
    └── selectivesearch.py
├── requirements.txt
├── set_env.sh
└── stages
    ├── __init__.py
    ├── region_proposal
        ├── __init__.py
        ├── extract_seal.py
        ├── region_grouping.py
        └── region_search.py
    ├── symbol_classification.py
    ├── symbol_segmentation.py
    └── text_region_extraction
        ├── __init__.py
        ├── region_classification.py
        └── text_region_formulation.py


/.gitattributes:
--------------------------------------------------------------------------------
1 | *.png filter=lfs diff=lfs merge=lfs -text
2 | *.jpg filter=lfs diff=lfs merge=lfs -text
3 | *.jpeg filter=lfs diff=lfs merge=lfs -text
4 | *.gif filter=lfs diff=lfs merge=lfs -text
5 | *.caffemodel filter=lfs diff=lfs merge=lfs -text
6 | *.solverstate filter=lfs diff=lfs merge=lfs -text
7 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__
3 | *.log
4 | .DS_Store
5 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM koallen/anaconda-caffe:gpu
 2 | 
 3 | # create an alias for caffe
 4 | RUN echo "alias caffe='/root/caffe/build/tools/caffe'" >> ~/.bashrc
 5 | 
 6 | # install OpenCV
 7 | RUN conda install -y opencv
 8 | 
 9 | # change working directory
10 | WORKDIR /root/workspace/
11 | 
12 | # install python requirements
13 | COPY requirements.txt /root/workspace/requirements.txt
14 | RUN pip install -r requirements.txt
15 | 
16 | # launch command line
17 | ENTRYPOINT ["/bin/bash"]
18 | 


--------------------------------------------------------------------------------
/LICENCE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "{}"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright 2017 Satish Palaniappan, Ronojoy Adhikari
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Indus Script OCR
 2 | 
 3 | To automatically locate text patches/regions, segment individual symbols/characters from those regions and also identify each symbol/character belonging to the Indus Script, given images of Indus seals from archaeological sites, using image processing and deep learning techniques. [WIP]
 4 | 
 5 | View our research article titled "__Deep Learning the Indus Script__" arXived at: [arXiv:1702.00523v1](https://arxiv.org/abs/1702.00523v1)
 6 | 
 7 | ## Deploying the app
 8 | 
 9 | - Setup the GPU machine to run the service,
10 |   - Install latest nvidia drivers, from `http://www.geforce.com/drivers`
11 |   - Install the nvidia-docker plug-in over docker, from `https://github.com/NVIDIA/nvidia-docker/releases`
12 |   - Make sure you have `git-lfs` installed (https://git-lfs.github.com/)
13 | 
14 | - Launch the service,
15 |   - Build the docker image: `nvidia-docker build --no-cache=true -t indus-script-ocr:latest .`
16 |   - To launch a docker container: `nvidia-docker run -it -v "$PWD":/root/workspace --rm --env-file app.env --name indus-script-ocr-service indus-script-ocr:latest`
17 | 
18 | ## Press Coverage:
19 | 
20 | - [The Verge](http://www.theverge.com/2017/1/25/14371450/indus-valley-civilization-ancient-seals-symbols-language-algorithms-ai#EQQA6r)
21 | - [The Hindu](http://www.thehindu.com/sci-tech/science/chennai-team-taps-ai-to-read-indus-script/article17448690.ece)
22 | - [Times of India](http://timesofindia.indiatimes.com/city/chennai/app-may-help-decipher-indus-valley-symbols/articleshow/57281369.cms)
23 | - [SBS Radio, Australia](http://www.sbs.com.au/yourlanguage/tamil/en/content/app-decipher-ancient-symbols?language=en)
24 | 
25 | ## Talks
26 | 
27 | - **Indian Deep Learning Initiative (IDLI):** [slide deck](https://github.com/tpsatish95/talks/blob/master/Deep\%20learning\%20based\%20OCR\%20engine\%20for\%20the\%20Indus\%20script\%20-\%20IDLI\%20Talk.pdf), [video](https://www.youtube.com/watch?v=qPF1oR9yMNY}), [link](https://www.facebook.com/groups/idliai/) 
28 | - **ThoughtWorks Geek Night:** [slide deck](https://github.com/tpsatish95/talks/blob/master/Deep\%20learning\%20based\%20OCR\%20engine\%20for\%20the\%20Indus\%20script\%20-\%20TW\%20Geek\%20Night.pdf), [video](https://www.youtube.com/watch?v=g7v4QaCD-UQ), [link](https://twchennai.github.io/geeknight/edition-43.html) 
29 | - **ChennaiPy:** [link](http://chennaipy.org/may-2017-meet-minutes.html) 
30 | - **Anthill Inside 2017:** [proposal](https://anthillinside.talkfunnel.com/2017/15-deep-learning-based-ocr-engine-for-the-indus-scrip)
31 | 
32 | ## Citation
33 | 
34 | Please cite `indus-script-ocr` in your publications if it helps your research:
35 | 
36 |     @article{palaniappan2017deep,
37 |     title={Deep Learning the Indus Script},
38 |     author={Palaniappan, Satish and Adhikari, Ronojoy},
39 |     journal={arXiv preprint arXiv:1702.00523},
40 |     year={2017}
41 |     }
42 | 


--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpsatish95/indus-script-ocr/3bc0c2f92c7e7926dab9efb1af29d92753d20672/__init__.py


--------------------------------------------------------------------------------
/app.env:
--------------------------------------------------------------------------------
 1 | # environment variables
 2 | 
 3 | # paths
 4 | CAFFE_PATH=/root/caffe/build/tools/caffe
 5 | TEXT_NOTEXT_MODELS_DIR=./lib/models/text-notext
 6 | JAR_NOJAR_MODELS_DIR=./lib/models/jar-nojar
 7 | 
 8 | # gpu computations switch (1-> GPU Computations and 0-> CPU Computations)
 9 | IS_GPU=1
10 | 
11 | # set the logger level (10 -> DEBUG, 20 -> INFO)
12 | LOG_LEVEL=10
13 | 
14 | # suppress caffe logs
15 | # GLOG_minloglevel=2
16 | 


--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | 
  3 | import skimage.io
  4 | 
  5 | from helpers import logger
  6 | 
  7 | import stages.region_proposal.extract_seal as region_proposal_extract_seal
  8 | import stages.region_proposal.region_grouping as region_proposal_region_grouping
  9 | import stages.region_proposal.region_search as region_proposal_region_search
 10 | 
 11 | import stages.text_region_extraction.region_classification as text_region_extraction_region_classification
 12 | import stages.text_region_extraction.text_region_formulation as text_region_extraction_text_region_formulation
 13 | 
 14 | from stages import symbol_segmentation, symbol_classification
 15 | 
 16 | LOGGER = logger.create_logger(__name__)
 17 | 
 18 | 
 19 | def get_new_image_dimensions(image):
 20 | 
 21 |     LOGGER.info("Calculating the new image dimensions ...")
 22 | 
 23 |     img = skimage.io.imread(image.name)
 24 |     width = len(img[0])
 25 |     height = len(img)
 26 | 
 27 |     if width * height < 256 * 256 * (0.95) and abs(width - height) <= 3:
 28 |         new_size = 512
 29 |     elif width * height < 220 * 220 * (1.11):
 30 |         new_size = 256
 31 |     elif width * height < 256 * 256:
 32 |         new_size = 256
 33 |     elif width * height > 512 * 512 * (0.99) and width < 800 and height < 800:
 34 |         new_size = 512
 35 |     elif width * height < 512 * 512 * (0.95) and width * height > 256 * 256 * (1.15):
 36 |         new_size = 512
 37 | 
 38 |     new_height = int(new_size * height / width)
 39 |     new_width = new_size
 40 | 
 41 |     return new_width, new_height
 42 | 
 43 | 
 44 | def get_text_regions(seal, new_width, new_height):
 45 |     # region_proposal
 46 |     candidate_regions = \
 47 |         region_proposal_region_search.get_candidate_regions(seal, new_width, new_height)
 48 |     LOGGER.info(candidate_regions)
 49 |     grouped_regions = \
 50 |         region_proposal_region_grouping.group_candidate_regions(candidate_regions, new_width, new_height)
 51 |     LOGGER.info(grouped_regions)
 52 | 
 53 |     # text_region_extraction
 54 |     text_regions, no_text_regions, both_regions = \
 55 |         text_region_extraction_region_classification.process_regions(seal, grouped_regions, new_width, new_height)
 56 |     formulated_text_regions = \
 57 |         text_region_extraction_text_region_formulation.process_regions(text_regions, no_text_regions, both_regions, new_width, new_height)
 58 | 
 59 |     return formulated_text_regions
 60 | 
 61 | 
 62 | def get_best_text_regions(seal, new_width, new_height):
 63 |     orig_image = skimage.io.imread(seal.name)
 64 |     orig_width = len(orig_image[0])
 65 |     orig_height = len(orig_image)
 66 | 
 67 |     all_dimensions = set([256, 512, orig_width])
 68 |     tried_dimensions = set()
 69 | 
 70 |     while True:
 71 |         tried_dimensions.add(new_width)
 72 |         text_regions = get_text_regions(seal, new_width, new_height)
 73 | 
 74 |         # min area check
 75 |         is_less_min_area = False
 76 |         for x, y, w, h in text_regions:
 77 |             if w * h < new_width * new_height * 0.20 and (w < new_width * 0.20 or h < new_height * 0.20):
 78 |                 is_less_min_area = True
 79 | 
 80 |         if (len(text_regions) == 0 or is_less_min_area) and len(tried_dimensions) < 3:
 81 |             new_width = list(all_dimensions - tried_dimensions)[0]
 82 |             new_height = int(new_width * orig_height / orig_width)
 83 |             LOGGER.info("New size being tried: " + str(new_width))
 84 | 
 85 |         else:
 86 |             return text_regions, new_width, new_height
 87 | 
 88 | 
 89 | def process(image_path):
 90 |     seal = region_proposal_extract_seal.crop_white(image_path)
 91 |     new_width, new_height = get_new_image_dimensions(seal)
 92 |     best_text_regions, updated_width, updated_height = get_best_text_regions(seal, new_width, new_height)
 93 |     symbols = symbol_segmentation.get_symbols(seal, best_text_regions, updated_width, updated_height)
 94 |     symbol_sequence = symbol_classification.process_symbols(symbols)
 95 | 
 96 |     LOGGER.info("The symbol sequence: " + str([s[1] for s in symbol_sequence]))
 97 | 
 98 | 
 99 | if __name__ == "__main__":
100 |     input_artifact_image_path = sys.argv[1]
101 |     process(input_artifact_image_path)
102 | 


--------------------------------------------------------------------------------
/helpers/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpsatish95/indus-script-ocr/3bc0c2f92c7e7926dab9efb1af29d92753d20672/helpers/__init__.py


--------------------------------------------------------------------------------
/helpers/logger.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import logging.handlers
 3 | import os
 4 | import signal
 5 | import sys
 6 | 
 7 | from pythonjsonlogger import jsonlogger
 8 | 
 9 | 
10 | CONSOLE_FORMATTER = logging.Formatter('[%(asctime)s] %(levelname)s --- %(message)s ' +
11 |                                       '(%(filename)s:%(lineno)d - %(funcName)s())',
12 |                                       datefmt='%Y-%m-%d %H:%M:%S')
13 | 
14 | JSON_FORMATTER = jsonlogger.JsonFormatter('%(asctime)s %(levelname)s %(message)s ' +
15 |                                           '%(filename)s %(module)s %(funcName)s %(lineno)d ',
16 |                                           datefmt='%Y-%m-%d %H:%M:%S')
17 | 
18 | 
19 | LOG_HANDLER_CONSOLE = logging.StreamHandler(stream=sys.stdout)
20 | LOG_HANDLER_CONSOLE.setLevel(logging.INFO)
21 | LOG_HANDLER_CONSOLE.setFormatter(CONSOLE_FORMATTER)
22 | 
23 | LOG_HANDLER_FILE = logging.FileHandler("indus_script_ocr.log", mode="a")
24 | LOG_HANDLER_FILE.setLevel(logging.DEBUG)
25 | LOG_HANDLER_FILE.setFormatter(JSON_FORMATTER)
26 | 
27 | 
28 | def create_logger(caller_name):
29 | 
30 |     logger = logging.getLogger(caller_name)
31 |     logger.propagate = False
32 |     logger.setLevel(int(os.environ["LOG_LEVEL"]))
33 |     logger.addHandler(LOG_HANDLER_CONSOLE)
34 |     logger.addHandler(LOG_HANDLER_FILE)
35 | 
36 |     return logger
37 | 
38 | 
39 | def signal_handler(signal, frame):
40 |     logger = create_logger(__name__)
41 |     logger.info("App terminated!")
42 |     logging.shutdown()
43 |     sys.exit(0)
44 | 
45 | signal.signal(signal.SIGINT, signal_handler)
46 | 


--------------------------------------------------------------------------------
/helpers/temp.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | 
 5 | 
 6 | class TemporaryDirectory(object):
 7 |     def __init__(self):
 8 |         self.name = tempfile.mkdtemp()
 9 | 
10 |     def cleanup(self):
11 |         shutil.rmtree(self.name)
12 | 
13 | 
14 | class TemporaryFile(object):
15 |     def __init__(self, ext=""):
16 |         temp_file = tempfile.NamedTemporaryFile(delete=False)
17 |         temp_file.name = "".join([temp_file.name, ext])
18 | 
19 |         self.fd = temp_file
20 |         self.name = temp_file.name
21 | 
22 |     def cleanup(self):
23 |         os.unlink(self.name)
24 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpsatish95/indus-script-ocr/3bc0c2f92c7e7926dab9efb1af29d92753d20672/lib/__init__.py


--------------------------------------------------------------------------------
/lib/models/jar-nojar/deploy.prototxt:
--------------------------------------------------------------------------------
 1 | name: "indus"
 2 | 
 3 | input: "data"
 4 | input_dim: 1
 5 | input_dim: 1
 6 | input_dim: 64
 7 | input_dim: 64
 8 | 
 9 | layers {
10 |   name: "conv1"
11 |   type: CONVOLUTION
12 |   bottom: "data"
13 |   top: "conv1"
14 |   blobs_lr: 1
15 |   blobs_lr: 2
16 |   convolution_param {
17 |     num_output: 20
18 |     kernel_size: 5
19 |     stride: 1
20 |     weight_filler {
21 |       type: "xavier"
22 |     }
23 |     bias_filler {
24 |       type: "constant"
25 |     }
26 |   }
27 | }
28 | layers {
29 |   name: "conv2"
30 |   type: CONVOLUTION
31 |   bottom: "conv1"
32 |   top: "conv2"
33 |   blobs_lr: 1
34 |   blobs_lr: 2
35 |   convolution_param {
36 |     num_output: 50
37 |     kernel_size: 5
38 |     stride: 1
39 |     weight_filler {
40 |       type: "xavier"
41 |     }
42 |     bias_filler {
43 |       type: "constant"
44 |     }
45 |   }
46 | }
47 | layers{
48 |       name: "dropout"
49 |       type: DROPOUT
50 |       bottom: "conv2"
51 |       top: "dropout"
52 | }
53 | layers {
54 |   name: "ip1"
55 |   type: INNER_PRODUCT
56 |   bottom: "dropout"
57 |   top: "ip1"
58 |   blobs_lr: 1
59 |   blobs_lr: 2
60 |   inner_product_param {
61 |     num_output: 500
62 |     weight_filler {
63 |       type: "xavier"
64 |     }
65 |     bias_filler {
66 |       type: "constant"
67 |     }
68 |   }
69 | }
70 | layers {
71 |   name: "relu1"
72 |   type: RELU
73 |   bottom: "ip1"
74 |   top: "ip1"
75 | }
76 | layers {
77 |   name: "ip2"
78 |   type: INNER_PRODUCT
79 |   bottom: "ip1"
80 |   top: "ip2"
81 |   blobs_lr: 1
82 |   blobs_lr: 2
83 |   inner_product_param {
84 |     num_output: 2
85 |     weight_filler {
86 |       type: "xavier"
87 |     }
88 |     bias_filler {
89 |       type: "constant"
90 |     }
91 |   }
92 | }
93 | layers {
94 |   name: "prob"
95 |   type: SOFTMAX
96 |   bottom: "ip2"
97 |   top: "prob"
98 | }
99 | 


--------------------------------------------------------------------------------
/lib/models/jar-nojar/weights.caffemodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:1aee444aa39b782fc575cb3cb1ee700de5a23c67e6f0949a72cbf9f8df3b5c5b
3 | size 313708914
4 | 


--------------------------------------------------------------------------------
/lib/models/text-notext/deploy.prototxt:
--------------------------------------------------------------------------------
   1 | name: "GoogleNet"
   2 | input: "data"
   3 | input_shape {
   4 |   dim: 1
   5 |   dim: 3
   6 |   dim: 224
   7 |   dim: 224
   8 | }
   9 | layer {
  10 |   name: "conv1/7x7_s2"
  11 |   type: "Convolution"
  12 |   bottom: "data"
  13 |   top: "conv1/7x7_s2"
  14 |   param {
  15 |     lr_mult: 0
  16 |     decay_mult: 0
  17 |   }
  18 |   param {
  19 |     lr_mult: 0
  20 |     decay_mult: 0
  21 |   }
  22 |   convolution_param {
  23 |     num_output: 64
  24 |     pad: 3
  25 |     kernel_size: 7
  26 |     stride: 2
  27 |     weight_filler {
  28 |       type: "xavier"
  29 |       std: 0.1
  30 |     }
  31 |     bias_filler {
  32 |       type: "constant"
  33 |       value: 0.2
  34 |     }
  35 |   }
  36 | }
  37 | layer {
  38 |   name: "conv1/relu_7x7"
  39 |   type: "ReLU"
  40 |   bottom: "conv1/7x7_s2"
  41 |   top: "conv1/7x7_s2"
  42 | }
  43 | layer {
  44 |   name: "pool1/3x3_s2"
  45 |   type: "Pooling"
  46 |   bottom: "conv1/7x7_s2"
  47 |   top: "pool1/3x3_s2"
  48 |   pooling_param {
  49 |     pool: MAX
  50 |     kernel_size: 3
  51 |     stride: 2
  52 |   }
  53 | }
  54 | layer {
  55 |   name: "pool1/norm1"
  56 |   type: "LRN"
  57 |   bottom: "pool1/3x3_s2"
  58 |   top: "pool1/norm1"
  59 |   lrn_param {
  60 |     local_size: 5
  61 |     alpha: 0.0001
  62 |     beta: 0.75
  63 |   }
  64 | }
  65 | layer {
  66 |   name: "conv2/3x3_reduce"
  67 |   type: "Convolution"
  68 |   bottom: "pool1/norm1"
  69 |   top: "conv2/3x3_reduce"
  70 |   param {
  71 |     lr_mult: 0
  72 |     decay_mult: 0
  73 |   }
  74 |   param {
  75 |     lr_mult: 0
  76 |     decay_mult: 0
  77 |   }
  78 |   convolution_param {
  79 |     num_output: 64
  80 |     kernel_size: 1
  81 |     weight_filler {
  82 |       type: "xavier"
  83 |       std: 0.1
  84 |     }
  85 |     bias_filler {
  86 |       type: "constant"
  87 |       value: 0.2
  88 |     }
  89 |   }
  90 | }
  91 | layer {
  92 |   name: "conv2/relu_3x3_reduce"
  93 |   type: "ReLU"
  94 |   bottom: "conv2/3x3_reduce"
  95 |   top: "conv2/3x3_reduce"
  96 | }
  97 | layer {
  98 |   name: "conv2/3x3"
  99 |   type: "Convolution"
 100 |   bottom: "conv2/3x3_reduce"
 101 |   top: "conv2/3x3"
 102 |   param {
 103 |     lr_mult: 0
 104 |     decay_mult: 0
 105 |   }
 106 |   param {
 107 |     lr_mult: 0
 108 |     decay_mult: 0
 109 |   }
 110 |   convolution_param {
 111 |     num_output: 192
 112 |     pad: 1
 113 |     kernel_size: 3
 114 |     weight_filler {
 115 |       type: "xavier"
 116 |       std: 0.03
 117 |     }
 118 |     bias_filler {
 119 |       type: "constant"
 120 |       value: 0.2
 121 |     }
 122 |   }
 123 | }
 124 | layer {
 125 |   name: "conv2/relu_3x3"
 126 |   type: "ReLU"
 127 |   bottom: "conv2/3x3"
 128 |   top: "conv2/3x3"
 129 | }
 130 | layer {
 131 |   name: "conv2/norm2"
 132 |   type: "LRN"
 133 |   bottom: "conv2/3x3"
 134 |   top: "conv2/norm2"
 135 |   lrn_param {
 136 |     local_size: 5
 137 |     alpha: 0.0001
 138 |     beta: 0.75
 139 |   }
 140 | }
 141 | layer {
 142 |   name: "pool2/3x3_s2"
 143 |   type: "Pooling"
 144 |   bottom: "conv2/norm2"
 145 |   top: "pool2/3x3_s2"
 146 |   pooling_param {
 147 |     pool: MAX
 148 |     kernel_size: 3
 149 |     stride: 2
 150 |   }
 151 | }
 152 | layer {
 153 |   name: "inception_3a/1x1"
 154 |   type: "Convolution"
 155 |   bottom: "pool2/3x3_s2"
 156 |   top: "inception_3a/1x1"
 157 |   param {
 158 |     lr_mult: 0
 159 |     decay_mult: 0
 160 |   }
 161 |   param {
 162 |     lr_mult: 0
 163 |     decay_mult: 0
 164 |   }
 165 |   convolution_param {
 166 |     num_output: 64
 167 |     kernel_size: 1
 168 |     weight_filler {
 169 |       type: "xavier"
 170 |       std: 0.03
 171 |     }
 172 |     bias_filler {
 173 |       type: "constant"
 174 |       value: 0.2
 175 |     }
 176 |   }
 177 | }
 178 | layer {
 179 |   name: "inception_3a/relu_1x1"
 180 |   type: "ReLU"
 181 |   bottom: "inception_3a/1x1"
 182 |   top: "inception_3a/1x1"
 183 | }
 184 | layer {
 185 |   name: "inception_3a/3x3_reduce"
 186 |   type: "Convolution"
 187 |   bottom: "pool2/3x3_s2"
 188 |   top: "inception_3a/3x3_reduce"
 189 |   param {
 190 |     lr_mult: 0
 191 |     decay_mult: 0
 192 |   }
 193 |   param {
 194 |     lr_mult: 0
 195 |     decay_mult: 0
 196 |   }
 197 |   convolution_param {
 198 |     num_output: 96
 199 |     kernel_size: 1
 200 |     weight_filler {
 201 |       type: "xavier"
 202 |       std: 0.09
 203 |     }
 204 |     bias_filler {
 205 |       type: "constant"
 206 |       value: 0.2
 207 |     }
 208 |   }
 209 | }
 210 | layer {
 211 |   name: "inception_3a/relu_3x3_reduce"
 212 |   type: "ReLU"
 213 |   bottom: "inception_3a/3x3_reduce"
 214 |   top: "inception_3a/3x3_reduce"
 215 | }
 216 | layer {
 217 |   name: "inception_3a/3x3"
 218 |   type: "Convolution"
 219 |   bottom: "inception_3a/3x3_reduce"
 220 |   top: "inception_3a/3x3"
 221 |   param {
 222 |     lr_mult: 0
 223 |     decay_mult: 0
 224 |   }
 225 |   param {
 226 |     lr_mult: 0
 227 |     decay_mult: 0
 228 |   }
 229 |   convolution_param {
 230 |     num_output: 128
 231 |     pad: 1
 232 |     kernel_size: 3
 233 |     weight_filler {
 234 |       type: "xavier"
 235 |       std: 0.03
 236 |     }
 237 |     bias_filler {
 238 |       type: "constant"
 239 |       value: 0.2
 240 |     }
 241 |   }
 242 | }
 243 | layer {
 244 |   name: "inception_3a/relu_3x3"
 245 |   type: "ReLU"
 246 |   bottom: "inception_3a/3x3"
 247 |   top: "inception_3a/3x3"
 248 | }
 249 | layer {
 250 |   name: "inception_3a/5x5_reduce"
 251 |   type: "Convolution"
 252 |   bottom: "pool2/3x3_s2"
 253 |   top: "inception_3a/5x5_reduce"
 254 |   param {
 255 |     lr_mult: 0
 256 |     decay_mult: 0
 257 |   }
 258 |   param {
 259 |     lr_mult: 0
 260 |     decay_mult: 0
 261 |   }
 262 |   convolution_param {
 263 |     num_output: 16
 264 |     kernel_size: 1
 265 |     weight_filler {
 266 |       type: "xavier"
 267 |       std: 0.2
 268 |     }
 269 |     bias_filler {
 270 |       type: "constant"
 271 |       value: 0.2
 272 |     }
 273 |   }
 274 | }
 275 | layer {
 276 |   name: "inception_3a/relu_5x5_reduce"
 277 |   type: "ReLU"
 278 |   bottom: "inception_3a/5x5_reduce"
 279 |   top: "inception_3a/5x5_reduce"
 280 | }
 281 | layer {
 282 |   name: "inception_3a/5x5"
 283 |   type: "Convolution"
 284 |   bottom: "inception_3a/5x5_reduce"
 285 |   top: "inception_3a/5x5"
 286 |   param {
 287 |     lr_mult: 0
 288 |     decay_mult: 0
 289 |   }
 290 |   param {
 291 |     lr_mult: 0
 292 |     decay_mult: 0
 293 |   }
 294 |   convolution_param {
 295 |     num_output: 32
 296 |     pad: 2
 297 |     kernel_size: 5
 298 |     weight_filler {
 299 |       type: "xavier"
 300 |       std: 0.03
 301 |     }
 302 |     bias_filler {
 303 |       type: "constant"
 304 |       value: 0.2
 305 |     }
 306 |   }
 307 | }
 308 | layer {
 309 |   name: "inception_3a/relu_5x5"
 310 |   type: "ReLU"
 311 |   bottom: "inception_3a/5x5"
 312 |   top: "inception_3a/5x5"
 313 | }
 314 | layer {
 315 |   name: "inception_3a/pool"
 316 |   type: "Pooling"
 317 |   bottom: "pool2/3x3_s2"
 318 |   top: "inception_3a/pool"
 319 |   pooling_param {
 320 |     pool: MAX
 321 |     kernel_size: 3
 322 |     stride: 1
 323 |     pad: 1
 324 |   }
 325 | }
 326 | layer {
 327 |   name: "inception_3a/pool_proj"
 328 |   type: "Convolution"
 329 |   bottom: "inception_3a/pool"
 330 |   top: "inception_3a/pool_proj"
 331 |   param {
 332 |     lr_mult: 0
 333 |     decay_mult: 0
 334 |   }
 335 |   param {
 336 |     lr_mult: 0
 337 |     decay_mult: 0
 338 |   }
 339 |   convolution_param {
 340 |     num_output: 32
 341 |     kernel_size: 1
 342 |     weight_filler {
 343 |       type: "xavier"
 344 |       std: 0.1
 345 |     }
 346 |     bias_filler {
 347 |       type: "constant"
 348 |       value: 0.2
 349 |     }
 350 |   }
 351 | }
 352 | layer {
 353 |   name: "inception_3a/relu_pool_proj"
 354 |   type: "ReLU"
 355 |   bottom: "inception_3a/pool_proj"
 356 |   top: "inception_3a/pool_proj"
 357 | }
 358 | layer {
 359 |   name: "inception_3a/output"
 360 |   type: "Concat"
 361 |   bottom: "inception_3a/1x1"
 362 |   bottom: "inception_3a/3x3"
 363 |   bottom: "inception_3a/5x5"
 364 |   bottom: "inception_3a/pool_proj"
 365 |   top: "inception_3a/output"
 366 | }
 367 | layer {
 368 |   name: "inception_3b/1x1"
 369 |   type: "Convolution"
 370 |   bottom: "inception_3a/output"
 371 |   top: "inception_3b/1x1"
 372 |   param {
 373 |     lr_mult: 0
 374 |     decay_mult: 0
 375 |   }
 376 |   param {
 377 |     lr_mult: 0
 378 |     decay_mult: 0
 379 |   }
 380 |   convolution_param {
 381 |     num_output: 128
 382 |     kernel_size: 1
 383 |     weight_filler {
 384 |       type: "xavier"
 385 |       std: 0.03
 386 |     }
 387 |     bias_filler {
 388 |       type: "constant"
 389 |       value: 0.2
 390 |     }
 391 |   }
 392 | }
 393 | layer {
 394 |   name: "inception_3b/relu_1x1"
 395 |   type: "ReLU"
 396 |   bottom: "inception_3b/1x1"
 397 |   top: "inception_3b/1x1"
 398 | }
 399 | layer {
 400 |   name: "inception_3b/3x3_reduce"
 401 |   type: "Convolution"
 402 |   bottom: "inception_3a/output"
 403 |   top: "inception_3b/3x3_reduce"
 404 |   param {
 405 |     lr_mult: 0
 406 |     decay_mult: 0
 407 |   }
 408 |   param {
 409 |     lr_mult: 0
 410 |     decay_mult: 0
 411 |   }
 412 |   convolution_param {
 413 |     num_output: 128
 414 |     kernel_size: 1
 415 |     weight_filler {
 416 |       type: "xavier"
 417 |       std: 0.09
 418 |     }
 419 |     bias_filler {
 420 |       type: "constant"
 421 |       value: 0.2
 422 |     }
 423 |   }
 424 | }
 425 | layer {
 426 |   name: "inception_3b/relu_3x3_reduce"
 427 |   type: "ReLU"
 428 |   bottom: "inception_3b/3x3_reduce"
 429 |   top: "inception_3b/3x3_reduce"
 430 | }
 431 | layer {
 432 |   name: "inception_3b/3x3"
 433 |   type: "Convolution"
 434 |   bottom: "inception_3b/3x3_reduce"
 435 |   top: "inception_3b/3x3"
 436 |   param {
 437 |     lr_mult: 0
 438 |     decay_mult: 0
 439 |   }
 440 |   param {
 441 |     lr_mult: 0
 442 |     decay_mult: 0
 443 |   }
 444 |   convolution_param {
 445 |     num_output: 192
 446 |     pad: 1
 447 |     kernel_size: 3
 448 |     weight_filler {
 449 |       type: "xavier"
 450 |       std: 0.03
 451 |     }
 452 |     bias_filler {
 453 |       type: "constant"
 454 |       value: 0.2
 455 |     }
 456 |   }
 457 | }
 458 | layer {
 459 |   name: "inception_3b/relu_3x3"
 460 |   type: "ReLU"
 461 |   bottom: "inception_3b/3x3"
 462 |   top: "inception_3b/3x3"
 463 | }
 464 | layer {
 465 |   name: "inception_3b/5x5_reduce"
 466 |   type: "Convolution"
 467 |   bottom: "inception_3a/output"
 468 |   top: "inception_3b/5x5_reduce"
 469 |   param {
 470 |     lr_mult: 0
 471 |     decay_mult: 0
 472 |   }
 473 |   param {
 474 |     lr_mult: 0
 475 |     decay_mult: 0
 476 |   }
 477 |   convolution_param {
 478 |     num_output: 32
 479 |     kernel_size: 1
 480 |     weight_filler {
 481 |       type: "xavier"
 482 |       std: 0.2
 483 |     }
 484 |     bias_filler {
 485 |       type: "constant"
 486 |       value: 0.2
 487 |     }
 488 |   }
 489 | }
 490 | layer {
 491 |   name: "inception_3b/relu_5x5_reduce"
 492 |   type: "ReLU"
 493 |   bottom: "inception_3b/5x5_reduce"
 494 |   top: "inception_3b/5x5_reduce"
 495 | }
 496 | layer {
 497 |   name: "inception_3b/5x5"
 498 |   type: "Convolution"
 499 |   bottom: "inception_3b/5x5_reduce"
 500 |   top: "inception_3b/5x5"
 501 |   param {
 502 |     lr_mult: 0
 503 |     decay_mult: 0
 504 |   }
 505 |   param {
 506 |     lr_mult: 0
 507 |     decay_mult: 0
 508 |   }
 509 |   convolution_param {
 510 |     num_output: 96
 511 |     pad: 2
 512 |     kernel_size: 5
 513 |     weight_filler {
 514 |       type: "xavier"
 515 |       std: 0.03
 516 |     }
 517 |     bias_filler {
 518 |       type: "constant"
 519 |       value: 0.2
 520 |     }
 521 |   }
 522 | }
 523 | layer {
 524 |   name: "inception_3b/relu_5x5"
 525 |   type: "ReLU"
 526 |   bottom: "inception_3b/5x5"
 527 |   top: "inception_3b/5x5"
 528 | }
 529 | layer {
 530 |   name: "inception_3b/pool"
 531 |   type: "Pooling"
 532 |   bottom: "inception_3a/output"
 533 |   top: "inception_3b/pool"
 534 |   pooling_param {
 535 |     pool: MAX
 536 |     kernel_size: 3
 537 |     stride: 1
 538 |     pad: 1
 539 |   }
 540 | }
 541 | layer {
 542 |   name: "inception_3b/pool_proj"
 543 |   type: "Convolution"
 544 |   bottom: "inception_3b/pool"
 545 |   top: "inception_3b/pool_proj"
 546 |   param {
 547 |     lr_mult: 0
 548 |     decay_mult: 0
 549 |   }
 550 |   param {
 551 |     lr_mult: 0
 552 |     decay_mult: 0
 553 |   }
 554 |   convolution_param {
 555 |     num_output: 64
 556 |     kernel_size: 1
 557 |     weight_filler {
 558 |       type: "xavier"
 559 |       std: 0.1
 560 |     }
 561 |     bias_filler {
 562 |       type: "constant"
 563 |       value: 0.2
 564 |     }
 565 |   }
 566 | }
 567 | layer {
 568 |   name: "inception_3b/relu_pool_proj"
 569 |   type: "ReLU"
 570 |   bottom: "inception_3b/pool_proj"
 571 |   top: "inception_3b/pool_proj"
 572 | }
 573 | layer {
 574 |   name: "inception_3b/output"
 575 |   type: "Concat"
 576 |   bottom: "inception_3b/1x1"
 577 |   bottom: "inception_3b/3x3"
 578 |   bottom: "inception_3b/5x5"
 579 |   bottom: "inception_3b/pool_proj"
 580 |   top: "inception_3b/output"
 581 | }
 582 | layer {
 583 |   name: "pool3/3x3_s2"
 584 |   type: "Pooling"
 585 |   bottom: "inception_3b/output"
 586 |   top: "pool3/3x3_s2"
 587 |   pooling_param {
 588 |     pool: MAX
 589 |     kernel_size: 3
 590 |     stride: 2
 591 |   }
 592 | }
 593 | layer {
 594 |   name: "inception_4a/1x1"
 595 |   type: "Convolution"
 596 |   bottom: "pool3/3x3_s2"
 597 |   top: "inception_4a/1x1"
 598 |   param {
 599 |     lr_mult: 0
 600 |     decay_mult: 0
 601 |   }
 602 |   param {
 603 |     lr_mult: 0
 604 |     decay_mult: 0
 605 |   }
 606 |   convolution_param {
 607 |     num_output: 192
 608 |     kernel_size: 1
 609 |     weight_filler {
 610 |       type: "xavier"
 611 |       std: 0.03
 612 |     }
 613 |     bias_filler {
 614 |       type: "constant"
 615 |       value: 0.2
 616 |     }
 617 |   }
 618 | }
 619 | layer {
 620 |   name: "inception_4a/relu_1x1"
 621 |   type: "ReLU"
 622 |   bottom: "inception_4a/1x1"
 623 |   top: "inception_4a/1x1"
 624 | }
 625 | layer {
 626 |   name: "inception_4a/3x3_reduce"
 627 |   type: "Convolution"
 628 |   bottom: "pool3/3x3_s2"
 629 |   top: "inception_4a/3x3_reduce"
 630 |   param {
 631 |     lr_mult: 0
 632 |     decay_mult: 0
 633 |   }
 634 |   param {
 635 |     lr_mult: 0
 636 |     decay_mult: 0
 637 |   }
 638 |   convolution_param {
 639 |     num_output: 96
 640 |     kernel_size: 1
 641 |     weight_filler {
 642 |       type: "xavier"
 643 |       std: 0.09
 644 |     }
 645 |     bias_filler {
 646 |       type: "constant"
 647 |       value: 0.2
 648 |     }
 649 |   }
 650 | }
 651 | layer {
 652 |   name: "inception_4a/relu_3x3_reduce"
 653 |   type: "ReLU"
 654 |   bottom: "inception_4a/3x3_reduce"
 655 |   top: "inception_4a/3x3_reduce"
 656 | }
 657 | layer {
 658 |   name: "inception_4a/3x3"
 659 |   type: "Convolution"
 660 |   bottom: "inception_4a/3x3_reduce"
 661 |   top: "inception_4a/3x3"
 662 |   param {
 663 |     lr_mult: 0
 664 |     decay_mult: 0
 665 |   }
 666 |   param {
 667 |     lr_mult: 0
 668 |     decay_mult: 0
 669 |   }
 670 |   convolution_param {
 671 |     num_output: 208
 672 |     pad: 1
 673 |     kernel_size: 3
 674 |     weight_filler {
 675 |       type: "xavier"
 676 |       std: 0.03
 677 |     }
 678 |     bias_filler {
 679 |       type: "constant"
 680 |       value: 0.2
 681 |     }
 682 |   }
 683 | }
 684 | layer {
 685 |   name: "inception_4a/relu_3x3"
 686 |   type: "ReLU"
 687 |   bottom: "inception_4a/3x3"
 688 |   top: "inception_4a/3x3"
 689 | }
 690 | layer {
 691 |   name: "inception_4a/5x5_reduce"
 692 |   type: "Convolution"
 693 |   bottom: "pool3/3x3_s2"
 694 |   top: "inception_4a/5x5_reduce"
 695 |   param {
 696 |     lr_mult: 0
 697 |     decay_mult: 0
 698 |   }
 699 |   param {
 700 |     lr_mult: 0
 701 |     decay_mult: 0
 702 |   }
 703 |   convolution_param {
 704 |     num_output: 16
 705 |     kernel_size: 1
 706 |     weight_filler {
 707 |       type: "xavier"
 708 |       std: 0.2
 709 |     }
 710 |     bias_filler {
 711 |       type: "constant"
 712 |       value: 0.2
 713 |     }
 714 |   }
 715 | }
 716 | layer {
 717 |   name: "inception_4a/relu_5x5_reduce"
 718 |   type: "ReLU"
 719 |   bottom: "inception_4a/5x5_reduce"
 720 |   top: "inception_4a/5x5_reduce"
 721 | }
 722 | layer {
 723 |   name: "inception_4a/5x5"
 724 |   type: "Convolution"
 725 |   bottom: "inception_4a/5x5_reduce"
 726 |   top: "inception_4a/5x5"
 727 |   param {
 728 |     lr_mult: 0
 729 |     decay_mult: 0
 730 |   }
 731 |   param {
 732 |     lr_mult: 0
 733 |     decay_mult: 0
 734 |   }
 735 |   convolution_param {
 736 |     num_output: 48
 737 |     pad: 2
 738 |     kernel_size: 5
 739 |     weight_filler {
 740 |       type: "xavier"
 741 |       std: 0.03
 742 |     }
 743 |     bias_filler {
 744 |       type: "constant"
 745 |       value: 0.2
 746 |     }
 747 |   }
 748 | }
 749 | layer {
 750 |   name: "inception_4a/relu_5x5"
 751 |   type: "ReLU"
 752 |   bottom: "inception_4a/5x5"
 753 |   top: "inception_4a/5x5"
 754 | }
 755 | layer {
 756 |   name: "inception_4a/pool"
 757 |   type: "Pooling"
 758 |   bottom: "pool3/3x3_s2"
 759 |   top: "inception_4a/pool"
 760 |   pooling_param {
 761 |     pool: MAX
 762 |     kernel_size: 3
 763 |     stride: 1
 764 |     pad: 1
 765 |   }
 766 | }
 767 | layer {
 768 |   name: "inception_4a/pool_proj"
 769 |   type: "Convolution"
 770 |   bottom: "inception_4a/pool"
 771 |   top: "inception_4a/pool_proj"
 772 |   param {
 773 |     lr_mult: 0
 774 |     decay_mult: 0
 775 |   }
 776 |   param {
 777 |     lr_mult: 0
 778 |     decay_mult: 0
 779 |   }
 780 |   convolution_param {
 781 |     num_output: 64
 782 |     kernel_size: 1
 783 |     weight_filler {
 784 |       type: "xavier"
 785 |       std: 0.1
 786 |     }
 787 |     bias_filler {
 788 |       type: "constant"
 789 |       value: 0.2
 790 |     }
 791 |   }
 792 | }
 793 | layer {
 794 |   name: "inception_4a/relu_pool_proj"
 795 |   type: "ReLU"
 796 |   bottom: "inception_4a/pool_proj"
 797 |   top: "inception_4a/pool_proj"
 798 | }
 799 | layer {
 800 |   name: "inception_4a/output"
 801 |   type: "Concat"
 802 |   bottom: "inception_4a/1x1"
 803 |   bottom: "inception_4a/3x3"
 804 |   bottom: "inception_4a/5x5"
 805 |   bottom: "inception_4a/pool_proj"
 806 |   top: "inception_4a/output"
 807 | }
 808 | layer {
 809 |   name: "inception_4b/1x1"
 810 |   type: "Convolution"
 811 |   bottom: "inception_4a/output"
 812 |   top: "inception_4b/1x1"
 813 |   param {
 814 |     lr_mult: 0
 815 |     decay_mult: 0
 816 |   }
 817 |   param {
 818 |     lr_mult: 0
 819 |     decay_mult: 0
 820 |   }
 821 |   convolution_param {
 822 |     num_output: 160
 823 |     kernel_size: 1
 824 |     weight_filler {
 825 |       type: "xavier"
 826 |       std: 0.03
 827 |     }
 828 |     bias_filler {
 829 |       type: "constant"
 830 |       value: 0.2
 831 |     }
 832 |   }
 833 | }
 834 | layer {
 835 |   name: "inception_4b/relu_1x1"
 836 |   type: "ReLU"
 837 |   bottom: "inception_4b/1x1"
 838 |   top: "inception_4b/1x1"
 839 | }
 840 | layer {
 841 |   name: "inception_4b/3x3_reduce"
 842 |   type: "Convolution"
 843 |   bottom: "inception_4a/output"
 844 |   top: "inception_4b/3x3_reduce"
 845 |   param {
 846 |     lr_mult: 0
 847 |     decay_mult: 0
 848 |   }
 849 |   param {
 850 |     lr_mult: 0
 851 |     decay_mult: 0
 852 |   }
 853 |   convolution_param {
 854 |     num_output: 112
 855 |     kernel_size: 1
 856 |     weight_filler {
 857 |       type: "xavier"
 858 |       std: 0.09
 859 |     }
 860 |     bias_filler {
 861 |       type: "constant"
 862 |       value: 0.2
 863 |     }
 864 |   }
 865 | }
 866 | layer {
 867 |   name: "inception_4b/relu_3x3_reduce"
 868 |   type: "ReLU"
 869 |   bottom: "inception_4b/3x3_reduce"
 870 |   top: "inception_4b/3x3_reduce"
 871 | }
 872 | layer {
 873 |   name: "inception_4b/3x3"
 874 |   type: "Convolution"
 875 |   bottom: "inception_4b/3x3_reduce"
 876 |   top: "inception_4b/3x3"
 877 |   param {
 878 |     lr_mult: 0
 879 |     decay_mult: 0
 880 |   }
 881 |   param {
 882 |     lr_mult: 0
 883 |     decay_mult: 0
 884 |   }
 885 |   convolution_param {
 886 |     num_output: 224
 887 |     pad: 1
 888 |     kernel_size: 3
 889 |     weight_filler {
 890 |       type: "xavier"
 891 |       std: 0.03
 892 |     }
 893 |     bias_filler {
 894 |       type: "constant"
 895 |       value: 0.2
 896 |     }
 897 |   }
 898 | }
 899 | layer {
 900 |   name: "inception_4b/relu_3x3"
 901 |   type: "ReLU"
 902 |   bottom: "inception_4b/3x3"
 903 |   top: "inception_4b/3x3"
 904 | }
 905 | layer {
 906 |   name: "inception_4b/5x5_reduce"
 907 |   type: "Convolution"
 908 |   bottom: "inception_4a/output"
 909 |   top: "inception_4b/5x5_reduce"
 910 |   param {
 911 |     lr_mult: 0
 912 |     decay_mult: 0
 913 |   }
 914 |   param {
 915 |     lr_mult: 0
 916 |     decay_mult: 0
 917 |   }
 918 |   convolution_param {
 919 |     num_output: 24
 920 |     kernel_size: 1
 921 |     weight_filler {
 922 |       type: "xavier"
 923 |       std: 0.2
 924 |     }
 925 |     bias_filler {
 926 |       type: "constant"
 927 |       value: 0.2
 928 |     }
 929 |   }
 930 | }
 931 | layer {
 932 |   name: "inception_4b/relu_5x5_reduce"
 933 |   type: "ReLU"
 934 |   bottom: "inception_4b/5x5_reduce"
 935 |   top: "inception_4b/5x5_reduce"
 936 | }
 937 | layer {
 938 |   name: "inception_4b/5x5"
 939 |   type: "Convolution"
 940 |   bottom: "inception_4b/5x5_reduce"
 941 |   top: "inception_4b/5x5"
 942 |   param {
 943 |     lr_mult: 0
 944 |     decay_mult: 0
 945 |   }
 946 |   param {
 947 |     lr_mult: 0
 948 |     decay_mult: 0
 949 |   }
 950 |   convolution_param {
 951 |     num_output: 64
 952 |     pad: 2
 953 |     kernel_size: 5
 954 |     weight_filler {
 955 |       type: "xavier"
 956 |       std: 0.03
 957 |     }
 958 |     bias_filler {
 959 |       type: "constant"
 960 |       value: 0.2
 961 |     }
 962 |   }
 963 | }
 964 | layer {
 965 |   name: "inception_4b/relu_5x5"
 966 |   type: "ReLU"
 967 |   bottom: "inception_4b/5x5"
 968 |   top: "inception_4b/5x5"
 969 | }
 970 | layer {
 971 |   name: "inception_4b/pool"
 972 |   type: "Pooling"
 973 |   bottom: "inception_4a/output"
 974 |   top: "inception_4b/pool"
 975 |   pooling_param {
 976 |     pool: MAX
 977 |     kernel_size: 3
 978 |     stride: 1
 979 |     pad: 1
 980 |   }
 981 | }
 982 | layer {
 983 |   name: "inception_4b/pool_proj"
 984 |   type: "Convolution"
 985 |   bottom: "inception_4b/pool"
 986 |   top: "inception_4b/pool_proj"
 987 |   param {
 988 |     lr_mult: 0
 989 |     decay_mult: 0
 990 |   }
 991 |   param {
 992 |     lr_mult: 0
 993 |     decay_mult: 0
 994 |   }
 995 |   convolution_param {
 996 |     num_output: 64
 997 |     kernel_size: 1
 998 |     weight_filler {
 999 |       type: "xavier"
1000 |       std: 0.1
1001 |     }
1002 |     bias_filler {
1003 |       type: "constant"
1004 |       value: 0.2
1005 |     }
1006 |   }
1007 | }
1008 | layer {
1009 |   name: "inception_4b/relu_pool_proj"
1010 |   type: "ReLU"
1011 |   bottom: "inception_4b/pool_proj"
1012 |   top: "inception_4b/pool_proj"
1013 | }
1014 | layer {
1015 |   name: "inception_4b/output"
1016 |   type: "Concat"
1017 |   bottom: "inception_4b/1x1"
1018 |   bottom: "inception_4b/3x3"
1019 |   bottom: "inception_4b/5x5"
1020 |   bottom: "inception_4b/pool_proj"
1021 |   top: "inception_4b/output"
1022 | }
1023 | layer {
1024 |   name: "inception_4c/1x1"
1025 |   type: "Convolution"
1026 |   bottom: "inception_4b/output"
1027 |   top: "inception_4c/1x1"
1028 |   param {
1029 |     lr_mult: 0
1030 |     decay_mult: 0
1031 |   }
1032 |   param {
1033 |     lr_mult: 0
1034 |     decay_mult: 0
1035 |   }
1036 |   convolution_param {
1037 |     num_output: 128
1038 |     kernel_size: 1
1039 |     weight_filler {
1040 |       type: "xavier"
1041 |       std: 0.03
1042 |     }
1043 |     bias_filler {
1044 |       type: "constant"
1045 |       value: 0.2
1046 |     }
1047 |   }
1048 | }
1049 | layer {
1050 |   name: "inception_4c/relu_1x1"
1051 |   type: "ReLU"
1052 |   bottom: "inception_4c/1x1"
1053 |   top: "inception_4c/1x1"
1054 | }
1055 | layer {
1056 |   name: "inception_4c/3x3_reduce"
1057 |   type: "Convolution"
1058 |   bottom: "inception_4b/output"
1059 |   top: "inception_4c/3x3_reduce"
1060 |   param {
1061 |     lr_mult: 0
1062 |     decay_mult: 0
1063 |   }
1064 |   param {
1065 |     lr_mult: 0
1066 |     decay_mult: 0
1067 |   }
1068 |   convolution_param {
1069 |     num_output: 128
1070 |     kernel_size: 1
1071 |     weight_filler {
1072 |       type: "xavier"
1073 |       std: 0.09
1074 |     }
1075 |     bias_filler {
1076 |       type: "constant"
1077 |       value: 0.2
1078 |     }
1079 |   }
1080 | }
1081 | layer {
1082 |   name: "inception_4c/relu_3x3_reduce"
1083 |   type: "ReLU"
1084 |   bottom: "inception_4c/3x3_reduce"
1085 |   top: "inception_4c/3x3_reduce"
1086 | }
1087 | layer {
1088 |   name: "inception_4c/3x3"
1089 |   type: "Convolution"
1090 |   bottom: "inception_4c/3x3_reduce"
1091 |   top: "inception_4c/3x3"
1092 |   param {
1093 |     lr_mult: 0
1094 |     decay_mult: 0
1095 |   }
1096 |   param {
1097 |     lr_mult: 0
1098 |     decay_mult: 0
1099 |   }
1100 |   convolution_param {
1101 |     num_output: 256
1102 |     pad: 1
1103 |     kernel_size: 3
1104 |     weight_filler {
1105 |       type: "xavier"
1106 |       std: 0.03
1107 |     }
1108 |     bias_filler {
1109 |       type: "constant"
1110 |       value: 0.2
1111 |     }
1112 |   }
1113 | }
1114 | layer {
1115 |   name: "inception_4c/relu_3x3"
1116 |   type: "ReLU"
1117 |   bottom: "inception_4c/3x3"
1118 |   top: "inception_4c/3x3"
1119 | }
1120 | layer {
1121 |   name: "inception_4c/5x5_reduce"
1122 |   type: "Convolution"
1123 |   bottom: "inception_4b/output"
1124 |   top: "inception_4c/5x5_reduce"
1125 |   param {
1126 |     lr_mult: 0
1127 |     decay_mult: 0
1128 |   }
1129 |   param {
1130 |     lr_mult: 0
1131 |     decay_mult: 0
1132 |   }
1133 |   convolution_param {
1134 |     num_output: 24
1135 |     kernel_size: 1
1136 |     weight_filler {
1137 |       type: "xavier"
1138 |       std: 0.2
1139 |     }
1140 |     bias_filler {
1141 |       type: "constant"
1142 |       value: 0.2
1143 |     }
1144 |   }
1145 | }
1146 | layer {
1147 |   name: "inception_4c/relu_5x5_reduce"
1148 |   type: "ReLU"
1149 |   bottom: "inception_4c/5x5_reduce"
1150 |   top: "inception_4c/5x5_reduce"
1151 | }
1152 | layer {
1153 |   name: "inception_4c/5x5"
1154 |   type: "Convolution"
1155 |   bottom: "inception_4c/5x5_reduce"
1156 |   top: "inception_4c/5x5"
1157 |   param {
1158 |     lr_mult: 0
1159 |     decay_mult: 0
1160 |   }
1161 |   param {
1162 |     lr_mult: 0
1163 |     decay_mult: 0
1164 |   }
1165 |   convolution_param {
1166 |     num_output: 64
1167 |     pad: 2
1168 |     kernel_size: 5
1169 |     weight_filler {
1170 |       type: "xavier"
1171 |       std: 0.03
1172 |     }
1173 |     bias_filler {
1174 |       type: "constant"
1175 |       value: 0.2
1176 |     }
1177 |   }
1178 | }
1179 | layer {
1180 |   name: "inception_4c/relu_5x5"
1181 |   type: "ReLU"
1182 |   bottom: "inception_4c/5x5"
1183 |   top: "inception_4c/5x5"
1184 | }
1185 | layer {
1186 |   name: "inception_4c/pool"
1187 |   type: "Pooling"
1188 |   bottom: "inception_4b/output"
1189 |   top: "inception_4c/pool"
1190 |   pooling_param {
1191 |     pool: MAX
1192 |     kernel_size: 3
1193 |     stride: 1
1194 |     pad: 1
1195 |   }
1196 | }
1197 | layer {
1198 |   name: "inception_4c/pool_proj"
1199 |   type: "Convolution"
1200 |   bottom: "inception_4c/pool"
1201 |   top: "inception_4c/pool_proj"
1202 |   param {
1203 |     lr_mult: 0
1204 |     decay_mult: 0
1205 |   }
1206 |   param {
1207 |     lr_mult: 0
1208 |     decay_mult: 0
1209 |   }
1210 |   convolution_param {
1211 |     num_output: 64
1212 |     kernel_size: 1
1213 |     weight_filler {
1214 |       type: "xavier"
1215 |       std: 0.1
1216 |     }
1217 |     bias_filler {
1218 |       type: "constant"
1219 |       value: 0.2
1220 |     }
1221 |   }
1222 | }
1223 | layer {
1224 |   name: "inception_4c/relu_pool_proj"
1225 |   type: "ReLU"
1226 |   bottom: "inception_4c/pool_proj"
1227 |   top: "inception_4c/pool_proj"
1228 | }
1229 | layer {
1230 |   name: "inception_4c/output"
1231 |   type: "Concat"
1232 |   bottom: "inception_4c/1x1"
1233 |   bottom: "inception_4c/3x3"
1234 |   bottom: "inception_4c/5x5"
1235 |   bottom: "inception_4c/pool_proj"
1236 |   top: "inception_4c/output"
1237 | }
1238 | layer {
1239 |   name: "inception_4d/1x1"
1240 |   type: "Convolution"
1241 |   bottom: "inception_4c/output"
1242 |   top: "inception_4d/1x1"
1243 |   param {
1244 |     lr_mult: 0
1245 |     decay_mult: 0
1246 |   }
1247 |   param {
1248 |     lr_mult: 0
1249 |     decay_mult: 0
1250 |   }
1251 |   convolution_param {
1252 |     num_output: 112
1253 |     kernel_size: 1
1254 |     weight_filler {
1255 |       type: "xavier"
1256 |       std: 0.03
1257 |     }
1258 |     bias_filler {
1259 |       type: "constant"
1260 |       value: 0.2
1261 |     }
1262 |   }
1263 | }
1264 | layer {
1265 |   name: "inception_4d/relu_1x1"
1266 |   type: "ReLU"
1267 |   bottom: "inception_4d/1x1"
1268 |   top: "inception_4d/1x1"
1269 | }
1270 | layer {
1271 |   name: "inception_4d/3x3_reduce"
1272 |   type: "Convolution"
1273 |   bottom: "inception_4c/output"
1274 |   top: "inception_4d/3x3_reduce"
1275 |   param {
1276 |     lr_mult: 0
1277 |     decay_mult: 0
1278 |   }
1279 |   param {
1280 |     lr_mult: 0
1281 |     decay_mult: 0
1282 |   }
1283 |   convolution_param {
1284 |     num_output: 144
1285 |     kernel_size: 1
1286 |     weight_filler {
1287 |       type: "xavier"
1288 |       std: 0.09
1289 |     }
1290 |     bias_filler {
1291 |       type: "constant"
1292 |       value: 0.2
1293 |     }
1294 |   }
1295 | }
1296 | layer {
1297 |   name: "inception_4d/relu_3x3_reduce"
1298 |   type: "ReLU"
1299 |   bottom: "inception_4d/3x3_reduce"
1300 |   top: "inception_4d/3x3_reduce"
1301 | }
1302 | layer {
1303 |   name: "inception_4d/3x3"
1304 |   type: "Convolution"
1305 |   bottom: "inception_4d/3x3_reduce"
1306 |   top: "inception_4d/3x3"
1307 |   param {
1308 |     lr_mult: 0
1309 |     decay_mult: 0
1310 |   }
1311 |   param {
1312 |     lr_mult: 0
1313 |     decay_mult: 0
1314 |   }
1315 |   convolution_param {
1316 |     num_output: 288
1317 |     pad: 1
1318 |     kernel_size: 3
1319 |     weight_filler {
1320 |       type: "xavier"
1321 |       std: 0.03
1322 |     }
1323 |     bias_filler {
1324 |       type: "constant"
1325 |       value: 0.2
1326 |     }
1327 |   }
1328 | }
1329 | layer {
1330 |   name: "inception_4d/relu_3x3"
1331 |   type: "ReLU"
1332 |   bottom: "inception_4d/3x3"
1333 |   top: "inception_4d/3x3"
1334 | }
1335 | layer {
1336 |   name: "inception_4d/5x5_reduce"
1337 |   type: "Convolution"
1338 |   bottom: "inception_4c/output"
1339 |   top: "inception_4d/5x5_reduce"
1340 |   param {
1341 |     lr_mult: 0
1342 |     decay_mult: 0
1343 |   }
1344 |   param {
1345 |     lr_mult: 0
1346 |     decay_mult: 0
1347 |   }
1348 |   convolution_param {
1349 |     num_output: 32
1350 |     kernel_size: 1
1351 |     weight_filler {
1352 |       type: "xavier"
1353 |       std: 0.2
1354 |     }
1355 |     bias_filler {
1356 |       type: "constant"
1357 |       value: 0.2
1358 |     }
1359 |   }
1360 | }
1361 | layer {
1362 |   name: "inception_4d/relu_5x5_reduce"
1363 |   type: "ReLU"
1364 |   bottom: "inception_4d/5x5_reduce"
1365 |   top: "inception_4d/5x5_reduce"
1366 | }
1367 | layer {
1368 |   name: "inception_4d/5x5"
1369 |   type: "Convolution"
1370 |   bottom: "inception_4d/5x5_reduce"
1371 |   top: "inception_4d/5x5"
1372 |   param {
1373 |     lr_mult: 0
1374 |     decay_mult: 0
1375 |   }
1376 |   param {
1377 |     lr_mult: 0
1378 |     decay_mult: 0
1379 |   }
1380 |   convolution_param {
1381 |     num_output: 64
1382 |     pad: 2
1383 |     kernel_size: 5
1384 |     weight_filler {
1385 |       type: "xavier"
1386 |       std: 0.03
1387 |     }
1388 |     bias_filler {
1389 |       type: "constant"
1390 |       value: 0.2
1391 |     }
1392 |   }
1393 | }
1394 | layer {
1395 |   name: "inception_4d/relu_5x5"
1396 |   type: "ReLU"
1397 |   bottom: "inception_4d/5x5"
1398 |   top: "inception_4d/5x5"
1399 | }
1400 | layer {
1401 |   name: "inception_4d/pool"
1402 |   type: "Pooling"
1403 |   bottom: "inception_4c/output"
1404 |   top: "inception_4d/pool"
1405 |   pooling_param {
1406 |     pool: MAX
1407 |     kernel_size: 3
1408 |     stride: 1
1409 |     pad: 1
1410 |   }
1411 | }
1412 | layer {
1413 |   name: "inception_4d/pool_proj"
1414 |   type: "Convolution"
1415 |   bottom: "inception_4d/pool"
1416 |   top: "inception_4d/pool_proj"
1417 |   param {
1418 |     lr_mult: 0
1419 |     decay_mult: 0
1420 |   }
1421 |   param {
1422 |     lr_mult: 0
1423 |     decay_mult: 0
1424 |   }
1425 |   convolution_param {
1426 |     num_output: 64
1427 |     kernel_size: 1
1428 |     weight_filler {
1429 |       type: "xavier"
1430 |       std: 0.1
1431 |     }
1432 |     bias_filler {
1433 |       type: "constant"
1434 |       value: 0.2
1435 |     }
1436 |   }
1437 | }
1438 | layer {
1439 |   name: "inception_4d/relu_pool_proj"
1440 |   type: "ReLU"
1441 |   bottom: "inception_4d/pool_proj"
1442 |   top: "inception_4d/pool_proj"
1443 | }
1444 | layer {
1445 |   name: "inception_4d/output"
1446 |   type: "Concat"
1447 |   bottom: "inception_4d/1x1"
1448 |   bottom: "inception_4d/3x3"
1449 |   bottom: "inception_4d/5x5"
1450 |   bottom: "inception_4d/pool_proj"
1451 |   top: "inception_4d/output"
1452 | }
1453 | layer {
1454 |   name: "inception_4e/1x1"
1455 |   type: "Convolution"
1456 |   bottom: "inception_4d/output"
1457 |   top: "inception_4e/1x1"
1458 |   param {
1459 |     lr_mult: 0
1460 |     decay_mult: 0
1461 |   }
1462 |   param {
1463 |     lr_mult: 0
1464 |     decay_mult: 0
1465 |   }
1466 |   convolution_param {
1467 |     num_output: 256
1468 |     kernel_size: 1
1469 |     weight_filler {
1470 |       type: "xavier"
1471 |       std: 0.03
1472 |     }
1473 |     bias_filler {
1474 |       type: "constant"
1475 |       value: 0.2
1476 |     }
1477 |   }
1478 | }
1479 | layer {
1480 |   name: "inception_4e/relu_1x1"
1481 |   type: "ReLU"
1482 |   bottom: "inception_4e/1x1"
1483 |   top: "inception_4e/1x1"
1484 | }
1485 | layer {
1486 |   name: "inception_4e/3x3_reduce"
1487 |   type: "Convolution"
1488 |   bottom: "inception_4d/output"
1489 |   top: "inception_4e/3x3_reduce"
1490 |   param {
1491 |     lr_mult: 0
1492 |     decay_mult: 0
1493 |   }
1494 |   param {
1495 |     lr_mult: 0
1496 |     decay_mult: 0
1497 |   }
1498 |   convolution_param {
1499 |     num_output: 160
1500 |     kernel_size: 1
1501 |     weight_filler {
1502 |       type: "xavier"
1503 |       std: 0.09
1504 |     }
1505 |     bias_filler {
1506 |       type: "constant"
1507 |       value: 0.2
1508 |     }
1509 |   }
1510 | }
1511 | layer {
1512 |   name: "inception_4e/relu_3x3_reduce"
1513 |   type: "ReLU"
1514 |   bottom: "inception_4e/3x3_reduce"
1515 |   top: "inception_4e/3x3_reduce"
1516 | }
1517 | layer {
1518 |   name: "inception_4e/3x3"
1519 |   type: "Convolution"
1520 |   bottom: "inception_4e/3x3_reduce"
1521 |   top: "inception_4e/3x3"
1522 |   param {
1523 |     lr_mult: 0
1524 |     decay_mult: 0
1525 |   }
1526 |   param {
1527 |     lr_mult: 0
1528 |     decay_mult: 0
1529 |   }
1530 |   convolution_param {
1531 |     num_output: 320
1532 |     pad: 1
1533 |     kernel_size: 3
1534 |     weight_filler {
1535 |       type: "xavier"
1536 |       std: 0.03
1537 |     }
1538 |     bias_filler {
1539 |       type: "constant"
1540 |       value: 0.2
1541 |     }
1542 |   }
1543 | }
1544 | layer {
1545 |   name: "inception_4e/relu_3x3"
1546 |   type: "ReLU"
1547 |   bottom: "inception_4e/3x3"
1548 |   top: "inception_4e/3x3"
1549 | }
1550 | layer {
1551 |   name: "inception_4e/5x5_reduce"
1552 |   type: "Convolution"
1553 |   bottom: "inception_4d/output"
1554 |   top: "inception_4e/5x5_reduce"
1555 |   param {
1556 |     lr_mult: 0
1557 |     decay_mult: 0
1558 |   }
1559 |   param {
1560 |     lr_mult: 0
1561 |     decay_mult: 0
1562 |   }
1563 |   convolution_param {
1564 |     num_output: 32
1565 |     kernel_size: 1
1566 |     weight_filler {
1567 |       type: "xavier"
1568 |       std: 0.2
1569 |     }
1570 |     bias_filler {
1571 |       type: "constant"
1572 |       value: 0.2
1573 |     }
1574 |   }
1575 | }
1576 | layer {
1577 |   name: "inception_4e/relu_5x5_reduce"
1578 |   type: "ReLU"
1579 |   bottom: "inception_4e/5x5_reduce"
1580 |   top: "inception_4e/5x5_reduce"
1581 | }
1582 | layer {
1583 |   name: "inception_4e/5x5"
1584 |   type: "Convolution"
1585 |   bottom: "inception_4e/5x5_reduce"
1586 |   top: "inception_4e/5x5"
1587 |   param {
1588 |     lr_mult: 0
1589 |     decay_mult: 0
1590 |   }
1591 |   param {
1592 |     lr_mult: 0
1593 |     decay_mult: 0
1594 |   }
1595 |   convolution_param {
1596 |     num_output: 128
1597 |     pad: 2
1598 |     kernel_size: 5
1599 |     weight_filler {
1600 |       type: "xavier"
1601 |       std: 0.03
1602 |     }
1603 |     bias_filler {
1604 |       type: "constant"
1605 |       value: 0.2
1606 |     }
1607 |   }
1608 | }
1609 | layer {
1610 |   name: "inception_4e/relu_5x5"
1611 |   type: "ReLU"
1612 |   bottom: "inception_4e/5x5"
1613 |   top: "inception_4e/5x5"
1614 | }
1615 | layer {
1616 |   name: "inception_4e/pool"
1617 |   type: "Pooling"
1618 |   bottom: "inception_4d/output"
1619 |   top: "inception_4e/pool"
1620 |   pooling_param {
1621 |     pool: MAX
1622 |     kernel_size: 3
1623 |     stride: 1
1624 |     pad: 1
1625 |   }
1626 | }
1627 | layer {
1628 |   name: "inception_4e/pool_proj"
1629 |   type: "Convolution"
1630 |   bottom: "inception_4e/pool"
1631 |   top: "inception_4e/pool_proj"
1632 |   param {
1633 |     lr_mult: 0
1634 |     decay_mult: 0
1635 |   }
1636 |   param {
1637 |     lr_mult: 0
1638 |     decay_mult: 0
1639 |   }
1640 |   convolution_param {
1641 |     num_output: 128
1642 |     kernel_size: 1
1643 |     weight_filler {
1644 |       type: "xavier"
1645 |       std: 0.1
1646 |     }
1647 |     bias_filler {
1648 |       type: "constant"
1649 |       value: 0.2
1650 |     }
1651 |   }
1652 | }
1653 | layer {
1654 |   name: "inception_4e/relu_pool_proj"
1655 |   type: "ReLU"
1656 |   bottom: "inception_4e/pool_proj"
1657 |   top: "inception_4e/pool_proj"
1658 | }
1659 | layer {
1660 |   name: "inception_4e/output"
1661 |   type: "Concat"
1662 |   bottom: "inception_4e/1x1"
1663 |   bottom: "inception_4e/3x3"
1664 |   bottom: "inception_4e/5x5"
1665 |   bottom: "inception_4e/pool_proj"
1666 |   top: "inception_4e/output"
1667 | }
1668 | layer {
1669 |   name: "pool4/3x3_s2"
1670 |   type: "Pooling"
1671 |   bottom: "inception_4e/output"
1672 |   top: "pool4/3x3_s2"
1673 |   pooling_param {
1674 |     pool: MAX
1675 |     kernel_size: 3
1676 |     stride: 2
1677 |   }
1678 | }
1679 | layer {
1680 |   name: "inception_5a/1x1"
1681 |   type: "Convolution"
1682 |   bottom: "pool4/3x3_s2"
1683 |   top: "inception_5a/1x1"
1684 |   param {
1685 |     lr_mult: 0
1686 |     decay_mult: 0
1687 |   }
1688 |   param {
1689 |     lr_mult: 0
1690 |     decay_mult: 0
1691 |   }
1692 |   convolution_param {
1693 |     num_output: 256
1694 |     kernel_size: 1
1695 |     weight_filler {
1696 |       type: "xavier"
1697 |       std: 0.03
1698 |     }
1699 |     bias_filler {
1700 |       type: "constant"
1701 |       value: 0.2
1702 |     }
1703 |   }
1704 | }
1705 | layer {
1706 |   name: "inception_5a/relu_1x1"
1707 |   type: "ReLU"
1708 |   bottom: "inception_5a/1x1"
1709 |   top: "inception_5a/1x1"
1710 | }
1711 | layer {
1712 |   name: "inception_5a/3x3_reduce"
1713 |   type: "Convolution"
1714 |   bottom: "pool4/3x3_s2"
1715 |   top: "inception_5a/3x3_reduce"
1716 |   param {
1717 |     lr_mult: 0
1718 |     decay_mult: 0
1719 |   }
1720 |   param {
1721 |     lr_mult: 0
1722 |     decay_mult: 0
1723 |   }
1724 |   convolution_param {
1725 |     num_output: 160
1726 |     kernel_size: 1
1727 |     weight_filler {
1728 |       type: "xavier"
1729 |       std: 0.09
1730 |     }
1731 |     bias_filler {
1732 |       type: "constant"
1733 |       value: 0.2
1734 |     }
1735 |   }
1736 | }
1737 | layer {
1738 |   name: "inception_5a/relu_3x3_reduce"
1739 |   type: "ReLU"
1740 |   bottom: "inception_5a/3x3_reduce"
1741 |   top: "inception_5a/3x3_reduce"
1742 | }
1743 | layer {
1744 |   name: "inception_5a/3x3"
1745 |   type: "Convolution"
1746 |   bottom: "inception_5a/3x3_reduce"
1747 |   top: "inception_5a/3x3"
1748 |   param {
1749 |     lr_mult: 0
1750 |     decay_mult: 0
1751 |   }
1752 |   param {
1753 |     lr_mult: 0
1754 |     decay_mult: 0
1755 |   }
1756 |   convolution_param {
1757 |     num_output: 320
1758 |     pad: 1
1759 |     kernel_size: 3
1760 |     weight_filler {
1761 |       type: "xavier"
1762 |       std: 0.03
1763 |     }
1764 |     bias_filler {
1765 |       type: "constant"
1766 |       value: 0.2
1767 |     }
1768 |   }
1769 | }
1770 | layer {
1771 |   name: "inception_5a/relu_3x3"
1772 |   type: "ReLU"
1773 |   bottom: "inception_5a/3x3"
1774 |   top: "inception_5a/3x3"
1775 | }
1776 | layer {
1777 |   name: "inception_5a/5x5_reduce"
1778 |   type: "Convolution"
1779 |   bottom: "pool4/3x3_s2"
1780 |   top: "inception_5a/5x5_reduce"
1781 |   param {
1782 |     lr_mult: 0
1783 |     decay_mult: 0
1784 |   }
1785 |   param {
1786 |     lr_mult: 0
1787 |     decay_mult: 0
1788 |   }
1789 |   convolution_param {
1790 |     num_output: 32
1791 |     kernel_size: 1
1792 |     weight_filler {
1793 |       type: "xavier"
1794 |       std: 0.2
1795 |     }
1796 |     bias_filler {
1797 |       type: "constant"
1798 |       value: 0.2
1799 |     }
1800 |   }
1801 | }
1802 | layer {
1803 |   name: "inception_5a/relu_5x5_reduce"
1804 |   type: "ReLU"
1805 |   bottom: "inception_5a/5x5_reduce"
1806 |   top: "inception_5a/5x5_reduce"
1807 | }
1808 | layer {
1809 |   name: "inception_5a/5x5"
1810 |   type: "Convolution"
1811 |   bottom: "inception_5a/5x5_reduce"
1812 |   top: "inception_5a/5x5"
1813 |   param {
1814 |     lr_mult: 0
1815 |     decay_mult: 0
1816 |   }
1817 |   param {
1818 |     lr_mult: 0
1819 |     decay_mult: 0
1820 |   }
1821 |   convolution_param {
1822 |     num_output: 128
1823 |     pad: 2
1824 |     kernel_size: 5
1825 |     weight_filler {
1826 |       type: "xavier"
1827 |       std: 0.03
1828 |     }
1829 |     bias_filler {
1830 |       type: "constant"
1831 |       value: 0.2
1832 |     }
1833 |   }
1834 | }
1835 | layer {
1836 |   name: "inception_5a/relu_5x5"
1837 |   type: "ReLU"
1838 |   bottom: "inception_5a/5x5"
1839 |   top: "inception_5a/5x5"
1840 | }
1841 | layer {
1842 |   name: "inception_5a/pool"
1843 |   type: "Pooling"
1844 |   bottom: "pool4/3x3_s2"
1845 |   top: "inception_5a/pool"
1846 |   pooling_param {
1847 |     pool: MAX
1848 |     kernel_size: 3
1849 |     stride: 1
1850 |     pad: 1
1851 |   }
1852 | }
1853 | layer {
1854 |   name: "inception_5a/pool_proj"
1855 |   type: "Convolution"
1856 |   bottom: "inception_5a/pool"
1857 |   top: "inception_5a/pool_proj"
1858 |   param {
1859 |     lr_mult: 0
1860 |     decay_mult: 0
1861 |   }
1862 |   param {
1863 |     lr_mult: 0
1864 |     decay_mult: 0
1865 |   }
1866 |   convolution_param {
1867 |     num_output: 128
1868 |     kernel_size: 1
1869 |     weight_filler {
1870 |       type: "xavier"
1871 |       std: 0.1
1872 |     }
1873 |     bias_filler {
1874 |       type: "constant"
1875 |       value: 0.2
1876 |     }
1877 |   }
1878 | }
1879 | layer {
1880 |   name: "inception_5a/relu_pool_proj"
1881 |   type: "ReLU"
1882 |   bottom: "inception_5a/pool_proj"
1883 |   top: "inception_5a/pool_proj"
1884 | }
1885 | layer {
1886 |   name: "inception_5a/output"
1887 |   type: "Concat"
1888 |   bottom: "inception_5a/1x1"
1889 |   bottom: "inception_5a/3x3"
1890 |   bottom: "inception_5a/5x5"
1891 |   bottom: "inception_5a/pool_proj"
1892 |   top: "inception_5a/output"
1893 | }
1894 | layer {
1895 |   name: "inception_5b/1x1"
1896 |   type: "Convolution"
1897 |   bottom: "inception_5a/output"
1898 |   top: "inception_5b/1x1"
1899 |   param {
1900 |     lr_mult: 0
1901 |     decay_mult: 0
1902 |   }
1903 |   param {
1904 |     lr_mult: 0
1905 |     decay_mult: 0
1906 |   }
1907 |   convolution_param {
1908 |     num_output: 384
1909 |     kernel_size: 1
1910 |     weight_filler {
1911 |       type: "xavier"
1912 |       std: 0.03
1913 |     }
1914 |     bias_filler {
1915 |       type: "constant"
1916 |       value: 0.2
1917 |     }
1918 |   }
1919 | }
1920 | layer {
1921 |   name: "inception_5b/relu_1x1"
1922 |   type: "ReLU"
1923 |   bottom: "inception_5b/1x1"
1924 |   top: "inception_5b/1x1"
1925 | }
1926 | layer {
1927 |   name: "inception_5b/3x3_reduce"
1928 |   type: "Convolution"
1929 |   bottom: "inception_5a/output"
1930 |   top: "inception_5b/3x3_reduce"
1931 |   param {
1932 |     lr_mult: 0
1933 |     decay_mult: 0
1934 |   }
1935 |   param {
1936 |     lr_mult: 0
1937 |     decay_mult: 0
1938 |   }
1939 |   convolution_param {
1940 |     num_output: 192
1941 |     kernel_size: 1
1942 |     weight_filler {
1943 |       type: "xavier"
1944 |       std: 0.09
1945 |     }
1946 |     bias_filler {
1947 |       type: "constant"
1948 |       value: 0.2
1949 |     }
1950 |   }
1951 | }
1952 | layer {
1953 |   name: "inception_5b/relu_3x3_reduce"
1954 |   type: "ReLU"
1955 |   bottom: "inception_5b/3x3_reduce"
1956 |   top: "inception_5b/3x3_reduce"
1957 | }
1958 | layer {
1959 |   name: "inception_5b/3x3"
1960 |   type: "Convolution"
1961 |   bottom: "inception_5b/3x3_reduce"
1962 |   top: "inception_5b/3x3"
1963 |   param {
1964 |     lr_mult: 0
1965 |     decay_mult: 0
1966 |   }
1967 |   param {
1968 |     lr_mult: 0
1969 |     decay_mult: 0
1970 |   }
1971 |   convolution_param {
1972 |     num_output: 384
1973 |     pad: 1
1974 |     kernel_size: 3
1975 |     weight_filler {
1976 |       type: "xavier"
1977 |       std: 0.03
1978 |     }
1979 |     bias_filler {
1980 |       type: "constant"
1981 |       value: 0.2
1982 |     }
1983 |   }
1984 | }
1985 | layer {
1986 |   name: "inception_5b/relu_3x3"
1987 |   type: "ReLU"
1988 |   bottom: "inception_5b/3x3"
1989 |   top: "inception_5b/3x3"
1990 | }
1991 | layer {
1992 |   name: "inception_5b/5x5_reduce"
1993 |   type: "Convolution"
1994 |   bottom: "inception_5a/output"
1995 |   top: "inception_5b/5x5_reduce"
1996 |   param {
1997 |     lr_mult: 0
1998 |     decay_mult: 0
1999 |   }
2000 |   param {
2001 |     lr_mult: 0
2002 |     decay_mult: 0
2003 |   }
2004 |   convolution_param {
2005 |     num_output: 48
2006 |     kernel_size: 1
2007 |     weight_filler {
2008 |       type: "xavier"
2009 |       std: 0.2
2010 |     }
2011 |     bias_filler {
2012 |       type: "constant"
2013 |       value: 0.2
2014 |     }
2015 |   }
2016 | }
2017 | layer {
2018 |   name: "inception_5b/relu_5x5_reduce"
2019 |   type: "ReLU"
2020 |   bottom: "inception_5b/5x5_reduce"
2021 |   top: "inception_5b/5x5_reduce"
2022 | }
2023 | layer {
2024 |   name: "inception_5b/5x5"
2025 |   type: "Convolution"
2026 |   bottom: "inception_5b/5x5_reduce"
2027 |   top: "inception_5b/5x5"
2028 |   param {
2029 |     lr_mult: 0
2030 |     decay_mult: 0
2031 |   }
2032 |   param {
2033 |     lr_mult: 0
2034 |     decay_mult: 0
2035 |   }
2036 |   convolution_param {
2037 |     num_output: 128
2038 |     pad: 2
2039 |     kernel_size: 5
2040 |     weight_filler {
2041 |       type: "xavier"
2042 |       std: 0.03
2043 |     }
2044 |     bias_filler {
2045 |       type: "constant"
2046 |       value: 0.2
2047 |     }
2048 |   }
2049 | }
2050 | layer {
2051 |   name: "inception_5b/relu_5x5"
2052 |   type: "ReLU"
2053 |   bottom: "inception_5b/5x5"
2054 |   top: "inception_5b/5x5"
2055 | }
2056 | layer {
2057 |   name: "inception_5b/pool"
2058 |   type: "Pooling"
2059 |   bottom: "inception_5a/output"
2060 |   top: "inception_5b/pool"
2061 |   pooling_param {
2062 |     pool: MAX
2063 |     kernel_size: 3
2064 |     stride: 1
2065 |     pad: 1
2066 |   }
2067 | }
2068 | layer {
2069 |   name: "inception_5b/pool_proj"
2070 |   type: "Convolution"
2071 |   bottom: "inception_5b/pool"
2072 |   top: "inception_5b/pool_proj"
2073 |   param {
2074 |     lr_mult: 0
2075 |     decay_mult: 0
2076 |   }
2077 |   param {
2078 |     lr_mult: 0
2079 |     decay_mult: 0
2080 |   }
2081 |   convolution_param {
2082 |     num_output: 128
2083 |     kernel_size: 1
2084 |     weight_filler {
2085 |       type: "xavier"
2086 |       std: 0.1
2087 |     }
2088 |     bias_filler {
2089 |       type: "constant"
2090 |       value: 0.2
2091 |     }
2092 |   }
2093 | }
2094 | layer {
2095 |   name: "inception_5b/relu_pool_proj"
2096 |   type: "ReLU"
2097 |   bottom: "inception_5b/pool_proj"
2098 |   top: "inception_5b/pool_proj"
2099 | }
2100 | layer {
2101 |   name: "inception_5b/output"
2102 |   type: "Concat"
2103 |   bottom: "inception_5b/1x1"
2104 |   bottom: "inception_5b/3x3"
2105 |   bottom: "inception_5b/5x5"
2106 |   bottom: "inception_5b/pool_proj"
2107 |   top: "inception_5b/output"
2108 | }
2109 | layer {
2110 |   name: "pool5/7x7_s1"
2111 |   type: "Pooling"
2112 |   bottom: "inception_5b/output"
2113 |   top: "pool5/7x7_s1"
2114 |   pooling_param {
2115 |     pool: AVE
2116 |     kernel_size: 7
2117 |     stride: 1
2118 |   }
2119 | }
2120 | layer {
2121 |   name: "pool5/drop_7x7_s1"
2122 |   type: "Dropout"
2123 |   bottom: "pool5/7x7_s1"
2124 |   top: "pool5/7x7_s1"
2125 |   dropout_param {
2126 |     dropout_ratio: 0.4
2127 |   }
2128 | }
2129 | layer {
2130 |   name: "loss3/classifier_indus"
2131 |   type: "InnerProduct"
2132 |   bottom: "pool5/7x7_s1"
2133 |   top: "loss3/classifier_indus"
2134 |   param {
2135 |     lr_mult: 0
2136 |     decay_mult: 0
2137 |   }
2138 |   param {
2139 |     lr_mult: 0
2140 |     decay_mult: 0
2141 |   }
2142 |   inner_product_param {
2143 |     num_output: 3
2144 |     weight_filler {
2145 |       type: "xavier"
2146 |     }
2147 |     bias_filler {
2148 |       type: "constant"
2149 |       value: 0
2150 |     }
2151 |   }
2152 | }
2153 | layer {
2154 |   name: "prob"
2155 |   type: "Softmax"
2156 |   bottom: "loss3/classifier_indus"
2157 |   top: "prob"
2158 | }
2159 | 


--------------------------------------------------------------------------------
/lib/models/text-notext/weights.caffemodel:
--------------------------------------------------------------------------------
1 | version https://git-lfs.github.com/spec/v1
2 | oid sha256:25ee822afe55a12ddc9f8d3d4d39bb050cfa5b12caff3bcda22c4beb486f0488
3 | size 41272197
4 | 


--------------------------------------------------------------------------------
/lib/selectivesearch.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | '''
  3 | The MIT License (MIT)
  4 | 
  5 | Copyright (c) 2015-2016 AlpacaDB
  6 | Copyright (c) 2016 Oussama ENNAFII
  7 | '''
  8 | 
  9 | import skimage.io
 10 | import skimage.feature
 11 | import skimage.color
 12 | import skimage.transform
 13 | import skimage.util
 14 | import skimage.segmentation
 15 | import numpy
 16 | 
 17 | 
 18 | # "Selective Search for Object Recognition" by J.R.R. Uijlings et al.
 19 | #
 20 | #  - Modified version with LBP extractor for texture vectorization
 21 | 
 22 | 
 23 | def _generate_segments(im_orig, scale, sigma, min_size):
 24 |     """
 25 |         segment smallest regions by the algorithm of Felzenswalb and
 26 |         Huttenlocher
 27 |     """
 28 | 
 29 |     # open the Image
 30 |     im_mask = skimage.segmentation.felzenszwalb(
 31 |         skimage.util.img_as_float(im_orig), scale=scale, sigma=sigma,
 32 |         min_size=min_size)
 33 | 
 34 |     # merge mask channel to the image as a 4th channel
 35 |     im_orig = numpy.append(
 36 |         im_orig, numpy.zeros(im_orig.shape[:2])[:, :, numpy.newaxis], axis=2)
 37 |     im_orig[:, :, 3] = im_mask
 38 | 
 39 |     return im_orig
 40 | 
 41 | 
 42 | def _sim_colour(r1, r2):
 43 |     """
 44 |         calculate the sum of histogram intersection of colour
 45 |     """
 46 |     return sum([min(a, b) for a, b in zip(r1["hist_c"], r2["hist_c"])])
 47 | 
 48 | 
 49 | def _sim_texture(r1, r2):
 50 |     """
 51 |         calculate the sum of histogram intersection of texture
 52 |     """
 53 |     return sum([min(a, b) for a, b in zip(r1["hist_t"], r2["hist_t"])])
 54 | 
 55 | 
 56 | def _sim_size(r1, r2, imsize):
 57 |     """
 58 |         calculate the size similarity over the image
 59 |     """
 60 |     return 1.0 - (r1["size"] + r2["size"]) / imsize
 61 | 
 62 | 
 63 | def _sim_fill(r1, r2, imsize):
 64 |     """
 65 |         calculate the fill similarity over the image
 66 |     """
 67 |     bbsize = (
 68 |         (max(r1["max_x"], r2["max_x"]) - min(r1["min_x"], r2["min_x"]))
 69 |         * (max(r1["max_y"], r2["max_y"]) - min(r1["min_y"], r2["min_y"]))
 70 |     )
 71 |     return 1.0 - (bbsize - r1["size"] - r2["size"]) / imsize
 72 | 
 73 | 
 74 | def _calc_sim(r1, r2, imsize):
 75 |     return (_sim_colour(r1, r2) + _sim_texture(r1, r2)
 76 |             + _sim_size(r1, r2, imsize) + _sim_fill(r1, r2, imsize))
 77 | 
 78 | 
 79 | def _calc_colour_hist(img):
 80 |     """
 81 |         calculate colour histogram for each region
 82 | 
 83 |         the size of output histogram will be BINS * COLOUR_CHANNELS(3)
 84 | 
 85 |         number of bins is 25 as same as [uijlings_ijcv2013_draft.pdf]
 86 | 
 87 |         extract HSV
 88 |     """
 89 | 
 90 |     BINS = 25
 91 |     hist = numpy.array([])
 92 | 
 93 |     for colour_channel in (0, 1, 2):
 94 | 
 95 |         # extracting one colour channel
 96 |         c = img[:, colour_channel]
 97 | 
 98 |         # calculate histogram for each colour and join to the result
 99 |         hist = numpy.concatenate(
100 |             [hist] + [numpy.histogram(c, BINS, (0.0, 255.0))[0]])
101 | 
102 |     # L1 normalize
103 |     hist = hist / len(img)
104 | 
105 |     return hist
106 | 
107 | 
108 | def _calc_texture_gradient(img):
109 |     """
110 |         calculate texture gradient for entire image
111 | 
112 |         The original SelectiveSearch algorithm proposed Gaussian derivative
113 |         for 8 orientations, but we use LBP instead.
114 | 
115 |         output will be [height(*)][width(*)]
116 |     """
117 |     ret = numpy.zeros((img.shape[0], img.shape[1], img.shape[2]))
118 | 
119 |     for colour_channel in (0, 1, 2):
120 |         ret[:, :, colour_channel] = skimage.feature.local_binary_pattern(
121 |             img[:, :, colour_channel], 8, 1.0)
122 | 
123 |     return ret
124 | 
125 | 
126 | def _calc_texture_hist(img):
127 |     """
128 |         calculate texture histogram for each region
129 | 
130 |         calculate the histogram of gradient for each colours
131 |         the size of output histogram will be
132 |             BINS * ORIENTATIONS * COLOUR_CHANNELS(3)
133 |     """
134 |     BINS = 10
135 | 
136 |     hist = numpy.array([])
137 | 
138 |     for colour_channel in (0, 1, 2):
139 | 
140 |         # mask by the colour channel
141 |         fd = img[:, colour_channel]
142 | 
143 |         # calculate histogram for each orientation and concatenate them all
144 |         # and join to the result
145 |         hist = numpy.concatenate(
146 |             [hist] + [numpy.histogram(fd, BINS, (0.0, 1.0))[0]])
147 | 
148 |     # L1 Normalize
149 |     hist = hist / len(img)
150 | 
151 |     return hist
152 | 
153 | 
154 | def _extract_regions(img):
155 | 
156 |     R = {}
157 | 
158 |     # get hsv image
159 |     hsv = skimage.color.rgb2hsv(img[:, :, :3])
160 | 
161 |     # pass 1: count pixel positions
162 |     for y, i in enumerate(img):
163 | 
164 |         for x, (r, g, b, l) in enumerate(i):
165 | 
166 |             # initialize a new region
167 |             if l not in R:
168 |                 R[l] = {
169 |                     "min_x": 0xffff, "min_y": 0xffff,
170 |                     "max_x": 0, "max_y": 0, "labels": [l]}
171 | 
172 |             # bounding box
173 |             if R[l]["min_x"] > x:
174 |                 R[l]["min_x"] = x
175 |             if R[l]["min_y"] > y:
176 |                 R[l]["min_y"] = y
177 |             if R[l]["max_x"] < x:
178 |                 R[l]["max_x"] = x
179 |             if R[l]["max_y"] < y:
180 |                 R[l]["max_y"] = y
181 | 
182 |     # pass 2: calculate texture gradient
183 |     tex_grad = _calc_texture_gradient(img)
184 | 
185 |     # pass 3: calculate colour histogram of each region
186 |     for k, v in R.items():
187 | 
188 |         # colour histogram
189 |         masked_pixels = hsv[:, :, :][img[:, :, 3] == k]
190 |         R[k]["size"] = len(masked_pixels / 4)
191 |         R[k]["hist_c"] = _calc_colour_hist(masked_pixels)
192 | 
193 |         # texture histogram
194 |         R[k]["hist_t"] = _calc_texture_hist(tex_grad[:, :][img[:, :, 3] == k])
195 | 
196 |     return R
197 | 
198 | 
199 | def _extract_neighbours(regions):
200 | 
201 |     def intersect(a, b):
202 |         if (a["min_x"] < b["min_x"] < a["max_x"]
203 |                 and a["min_y"] < b["min_y"] < a["max_y"]) or (
204 |             a["min_x"] < b["max_x"] < a["max_x"]
205 |                 and a["min_y"] < b["max_y"] < a["max_y"]) or (
206 |             a["min_x"] < b["min_x"] < a["max_x"]
207 |                 and a["min_y"] < b["max_y"] < a["max_y"]) or (
208 |             a["min_x"] < b["min_x"] < a["max_x"]
209 |                 and a["min_y"] < b["max_y"] < a["max_y"]):
210 |             return True
211 |         return False
212 | 
213 |     R = list(regions.items())
214 | 
215 |     neighbours = []
216 |     for cur, a in enumerate(R[:-1]):
217 |         for b in R[int(cur) + 1:]:
218 |             if intersect(a[1], b[1]):
219 |                 neighbours.append((a, b))
220 | 
221 |     return neighbours
222 | 
223 | 
224 | def _merge_regions(r1, r2):
225 |     new_size = r1["size"] + r2["size"]
226 |     rt = {
227 |         "min_x": min(r1["min_x"], r2["min_x"]),
228 |         "min_y": min(r1["min_y"], r2["min_y"]),
229 |         "max_x": max(r1["max_x"], r2["max_x"]),
230 |         "max_y": max(r1["max_y"], r2["max_y"]),
231 |         "size": new_size,
232 |         "hist_c": (
233 |             r1["hist_c"] * r1["size"] + r2["hist_c"] * r2["size"]) / new_size,
234 |         "hist_t": (
235 |             r1["hist_t"] * r1["size"] + r2["hist_t"] * r2["size"]) / new_size,
236 |         "labels": r1["labels"] + r2["labels"]
237 |     }
238 |     return rt
239 | 
240 | 
241 | def selective_search(
242 |         im_orig, scale=1.0, sigma=0.8, min_size=50):
243 |     '''Selective Search
244 | 
245 |     Parameters
246 |     ----------
247 |         im_orig : ndarray
248 |             Input image
249 |         scale : int
250 |             Free parameter. Higher means larger clusters in felzenszwalb segmentation. Inverse relation with num pixels.
251 |         sigma : float
252 |             Width of Gaussian kernel for felzenszwalb segmentation.
253 |         min_size : int
254 |             Minimum component size for felzenszwalb segmentation.
255 |     Returns
256 |     -------
257 |         img : ndarray
258 |             image with region label
259 |             region label is stored in the 4th value of each pixel [r,g,b,(region)]
260 |         regions : array of dict
261 |             [
262 |                 {
263 |                     'rect': (left, top, right, bottom),
264 |                     'labels': [...]
265 |                 },
266 |                 ...
267 |             ]
268 |     '''
269 |     assert im_orig.shape[2] == 3, "3ch image is expected"
270 | 
271 |     # load image and get smallest regions
272 |     # region label is stored in the 4th value of each pixel [r,g,b,(region)]
273 |     img = _generate_segments(im_orig, scale, sigma, min_size)
274 | 
275 |     if img is None:
276 |         return None, {}
277 | 
278 |     imsize = img.shape[0] * img.shape[1]
279 |     R = _extract_regions(img)
280 | 
281 |     # extract neighbouring information
282 |     neighbours = _extract_neighbours(R)
283 | 
284 |     # calculate initial similarities
285 |     S = {}
286 |     for (ai, ar), (bi, br) in neighbours:
287 |         S[(ai, bi)] = _calc_sim(ar, br, imsize)
288 | 
289 |     # hierarchal search
290 |     while S != {}:
291 | 
292 |         # get highest similarity
293 |         i, j = sorted(list(S.items()), key=lambda tup: tup[1])[-1][0]
294 | 
295 |         # merge corresponding regions
296 |         t = max(R.keys()) + 1.0
297 |         R[t] = _merge_regions(R[i], R[j])
298 | 
299 |         # mark similarities for regions to be removed
300 |         key_to_delete = []
301 |         for k, v in S.items():
302 |             if (i in k) or (j in k):
303 |                 key_to_delete.append(k)
304 | 
305 |         # remove old similarities of related regions
306 |         for k in key_to_delete:
307 |             del S[k]
308 | 
309 |         # calculate similarity set with the new region
310 |         for k in filter(lambda a: a != (i, j), key_to_delete):
311 |             n = k[1] if k[0] in (i, j) else k[0]
312 |             S[(t, n)] = _calc_sim(R[t], R[n], imsize)
313 | 
314 |     regions = []
315 |     for k, r in R.items():
316 |         regions.append({
317 |             'rect': (
318 |                 r['min_x'], r['min_y'],
319 |                 r['max_x'] - r['min_x'], r['max_y'] - r['min_y']),
320 |             'size': r['size'],
321 |             'labels': r['labels']
322 |         })
323 | 
324 |     return img, regions
325 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | python-json-logger==0.1.5
2 | 


--------------------------------------------------------------------------------
/set_env.sh:
--------------------------------------------------------------------------------
 1 | # environment variables
 2 | 
 3 | # paths
 4 | export CAFFE_PATH=/root/caffe/build/tools/caffe
 5 | export TEXT_NOTEXT_MODELS_DIR=./lib/models/text-notext
 6 | export JAR_NOJAR_MODELS_DIR=./lib/models/jar-nojar
 7 | 
 8 | # gpu computations switch (1-> GPU Computations and 0-> CPU Computations)
 9 | export IS_GPU=1
10 | 
11 | # set the logger level (10 -> DEBUG, 20 -> INFO)
12 | export LOG_LEVEL=10
13 | 
14 | # suppress caffe logs
15 | # export GLOG_minloglevel=2
16 | 


--------------------------------------------------------------------------------
/stages/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpsatish95/indus-script-ocr/3bc0c2f92c7e7926dab9efb1af29d92753d20672/stages/__init__.py


--------------------------------------------------------------------------------
/stages/region_proposal/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpsatish95/indus-script-ocr/3bc0c2f92c7e7926dab9efb1af29d92753d20672/stages/region_proposal/__init__.py


--------------------------------------------------------------------------------
/stages/region_proposal/extract_seal.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import cv2
 4 | import matplotlib.pyplot as plt
 5 | import numpy as np
 6 | import scipy
 7 | from PIL import Image, ImageChops
 8 | from scipy import ndimage
 9 | 
10 | from helpers.temp import TemporaryFile
11 | 
12 | 
13 | def trim(im):
14 | 
15 |     bg = Image.new(im.mode, im.size, im.getpixel((0, 0)))
16 |     diff = ImageChops.difference(im, bg)
17 |     diff = ImageChops.add(diff, diff, 2.0, -100)
18 |     bbox = diff.getbbox()
19 |     if bbox:
20 |         return im.crop(bbox)
21 | 
22 | 
23 | def auto_canny(image, sigma=0.33):
24 | 
25 |     # compute the median of the single channel pixel intensities
26 |     v = np.median(image)
27 | 
28 |     # apply automatic Canny edge detection using the computed median
29 |     lower = int(max(0, (1.0 - sigma) * v))
30 |     upper = int(min(255, (1.0 + sigma) * v))
31 |     edged = cv2.Canny(image, lower, upper)
32 | 
33 |     return edged
34 | 
35 | 
36 | def crop_white(image_path):
37 | 
38 |     threshold = 250
39 | 
40 |     while True:
41 |         image_sci = scipy.misc.imread(image_path)
42 | 
43 |         image_g = ndimage.gaussian_filter(image_sci, 3.0)
44 |         labeled, _ = ndimage.label(image_g > threshold)
45 | 
46 |         temp_conv = TemporaryFile(".png")
47 | 
48 |         plt.imsave(temp_conv.name, labeled)
49 |         image_cv = cv2.imread(temp_conv.name)
50 |         temp_conv.cleanup()
51 | 
52 |         gray = cv2.cvtColor(image_cv, cv2.COLOR_BGR2GRAY)
53 |         blurred = cv2.GaussianBlur(gray, (7, 7), 0)
54 |         auto = auto_canny(blurred)
55 | 
56 |         _, cnts, _ = cv2.findContours(auto.copy(), cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
57 |         screenCnt = sorted(cnts, key=cv2.contourArea, reverse=True)[0]
58 | 
59 |         x, y, w, h = cv2.boundingRect(screenCnt)
60 |         if w * h > (image_sci.shape[0] * image_sci.shape[1]) * 0.60:
61 |             temp_crop = TemporaryFile(".tif")
62 |             plt.imsave(temp_crop.name, image_sci[y:y + h, x:x + w])
63 | 
64 |             image_pil = Image.open(temp_crop.name)
65 |             temp_crop.cleanup()
66 |             output = trim(image_pil)
67 |             if output is not None:
68 |                 temp_output = TemporaryFile(".tif")
69 |                 output.save(temp_output.name)
70 |                 return temp_output
71 |         elif threshold == 200:
72 |             image_pil = Image.open(image_path)
73 |             output = trim(image_pil)
74 |             if output is not None:
75 |                 temp_output = TemporaryFile(".tif")
76 |                 output.save(temp_output.name)
77 |                 return temp_output
78 |         else:
79 |             threshold = 200
80 | 


--------------------------------------------------------------------------------
/stages/region_proposal/region_grouping.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | from helpers import logger
  4 | 
  5 | LOGGER = logger.create_logger(__name__)
  6 | 
  7 | 
  8 | def mean_rect(r):
  9 |     return (min([i[0] for i in r]), min([i[1] for i in r]), max([i[0] + i[2] for i in r]) - min([i[0] for i in r]), max([i[1] + i[3] for i in r]) - min([i[1] for i in r]))
 10 | 
 11 | 
 12 | def extend_rect(r):
 13 |     return (min([i[0] for i in r]), min([i[1] for i in r]), max([i[0] + i[2] for i in r]) - min([i[0] for i in r]), max([i[3] for i in r]))
 14 | 
 15 | 
 16 | def merge(candidates, width, height):
 17 |     merged_candidates = set()
 18 |     processed = set()
 19 | 
 20 |     threshold = int(((width + height) / 2) * (0.14))
 21 |     for x, y, w, h in candidates:
 22 |         if (x, y, w, h) not in processed:
 23 |             group = set()
 24 |             group.add((x, y, w, h))
 25 |             for x1, y1, w1, h1 in candidates:
 26 |                 if abs(x1 - x) <= threshold and abs(y1 - y) <= threshold and abs(w1 - w) <= threshold and abs(h1 - h) <= threshold:
 27 |                     group.add((x1, y1, w1, h1))
 28 |                     processed.add((x1, y1, w1, h1))
 29 |             merged_candidates.add(mean_rect(group))
 30 | 
 31 |     return merged_candidates
 32 | 
 33 | 
 34 | def contains_remove(merged_candidates):
 35 |     refined_merged_candidates = set()
 36 |     for x, y, w, h in merged_candidates:
 37 |         is_contained = False
 38 |         merged_candidates_copy = set(merged_candidates)
 39 |         merged_candidates_copy.remove((x, y, w, h))
 40 |         for x1, y1, w1, h1 in merged_candidates_copy:
 41 |             if x1 >= x and y1 >= y and x1 + w1 <= x + w and y1 + h1 <= y + h:
 42 |                 is_contained = True
 43 |                 break
 44 | 
 45 |         if not is_contained:
 46 |             refined_merged_candidates.add((x, y, w, h))
 47 | 
 48 |     return refined_merged_candidates
 49 | 
 50 | 
 51 | def draw_superbox(refined_merged_candidates, old_superboxes=[]):
 52 |     no_overlap = []
 53 |     draw_superbox_candidates = []
 54 | 
 55 |     superboxes = set()
 56 | 
 57 |     if not old_superboxes:
 58 |         draw_superbox_candidates = old_superboxes
 59 |     else:
 60 |         draw_superbox_candidates = refined_merged_candidates
 61 | 
 62 |     base_list = list(draw_superbox_candidates)
 63 |     base_set = set(draw_superbox_candidates)
 64 | 
 65 |     # (x1,y1) top-left coord, (x2,y2) bottom-right coord, (w,h) size
 66 |     while base_list:
 67 |         x1, y1, w1, h1 = base_list[0]
 68 | 
 69 |         if len(base_list) == 1:  # super box
 70 |             superboxes.add((x1, y1, w1, h1))
 71 | 
 72 |         base_list.remove((x1, y1, w1, h1))
 73 | 
 74 |         overlap = set()
 75 |         base_set.remove((x1, y1, w1, h1))
 76 |         for x2, y2, w2, h2 in base_set:
 77 |             a = {'x1': x1, 'y1': y1, 'x2': x1 + w1, 'y2': y1 + h1, 'w': w1, 'h': h1}
 78 |             b = {'x1': x2, 'y1': y2, 'x2': x2 + w2, 'y2': y2 + h2, 'w': w2, 'h': h2}
 79 | 
 80 |             # overlap between A and B
 81 |             area_a = a['w'] * a['h']
 82 |             area_b = b['w'] * b['h']
 83 |             area_intersection = np.max([0, np.min([a['x2'], b['x2']]) - np.max([a['x1'], b['x1']])]) * \
 84 |                                 np.max([0, np.min([a['y2'], b['y2']]) - np.max([a['y1'], b['y1']])])
 85 | 
 86 |             # area_union = area_a + area_b - area_intersection
 87 |             # overlap_ab = float(area_intersection) / float(area_union)
 88 | 
 89 |             overlap_a = float(area_intersection) / float(area_a)
 90 |             overlap_b = float(area_intersection) / float(area_b)
 91 | 
 92 |             if overlap_a >= 0.40 or overlap_b >= 0.40:
 93 |                 overlap.add((b['x1'], b['y1'], b['w'], b['h']))
 94 | 
 95 |         if overlap:  # overlap
 96 |             base_set = base_set - overlap
 97 |             base_list = [bl for bl in base_list if bl not in overlap]
 98 |             overlap.add((a['x1'], a['y1'], a['w'], a['h']))
 99 | 
100 |             superboxes.add((min([i[0] for i in overlap]), min([i[1] for i in overlap]), max([i[0] + i[2] for i in overlap]) -
101 |                             min([i[0] for i in overlap]), max([i[1] + i[3] for i in overlap]) - min([i[1] for i in overlap])))
102 | 
103 |             no_overlap.append(False)
104 |         else:  # no overlap
105 |             superboxes.add((x1, y1, w1, h1))
106 |             no_overlap.append(True)
107 | 
108 |     if all(no_overlap):
109 |         return superboxes
110 |     else:
111 |         draw_superbox(refined_merged_candidates, superboxes)
112 |         return superboxes
113 | 
114 | 
115 | def extend_superbox(superboxes, width, height):
116 |     extended_superboxes = set()
117 |     processed = set()
118 | 
119 |     threshold = ((width + height) / 2) * (0.06)
120 |     for x, y, w, h in superboxes:
121 |         if (x, y, w, h) not in processed:
122 |             group = set()
123 | 
124 |             group.add((x, y, w, h))
125 |             for x1, y1, w1, h1 in superboxes:
126 |                 if abs(y1 - y) <= threshold and abs(h1 - h) <= threshold:
127 |                     group.add((x1, y1, w1, h1))
128 |                     processed.add((x1, y1, w1, h1))
129 | 
130 |             extended_superboxes.add(extend_rect(group))
131 | 
132 |     return extended_superboxes
133 | 
134 | 
135 | def group_candidate_regions(candidates, width, height):
136 |     merged_candidates = merge(candidates, width, height)
137 |     LOGGER.info(merged_candidates)
138 |     refined_merged_candidates = contains_remove(merged_candidates)
139 |     LOGGER.info(refined_merged_candidates)
140 |     superboxes = draw_superbox(refined_merged_candidates)
141 |     LOGGER.info(superboxes)
142 |     extended_superboxes = extend_superbox(superboxes, width, height)
143 |     LOGGER.info(extended_superboxes)
144 | 
145 |     return extended_superboxes
146 | 


--------------------------------------------------------------------------------
/stages/region_proposal/region_search.py:
--------------------------------------------------------------------------------
 1 | import skimage.io
 2 | import skimage.transform
 3 | 
 4 | from helpers import logger
 5 | from lib import selectivesearch
 6 | 
 7 | LOGGER = logger.create_logger(__name__)
 8 | 
 9 | 
10 | def get_candidate_regions(image, width, height):
11 |     LOGGER.info("Extracting the candidate regions ...")
12 |     candidates = set()
13 | 
14 |     stage = 1
15 |     for sc in [350, 450, 500]:
16 |         for sig in [0.8]:
17 |             for mins in [30, 60, 120]:
18 |                 img = skimage.io.imread(image.name)[:, :, :3]
19 |                 if not (height == len(img) and width == len(img[0])):
20 |                     img = skimage.transform.resize(img, (height, width))
21 | 
22 |                 _, regions = selectivesearch.selective_search(
23 |                     img, scale=sc, sigma=sig, min_size=mins)
24 | 
25 |                 for r in regions:
26 |                     # excluding same rectangle (with different segments)
27 |                     if r['rect'] in candidates:
28 |                         continue
29 | 
30 |                     # excluding regions smaller than 2000 pixels
31 |                     if r['size'] < 2000:  # TODO: Should not be hard coded, determine from image size
32 |                         continue
33 | 
34 |                     # distorted rects
35 |                     _, _, w, h = r['rect']
36 |                     if w / h > 1.2 or h / w > 1.2:
37 |                         continue
38 | 
39 |                     # rects covering entire seal image
40 |                     if w >= (img.shape[0] - 1) * (0.7) and h >= (img.shape[1] - 1) * (0.7):
41 |                         continue
42 | 
43 |                     candidates.add(r['rect'])
44 | 
45 |         LOGGER.info("Stage " + str(stage) + " Complete.")
46 |         stage += 1
47 | 
48 |     return candidates
49 | 


--------------------------------------------------------------------------------
/stages/symbol_classification.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import skimage.io
 5 | import skimage.transform
 6 | 
 7 | import caffe
 8 | from helpers import logger
 9 | 
10 | LOGGER = logger.create_logger(__name__)
11 | 
12 | 
13 | def get_symbol_images(symbols_dir):
14 |     symbols = list()
15 |     symbols_list = sorted(os.listdir(symbols_dir.name), key=lambda i: int(os.path.splitext(i)[0]))
16 |     for filename in symbols_list:
17 |         image_path = os.path.join(symbols_dir.name, filename)
18 |         symbols.append([image_path, caffe.io.load_image(image_path, color=False)])
19 | 
20 |     return symbols
21 | 
22 | 
23 | def get_symbol_classifications(symbols):
24 |     if os.environ["IS_GPU"]:
25 |         caffe.set_device(0)
26 |         caffe.set_mode_gpu()
27 |     else:
28 |         caffe.set_mode_cpu()
29 | 
30 |     classifier = caffe.Classifier(os.path.join(os.environ["JAR_NOJAR_MODELS_DIR"], "deploy.prototxt"),
31 |                                   os.path.join(os.environ["JAR_NOJAR_MODELS_DIR"], "weights.caffemodel"),
32 |                                   image_dims=[64, 64],
33 |                                   raw_scale=255.0)
34 | 
35 |     LOGGER.info("Classifying " + str(len(symbols)) + " inputs.")
36 | 
37 |     predictions = classifier.predict([s[1] for s in symbols])
38 | 
39 |     symbol_sequence = list()
40 |     classes = np.array([0, 1])
41 | 
42 |     for i, prediction in enumerate(predictions):
43 |         idx = list((-prediction).argsort())
44 |         prediction = classes[np.array(idx)]
45 | 
46 |         if prediction[0] == 1:
47 |             symbol_sequence.append([symbols[i], "jar"])
48 |         elif prediction[0] == 0:
49 |             symbol_sequence.append([symbols[i], "no-jar"])
50 | 
51 |     return symbol_sequence
52 | 
53 | 
54 | def process_symbols(symbols_dir):
55 |     symbols = get_symbol_images(symbols_dir)
56 |     symbol_sequence = get_symbol_classifications(symbols)
57 | 
58 |     return symbol_sequence
59 | 


--------------------------------------------------------------------------------
/stages/symbol_segmentation.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import numpy as np
  4 | import skimage.color
  5 | import skimage.io
  6 | import skimage.morphology
  7 | import skimage.transform
  8 | from scipy import ndimage
  9 | from skimage.filters import gaussian_filter, threshold_otsu
 10 | 
 11 | from helpers.temp import TemporaryFile, TemporaryDirectory
 12 | 
 13 | 
 14 | def extend_rect(r):
 15 |     return (min([i[0] for i in r]), min([i[1] for i in r]), max([i[0] + i[2] for i in r]) - min([i[0] for i in r]), max([i[1] + i[3] for i in r]) - min([i[1] for i in r]))
 16 | 
 17 | 
 18 | def remove_contained_regions(candidates):
 19 |     refined_regions = set()
 20 |     for x, y, w, h in candidates:
 21 |         candidates_complement = set(candidates)
 22 |         candidates_complement.remove((x, y, w, h))
 23 |         is_not_contained = []
 24 |         for x1, y1, w1, h1 in candidates_complement:
 25 |             a = {'x1': x, 'y1': y, 'x2': x + w, 'y2': y + h, 'w': w, 'h': h}
 26 |             b = {'x1': x1, 'y1': y1, 'x2': x1 + w1, 'y2': y1 + h1, 'w': w1, 'h': h1}
 27 | 
 28 |             # overlap between a and b
 29 |             area_a = a['w'] * a['h']
 30 |             area_b = b['w'] * b['h']
 31 |             area_intersection = np.max([0, np.min([a['x2'], b['x2']]) - np.max([a['x1'], b['x1']])]) * \
 32 |                 np.max([0, np.min([a['y2'], b['y2']]) - np.max([a['y1'], b['y1']])])
 33 | 
 34 |             area_union = area_a + area_b - area_intersection
 35 |             overlap_ab = float(area_intersection) / float(area_union)
 36 | 
 37 |             if overlap_ab > 0.0:
 38 |                 if x1 <= x and y1 <= y and x1 + w1 >= x + w and y1 + h1 >= y + h:
 39 |                     is_not_contained.append(False)
 40 |                 else:
 41 |                     is_not_contained.append(True)
 42 |             else:
 43 |                 is_not_contained.append(True)
 44 | 
 45 |         if all(is_not_contained):
 46 |             refined_regions.add((x, y, w, h))
 47 | 
 48 |     return refined_regions
 49 | 
 50 | 
 51 | def draw_superbox(refined_regions, old_superboxes=[]):
 52 |     no_overlap = []
 53 |     draw_superbox_candidates = []
 54 | 
 55 |     superboxes = set()
 56 | 
 57 |     if not old_superboxes:
 58 |         draw_superbox_candidates = old_superboxes
 59 |     else:
 60 |         draw_superbox_candidates = refined_regions
 61 | 
 62 |     base_list = list(draw_superbox_candidates)
 63 |     base_set = set(draw_superbox_candidates)
 64 | 
 65 |     # (x1,y1) top-left coord, (x2,y2) bottom-right coord, (w,h) size
 66 |     while base_list:
 67 |         x1, y1, w1, h1 = base_list[0]
 68 | 
 69 |         if len(base_list) == 1:  # super box
 70 |             superboxes.add((x1, y1, w1, h1))
 71 | 
 72 |         base_list.remove((x1, y1, w1, h1))
 73 | 
 74 |         overlap = set()
 75 |         base_set.remove((x1, y1, w1, h1))
 76 |         for x2, y2, w2, h2 in base_set:
 77 |             a = {'x1': x1, 'y1': y1, 'x2': x1 + w1, 'y2': y1 + h1, 'w': w1, 'h': h1}
 78 |             b = {'x1': x2, 'y1': y2, 'x2': x2 + w2, 'y2': y2 + h2, 'w': w2, 'h': h2}
 79 | 
 80 |             # overlap between A and B
 81 |             area_a = a['w'] * a['h']
 82 |             area_b = b['w'] * b['h']
 83 |             area_intersection = np.max([0, np.min([a['x2'], b['x2']]) - np.max([a['x1'], b['x1']])]) * \
 84 |                 np.max([0, np.min([a['y2'], b['y2']]) - np.max([a['y1'], b['y1']])])
 85 | 
 86 |             # area_union = area_a + area_b - area_intersection
 87 |             # overlap_ab = float(area_intersection) / float(area_union)
 88 | 
 89 |             overlap_a = float(area_intersection) / float(area_a)
 90 |             overlap_b = float(area_intersection) / float(area_b)
 91 | 
 92 |             if overlap_a >= 0.15 or overlap_b >= 0.15:
 93 |                 overlap.add((b['x1'], b['y1'], b['w'], b['h']))
 94 | 
 95 |         if overlap:  # overlap
 96 |             base_set = base_set - overlap
 97 |             base_list = [bl for bl in base_list if bl not in overlap]
 98 |             overlap.add((a['x1'], a['y1'], a['w'], a['h']))
 99 | 
100 |             superboxes.add((min([i[0] for i in overlap]), min([i[1] for i in overlap]), max([i[0] + i[2] for i in overlap]) -
101 |                             min([i[0] for i in overlap]), max([i[1] + i[3] for i in overlap]) - min([i[1] for i in overlap])))
102 | 
103 |             no_overlap.append(False)
104 |         else:  # no overlap
105 |             superboxes.add((x1, y1, w1, h1))
106 |             no_overlap.append(True)
107 | 
108 |     if all(no_overlap):
109 |         return superboxes
110 |     else:
111 |         draw_superbox(refined_regions, superboxes)
112 |         return superboxes
113 | 
114 | 
115 | def extend_superbox(superboxes):
116 |     extended_superboxes = set()
117 |     processed = set()
118 | 
119 |     for x, y, w, h in superboxes:
120 |         if (x, y, w, h) not in processed:
121 |             group = set()
122 | 
123 |             group.add((x, y, w, h))
124 |             for x1, y1, w1, h1 in superboxes:
125 |                 if x1 >= x and (w1 + x1) <= w + x:
126 |                     group.add((x1, y1, w1, h1))
127 |                     processed.add((x1, y1, w1, h1))
128 | 
129 |             extended_superboxes.add(extend_rect(group))
130 | 
131 |     return remove_contained_regions(extended_superboxes)
132 | 
133 | 
134 | def get_candidate_symbol_regions(image, text_regions, updated_width, updated_height):
135 |     img = skimage.io.imread(image.name)[:, :, :3]
136 |     if not (updated_height == len(img) and updated_width == len(img[0])):
137 |         img = skimage.transform.resize(img, (updated_height, updated_width))
138 | 
139 |     symbol_regions = dict()
140 |     for x, y, w, h in text_regions:
141 |         text_region_image = img[y: y + h, x: x + w]
142 |         text_region_image_width = len(text_region_image[0])
143 |         text_region_image_height = len(text_region_image)
144 | 
145 |         text_region_gray_image = skimage.color.rgb2gray(text_region_image)
146 |         text_region_binary_image = image <= threshold_otsu(text_region_gray_image)
147 | 
148 |         temp = TemporaryFile(".png")
149 |         skimage.io.imsave(temp.name, text_region_binary_image)
150 |         text_region_binary_image = skimage.io.imread(temp.name)
151 | 
152 |         text_region_blurred_image = gaussian_filter(text_region_binary_image, sigma=3.5)
153 |         text_region_blobs = text_region_blurred_image > text_region_blurred_image.mean()
154 | 
155 |         text_region_labels = skimage.morphology.label(text_region_blobs, neighbors=4)
156 | 
157 |         symbol_blobs = ndimage.find_objects(text_region_labels)
158 |         candidate_symbol_regions = set()
159 | 
160 |         for c1, c2 in symbol_blobs:
161 |             if (c2.stop - c2.start) * c1.stop - c1.start > (text_region_image.shape[0] * text_region_image.shape[1]) * (0.026):
162 |                 if (c2.stop - c2.start) * c1.stop - c1.start < (text_region_image.shape[0] * text_region_image.shape[1]) * (0.90):
163 |                     candidate_symbol_regions.add(
164 |                         (c2.start, c1.start, c2.stop - c2.start, c1.stop - c1.start))
165 | 
166 |         symbol_regions[str((x, y, w, h))] = dict()
167 |         symbol_regions[str((x, y, w, h))]["image"] = text_region_image
168 |         symbol_regions[str((x, y, w, h))]["regions"] = candidate_symbol_regions
169 |         symbol_regions[str((x, y, w, h))]["width"] = text_region_image_width
170 |         symbol_regions[str((x, y, w, h))]["height"] = text_region_image_height
171 | 
172 |     return symbol_regions
173 | 
174 | 
175 | def process_candidate_symbol_regions(symbol_regions):
176 |     for text_region in symbol_regions:
177 |         candidate_symbol_regions = symbol_regions[text_region]["regions"]
178 |         refined_regions = remove_contained_regions(candidate_symbol_regions)
179 |         superboxes = draw_superbox(refined_regions)
180 |         refined_extended_superboxes = extend_superbox(superboxes)
181 | 
182 |         symbol_regions[text_region]["refined_regions"] = refined_extended_superboxes
183 | 
184 |     return symbol_regions
185 | 
186 | 
187 | def get_symbols(image, text_regions, updated_width, updated_height):
188 |     symbol_regions = get_candidate_symbol_regions(image, text_regions, updated_width, updated_height)
189 |     symbol_regions = process_candidate_symbol_regions(symbol_regions)
190 | 
191 |     symbols = list()
192 |     for text_region in symbol_regions:
193 |         for x, y, w, h in symbol_regions[text_region]["refined_regions"]:
194 |             symbols.append([(x, y, w, h), symbol_regions[text_region]["image"][y: y + h, x: x + w]])
195 | 
196 |     # sort the symbols according to horizontal order
197 |     symbols = sorted(symbols, key=lambda x: x[0][0])
198 | 
199 |     # save all the symbols in a TemporaryDirectory
200 |     symbols_dir = TemporaryDirectory()
201 | 
202 |     for i, symbol in enumerate(symbols):
203 |         skimage.io.imsave(os.path.join(symbols_dir.name, str(i) + ".jpg"), symbol[1])
204 | 
205 |     return symbols_dir
206 | 


--------------------------------------------------------------------------------
/stages/text_region_extraction/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tpsatish95/indus-script-ocr/3bc0c2f92c7e7926dab9efb1af29d92753d20672/stages/text_region_extraction/__init__.py


--------------------------------------------------------------------------------
/stages/text_region_extraction/region_classification.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import numpy as np
 4 | import skimage.io
 5 | import skimage.transform
 6 | 
 7 | import caffe
 8 | from helpers.temp import TemporaryFile
 9 | from helpers import logger
10 | 
11 | LOGGER = logger.create_logger(__name__)
12 | 
13 | 
14 | def get_region_crops(image, grouped_regions, new_width, new_height):
15 |     img = skimage.io.imread(image.name)[:, :, :3]
16 |     if not (new_height == len(img) and new_width == len(img[0])):
17 |         img = skimage.transform.resize(img, (new_height, new_width))
18 | 
19 |     region_coords = list()
20 |     region_crops = list()
21 |     for x, y, w, h in grouped_regions:
22 |         temp = TemporaryFile(".jpg")
23 |         skimage.io.imsave(temp.name, img[y: y + h, x: x + w])
24 |         region_crops.append(caffe.io.load_image(temp.name))
25 |         region_coords.append((x, y, w, h))
26 | 
27 |     return region_coords, region_crops
28 | 
29 | 
30 | def get_predictions(region_crops):
31 |     if os.environ["IS_GPU"]:
32 |         caffe.set_device(0)
33 |         caffe.set_mode_gpu()
34 |     else:
35 |         caffe.set_mode_cpu()
36 | 
37 |     classifier = caffe.Classifier(os.path.join(os.environ["TEXT_NOTEXT_MODELS_DIR"], "deploy.prototxt"),
38 |                                   os.path.join(os.environ["TEXT_NOTEXT_MODELS_DIR"], "weights.caffemodel"),
39 |                                   mean=np.array([104, 117, 123], dtype='f4'),
40 |                                   image_dims=[224, 224],
41 |                                   raw_scale=255.0,
42 |                                   channel_swap=[2, 1, 0])
43 | 
44 |     LOGGER.info("Classifying " + str(len(region_crops)) + " inputs.")
45 | 
46 |     predictions = classifier.predict(region_crops)
47 | 
48 |     return predictions
49 | 
50 | 
51 | def classify_regions(region_coords, region_crops):
52 |     text_regions = set()
53 |     no_text_regions = set()
54 |     both_regions = set()
55 |     classes = np.array([0, 1, 2])
56 | 
57 |     try:
58 |         predictions = get_predictions(region_crops)
59 |         for i, prediction in enumerate(predictions):
60 |             idx = list((-prediction).argsort())
61 |             prediction = classes[np.array(idx)]
62 | 
63 |             if prediction[0] == 1 or prediction[0] == 2:
64 |                 text_regions.add(region_coords[i])
65 |             elif prediction[0] == 0:
66 |                 no_text_regions.add(region_coords[i])
67 |             if prediction[0] == 2:
68 |                 both_regions.add(region_coords[i])
69 |     except:
70 |         LOGGER.info("Failed to classify regions!")
71 | 
72 |     return text_regions, no_text_regions, both_regions
73 | 
74 | 
75 | def process_regions(image, grouped_regions, new_width, new_height):
76 |     region_coords, region_crops = \
77 |         get_region_crops(image, grouped_regions, new_width, new_height)
78 |     text_regions, no_text_regions, both_regions = \
79 |         classify_regions(region_coords, region_crops)
80 | 
81 |     return text_regions, no_text_regions, both_regions
82 | 


--------------------------------------------------------------------------------
/stages/text_region_extraction/text_region_formulation.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | 
  3 | 
  4 | def extend_text_rect(l):
  5 |     return (min([i[0] for i in l]), min([i[1] for i in l]), max([i[0] + i[2] for i in l]) - min([i[0] for i in l]), max([i[3] for i in l]))
  6 | 
  7 | 
  8 | def refine_text_regions(text_regions, width, height):
  9 |     refined_text_regions = set()
 10 |     processed = set()
 11 | 
 12 |     threshold = ((width + height) / 2) * (0.25)
 13 |     for x, y, w, h in text_regions:
 14 |         if (x, y, w, h) not in processed:
 15 |             group = set()
 16 |             group.add((x, y, w, h))
 17 |             for x1, y1, w1, h1 in text_regions:
 18 |                 if abs(y1 - y) <= threshold and abs(h1 - h) <= threshold:
 19 |                     group.add((x1, y1, w1, h1))
 20 |                     processed.add((x1, y1, w1, h1))
 21 |             refined_text_regions.add(extend_text_rect(group))
 22 | 
 23 |     return refined_text_regions
 24 | 
 25 | 
 26 | def trim_text_regions(refined_text_regions, no_text_regions, both_regions):
 27 |     trimmed_text_regions = set()
 28 |     unwanted_regions = no_text_regions.union(both_regions)
 29 | 
 30 |     for x, y, w, h in refined_text_regions:
 31 |         a = {'x1': x, 'y1': y, 'x2': x + w, 'y2': y + h, 'w': w, 'h': h}
 32 |         for x1, y1, w1, h1 in unwanted_regions:
 33 |             b = {'x1': x1, 'y1': y1, 'x2': x1 + w1, 'y2': y1 + h1, 'w': w1, 'h': h1}
 34 | 
 35 |             # overlap between a and b
 36 |             area_a = a['w'] * a['h']
 37 |             area_b = b['w'] * b['h']
 38 |             area_intersection = np.max([0, np.min([a['x2'], b['x2']]) - np.max([a['x1'], b['x1']])]) * \
 39 |                 np.max([0, np.min([a['y2'], b['y2']]) - np.max([a['y1'], b['y1']])])
 40 | 
 41 |             area_union = area_a + area_b - area_intersection
 42 |             overlap_ab = float(area_intersection) / float(area_union)
 43 | 
 44 |             is_overlap = False
 45 |             ax1, ay1, aw, ah = a['x1'], a['y1'], a['w'], a['h']
 46 | 
 47 |             if overlap_ab > 0.0:
 48 |                 if a['x1'] > b['x1'] and abs(b['x1'] + b['w'] - a['x1']) < a['w'] * 0.20:  # b is left to a
 49 |                     ax1 = b['x1'] + b['w']
 50 |                     is_overlap = True
 51 |                 if a['y1'] < b['y1'] and abs(a['y1'] - b['y1']) > a['h'] * 0.70:  # b is bottom to a
 52 |                     ah = a['h'] - (a['y1'] + a['h'] - b['y1'])
 53 |                     is_overlap = True
 54 |                 # if a['y1'] > b['y1']: # b is top to a
 55 |                 #     ay1 = b['y1'] + b['h']
 56 |                 # if a['x1'] < b['x1']: # b is right to a
 57 |                 #     aw = a['w'] - (a['x1'] + a['w'] - b['x1'])
 58 |                 # if a['y1'] < b['y1']: # b is bottom to a
 59 |                 #     ah = a['h'] - (a['y1'] + a['h'] - b['y1'])
 60 |                 # REPLACE by Cohen Sutherland algo
 61 | 
 62 |                 a['x1'], a['y1'], a['w'], a['h'] = ax1, ay1, aw, ah
 63 |                 trimmed_text_regions.add((a['x1'], a['y1'], a['w'], a['h']))
 64 | 
 65 |             if is_overlap:
 66 |                 break
 67 | 
 68 |         trimmed_text_regions.add((a['x1'], a['y1'], a['w'], a['h']))
 69 | 
 70 |     return trimmed_text_regions
 71 | 
 72 | 
 73 | def extend_text_regions(refined_text_regions, both_regions):
 74 |     extended_text_regions = set()
 75 | 
 76 |     for x, y, w, h in refined_text_regions:
 77 |         a = {'x1': x, 'y1': y, 'x2': x + w, 'y2': y + h, 'w': w, 'h': h}
 78 |         for x1, y1, w1, h1 in both_regions:
 79 |             b = {'x1': x1, 'y1': y1, 'x2': x1 + w1, 'y2': y1 + h1, 'w': w1, 'h': h1}
 80 | 
 81 |             # overlap between a and b
 82 |             area_a = a['w'] * a['h']
 83 |             area_b = b['w'] * b['h']
 84 |             area_intersection = np.max([0, np.min([a['x2'], b['x2']]) - np.max([a['x1'], b['x1']])]) * \
 85 |                 np.max([0, np.min([a['y2'], b['y2']]) - np.max([a['y1'], b['y1']])])
 86 | 
 87 |             area_union = area_a + area_b - area_intersection
 88 |             overlap_ab = float(area_intersection) / float(area_union)
 89 | 
 90 |             is_overlap = False
 91 |             ax1, ay1, aw, ah = a['x1'], a['y1'], a['w'], a['h']
 92 |             if overlap_ab > 0.0:
 93 |                 if a['x1'] > b['x1'] and abs(b['x1'] + b['w'] - a['x1']) < a['w'] * 0.20:  # b is left to a
 94 |                     ax1 = b['x1']
 95 |                     aw = a['x1'] + a['w'] - b['x1']
 96 |                     is_overlap = True
 97 |                 # if a['y1'] < b['y1'] and abs(a['y1'] - b['y1']) > a['h']*0.70: # b is bottom to a
 98 |                 #     ah = a['h'] - (a['y1'] + a['h'] - b['y1'])
 99 |                 # if a['y1'] > b['y1']: # b is top to a
100 |                 #     ay1 = b['y1'] + b['h']
101 |                 if a['x1'] < b['x1']:  # b is right to a
102 |                     aw = b['x1'] + b['w'] - a['x1']
103 |                     is_overlap = True
104 |                 # if a['y1'] < b['y1']: # b is bottom to a
105 |                 #     ah = a['h'] - (a['y1'] + a['h'] - b['y1'])
106 |                 # REPLACE by Cohen Sutherland algo
107 | 
108 |                 a['x1'], a['y1'], a['w'], a['h'] = ax1, ay1, aw, ah
109 |                 extended_text_regions.add((a['x1'], a['y1'], a['w'], a['h']))
110 |             if is_overlap:
111 |                 break
112 |         extended_text_regions.add((a['x1'], a['y1'], a['w'], a['h']))
113 |     extended_text_regions = extended_text_regions - both_regions  # CHANGE this line
114 | 
115 |     return extended_text_regions
116 | 
117 | 
118 | def process_regions(text_regions, no_text_regions, both_regions, width, height):
119 |     refined_text_regions = refine_text_regions(text_regions, width, height)
120 |     trimmed_text_regions = trim_text_regions(refined_text_regions, no_text_regions, both_regions)
121 |     extended_text_regions = extend_text_regions(refined_text_regions, both_regions)
122 | 
123 |     return extended_text_regions
124 | 


--------------------------------------------------------------------------------