├── .gitignore
├── .gitmodules
├── Dockerfile
├── LICENSE.md
├── README.md
├── benchmarks
    └── bench.py
├── camera
    └── __init__.py
├── docs
    ├── NVIDIA_CLA_v1.0.1.pdf
    ├── README_images
    │   ├── pringle_kirkland.gif
    │   └── ruler.gif
    ├── images
    │   ├── Broadway.jpg
    │   ├── LAX-wayfinding-signage.jpg
    │   ├── LAX-wayfinding-terminal-signage.jpg
    │   ├── NYC_intersection.jpg
    │   ├── airport_amsterdam_signs.jpg
    │   ├── airport_precluded_signs.jpg
    │   ├── ariport_inside_signs.jpg
    │   ├── foods_shelf_close_up.jpg
    │   ├── storm_nyc.jpg
    │   ├── street_sign_with_shadwo.jpg
    │   └── streetname-BANNER-2.jpg
    └── labeled-images
    │   ├── labeled_Broadway.jpg
    │   ├── labeled_LAX-wayfinding-signage.jpg
    │   ├── labeled_LAX-wayfinding-terminal-signage.jpg
    │   ├── labeled_NYC_intersection.jpg
    │   ├── labeled_airport_amsterdam_signs.jpg
    │   ├── labeled_airport_precluded_signs.jpg
    │   ├── labeled_ariport_inside_signs.jpg
    │   ├── labeled_foods_shelf_close_up.jpg
    │   ├── labeled_storm_nyc.jpg
    │   ├── labeled_street_sign_with_shadwo.jpg
    │   └── labeled_streetname-BANNER-2.jpg
├── example_camera.py
├── example_images.py
├── models
    └── .gitkeep
├── requirements.txt
└── video_capture.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | share/python-wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | MANIFEST
 28 | 
 29 | # PyInstaller
 30 | #  Usually these files are written by a python script from a template
 31 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 32 | *.manifest
 33 | *.spec
 34 | 
 35 | # Installer logs
 36 | pip-log.txt
 37 | pip-delete-this-directory.txt
 38 | 
 39 | # Unit test / coverage reports
 40 | htmlcov/
 41 | .tox/
 42 | .nox/
 43 | .coverage
 44 | .coverage.*
 45 | .cache
 46 | nosetests.xml
 47 | coverage.xml
 48 | *.cover
 49 | *.py,cover
 50 | .hypothesis/
 51 | .pytest_cache/
 52 | cover/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | .pybuilder/
 76 | target/
 77 | 
 78 | # Jupyter Notebook
 79 | .ipynb_checkpoints
 80 | 
 81 | # IPython
 82 | profile_default/
 83 | ipython_config.py
 84 | 
 85 | # pyenv
 86 | #   For a library or package, you might want to ignore these files since the code is
 87 | #   intended to run in multiple environments; otherwise, check them in:
 88 | # .python-version
 89 | 
 90 | # pipenv
 91 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 92 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 93 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 94 | #   install all needed dependencies.
 95 | #Pipfile.lock
 96 | 
 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 98 | __pypackages__/
 99 | 
100 | # Celery stuff
101 | celerybeat-schedule
102 | celerybeat.pid
103 | 
104 | # SageMath parsed files
105 | *.sage.py
106 | 
107 | # Environments
108 | .env
109 | .venv
110 | env/
111 | venv/
112 | ENV/
113 | env.bak/
114 | venv.bak/
115 | 
116 | # Spyder project settings
117 | .spyderproject
118 | .spyproject
119 | 
120 | # Rope project settings
121 | .ropeproject
122 | 
123 | # mkdocs documentation
124 | /site
125 | 
126 | # mypy
127 | .mypy_cache/
128 | .dmypy.json
129 | dmypy.json
130 | 
131 | # Pyre type checker
132 | .pyre/
133 | 
134 | # pytype static type analyzer
135 | .pytype/
136 | 
137 | # Cython debug symbols
138 | cython_debug/
139 | 
140 | # pytorch
141 | *.pth
142 | 
143 | # assets
144 | *.pdf
145 | *.png
146 | *.jpg
147 | *.txt
148 | 
149 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "EasyOCR"]
2 | 	path = EasyOCR
3 | 	url = https://github.com/akamboj2/EasyOCR
4 | [submodule "torch2trt"]
5 | 	path = torch2trt
6 | 	url = https://github.com/tomek-l/torch2trt.git
7 | 	branch = easyocr_trt8
8 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
 2 | #
 3 | # Permission is hereby granted, free of charge, to any person obtaining a
 4 | # copy of this software and associated documentation files (the "Software"),
 5 | # to deal in the Software without restriction, including without limitation
 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense,
 7 | # and/or sell copies of the Software, and to permit persons to whom the
 8 | # Software is furnished to do so, subject to the following conditions:
 9 | #
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | #
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
19 | # DEALINGS IN THE SOFTWARE.
20 | 
21 | ARG BASE_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.6.1-pth1.9-py3
22 | FROM ${BASE_IMAGE}
23 | 
24 | ENV DEBIAN_FRONTEND=noninteractive
25 | 
26 | #for video_capture.py
27 | RUN pip3 install --upgrade pip
28 | RUN pip3 install opencv-python 
29 | RUN pip3 install traitlets
30 | RUN pip3 install scipy
31 | RUN pip3 install tifffile
32 | 
33 | #for easyocr
34 | RUN pip3 install python-bidi
35 | ENV PYTHONIOENCODING=utf-8
36 | 
37 | #for torch2trt
38 | RUN git clone --recursive -b jax-jp4.6.1-trt7 https://github.com/akamboj2/torch2trt.git torch2trt && \
39 |     cd torch2trt && \
40 |     python3 setup.py install && \
41 |     cd ../ && \
42 |     rm -rf torch2trt
43 | 
44 | 
45 | #for EAST:
46 | RUN apt-get update -y
47 | RUN apt-get install -y libgeos-dev
48 | RUN pip3 install shapely==1.6.4
49 | RUN pip3 install lanms==1.0.2
50 | 
51 | #Run video capture.py
52 | # RUN git clone --recurse-submodules https://gitlab-master.nvidia.com/akamboj/scene-text-recognition.git
53 | # RUN cd EasyOCR  && \
54 | #     python3 setup.py install && \
55 | #     cd ../
56 | COPY . scene-text-recognition
57 | RUN cd scene-text-recognition/EasyOCR  && \
58 |     pip3 install --upgrade pip && \
59 |     python3 setup.py install && \
60 |     cd ../
61 | RUN python3 video_capture.py


--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
  1 | The MIT License (MIT)
  2 | 
  3 | Copyright (c) 2021 NVIDIA CORPORATION
  4 | 
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | 
 12 | The above copyright notice and this permission notice shall be included in all
 13 | copies or substantial portions of the Software.
 14 | 
 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 21 | 
 22 | 
 23 | 
 24 | The following open source license is used by the EasyOCR git submodule included in this project.
 25 | 
 26 |                                  Apache License
 27 |                            Version 2.0, January 2004
 28 |                         http://www.apache.org/licenses/
 29 | 
 30 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
 31 | 
 32 |    1. Definitions.
 33 | 
 34 |       "License" shall mean the terms and conditions for use, reproduction,
 35 |       and distribution as defined by Sections 1 through 9 of this document.
 36 | 
 37 |       "Licensor" shall mean the copyright owner or entity authorized by
 38 |       the copyright owner that is granting the License.
 39 | 
 40 |       "Legal Entity" shall mean the union of the acting entity and all
 41 |       other entities that control, are controlled by, or are under common
 42 |       control with that entity. For the purposes of this definition,
 43 |       "control" means (i) the power, direct or indirect, to cause the
 44 |       direction or management of such entity, whether by contract or
 45 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 46 |       outstanding shares, or (iii) beneficial ownership of such entity.
 47 | 
 48 |       "You" (or "Your") shall mean an individual or Legal Entity
 49 |       exercising permissions granted by this License.
 50 | 
 51 |       "Source" form shall mean the preferred form for making modifications,
 52 |       including but not limited to software source code, documentation
 53 |       source, and configuration files.
 54 | 
 55 |       "Object" form shall mean any form resulting from mechanical
 56 |       transformation or translation of a Source form, including but
 57 |       not limited to compiled object code, generated documentation,
 58 |       and conversions to other media types.
 59 | 
 60 |       "Work" shall mean the work of authorship, whether in Source or
 61 |       Object form, made available under the License, as indicated by a
 62 |       copyright notice that is included in or attached to the work
 63 |       (an example is provided in the Appendix below).
 64 | 
 65 |       "Derivative Works" shall mean any work, whether in Source or Object
 66 |       form, that is based on (or derived from) the Work and for which the
 67 |       editorial revisions, annotations, elaborations, or other modifications
 68 |       represent, as a whole, an original work of authorship. For the purposes
 69 |       of this License, Derivative Works shall not include works that remain
 70 |       separable from, or merely link (or bind by name) to the interfaces of,
 71 |       the Work and Derivative Works thereof.
 72 | 
 73 |       "Contribution" shall mean any work of authorship, including
 74 |       the original version of the Work and any modifications or additions
 75 |       to that Work or Derivative Works thereof, that is intentionally
 76 |       submitted to Licensor for inclusion in the Work by the copyright owner
 77 |       or by an individual or Legal Entity authorized to submit on behalf of
 78 |       the copyright owner. For the purposes of this definition, "submitted"
 79 |       means any form of electronic, verbal, or written communication sent
 80 |       to the Licensor or its representatives, including but not limited to
 81 |       communication on electronic mailing lists, source code control systems,
 82 |       and issue tracking systems that are managed by, or on behalf of, the
 83 |       Licensor for the purpose of discussing and improving the Work, but
 84 |       excluding communication that is conspicuously marked or otherwise
 85 |       designated in writing by the copyright owner as "Not a Contribution."
 86 | 
 87 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 88 |       on behalf of whom a Contribution has been received by Licensor and
 89 |       subsequently incorporated within the Work.
 90 | 
 91 |    2. Grant of Copyright License. Subject to the terms and conditions of
 92 |       this License, each Contributor hereby grants to You a perpetual,
 93 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 94 |       copyright license to reproduce, prepare Derivative Works of,
 95 |       publicly display, publicly perform, sublicense, and distribute the
 96 |       Work and such Derivative Works in Source or Object form.
 97 | 
 98 |    3. Grant of Patent License. Subject to the terms and conditions of
 99 |       this License, each Contributor hereby grants to You a perpetual,
100 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
101 |       (except as stated in this section) patent license to make, have made,
102 |       use, offer to sell, sell, import, and otherwise transfer the Work,
103 |       where such license applies only to those patent claims licensable
104 |       by such Contributor that are necessarily infringed by their
105 |       Contribution(s) alone or by combination of their Contribution(s)
106 |       with the Work to which such Contribution(s) was submitted. If You
107 |       institute patent litigation against any entity (including a
108 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
109 |       or a Contribution incorporated within the Work constitutes direct
110 |       or contributory patent infringement, then any patent licenses
111 |       granted to You under this License for that Work shall terminate
112 |       as of the date such litigation is filed.
113 | 
114 |    4. Redistribution. You may reproduce and distribute copies of the
115 |       Work or Derivative Works thereof in any medium, with or without
116 |       modifications, and in Source or Object form, provided that You
117 |       meet the following conditions:
118 | 
119 |       (a) You must give any other recipients of the Work or
120 |           Derivative Works a copy of this License; and
121 | 
122 |       (b) You must cause any modified files to carry prominent notices
123 |           stating that You changed the files; and
124 | 
125 |       (c) You must retain, in the Source form of any Derivative Works
126 |           that You distribute, all copyright, patent, trademark, and
127 |           attribution notices from the Source form of the Work,
128 |           excluding those notices that do not pertain to any part of
129 |           the Derivative Works; and
130 | 
131 |       (d) If the Work includes a "NOTICE" text file as part of its
132 |           distribution, then any Derivative Works that You distribute must
133 |           include a readable copy of the attribution notices contained
134 |           within such NOTICE file, excluding those notices that do not
135 |           pertain to any part of the Derivative Works, in at least one
136 |           of the following places: within a NOTICE text file distributed
137 |           as part of the Derivative Works; within the Source form or
138 |           documentation, if provided along with the Derivative Works; or,
139 |           within a display generated by the Derivative Works, if and
140 |           wherever such third-party notices normally appear. The contents
141 |           of the NOTICE file are for informational purposes only and
142 |           do not modify the License. You may add Your own attribution
143 |           notices within Derivative Works that You distribute, alongside
144 |           or as an addendum to the NOTICE text from the Work, provided
145 |           that such additional attribution notices cannot be construed
146 |           as modifying the License.
147 | 
148 |       You may add Your own copyright statement to Your modifications and
149 |       may provide additional or different license terms and conditions
150 |       for use, reproduction, or distribution of Your modifications, or
151 |       for any such Derivative Works as a whole, provided Your use,
152 |       reproduction, and distribution of the Work otherwise complies with
153 |       the conditions stated in this License.
154 | 
155 |    5. Submission of Contributions. Unless You explicitly state otherwise,
156 |       any Contribution intentionally submitted for inclusion in the Work
157 |       by You to the Licensor shall be under the terms and conditions of
158 |       this License, without any additional terms or conditions.
159 |       Notwithstanding the above, nothing herein shall supersede or modify
160 |       the terms of any separate license agreement you may have executed
161 |       with Licensor regarding such Contributions.
162 | 
163 |    6. Trademarks. This License does not grant permission to use the trade
164 |       names, trademarks, service marks, or product names of the Licensor,
165 |       except as required for reasonable and customary use in describing the
166 |       origin of the Work and reproducing the content of the NOTICE file.
167 | 
168 |    7. Disclaimer of Warranty. Unless required by applicable law or
169 |       agreed to in writing, Licensor provides the Work (and each
170 |       Contributor provides its Contributions) on an "AS IS" BASIS,
171 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
172 |       implied, including, without limitation, any warranties or conditions
173 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
174 |       PARTICULAR PURPOSE. You are solely responsible for determining the
175 |       appropriateness of using or redistributing the Work and assume any
176 |       risks associated with Your exercise of permissions under this License.
177 | 
178 |    8. Limitation of Liability. In no event and under no legal theory,
179 |       whether in tort (including negligence), contract, or otherwise,
180 |       unless required by applicable law (such as deliberate and grossly
181 |       negligent acts) or agreed to in writing, shall any Contributor be
182 |       liable to You for damages, including any direct, indirect, special,
183 |       incidental, or consequential damages of any character arising as a
184 |       result of this License or out of the use or inability to use the
185 |       Work (including but not limited to damages for loss of goodwill,
186 |       work stoppage, computer failure or malfunction, or any and all
187 |       other commercial damages or losses), even if such Contributor
188 |       has been advised of the possibility of such damages.
189 | 
190 |    9. Accepting Warranty or Additional Liability. While redistributing
191 |       the Work or Derivative Works thereof, You may choose to offer,
192 |       and charge a fee for, acceptance of support, warranty, indemnity,
193 |       or other liability obligations and/or rights consistent with this
194 |       License. However, in accepting such obligations, You may act only
195 |       on Your own behalf and on Your sole responsibility, not on behalf
196 |       of any other Contributor, and only if You agree to indemnify,
197 |       defend, and hold each Contributor harmless for any liability
198 |       incurred by, or claims asserted against, such Contributor by reason
199 |       of your accepting any such warranty or additional liability.
200 | 
201 |    END OF TERMS AND CONDITIONS
202 | 
203 |    APPENDIX: How to apply the Apache License to your work.
204 | 
205 |       To apply the Apache License to your work, attach the following
206 |       boilerplate notice, with the fields enclosed by brackets "[]"
207 |       replaced with your own identifying information. (Don't include
208 |       the brackets!)  The text should be enclosed in the appropriate
209 |       comment syntax for the file format. We also recommend that a
210 |       file or class name and description of purpose be included on the
211 |       same "printed page" as the copyright notice for easier
212 |       identification within third-party archives.
213 | 
214 |    Copyright [yyyy] [name of copyright owner]
215 | 
216 |    Licensed under the Apache License, Version 2.0 (the "License");
217 |    you may not use this file except in compliance with the License.
218 |    You may obtain a copy of the License at
219 | 
220 |        http://www.apache.org/licenses/LICENSE-2.0
221 | 
222 |    Unless required by applicable law or agreed to in writing, software
223 |    distributed under the License is distributed on an "AS IS" BASIS,
224 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
225 |    See the License for the specific language governing permissions and
226 |    limitations under the License.
227 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Scene Text Recognition
  2 |  
  3 | Real-time scene text recognition accelerated with NVIDIA TensorRT
  4 | 
  5 | https://user-images.githubusercontent.com/26127866/144195980-64935bb7-47d5-404f-8b89-ac99c013ca97.mp4
  6 | 
  7 | ## Quickstart
  8 | 
  9 | 1. Clone Repo
 10 | ```bash
 11 | git clone --recursive git@github.com:tomek-l/nv-scene-text-recognition.git
 12 | ```
 13 | 
 14 | 2. Install pytorch, torchvision
 15 | ```bash
 16 | wget https://raw.githubusercontent.com/tomek-l/jetson-install-pytorch/master/install_torch_v1.9.sh 
 17 | bash install_torch_v1.9.sh
 18 | pip3 install -r requirements.txt
 19 | ```
 20 |  
 21 | 3. Install torch2trt
 22 | 
 23 | Until this [PR](https://github.com/NVIDIA-AI-IOT/torch2trt/pull/627) is merged use Chitoku's [branch](https://github.com/chitoku/torch2trt/tree/jp4.6_tensorrt8) containing a fix for TensorRT 8.
 24 | ```bash
 25 | cd torch2trt 
 26 | sudo python3 setup.py install --plugins
 27 | ```
 28 | 
 29 | 4. Install easyOCR
 30 | ```bash
 31 | cd EasyOCR
 32 | sudo python3 setup.py install
 33 | ```
 34 | 
 35 | #### Dockerfile 
 36 | 1. Make sure docker is setup correctly on the jetson as directed [here](https://github.com/dusty-nv/jetson-containers#docker-default-runtime). Specifically, read the "Docker Default Runtime" section and make sure Nvidia is the default docker runtime daemon.
 37 | 
 38 | 2. Build the dockerfile
 39 |     ```
 40 |     docker build -t scene-text-recognition .
 41 |     ```
 42 | 3. Run the dockerfile
 43 |     ```
 44 |     sudo docker run -it --rm -v ~/workdir:/workdir/ --runtime nvidia --network host scene-text-recognition
 45 |     ```
 46 |     where workdir is the directory contianing this cloned repo, or is the clone repo.
 47 |     
 48 |     If you are using a realtime camera:
 49 |     ```
 50 |     xhost +
 51 |     sudo docker run -it --rm -v ~/workdir:/workdir/ --runtime nvidia --network host -e DISPLAY=$DISPLAY --device /dev/video0: dev/video0 scene-text-recognition
 52 |     ```
 53 |     Where video0 is correct device id into the container. This can be found using:
 54 |     ```
 55 |     ls /dev/video*
 56 |     ```
 57 | 
 58 | ### Step 3 - Run the example files
 59 | 
 60 | There are three separate demo files included: 
 61 | 
 62 | #### 1. easy_ocr_demo.py
 63 | This program uses EasyOCR to read an image or directory of images and output labeled images. The output is in the labeled-images/ directory
 64 | 
 65 | To use easy_ocr_demo:
 66 | ```
 67 | python3 easy_ocr_demo.py images
 68 | ```
 69 | where images is an image file or directory of images.
 70 | 
 71 | #### 2. easy_ocr_benchmark.py
 72 | Using the pretrained EasyOCR detection and recognition models, we benchmark the throughput and latency and show the speedup after it is converted to a TensorRT engine (TRT) on the Jetson AGX Xavier.
 73 |  
 74 | 
 75 | | Model | Throughput (fps) | Latency (ms) |
 76 | |-------|-------------|---------------|
 77 | | Detection | 12.386  | 84.190 |
 78 | | Detection TRT | 24.737 | 48.990 |
 79 | | Recognition | 174.518 | 5.900 |
 80 | | Recognition TRT | 7118.642 | 0.160 |
 81 | 
 82 | To run this benchmark:
 83 | ```
 84 | python3 easy_ocr_benchmark.py
 85 | ```
 86 | 
 87 | This program will store the Torch2trt state dictionaries in the torch2trt_models dictionary. 
 88 | 
 89 | #### 3. video_capture.py
 90 | This program uses an attached USB camera to display a realtime video. The code will display bounding boxes around the text in the video and output the text in the terminal. Click on the video screen and type 'q' to terminate the program. 
 91 | After plugging in the USB camera, but before running the python file, check the device id, and make sure that is passed into 'cap = cv2.VideoCapture(0)' line. By defualt we assume it is zero, change the argument in cv2.Videcapture() to the correct device id before running the program. The deviced id can be checked by doing:
 92 | ```
 93 | ls /dev/video*
 94 | ```
 95 | To run the program:
 96 | ```
 97 | python3 video_capture.py
 98 | ```
 99 |  
100 | ### Step 4 - Write your own code
101 | The easyocr package can be called and used mostly as described in the EasyOCR repo. This repo, however, also adds the use_trt flag to the reader class. Setting use_trt = True, will convert the models to tensorRT or use the converted and locally stored models, when performing detection.
102 | 
103 | Example code:
104 | ```
105 | import easyocr
106 | reader = easyocr.Reader(['en'], use_trt=True)
107 | result = reader.readtext('path/to/image.png')
108 | print("TensorRT Optimized Result",result, '\n')
109 | ```
110 | ## More:
111 | 
112 | ### Different Models
113 | The code is designed to be able to swap in and out various detection models. As an example, view detect.py file to see where the EAST detection model was substituted in.
114 | 
115 | ### Custom Training
116 | To train and run your own models please see the EasyOCR [instructions](https://github.com/akamboj2/EasyOCR/blob/master/custom_model.md)
117 | 
118 | 
119 | ## See also
120 |  
121 | - [trt_pose_hand](http://github.com/NVIDIA-AI-IOT/trt_pose_hand) - Real-time hand pose estimation based on trt_pose
122 | - [torch2trt](http://github.com/NVIDIA-AI-IOT/torch2trt) - An easy to use PyTorch to TensorRT converter
123 |  
124 | - [JetBot](http://github.com/NVIDIA-AI-IOT/jetbot) - An educational AI robot based on NVIDIA Jetson Nano
125 | - [JetRacer](http://github.com/NVIDIA-AI-IOT/jetracer) - An educational AI racecar using NVIDIA Jetson Nano
126 | - [JetCam](http://github.com/NVIDIA-AI-IOT/jetcam) - An easy to use Python camera interface for NVIDIA Jetson
127 |  
128 | ## References
129 |  
130 | The scene text recogntion framework used here is a modified version of the EasyOCR open-source code [EasyOCR](https://github.com/JaidedAI/EasyOCR). 
131 | 
132 | Below are the sources of the default [detection](https://arxiv.org/abs/1904.01941) and [recogntion](https://arxiv.org/abs/1507.05717) models:
133 | 
134 |  
135 | *  Baek, Y., Lee, B., Han, D., Yun, S., & Lee, H. (2019). Character region awareness for text detection. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 9365-9374).
136 |  
137 | *  Shi, B., Bai, X., & Yao, C. (2016). An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence, 39(11), 2298-2304.
138 | 
139 | 
140 | ## Licenses
141 | This code is licensed under The MIT License as described [here](https://gitlab-master.nvidia.com/akamboj/scene-text-recognition/-/blob/master/LICENSE).
142 | The submodule EasyOCR is licesned under Apache License 2.0 as described [here](https://github.com/akamboj2/EasyOCR/blob/master/LICENSE).
143 | 


--------------------------------------------------------------------------------
/benchmarks/bench.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The MIT License (MIT)
  3 | 
  4 | Copyright (c) 2021 NVIDIA CORPORATION
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  7 | this software and associated documentation files (the "Software"), to deal in
  8 | the Software without restriction, including without limitation the rights to
  9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 10 | the Software, and to permit persons to whom the Software is furnished to do so,
 11 | subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | """
 23 | import easyocr
 24 | import torch
 25 | import torch.nn as nn
 26 | from torch2trt import torch2trt
 27 | import time
 28 | import os
 29 | 
 30 | #torch2trt custom converters
 31 | from torch2trt import *
 32 | @tensorrt_converter('torch.Tensor.__hash__')
 33 | @tensorrt_converter('torch.Tensor.get_device')
 34 | @tensorrt_converter('torch.Tensor.data_ptr')
 35 | @tensorrt_converter('torch.Tensor.is_complex')
 36 | @tensorrt_converter('torch.is_grad_enabled')
 37 | def suppress_warning(ctx):
 38 |     #none of these effect the computational path thus don't need converters
 39 |     pass
 40 | 
 41 | @tensorrt_converter('torch.zeros')
 42 | def convert_add(ctx):
 43 |     input_a = ctx.method_args[0]
 44 |     output = ctx.method_return
 45 |     output._trt = add_missing_trt_tensors(ctx.network, [output])
 46 | 
 47 | def profile(model,dummy_input):
 48 |     iters=50
 49 |     with torch.no_grad():
 50 |             # warm up
 51 |             for _ in range(10):
 52 |                 model(dummy_input)
 53 | 
 54 |             # throughput evaluate
 55 |             torch.cuda.current_stream().synchronize()
 56 |             t0 = time.time()
 57 |             for _ in range(iters):
 58 |                 model(dummy_input)
 59 |             torch.cuda.current_stream().synchronize()
 60 |             t1 = time.time()
 61 |             throughput = 1.0 * iters / (t1 - t0)
 62 | 
 63 |             # latency evaluate
 64 |             torch.cuda.current_stream().synchronize()
 65 |             t0 = time.time()
 66 |             for _ in range(iters):
 67 |                 model(dummy_input)
 68 |                 torch.cuda.current_stream().synchronize()
 69 |             t1 = time.time()
 70 |             latency = round(1000.0 * (t1 - t0) / iters, 2)
 71 |     print("throughput: %.3f fps\t latency: %.3f ms"% (throughput,latency))
 72 | 
 73 | if __name__ == '__main__':
 74 | 
 75 |     reader = easyocr.Reader(['en'],gpu=True) # need to run only once to load model into memory
 76 | 
 77 |     if not os.path.exists('torch2trt_models'):
 78 |         os.makedirs('torch2trt_models')
 79 | 
 80 |     #detector: 
 81 |     y = torch.ones((1, 3, 480, 640),dtype=torch.float).cuda()
 82 |     print("Detector:")
 83 |     print("Before Conversion:")
 84 |     profile(reader.detector, y) #throughput: 12.386 	 latency: 84.190
 85 | 
 86 |     if os.path.isfile('torch2trt_models/easyocr_detect.pth'):
 87 |         model_trt_detect = TRTModule()
 88 |         model_trt_detect.load_state_dict(torch.load('torch2trt_models/easyocr_detect.pth'))
 89 |     else:
 90 |         model_trt_detect = torch2trt(reader.detector, [y])
 91 |         torch.save(model_trt_detect.state_dict(),'torch2trt_models/easyocr_detect.pth')
 92 | 
 93 |     print("After Conversion")
 94 |     profile(model_trt_detect, y) #throughput: 24.737 	 latency: 48.990
 95 | 
 96 | 
 97 |     #recognizer
 98 |     print("\nRecognizer:")
 99 |     x = torch.ones((1,1,64,320),dtype=torch.float).to('cuda')
100 |     reader.recognizer.eval()
101 |     print("Before Conversion:")
102 |     profile(reader.recognizer, x) #throughput: 36.912 	 latency: 24.610
103 | 
104 |     if os.path.isfile('torch2trt_models/easyocr_recognize.pth'):
105 |         model_trt_rec = TRTModule()
106 |         model_trt_rec.load_state_dict(torch.load('torch2trt_models/easyocr_recognize.pth'))
107 |     else:
108 |         model_trt_rec = torch2trt(reader.detector, [y])
109 |         torch.save(model_trt_rec.state_dict(),'torch2trt_models/easyocr_recognize.pth')
110 |     model_trt_rec = torch2trt(reader.recognizer, [x])#, use_onnx=True)
111 | 
112 |     print("After Conversion")
113 |     profile(model_trt_rec,x) #throughput: 2296.110 	 latency: 0.450
114 |     torch.save(model_trt_rec.state_dict(),'torch2trt_models/easyocr_recognize.pth')
115 | 
116 | 
117 | """
118 | TODO:
119 | - benchmark again
120 | - input trt
121 | - look through slides' notes
122 | """
123 | 


--------------------------------------------------------------------------------
/camera/__init__.py:
--------------------------------------------------------------------------------
  1 | 
  2 | """
  3 | The MIT License (MIT)
  4 | Copyright (c) 2021 NVIDIA CORPORATION
  5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  6 | this software and associated documentation files (the "Software"), to deal in
  7 | the Software without restriction, including without limitation the rights to
  8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
  9 | the Software, and to permit persons to whom the Software is furnished to do so,
 10 | subject to the following conditions:
 11 | The above copyright notice and this permission notice shall be included in all
 12 | copies or substantial portions of the Software.
 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 19 | """
 20 | 
 21 | import sys
 22 | import time
 23 | import numpy as np
 24 | from threading import Thread
 25 | 
 26 | import gi
 27 | 
 28 | gi.require_version("Gst", "1.0")
 29 | from gi.repository import GObject, Gst
 30 | 
 31 | Gst.init(None)
 32 | 
 33 | 
 34 | def _sanitize(element) -> Gst.Element:
 35 |     """
 36 |     Passthrough function which sure element is not `None`
 37 |     Returns `Gst.Element` or raises Error
 38 |     """
 39 |     if element is None:
 40 |         raise Exception("Element is none!")
 41 |     else:
 42 |         return element
 43 | 
 44 | 
 45 | def _make_element_safe(el_type: str, el_name=None) -> Gst.Element:
 46 |     """
 47 |     Creates a gstremer element using el_type factory.
 48 |     Returns Gst.Element or throws an error if we fail.
 49 |     This is to avoid `None` elements in our pipeline
 50 |     """
 51 | 
 52 |     # name=None parameter asks Gstreamer to uniquely name the elements for us
 53 |     el = Gst.ElementFactory.make(el_type, name=el_name)
 54 | 
 55 |     if el is not None:
 56 |         return el
 57 |     else:
 58 |         print(f"Pipeline element is None!")
 59 |         raise NameError(f"Could not create element {el_type}")
 60 | 
 61 | 
 62 | class Camera:
 63 |     def __init__(self, sensor_id, fps=None, shape_in=None, shape_out=None) -> None:
 64 | 
 65 |         # self._mainloop = GObject.MainLoop() # TODO: use GLib.MainLoop
 66 |         if any([fps, shape_in, shape_out]):
 67 |             self._pipeline = self._make_pipeline_with_resize(
 68 |                 sensor_id, fps, shape_in, shape_out
 69 |             )
 70 |         else:
 71 |             self._pipeline = self._make_pipeline(sensor_id)
 72 |         self._pipeline.set_state(Gst.State.PLAYING)
 73 |         self.wait_ready()
 74 | 
 75 |     def stop(self):
 76 |         self._pipeline.set_state(Gst.State.NULL)
 77 | 
 78 |     def _make_pipeline_with_resize(
 79 |         self, sensor_id, fps=None, shape_in=None, shape_out=None
 80 |     ):
 81 | 
 82 |         pipeline = _sanitize(Gst.Pipeline())
 83 | 
 84 |         # Camera
 85 |         camera = _make_element_safe("nvarguscamerasrc")
 86 |         camera.set_property("sensor-id", sensor_id)
 87 | 
 88 |         # Input CF
 89 |         camera_cf = self._make_input_capsfilter(fps, shape_in)
 90 | 
 91 |         # nvvidconv
 92 |         conv = _make_element_safe("nvvidconv")
 93 | 
 94 |         # Output CF
 95 |         appsink_cf = self._make_output_capsfilter(shape_out)
 96 | 
 97 |         # Appsink
 98 |         self._appsink = appsink = _make_element_safe("appsink")
 99 | 
100 |         # Add everything
101 |         for el in [camera, camera_cf, conv, appsink_cf, appsink]:
102 |             pipeline.add(el)
103 | 
104 |         camera.link(camera_cf)
105 |         camera_cf.link(conv)
106 |         conv.link(appsink_cf)
107 |         appsink_cf.link(appsink)
108 | 
109 |         return pipeline
110 | 
111 |     def _make_pipeline(self, sensor_id):
112 | 
113 |         pipeline = _sanitize(Gst.Pipeline())
114 | 
115 |         cam = _make_element_safe("nvarguscamerasrc")
116 |         cam.set_property("sensor-id", sensor_id)
117 | 
118 |         conv = _make_element_safe("nvvidconv")
119 | 
120 |         cf = _make_element_safe("capsfilter")
121 |         cf.set_property(
122 |             "caps", Gst.Caps.from_string("video/x-raw, format=(string)RGBA")
123 |         )
124 | 
125 |         self._appsink = appsink = _make_element_safe("appsink")
126 | 
127 |         for el in [cam, conv, cf, appsink]:
128 |             pipeline.add(el)
129 | 
130 |         cam.link(conv)
131 |         conv.link(cf)
132 |         cf.link(appsink)
133 | 
134 |         return pipeline
135 | 
136 |     @staticmethod
137 |     def _make_input_capsfilter(fps, shape_in):
138 | 
139 |         caps_str = "video/x-raw(memory:NVMM), format=(string)NV12"
140 | 
141 |         if shape_in:
142 |             W_in, H_in = shape_in
143 |             caps_str += f", width=(int){W_in}, height=(int){H_in}"
144 |         if fps:
145 |             caps_str += f" framerate=(fraction){fps}/1"
146 | 
147 |         caps = Gst.Caps.from_string(caps_str)
148 |         in_cf = _make_element_safe("capsfilter")
149 |         in_cf.set_property("caps", caps)
150 | 
151 |         return in_cf
152 | 
153 |     @staticmethod
154 |     def _make_output_capsfilter(shape_out):
155 |         print(shape_out)
156 |         if shape_out:
157 |             W_out, H_out = shape_out
158 |             caps = Gst.Caps.from_string(
159 |                 f"video/x-raw, width={W_out}, height={H_out}, format=(string)BGRx"
160 |             )
161 |         else:
162 |             caps = Gst.Caps.from_string("video/x-raw, format=(string)RGBA")
163 | 
164 |         cf = _make_element_safe("capsfilter")
165 |         cf.set_property("caps", caps)
166 |         return cf
167 | 
168 |     def read(self):
169 |         """
170 |         Returns np.array or None
171 |         """
172 |         sample = self._appsink.emit("pull-sample")
173 |         if sample is None:
174 |             return None
175 |         buf = sample.get_buffer()
176 |         caps_format = sample.get_caps().get_structure(0)
177 |         W, H = caps_format.get_value("width"), caps_format.get_value("height")
178 |         C = 4  # Earlier we converted to RGBA
179 |         buf2 = buf.extract_dup(0, buf.get_size())
180 |         arr = np.ndarray(shape=(H, W, C), buffer=buf2, dtype=np.uint8)
181 |         arr = arr[:, :, :3]  # RGBA -> RGB
182 |         return arr
183 | 
184 |     def running(self):
185 |         _, state, _ = self._pipeline.get_state(1)
186 |         return True if state == Gst.State.PLAYING else False
187 | 
188 |     def wait_ready(self):
189 |         while not self.running():
190 |             time.sleep(0.1)
191 | 
192 | 
193 | class CameraThread(Thread):
194 |     def __init__(self, sensor_id) -> None:
195 | 
196 |         super().__init__()
197 |         self._camera = Camera(sensor_id)
198 |         self._should_run = True
199 |         self._image = self._camera.read()
200 |         self.start()
201 | 
202 |     def run(self):
203 |         while self._should_run:
204 |             self._image = self._camera.read()
205 | 
206 |     @property
207 |     def image(self):
208 |         # NOTE: if we care about atomicity of reads, we can add a lock here
209 |         return self._image
210 | 
211 |     def stop(self):
212 |         # TODO: this should be threading.Event
213 |         self._should_run = False
214 |         self._camera.stop()
215 | 
216 | 
217 | if __name__ == "__main__":
218 | 
219 |     camera = Camera(0, shape_in=(1920, 1080), shape_out=(224, 224))
220 | 
221 |     for _ in range(10):
222 |         start = time.perf_counter()
223 |         arr = camera.read()
224 |         print(
225 |             f"Latency: {time.perf_counter() - start} Image shape: {arr.shape} Image mean: {arr.mean()}"
226 |         )
227 | 
228 |     camera.stop()
229 | 


--------------------------------------------------------------------------------
/docs/NVIDIA_CLA_v1.0.1.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/NVIDIA_CLA_v1.0.1.pdf


--------------------------------------------------------------------------------
/docs/README_images/pringle_kirkland.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/README_images/pringle_kirkland.gif


--------------------------------------------------------------------------------
/docs/README_images/ruler.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/README_images/ruler.gif


--------------------------------------------------------------------------------
/docs/images/Broadway.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/Broadway.jpg


--------------------------------------------------------------------------------
/docs/images/LAX-wayfinding-signage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/LAX-wayfinding-signage.jpg


--------------------------------------------------------------------------------
/docs/images/LAX-wayfinding-terminal-signage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/LAX-wayfinding-terminal-signage.jpg


--------------------------------------------------------------------------------
/docs/images/NYC_intersection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/NYC_intersection.jpg


--------------------------------------------------------------------------------
/docs/images/airport_amsterdam_signs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/airport_amsterdam_signs.jpg


--------------------------------------------------------------------------------
/docs/images/airport_precluded_signs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/airport_precluded_signs.jpg


--------------------------------------------------------------------------------
/docs/images/ariport_inside_signs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/ariport_inside_signs.jpg


--------------------------------------------------------------------------------
/docs/images/foods_shelf_close_up.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/foods_shelf_close_up.jpg


--------------------------------------------------------------------------------
/docs/images/storm_nyc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/storm_nyc.jpg


--------------------------------------------------------------------------------
/docs/images/street_sign_with_shadwo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/street_sign_with_shadwo.jpg


--------------------------------------------------------------------------------
/docs/images/streetname-BANNER-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/streetname-BANNER-2.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_Broadway.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_Broadway.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_LAX-wayfinding-signage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_LAX-wayfinding-signage.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_LAX-wayfinding-terminal-signage.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_LAX-wayfinding-terminal-signage.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_NYC_intersection.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_NYC_intersection.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_airport_amsterdam_signs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_airport_amsterdam_signs.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_airport_precluded_signs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_airport_precluded_signs.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_ariport_inside_signs.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_ariport_inside_signs.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_foods_shelf_close_up.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_foods_shelf_close_up.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_storm_nyc.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_storm_nyc.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_street_sign_with_shadwo.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_street_sign_with_shadwo.jpg


--------------------------------------------------------------------------------
/docs/labeled-images/labeled_streetname-BANNER-2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_streetname-BANNER-2.jpg


--------------------------------------------------------------------------------
/example_camera.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The MIT License (MIT)
 3 | 
 4 | Copyright (c) 2021 NVIDIA CORPORATION
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 7 | this software and associated documentation files (the "Software"), to deal in
 8 | the Software without restriction, including without limitation the rights to
 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | the Software, and to permit persons to whom the Software is furnished to do so,
11 | subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | """
23 | import cv2
24 | import easyocr
25 | import time
26 | import threading
27 | 
28 | from camera import Camera
29 | 
30 | def put_boxes(result, arr, text=False):
31 |     color = (0, 0, 255)
32 |     imgHeight, imgWidth, _ = imageData.shape
33 |     thick = 2
34 |     font_scale = 1
35 |     for res in result:
36 |         top_left, btm_right = res[0][0], res[0][2]
37 |         to_int = lambda items: [int(x) for x in items]
38 |         top_left = to_int(top_left)
39 |         btm_right = to_int(btm_right)
40 | 
41 |         label = res[1]
42 | 
43 |         # Draw BB
44 |         cv2.rectangle(arr, top_left, btm_right, color, thick)
45 |         
46 |         # Draw text
47 |         if text:
48 |             cv2.putText(arr, label, (top_left[0], top_left[1] - 12), 0, font_scale, color, thick)
49 | 
50 | def main():
51 | 
52 |     DISPLAY = False
53 | 
54 |     print("Setting up camera...")
55 |     cam = Camera(0, shape_in=(1920, 1080), shape_out=(224, 224))
56 | 
57 |     print("Loading model...")
58 |     reader = easyocr.Reader(["en"], use_trt=True)
59 | 
60 |     for _ in range(1000):
61 |         
62 |         arr = cam.read()
63 |         result = reader.readtext(arr, text_threshold=0.85)
64 |         print(result)
65 | 
66 |         # Display the resulting frame
67 |         if DISPLAY:
68 |             cv2.imshow("Frame", frame)
69 |             
70 |             if cv2.waitKey(1) & 0xFF == ord("q"):
71 |                 break
72 | 
73 | if __name__ == "__main__":
74 |     main()
75 | 


--------------------------------------------------------------------------------
/example_images.py:
--------------------------------------------------------------------------------
 1 | """
 2 | The MIT License (MIT)
 3 | 
 4 | Copyright (c) 2021 NVIDIA CORPORATION
 5 | 
 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 7 | this software and associated documentation files (the "Software"), to deal in
 8 | the Software without restriction, including without limitation the rights to
 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
10 | the Software, and to permit persons to whom the Software is furnished to do so,
11 | subject to the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be included in all
14 | copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 | """
23 | import argparse
24 | import os
25 | import easyocr
26 | import cv2
27 | 
28 | if __name__ == '__main__':
29 |     #Arg parse and setup
30 |     parser = argparse.ArgumentParser(description="EasyOCR Label Images")
31 |     parser.add_argument('image',type=str, help='path to input image or directory of images')
32 |     parser.add_argument('-t', '--trt', default=False, type=bool, help='accelerates detection and recognition models by converting them to TensorRT')
33 |     args = parser.parse_args()
34 |     if os.path.isfile(args.image):
35 |         images = [args.image]
36 |     else: #if it's not a file, assume it's a directory of images
37 |         images = [os.path.join(args.image, file) for file in filter(lambda x: not x.endswith('.ipynb_checkpoints'),os.listdir(args.image))]
38 |     
39 |     #intialize output directory
40 |     out_directory = args.image.split('/')[-1].split('.')[0]+'-labeled-images'
41 |     if not os.path.exists(out_directory):
42 |         os.makedirs(out_directory)
43 |     txt_file = open(out_directory+'/image_labels.txt','w')
44 |     print('output directory:',out_directory)
45 | 
46 |     #load the scene text ocr models
47 |     reader = easyocr.Reader(['en'], use_trt=args.trt) # need to run only once to load model into memory
48 | 
49 | 
50 |     for image in images:
51 |         #use cv2 to check if it is a valid image
52 |         imageData = cv2.imread(image)
53 |         if imageData is None:
54 |             print("reading image %s failed" % image)
55 |             continue
56 | 
57 |         #perform inference and read the models
58 |         print("on image",image)
59 |         txt_file.write(image+'\n')
60 |         result = reader.readtext(image)
61 |         print('result',result, '\n')
62 |         
63 |         #draw bounding boxes and ouptut result to txt file
64 |         color = (0,0,255)
65 |         imageData = cv2.imread(image)
66 |         imgHeight, imgWidth, _ = imageData.shape
67 |         thick = 2
68 |         font_scale = 1
69 |         for res in result:
70 |             top_left, btm_right = res[0][0],res[0][2]
71 |             to_int = lambda items: [int(x) for x in items]
72 |             top_left = to_int(top_left)
73 |             btm_right = to_int(btm_right)
74 |             label = res[1]
75 |             cv2.rectangle(imageData,top_left, btm_right, color, thick)
76 |             cv2.putText(imageData, label, (top_left[0], top_left[1] - 12), 0, font_scale, color, thick)
77 |             txt_file.write(str(res)+'\n')
78 |         txt_file.write('\n')
79 |         
80 |         #write image and notify user
81 |         check = cv2.imwrite(out_directory+"/labeled_"+image.split('/')[-1], imageData)
82 |         if check:
83 |             print("successfully wrote image:",out_directory+"/labeled_"+image.split('/')[-1])
84 |         else:
85 |             print("failed to write image:",out_directory+"/labeled_"+image.split('/')[-1])
86 | 
87 |     txt_file.close()
88 | 


--------------------------------------------------------------------------------
/models/.gitkeep:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/models/.gitkeep


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | numpy==1.19.3
2 | opencv-python==4.5.4.58
3 | python-bidi==0.42


--------------------------------------------------------------------------------
/video_capture.py:
--------------------------------------------------------------------------------
  1 | """
  2 | The MIT License (MIT)
  3 | 
  4 | Copyright (c) 2021 NVIDIA CORPORATION
  5 | 
  6 | Permission is hereby granted, free of charge, to any person obtaining a copy of
  7 | this software and associated documentation files (the "Software"), to deal in
  8 | the Software without restriction, including without limitation the rights to
  9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 10 | the Software, and to permit persons to whom the Software is furnished to do so,
 11 | subject to the following conditions:
 12 | 
 13 | The above copyright notice and this permission notice shall be included in all
 14 | copies or substantial portions of the Software.
 15 | 
 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 22 | """
 23 | import cv2
 24 | import numpy as np
 25 | import easyocr
 26 | import time
 27 | import threading
 28 | import queue
 29 | 
 30 | def put_boxes(result,imageData):
 31 |   color = (0,0,255)
 32 |   imgHeight, imgWidth, _ = imageData.shape
 33 |   thick = 2
 34 |   font_scale = 1
 35 |   for res in result:
 36 |       top_left, btm_right = res[0][0],res[0][2]
 37 |       to_int = lambda items: [int(x) for x in items]
 38 |       top_left = to_int(top_left)
 39 |       btm_right = to_int(btm_right)
 40 |       
 41 |       label = res[1]
 42 |       
 43 |       print(label)
 44 | 
 45 |       cv2.rectangle(imageData,top_left, btm_right, color, thick)
 46 |       #cv2.putText(imageData, label, (top_left[0], top_left[1] - 12), 0, font_scale, color, thick)
 47 |   if len(result)>0:
 48 |     print('\n')
 49 | 
 50 | 
 51 | # Create a VideoCapture object and read from input file
 52 | cap = cv2.VideoCapture(0)#'v4l2src device=/dev/video0 ! video/x-raw, width=(int)640, height=(int)480, framerate=(fraction)30/1 ! videoconvert !  video/x-raw, format=(string)BGR ! appsink',cv2.CAP_GSTREAMER)
 53 |    
 54 | # Check if camera opened successfully
 55 | if (cap.isOpened()== False): 
 56 |   print("Error opening video  file")
 57 |    
 58 | 
 59 | reader = easyocr.Reader(['en'], use_trt=True)
 60 | #if ur loading pth file for this, use 480x640! tis the frame.shape
 61 | 
 62 | 
 63 |       
 64 | iters = 0
 65 | time_labels = ['t_capture','t_infer', 't_box','t_show'] #[0.0009343624114990234, 1.134493112564087, 0.0008838176727294922, 0.0002529621124267578]
 66 | avg_times = [0]*len(time_labels)
 67 | times = []
 68 | result = []
 69 | q = queue.Queue()
 70 | 
 71 | 
 72 | # Read until video is completed
 73 | while(cap.isOpened()):
 74 |   times.append(time.time())
 75 |   # Capture frame-by-frame
 76 |   ret, frame = cap.read()
 77 |   times.append(time.time())
 78 |   #print('frame size', frame.shape)
 79 | #  print("num threads",threading.active_count())
 80 |   if ret == True:
 81 |     if True:#iters%10==0:
 82 |       if threading.active_count()==1:
 83 |         #result = reader.readtext(frame)
 84 |         threading.Thread(target=lambda *f: q.put(reader.readtext(np.array(f),text_threshold=.85)),args=(frame)).start()
 85 |       if not q.empty():
 86 |         result = q.get()
 87 |       times.append(time.time())
 88 |       #print(result)
 89 |       put_boxes(result,frame)
 90 |       times.append(time.time())
 91 |     # Display the resulting frame
 92 |       cv2.imshow('Frame', frame)
 93 |       times.append(time.time())
 94 |    
 95 |     # Press Q on keyboard to  exit
 96 |     if cv2.waitKey(25) & 0xFF == ord('q'):
 97 |       break
 98 |    
 99 |   # Break the loop
100 |   else: 
101 |     break
102 |   iters+=1
103 | 
104 |   #perform calculations
105 |   calc_avg = lambda t1,t2,t_old:((t2-t1))#+t_old) #/(2 if avg_times[0]!=0 else 1)
106 |   avg_times = list(map(calc_avg,times[:-1],times[1:],avg_times))
107 | #  print(avg_times,"fps:",1.0/(times[-1]-times[1]))
108 |   times = []
109 | 
110 |    
111 | # When everything done, release 
112 | # the video capture object
113 | cap.release()
114 |    
115 | # Closes all the frames
116 | cv2.destroyAllWindows()
117 | 
118 | 
119 | """
120 | [0.0012748241424560547, 1.2281830310821533, 0.0008254051208496094, 0.00030159950256347656] fps: 0.8134644401776536
121 | Detection time: 0.2852518558502197
122 | Total detection time 0.29549670219421387
123 | Total Recogntion time 1.0111820697784424
124 | Members Mark
125 | out
126 | Kancmao
127 | Disinfecting 
128 | WIPES
129 | Toallitas Desiniecanle
130 | Ws Cold & Flu Virus "
131 | Kills 91.93,d brtntl
132 | ORANGE SCEHT
133 | AroMa A Maant
134 | IuO BLCH
135 | Oounav
136 | (moh
137 | foga tuEA La
138 | Comat
139 | dL 0s Nisos
140 | quac4
141 | 78 WIPES
142 | ZoWKbo 7eWc
143 | rinimumw 1b
144 | Dn
145 | 917
146 | TaFe
147 | 
148 | 
149 | 
150 | IDEA:
151 | have the read_text in a separate thread, and the rest in this thread. then video will be super smooth, but text will only show up choppy.
152 | """
153 | 
154 | 


--------------------------------------------------------------------------------