├── .gitignore ├── .gitmodules ├── Dockerfile ├── LICENSE.md ├── README.md ├── benchmarks └── bench.py ├── camera └── __init__.py ├── docs ├── NVIDIA_CLA_v1.0.1.pdf ├── README_images │ ├── pringle_kirkland.gif │ └── ruler.gif ├── images │ ├── Broadway.jpg │ ├── LAX-wayfinding-signage.jpg │ ├── LAX-wayfinding-terminal-signage.jpg │ ├── NYC_intersection.jpg │ ├── airport_amsterdam_signs.jpg │ ├── airport_precluded_signs.jpg │ ├── ariport_inside_signs.jpg │ ├── foods_shelf_close_up.jpg │ ├── storm_nyc.jpg │ ├── street_sign_with_shadwo.jpg │ └── streetname-BANNER-2.jpg └── labeled-images │ ├── labeled_Broadway.jpg │ ├── labeled_LAX-wayfinding-signage.jpg │ ├── labeled_LAX-wayfinding-terminal-signage.jpg │ ├── labeled_NYC_intersection.jpg │ ├── labeled_airport_amsterdam_signs.jpg │ ├── labeled_airport_precluded_signs.jpg │ ├── labeled_ariport_inside_signs.jpg │ ├── labeled_foods_shelf_close_up.jpg │ ├── labeled_storm_nyc.jpg │ ├── labeled_street_sign_with_shadwo.jpg │ └── labeled_streetname-BANNER-2.jpg ├── example_camera.py ├── example_images.py ├── models └── .gitkeep ├── requirements.txt └── video_capture.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | # pytorch 141 | *.pth 142 | 143 | # assets 144 | *.pdf 145 | *.png 146 | *.jpg 147 | *.txt 148 | 149 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "EasyOCR"] 2 | path = EasyOCR 3 | url = https://github.com/akamboj2/EasyOCR 4 | [submodule "torch2trt"] 5 | path = torch2trt 6 | url = https://github.com/tomek-l/torch2trt.git 7 | branch = easyocr_trt8 8 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. 2 | # 3 | # Permission is hereby granted, free of charge, to any person obtaining a 4 | # copy of this software and associated documentation files (the "Software"), 5 | # to deal in the Software without restriction, including without limitation 6 | # the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | # and/or sell copies of the Software, and to permit persons to whom the 8 | # Software is furnished to do so, subject to the following conditions: 9 | # 10 | # The above copyright notice and this permission notice shall be included in 11 | # all copies or substantial portions of the Software. 12 | # 13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 16 | # THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 18 | # FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 19 | # DEALINGS IN THE SOFTWARE. 20 | 21 | ARG BASE_IMAGE=nvcr.io/nvidia/l4t-pytorch:r32.6.1-pth1.9-py3 22 | FROM ${BASE_IMAGE} 23 | 24 | ENV DEBIAN_FRONTEND=noninteractive 25 | 26 | #for video_capture.py 27 | RUN pip3 install --upgrade pip 28 | RUN pip3 install opencv-python 29 | RUN pip3 install traitlets 30 | RUN pip3 install scipy 31 | RUN pip3 install tifffile 32 | 33 | #for easyocr 34 | RUN pip3 install python-bidi 35 | ENV PYTHONIOENCODING=utf-8 36 | 37 | #for torch2trt 38 | RUN git clone --recursive -b jax-jp4.6.1-trt7 https://github.com/akamboj2/torch2trt.git torch2trt && \ 39 | cd torch2trt && \ 40 | python3 setup.py install && \ 41 | cd ../ && \ 42 | rm -rf torch2trt 43 | 44 | 45 | #for EAST: 46 | RUN apt-get update -y 47 | RUN apt-get install -y libgeos-dev 48 | RUN pip3 install shapely==1.6.4 49 | RUN pip3 install lanms==1.0.2 50 | 51 | #Run video capture.py 52 | # RUN git clone --recurse-submodules https://gitlab-master.nvidia.com/akamboj/scene-text-recognition.git 53 | # RUN cd EasyOCR && \ 54 | # python3 setup.py install && \ 55 | # cd ../ 56 | COPY . scene-text-recognition 57 | RUN cd scene-text-recognition/EasyOCR && \ 58 | pip3 install --upgrade pip && \ 59 | python3 setup.py install && \ 60 | cd ../ 61 | RUN python3 video_capture.py -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 NVIDIA CORPORATION 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | 22 | 23 | 24 | The following open source license is used by the EasyOCR git submodule included in this project. 25 | 26 | Apache License 27 | Version 2.0, January 2004 28 | http://www.apache.org/licenses/ 29 | 30 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 31 | 32 | 1. Definitions. 33 | 34 | "License" shall mean the terms and conditions for use, reproduction, 35 | and distribution as defined by Sections 1 through 9 of this document. 36 | 37 | "Licensor" shall mean the copyright owner or entity authorized by 38 | the copyright owner that is granting the License. 39 | 40 | "Legal Entity" shall mean the union of the acting entity and all 41 | other entities that control, are controlled by, or are under common 42 | control with that entity. For the purposes of this definition, 43 | "control" means (i) the power, direct or indirect, to cause the 44 | direction or management of such entity, whether by contract or 45 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 46 | outstanding shares, or (iii) beneficial ownership of such entity. 47 | 48 | "You" (or "Your") shall mean an individual or Legal Entity 49 | exercising permissions granted by this License. 50 | 51 | "Source" form shall mean the preferred form for making modifications, 52 | including but not limited to software source code, documentation 53 | source, and configuration files. 54 | 55 | "Object" form shall mean any form resulting from mechanical 56 | transformation or translation of a Source form, including but 57 | not limited to compiled object code, generated documentation, 58 | and conversions to other media types. 59 | 60 | "Work" shall mean the work of authorship, whether in Source or 61 | Object form, made available under the License, as indicated by a 62 | copyright notice that is included in or attached to the work 63 | (an example is provided in the Appendix below). 64 | 65 | "Derivative Works" shall mean any work, whether in Source or Object 66 | form, that is based on (or derived from) the Work and for which the 67 | editorial revisions, annotations, elaborations, or other modifications 68 | represent, as a whole, an original work of authorship. For the purposes 69 | of this License, Derivative Works shall not include works that remain 70 | separable from, or merely link (or bind by name) to the interfaces of, 71 | the Work and Derivative Works thereof. 72 | 73 | "Contribution" shall mean any work of authorship, including 74 | the original version of the Work and any modifications or additions 75 | to that Work or Derivative Works thereof, that is intentionally 76 | submitted to Licensor for inclusion in the Work by the copyright owner 77 | or by an individual or Legal Entity authorized to submit on behalf of 78 | the copyright owner. For the purposes of this definition, "submitted" 79 | means any form of electronic, verbal, or written communication sent 80 | to the Licensor or its representatives, including but not limited to 81 | communication on electronic mailing lists, source code control systems, 82 | and issue tracking systems that are managed by, or on behalf of, the 83 | Licensor for the purpose of discussing and improving the Work, but 84 | excluding communication that is conspicuously marked or otherwise 85 | designated in writing by the copyright owner as "Not a Contribution." 86 | 87 | "Contributor" shall mean Licensor and any individual or Legal Entity 88 | on behalf of whom a Contribution has been received by Licensor and 89 | subsequently incorporated within the Work. 90 | 91 | 2. Grant of Copyright License. Subject to the terms and conditions of 92 | this License, each Contributor hereby grants to You a perpetual, 93 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 94 | copyright license to reproduce, prepare Derivative Works of, 95 | publicly display, publicly perform, sublicense, and distribute the 96 | Work and such Derivative Works in Source or Object form. 97 | 98 | 3. Grant of Patent License. Subject to the terms and conditions of 99 | this License, each Contributor hereby grants to You a perpetual, 100 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 101 | (except as stated in this section) patent license to make, have made, 102 | use, offer to sell, sell, import, and otherwise transfer the Work, 103 | where such license applies only to those patent claims licensable 104 | by such Contributor that are necessarily infringed by their 105 | Contribution(s) alone or by combination of their Contribution(s) 106 | with the Work to which such Contribution(s) was submitted. If You 107 | institute patent litigation against any entity (including a 108 | cross-claim or counterclaim in a lawsuit) alleging that the Work 109 | or a Contribution incorporated within the Work constitutes direct 110 | or contributory patent infringement, then any patent licenses 111 | granted to You under this License for that Work shall terminate 112 | as of the date such litigation is filed. 113 | 114 | 4. Redistribution. You may reproduce and distribute copies of the 115 | Work or Derivative Works thereof in any medium, with or without 116 | modifications, and in Source or Object form, provided that You 117 | meet the following conditions: 118 | 119 | (a) You must give any other recipients of the Work or 120 | Derivative Works a copy of this License; and 121 | 122 | (b) You must cause any modified files to carry prominent notices 123 | stating that You changed the files; and 124 | 125 | (c) You must retain, in the Source form of any Derivative Works 126 | that You distribute, all copyright, patent, trademark, and 127 | attribution notices from the Source form of the Work, 128 | excluding those notices that do not pertain to any part of 129 | the Derivative Works; and 130 | 131 | (d) If the Work includes a "NOTICE" text file as part of its 132 | distribution, then any Derivative Works that You distribute must 133 | include a readable copy of the attribution notices contained 134 | within such NOTICE file, excluding those notices that do not 135 | pertain to any part of the Derivative Works, in at least one 136 | of the following places: within a NOTICE text file distributed 137 | as part of the Derivative Works; within the Source form or 138 | documentation, if provided along with the Derivative Works; or, 139 | within a display generated by the Derivative Works, if and 140 | wherever such third-party notices normally appear. The contents 141 | of the NOTICE file are for informational purposes only and 142 | do not modify the License. You may add Your own attribution 143 | notices within Derivative Works that You distribute, alongside 144 | or as an addendum to the NOTICE text from the Work, provided 145 | that such additional attribution notices cannot be construed 146 | as modifying the License. 147 | 148 | You may add Your own copyright statement to Your modifications and 149 | may provide additional or different license terms and conditions 150 | for use, reproduction, or distribution of Your modifications, or 151 | for any such Derivative Works as a whole, provided Your use, 152 | reproduction, and distribution of the Work otherwise complies with 153 | the conditions stated in this License. 154 | 155 | 5. Submission of Contributions. Unless You explicitly state otherwise, 156 | any Contribution intentionally submitted for inclusion in the Work 157 | by You to the Licensor shall be under the terms and conditions of 158 | this License, without any additional terms or conditions. 159 | Notwithstanding the above, nothing herein shall supersede or modify 160 | the terms of any separate license agreement you may have executed 161 | with Licensor regarding such Contributions. 162 | 163 | 6. Trademarks. This License does not grant permission to use the trade 164 | names, trademarks, service marks, or product names of the Licensor, 165 | except as required for reasonable and customary use in describing the 166 | origin of the Work and reproducing the content of the NOTICE file. 167 | 168 | 7. Disclaimer of Warranty. Unless required by applicable law or 169 | agreed to in writing, Licensor provides the Work (and each 170 | Contributor provides its Contributions) on an "AS IS" BASIS, 171 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 172 | implied, including, without limitation, any warranties or conditions 173 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 174 | PARTICULAR PURPOSE. You are solely responsible for determining the 175 | appropriateness of using or redistributing the Work and assume any 176 | risks associated with Your exercise of permissions under this License. 177 | 178 | 8. Limitation of Liability. In no event and under no legal theory, 179 | whether in tort (including negligence), contract, or otherwise, 180 | unless required by applicable law (such as deliberate and grossly 181 | negligent acts) or agreed to in writing, shall any Contributor be 182 | liable to You for damages, including any direct, indirect, special, 183 | incidental, or consequential damages of any character arising as a 184 | result of this License or out of the use or inability to use the 185 | Work (including but not limited to damages for loss of goodwill, 186 | work stoppage, computer failure or malfunction, or any and all 187 | other commercial damages or losses), even if such Contributor 188 | has been advised of the possibility of such damages. 189 | 190 | 9. Accepting Warranty or Additional Liability. While redistributing 191 | the Work or Derivative Works thereof, You may choose to offer, 192 | and charge a fee for, acceptance of support, warranty, indemnity, 193 | or other liability obligations and/or rights consistent with this 194 | License. However, in accepting such obligations, You may act only 195 | on Your own behalf and on Your sole responsibility, not on behalf 196 | of any other Contributor, and only if You agree to indemnify, 197 | defend, and hold each Contributor harmless for any liability 198 | incurred by, or claims asserted against, such Contributor by reason 199 | of your accepting any such warranty or additional liability. 200 | 201 | END OF TERMS AND CONDITIONS 202 | 203 | APPENDIX: How to apply the Apache License to your work. 204 | 205 | To apply the Apache License to your work, attach the following 206 | boilerplate notice, with the fields enclosed by brackets "[]" 207 | replaced with your own identifying information. (Don't include 208 | the brackets!) The text should be enclosed in the appropriate 209 | comment syntax for the file format. We also recommend that a 210 | file or class name and description of purpose be included on the 211 | same "printed page" as the copyright notice for easier 212 | identification within third-party archives. 213 | 214 | Copyright [yyyy] [name of copyright owner] 215 | 216 | Licensed under the Apache License, Version 2.0 (the "License"); 217 | you may not use this file except in compliance with the License. 218 | You may obtain a copy of the License at 219 | 220 | http://www.apache.org/licenses/LICENSE-2.0 221 | 222 | Unless required by applicable law or agreed to in writing, software 223 | distributed under the License is distributed on an "AS IS" BASIS, 224 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 225 | See the License for the specific language governing permissions and 226 | limitations under the License. 227 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scene Text Recognition 2 | 3 | Real-time scene text recognition accelerated with NVIDIA TensorRT 4 | 5 | https://user-images.githubusercontent.com/26127866/144195980-64935bb7-47d5-404f-8b89-ac99c013ca97.mp4 6 | 7 | ## Quickstart 8 | 9 | 1. Clone Repo 10 | ```bash 11 | git clone --recursive git@github.com:tomek-l/nv-scene-text-recognition.git 12 | ``` 13 | 14 | 2. Install pytorch, torchvision 15 | ```bash 16 | wget https://raw.githubusercontent.com/tomek-l/jetson-install-pytorch/master/install_torch_v1.9.sh 17 | bash install_torch_v1.9.sh 18 | pip3 install -r requirements.txt 19 | ``` 20 | 21 | 3. Install torch2trt 22 | 23 | Until this [PR](https://github.com/NVIDIA-AI-IOT/torch2trt/pull/627) is merged use Chitoku's [branch](https://github.com/chitoku/torch2trt/tree/jp4.6_tensorrt8) containing a fix for TensorRT 8. 24 | ```bash 25 | cd torch2trt 26 | sudo python3 setup.py install --plugins 27 | ``` 28 | 29 | 4. Install easyOCR 30 | ```bash 31 | cd EasyOCR 32 | sudo python3 setup.py install 33 | ``` 34 | 35 | #### Dockerfile 36 | 1. Make sure docker is setup correctly on the jetson as directed [here](https://github.com/dusty-nv/jetson-containers#docker-default-runtime). Specifically, read the "Docker Default Runtime" section and make sure Nvidia is the default docker runtime daemon. 37 | 38 | 2. Build the dockerfile 39 | ``` 40 | docker build -t scene-text-recognition . 41 | ``` 42 | 3. Run the dockerfile 43 | ``` 44 | sudo docker run -it --rm -v ~/workdir:/workdir/ --runtime nvidia --network host scene-text-recognition 45 | ``` 46 | where workdir is the directory contianing this cloned repo, or is the clone repo. 47 | 48 | If you are using a realtime camera: 49 | ``` 50 | xhost + 51 | sudo docker run -it --rm -v ~/workdir:/workdir/ --runtime nvidia --network host -e DISPLAY=$DISPLAY --device /dev/video0: dev/video0 scene-text-recognition 52 | ``` 53 | Where video0 is correct device id into the container. This can be found using: 54 | ``` 55 | ls /dev/video* 56 | ``` 57 | 58 | ### Step 3 - Run the example files 59 | 60 | There are three separate demo files included: 61 | 62 | #### 1. easy_ocr_demo.py 63 | This program uses EasyOCR to read an image or directory of images and output labeled images. The output is in the labeled-images/ directory 64 | 65 | To use easy_ocr_demo: 66 | ``` 67 | python3 easy_ocr_demo.py images 68 | ``` 69 | where images is an image file or directory of images. 70 | 71 | #### 2. easy_ocr_benchmark.py 72 | Using the pretrained EasyOCR detection and recognition models, we benchmark the throughput and latency and show the speedup after it is converted to a TensorRT engine (TRT) on the Jetson AGX Xavier. 73 | 74 | 75 | | Model | Throughput (fps) | Latency (ms) | 76 | |-------|-------------|---------------| 77 | | Detection | 12.386 | 84.190 | 78 | | Detection TRT | 24.737 | 48.990 | 79 | | Recognition | 174.518 | 5.900 | 80 | | Recognition TRT | 7118.642 | 0.160 | 81 | 82 | To run this benchmark: 83 | ``` 84 | python3 easy_ocr_benchmark.py 85 | ``` 86 | 87 | This program will store the Torch2trt state dictionaries in the torch2trt_models dictionary. 88 | 89 | #### 3. video_capture.py 90 | This program uses an attached USB camera to display a realtime video. The code will display bounding boxes around the text in the video and output the text in the terminal. Click on the video screen and type 'q' to terminate the program. 91 | After plugging in the USB camera, but before running the python file, check the device id, and make sure that is passed into 'cap = cv2.VideoCapture(0)' line. By defualt we assume it is zero, change the argument in cv2.Videcapture() to the correct device id before running the program. The deviced id can be checked by doing: 92 | ``` 93 | ls /dev/video* 94 | ``` 95 | To run the program: 96 | ``` 97 | python3 video_capture.py 98 | ``` 99 | 100 | ### Step 4 - Write your own code 101 | The easyocr package can be called and used mostly as described in the EasyOCR repo. This repo, however, also adds the use_trt flag to the reader class. Setting use_trt = True, will convert the models to tensorRT or use the converted and locally stored models, when performing detection. 102 | 103 | Example code: 104 | ``` 105 | import easyocr 106 | reader = easyocr.Reader(['en'], use_trt=True) 107 | result = reader.readtext('path/to/image.png') 108 | print("TensorRT Optimized Result",result, '\n') 109 | ``` 110 | ## More: 111 | 112 | ### Different Models 113 | The code is designed to be able to swap in and out various detection models. As an example, view detect.py file to see where the EAST detection model was substituted in. 114 | 115 | ### Custom Training 116 | To train and run your own models please see the EasyOCR [instructions](https://github.com/akamboj2/EasyOCR/blob/master/custom_model.md) 117 | 118 | 119 | ## See also 120 | 121 | - [trt_pose_hand](http://github.com/NVIDIA-AI-IOT/trt_pose_hand) - Real-time hand pose estimation based on trt_pose 122 | - [torch2trt](http://github.com/NVIDIA-AI-IOT/torch2trt) - An easy to use PyTorch to TensorRT converter 123 | 124 | - [JetBot](http://github.com/NVIDIA-AI-IOT/jetbot) - An educational AI robot based on NVIDIA Jetson Nano 125 | - [JetRacer](http://github.com/NVIDIA-AI-IOT/jetracer) - An educational AI racecar using NVIDIA Jetson Nano 126 | - [JetCam](http://github.com/NVIDIA-AI-IOT/jetcam) - An easy to use Python camera interface for NVIDIA Jetson 127 | 128 | ## References 129 | 130 | The scene text recogntion framework used here is a modified version of the EasyOCR open-source code [EasyOCR](https://github.com/JaidedAI/EasyOCR). 131 | 132 | Below are the sources of the default [detection](https://arxiv.org/abs/1904.01941) and [recogntion](https://arxiv.org/abs/1507.05717) models: 133 | 134 | 135 | * Baek, Y., Lee, B., Han, D., Yun, S., & Lee, H. (2019). Character region awareness for text detection. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 9365-9374). 136 | 137 | * Shi, B., Bai, X., & Yao, C. (2016). An end-to-end trainable neural network for image-based sequence recognition and its application to scene text recognition. IEEE transactions on pattern analysis and machine intelligence, 39(11), 2298-2304. 138 | 139 | 140 | ## Licenses 141 | This code is licensed under The MIT License as described [here](https://gitlab-master.nvidia.com/akamboj/scene-text-recognition/-/blob/master/LICENSE). 142 | The submodule EasyOCR is licesned under Apache License 2.0 as described [here](https://github.com/akamboj2/EasyOCR/blob/master/LICENSE). 143 | -------------------------------------------------------------------------------- /benchmarks/bench.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2021 NVIDIA CORPORATION 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software is furnished to do so, 11 | subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | import easyocr 24 | import torch 25 | import torch.nn as nn 26 | from torch2trt import torch2trt 27 | import time 28 | import os 29 | 30 | #torch2trt custom converters 31 | from torch2trt import * 32 | @tensorrt_converter('torch.Tensor.__hash__') 33 | @tensorrt_converter('torch.Tensor.get_device') 34 | @tensorrt_converter('torch.Tensor.data_ptr') 35 | @tensorrt_converter('torch.Tensor.is_complex') 36 | @tensorrt_converter('torch.is_grad_enabled') 37 | def suppress_warning(ctx): 38 | #none of these effect the computational path thus don't need converters 39 | pass 40 | 41 | @tensorrt_converter('torch.zeros') 42 | def convert_add(ctx): 43 | input_a = ctx.method_args[0] 44 | output = ctx.method_return 45 | output._trt = add_missing_trt_tensors(ctx.network, [output]) 46 | 47 | def profile(model,dummy_input): 48 | iters=50 49 | with torch.no_grad(): 50 | # warm up 51 | for _ in range(10): 52 | model(dummy_input) 53 | 54 | # throughput evaluate 55 | torch.cuda.current_stream().synchronize() 56 | t0 = time.time() 57 | for _ in range(iters): 58 | model(dummy_input) 59 | torch.cuda.current_stream().synchronize() 60 | t1 = time.time() 61 | throughput = 1.0 * iters / (t1 - t0) 62 | 63 | # latency evaluate 64 | torch.cuda.current_stream().synchronize() 65 | t0 = time.time() 66 | for _ in range(iters): 67 | model(dummy_input) 68 | torch.cuda.current_stream().synchronize() 69 | t1 = time.time() 70 | latency = round(1000.0 * (t1 - t0) / iters, 2) 71 | print("throughput: %.3f fps\t latency: %.3f ms"% (throughput,latency)) 72 | 73 | if __name__ == '__main__': 74 | 75 | reader = easyocr.Reader(['en'],gpu=True) # need to run only once to load model into memory 76 | 77 | if not os.path.exists('torch2trt_models'): 78 | os.makedirs('torch2trt_models') 79 | 80 | #detector: 81 | y = torch.ones((1, 3, 480, 640),dtype=torch.float).cuda() 82 | print("Detector:") 83 | print("Before Conversion:") 84 | profile(reader.detector, y) #throughput: 12.386 latency: 84.190 85 | 86 | if os.path.isfile('torch2trt_models/easyocr_detect.pth'): 87 | model_trt_detect = TRTModule() 88 | model_trt_detect.load_state_dict(torch.load('torch2trt_models/easyocr_detect.pth')) 89 | else: 90 | model_trt_detect = torch2trt(reader.detector, [y]) 91 | torch.save(model_trt_detect.state_dict(),'torch2trt_models/easyocr_detect.pth') 92 | 93 | print("After Conversion") 94 | profile(model_trt_detect, y) #throughput: 24.737 latency: 48.990 95 | 96 | 97 | #recognizer 98 | print("\nRecognizer:") 99 | x = torch.ones((1,1,64,320),dtype=torch.float).to('cuda') 100 | reader.recognizer.eval() 101 | print("Before Conversion:") 102 | profile(reader.recognizer, x) #throughput: 36.912 latency: 24.610 103 | 104 | if os.path.isfile('torch2trt_models/easyocr_recognize.pth'): 105 | model_trt_rec = TRTModule() 106 | model_trt_rec.load_state_dict(torch.load('torch2trt_models/easyocr_recognize.pth')) 107 | else: 108 | model_trt_rec = torch2trt(reader.detector, [y]) 109 | torch.save(model_trt_rec.state_dict(),'torch2trt_models/easyocr_recognize.pth') 110 | model_trt_rec = torch2trt(reader.recognizer, [x])#, use_onnx=True) 111 | 112 | print("After Conversion") 113 | profile(model_trt_rec,x) #throughput: 2296.110 latency: 0.450 114 | torch.save(model_trt_rec.state_dict(),'torch2trt_models/easyocr_recognize.pth') 115 | 116 | 117 | """ 118 | TODO: 119 | - benchmark again 120 | - input trt 121 | - look through slides' notes 122 | """ 123 | -------------------------------------------------------------------------------- /camera/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | """ 3 | The MIT License (MIT) 4 | Copyright (c) 2021 NVIDIA CORPORATION 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | The above copyright notice and this permission notice shall be included in all 12 | copies or substantial portions of the Software. 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | """ 20 | 21 | import sys 22 | import time 23 | import numpy as np 24 | from threading import Thread 25 | 26 | import gi 27 | 28 | gi.require_version("Gst", "1.0") 29 | from gi.repository import GObject, Gst 30 | 31 | Gst.init(None) 32 | 33 | 34 | def _sanitize(element) -> Gst.Element: 35 | """ 36 | Passthrough function which sure element is not `None` 37 | Returns `Gst.Element` or raises Error 38 | """ 39 | if element is None: 40 | raise Exception("Element is none!") 41 | else: 42 | return element 43 | 44 | 45 | def _make_element_safe(el_type: str, el_name=None) -> Gst.Element: 46 | """ 47 | Creates a gstremer element using el_type factory. 48 | Returns Gst.Element or throws an error if we fail. 49 | This is to avoid `None` elements in our pipeline 50 | """ 51 | 52 | # name=None parameter asks Gstreamer to uniquely name the elements for us 53 | el = Gst.ElementFactory.make(el_type, name=el_name) 54 | 55 | if el is not None: 56 | return el 57 | else: 58 | print(f"Pipeline element is None!") 59 | raise NameError(f"Could not create element {el_type}") 60 | 61 | 62 | class Camera: 63 | def __init__(self, sensor_id, fps=None, shape_in=None, shape_out=None) -> None: 64 | 65 | # self._mainloop = GObject.MainLoop() # TODO: use GLib.MainLoop 66 | if any([fps, shape_in, shape_out]): 67 | self._pipeline = self._make_pipeline_with_resize( 68 | sensor_id, fps, shape_in, shape_out 69 | ) 70 | else: 71 | self._pipeline = self._make_pipeline(sensor_id) 72 | self._pipeline.set_state(Gst.State.PLAYING) 73 | self.wait_ready() 74 | 75 | def stop(self): 76 | self._pipeline.set_state(Gst.State.NULL) 77 | 78 | def _make_pipeline_with_resize( 79 | self, sensor_id, fps=None, shape_in=None, shape_out=None 80 | ): 81 | 82 | pipeline = _sanitize(Gst.Pipeline()) 83 | 84 | # Camera 85 | camera = _make_element_safe("nvarguscamerasrc") 86 | camera.set_property("sensor-id", sensor_id) 87 | 88 | # Input CF 89 | camera_cf = self._make_input_capsfilter(fps, shape_in) 90 | 91 | # nvvidconv 92 | conv = _make_element_safe("nvvidconv") 93 | 94 | # Output CF 95 | appsink_cf = self._make_output_capsfilter(shape_out) 96 | 97 | # Appsink 98 | self._appsink = appsink = _make_element_safe("appsink") 99 | 100 | # Add everything 101 | for el in [camera, camera_cf, conv, appsink_cf, appsink]: 102 | pipeline.add(el) 103 | 104 | camera.link(camera_cf) 105 | camera_cf.link(conv) 106 | conv.link(appsink_cf) 107 | appsink_cf.link(appsink) 108 | 109 | return pipeline 110 | 111 | def _make_pipeline(self, sensor_id): 112 | 113 | pipeline = _sanitize(Gst.Pipeline()) 114 | 115 | cam = _make_element_safe("nvarguscamerasrc") 116 | cam.set_property("sensor-id", sensor_id) 117 | 118 | conv = _make_element_safe("nvvidconv") 119 | 120 | cf = _make_element_safe("capsfilter") 121 | cf.set_property( 122 | "caps", Gst.Caps.from_string("video/x-raw, format=(string)RGBA") 123 | ) 124 | 125 | self._appsink = appsink = _make_element_safe("appsink") 126 | 127 | for el in [cam, conv, cf, appsink]: 128 | pipeline.add(el) 129 | 130 | cam.link(conv) 131 | conv.link(cf) 132 | cf.link(appsink) 133 | 134 | return pipeline 135 | 136 | @staticmethod 137 | def _make_input_capsfilter(fps, shape_in): 138 | 139 | caps_str = "video/x-raw(memory:NVMM), format=(string)NV12" 140 | 141 | if shape_in: 142 | W_in, H_in = shape_in 143 | caps_str += f", width=(int){W_in}, height=(int){H_in}" 144 | if fps: 145 | caps_str += f" framerate=(fraction){fps}/1" 146 | 147 | caps = Gst.Caps.from_string(caps_str) 148 | in_cf = _make_element_safe("capsfilter") 149 | in_cf.set_property("caps", caps) 150 | 151 | return in_cf 152 | 153 | @staticmethod 154 | def _make_output_capsfilter(shape_out): 155 | print(shape_out) 156 | if shape_out: 157 | W_out, H_out = shape_out 158 | caps = Gst.Caps.from_string( 159 | f"video/x-raw, width={W_out}, height={H_out}, format=(string)BGRx" 160 | ) 161 | else: 162 | caps = Gst.Caps.from_string("video/x-raw, format=(string)RGBA") 163 | 164 | cf = _make_element_safe("capsfilter") 165 | cf.set_property("caps", caps) 166 | return cf 167 | 168 | def read(self): 169 | """ 170 | Returns np.array or None 171 | """ 172 | sample = self._appsink.emit("pull-sample") 173 | if sample is None: 174 | return None 175 | buf = sample.get_buffer() 176 | caps_format = sample.get_caps().get_structure(0) 177 | W, H = caps_format.get_value("width"), caps_format.get_value("height") 178 | C = 4 # Earlier we converted to RGBA 179 | buf2 = buf.extract_dup(0, buf.get_size()) 180 | arr = np.ndarray(shape=(H, W, C), buffer=buf2, dtype=np.uint8) 181 | arr = arr[:, :, :3] # RGBA -> RGB 182 | return arr 183 | 184 | def running(self): 185 | _, state, _ = self._pipeline.get_state(1) 186 | return True if state == Gst.State.PLAYING else False 187 | 188 | def wait_ready(self): 189 | while not self.running(): 190 | time.sleep(0.1) 191 | 192 | 193 | class CameraThread(Thread): 194 | def __init__(self, sensor_id) -> None: 195 | 196 | super().__init__() 197 | self._camera = Camera(sensor_id) 198 | self._should_run = True 199 | self._image = self._camera.read() 200 | self.start() 201 | 202 | def run(self): 203 | while self._should_run: 204 | self._image = self._camera.read() 205 | 206 | @property 207 | def image(self): 208 | # NOTE: if we care about atomicity of reads, we can add a lock here 209 | return self._image 210 | 211 | def stop(self): 212 | # TODO: this should be threading.Event 213 | self._should_run = False 214 | self._camera.stop() 215 | 216 | 217 | if __name__ == "__main__": 218 | 219 | camera = Camera(0, shape_in=(1920, 1080), shape_out=(224, 224)) 220 | 221 | for _ in range(10): 222 | start = time.perf_counter() 223 | arr = camera.read() 224 | print( 225 | f"Latency: {time.perf_counter() - start} Image shape: {arr.shape} Image mean: {arr.mean()}" 226 | ) 227 | 228 | camera.stop() 229 | -------------------------------------------------------------------------------- /docs/NVIDIA_CLA_v1.0.1.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/NVIDIA_CLA_v1.0.1.pdf -------------------------------------------------------------------------------- /docs/README_images/pringle_kirkland.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/README_images/pringle_kirkland.gif -------------------------------------------------------------------------------- /docs/README_images/ruler.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/README_images/ruler.gif -------------------------------------------------------------------------------- /docs/images/Broadway.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/Broadway.jpg -------------------------------------------------------------------------------- /docs/images/LAX-wayfinding-signage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/LAX-wayfinding-signage.jpg -------------------------------------------------------------------------------- /docs/images/LAX-wayfinding-terminal-signage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/LAX-wayfinding-terminal-signage.jpg -------------------------------------------------------------------------------- /docs/images/NYC_intersection.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/NYC_intersection.jpg -------------------------------------------------------------------------------- /docs/images/airport_amsterdam_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/airport_amsterdam_signs.jpg -------------------------------------------------------------------------------- /docs/images/airport_precluded_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/airport_precluded_signs.jpg -------------------------------------------------------------------------------- /docs/images/ariport_inside_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/ariport_inside_signs.jpg -------------------------------------------------------------------------------- /docs/images/foods_shelf_close_up.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/foods_shelf_close_up.jpg -------------------------------------------------------------------------------- /docs/images/storm_nyc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/storm_nyc.jpg -------------------------------------------------------------------------------- /docs/images/street_sign_with_shadwo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/street_sign_with_shadwo.jpg -------------------------------------------------------------------------------- /docs/images/streetname-BANNER-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/images/streetname-BANNER-2.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_Broadway.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_Broadway.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_LAX-wayfinding-signage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_LAX-wayfinding-signage.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_LAX-wayfinding-terminal-signage.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_LAX-wayfinding-terminal-signage.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_NYC_intersection.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_NYC_intersection.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_airport_amsterdam_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_airport_amsterdam_signs.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_airport_precluded_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_airport_precluded_signs.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_ariport_inside_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_ariport_inside_signs.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_foods_shelf_close_up.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_foods_shelf_close_up.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_storm_nyc.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_storm_nyc.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_street_sign_with_shadwo.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_street_sign_with_shadwo.jpg -------------------------------------------------------------------------------- /docs/labeled-images/labeled_streetname-BANNER-2.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/docs/labeled-images/labeled_streetname-BANNER-2.jpg -------------------------------------------------------------------------------- /example_camera.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2021 NVIDIA CORPORATION 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software is furnished to do so, 11 | subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | import cv2 24 | import easyocr 25 | import time 26 | import threading 27 | 28 | from camera import Camera 29 | 30 | def put_boxes(result, arr, text=False): 31 | color = (0, 0, 255) 32 | imgHeight, imgWidth, _ = imageData.shape 33 | thick = 2 34 | font_scale = 1 35 | for res in result: 36 | top_left, btm_right = res[0][0], res[0][2] 37 | to_int = lambda items: [int(x) for x in items] 38 | top_left = to_int(top_left) 39 | btm_right = to_int(btm_right) 40 | 41 | label = res[1] 42 | 43 | # Draw BB 44 | cv2.rectangle(arr, top_left, btm_right, color, thick) 45 | 46 | # Draw text 47 | if text: 48 | cv2.putText(arr, label, (top_left[0], top_left[1] - 12), 0, font_scale, color, thick) 49 | 50 | def main(): 51 | 52 | DISPLAY = False 53 | 54 | print("Setting up camera...") 55 | cam = Camera(0, shape_in=(1920, 1080), shape_out=(224, 224)) 56 | 57 | print("Loading model...") 58 | reader = easyocr.Reader(["en"], use_trt=True) 59 | 60 | for _ in range(1000): 61 | 62 | arr = cam.read() 63 | result = reader.readtext(arr, text_threshold=0.85) 64 | print(result) 65 | 66 | # Display the resulting frame 67 | if DISPLAY: 68 | cv2.imshow("Frame", frame) 69 | 70 | if cv2.waitKey(1) & 0xFF == ord("q"): 71 | break 72 | 73 | if __name__ == "__main__": 74 | main() 75 | -------------------------------------------------------------------------------- /example_images.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2021 NVIDIA CORPORATION 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software is furnished to do so, 11 | subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | import argparse 24 | import os 25 | import easyocr 26 | import cv2 27 | 28 | if __name__ == '__main__': 29 | #Arg parse and setup 30 | parser = argparse.ArgumentParser(description="EasyOCR Label Images") 31 | parser.add_argument('image',type=str, help='path to input image or directory of images') 32 | parser.add_argument('-t', '--trt', default=False, type=bool, help='accelerates detection and recognition models by converting them to TensorRT') 33 | args = parser.parse_args() 34 | if os.path.isfile(args.image): 35 | images = [args.image] 36 | else: #if it's not a file, assume it's a directory of images 37 | images = [os.path.join(args.image, file) for file in filter(lambda x: not x.endswith('.ipynb_checkpoints'),os.listdir(args.image))] 38 | 39 | #intialize output directory 40 | out_directory = args.image.split('/')[-1].split('.')[0]+'-labeled-images' 41 | if not os.path.exists(out_directory): 42 | os.makedirs(out_directory) 43 | txt_file = open(out_directory+'/image_labels.txt','w') 44 | print('output directory:',out_directory) 45 | 46 | #load the scene text ocr models 47 | reader = easyocr.Reader(['en'], use_trt=args.trt) # need to run only once to load model into memory 48 | 49 | 50 | for image in images: 51 | #use cv2 to check if it is a valid image 52 | imageData = cv2.imread(image) 53 | if imageData is None: 54 | print("reading image %s failed" % image) 55 | continue 56 | 57 | #perform inference and read the models 58 | print("on image",image) 59 | txt_file.write(image+'\n') 60 | result = reader.readtext(image) 61 | print('result',result, '\n') 62 | 63 | #draw bounding boxes and ouptut result to txt file 64 | color = (0,0,255) 65 | imageData = cv2.imread(image) 66 | imgHeight, imgWidth, _ = imageData.shape 67 | thick = 2 68 | font_scale = 1 69 | for res in result: 70 | top_left, btm_right = res[0][0],res[0][2] 71 | to_int = lambda items: [int(x) for x in items] 72 | top_left = to_int(top_left) 73 | btm_right = to_int(btm_right) 74 | label = res[1] 75 | cv2.rectangle(imageData,top_left, btm_right, color, thick) 76 | cv2.putText(imageData, label, (top_left[0], top_left[1] - 12), 0, font_scale, color, thick) 77 | txt_file.write(str(res)+'\n') 78 | txt_file.write('\n') 79 | 80 | #write image and notify user 81 | check = cv2.imwrite(out_directory+"/labeled_"+image.split('/')[-1], imageData) 82 | if check: 83 | print("successfully wrote image:",out_directory+"/labeled_"+image.split('/')[-1]) 84 | else: 85 | print("failed to write image:",out_directory+"/labeled_"+image.split('/')[-1]) 86 | 87 | txt_file.close() 88 | -------------------------------------------------------------------------------- /models/.gitkeep: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/NVIDIA-AI-IOT/scene-text-recognition/d3a832a340595efe20fea24d09f82461b79daa21/models/.gitkeep -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | numpy==1.19.3 2 | opencv-python==4.5.4.58 3 | python-bidi==0.42 -------------------------------------------------------------------------------- /video_capture.py: -------------------------------------------------------------------------------- 1 | """ 2 | The MIT License (MIT) 3 | 4 | Copyright (c) 2021 NVIDIA CORPORATION 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy of 7 | this software and associated documentation files (the "Software"), to deal in 8 | the Software without restriction, including without limitation the rights to 9 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 10 | the Software, and to permit persons to whom the Software is furnished to do so, 11 | subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 18 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 19 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 20 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 22 | """ 23 | import cv2 24 | import numpy as np 25 | import easyocr 26 | import time 27 | import threading 28 | import queue 29 | 30 | def put_boxes(result,imageData): 31 | color = (0,0,255) 32 | imgHeight, imgWidth, _ = imageData.shape 33 | thick = 2 34 | font_scale = 1 35 | for res in result: 36 | top_left, btm_right = res[0][0],res[0][2] 37 | to_int = lambda items: [int(x) for x in items] 38 | top_left = to_int(top_left) 39 | btm_right = to_int(btm_right) 40 | 41 | label = res[1] 42 | 43 | print(label) 44 | 45 | cv2.rectangle(imageData,top_left, btm_right, color, thick) 46 | #cv2.putText(imageData, label, (top_left[0], top_left[1] - 12), 0, font_scale, color, thick) 47 | if len(result)>0: 48 | print('\n') 49 | 50 | 51 | # Create a VideoCapture object and read from input file 52 | cap = cv2.VideoCapture(0)#'v4l2src device=/dev/video0 ! video/x-raw, width=(int)640, height=(int)480, framerate=(fraction)30/1 ! videoconvert ! video/x-raw, format=(string)BGR ! appsink',cv2.CAP_GSTREAMER) 53 | 54 | # Check if camera opened successfully 55 | if (cap.isOpened()== False): 56 | print("Error opening video file") 57 | 58 | 59 | reader = easyocr.Reader(['en'], use_trt=True) 60 | #if ur loading pth file for this, use 480x640! tis the frame.shape 61 | 62 | 63 | 64 | iters = 0 65 | time_labels = ['t_capture','t_infer', 't_box','t_show'] #[0.0009343624114990234, 1.134493112564087, 0.0008838176727294922, 0.0002529621124267578] 66 | avg_times = [0]*len(time_labels) 67 | times = [] 68 | result = [] 69 | q = queue.Queue() 70 | 71 | 72 | # Read until video is completed 73 | while(cap.isOpened()): 74 | times.append(time.time()) 75 | # Capture frame-by-frame 76 | ret, frame = cap.read() 77 | times.append(time.time()) 78 | #print('frame size', frame.shape) 79 | # print("num threads",threading.active_count()) 80 | if ret == True: 81 | if True:#iters%10==0: 82 | if threading.active_count()==1: 83 | #result = reader.readtext(frame) 84 | threading.Thread(target=lambda *f: q.put(reader.readtext(np.array(f),text_threshold=.85)),args=(frame)).start() 85 | if not q.empty(): 86 | result = q.get() 87 | times.append(time.time()) 88 | #print(result) 89 | put_boxes(result,frame) 90 | times.append(time.time()) 91 | # Display the resulting frame 92 | cv2.imshow('Frame', frame) 93 | times.append(time.time()) 94 | 95 | # Press Q on keyboard to exit 96 | if cv2.waitKey(25) & 0xFF == ord('q'): 97 | break 98 | 99 | # Break the loop 100 | else: 101 | break 102 | iters+=1 103 | 104 | #perform calculations 105 | calc_avg = lambda t1,t2,t_old:((t2-t1))#+t_old) #/(2 if avg_times[0]!=0 else 1) 106 | avg_times = list(map(calc_avg,times[:-1],times[1:],avg_times)) 107 | # print(avg_times,"fps:",1.0/(times[-1]-times[1])) 108 | times = [] 109 | 110 | 111 | # When everything done, release 112 | # the video capture object 113 | cap.release() 114 | 115 | # Closes all the frames 116 | cv2.destroyAllWindows() 117 | 118 | 119 | """ 120 | [0.0012748241424560547, 1.2281830310821533, 0.0008254051208496094, 0.00030159950256347656] fps: 0.8134644401776536 121 | Detection time: 0.2852518558502197 122 | Total detection time 0.29549670219421387 123 | Total Recogntion time 1.0111820697784424 124 | Members Mark 125 | out 126 | Kancmao 127 | Disinfecting 128 | WIPES 129 | Toallitas Desiniecanle 130 | Ws Cold & Flu Virus " 131 | Kills 91.93,d brtntl 132 | ORANGE SCEHT 133 | AroMa A Maant 134 | IuO BLCH 135 | Oounav 136 | (moh 137 | foga tuEA La 138 | Comat 139 | dL 0s Nisos 140 | quac4 141 | 78 WIPES 142 | ZoWKbo 7eWc 143 | rinimumw 1b 144 | Dn 145 | 917 146 | TaFe 147 | 148 | 149 | 150 | IDEA: 151 | have the read_text in a separate thread, and the rest in this thread. then video will be super smooth, but text will only show up choppy. 152 | """ 153 | 154 | --------------------------------------------------------------------------------