├── .gitignore ├── LICENSE ├── README.md ├── dependency_licenses └── tesseract_LICENSES └── src ├── Ruleset.ruleset ├── TesserNet.Example.ImageSharp ├── Program.cs ├── TesserNet.Example.ImageSharp.csproj └── img.png ├── TesserNet.Example.SkiaSharp ├── Program.cs ├── TesserNet.Example.SkiaSharp.csproj └── img.png ├── TesserNet.Example.System.Drawing ├── Program.cs ├── TesserNet.Example.System.Drawing.csproj └── img.png ├── TesserNet.ImageSharp ├── ImageSharpTesseractExtensions.cs └── TesserNet.ImageSharp.csproj ├── TesserNet.SkiaSharp ├── SkiaSharpTesseractExtensions.cs └── TesserNet.SkiaSharp.csproj ├── TesserNet.System.Drawing ├── SystemDrawingTesseractExtensions.cs └── TesserNet.System.Drawing.csproj ├── TesserNet.Tests ├── ImageLoader.cs ├── Resources │ └── img.png ├── TesserNet.Tests.csproj └── Tests.cs ├── TesserNet.sln ├── TesserNet ├── ITesseract.cs ├── Internal │ ├── LazyQueue.cs │ ├── Loader.cs │ ├── PlatformNames.cs │ ├── TesseractApi.cs │ ├── UnixTesseractApi.cs │ ├── Utf8Helper.cs │ └── WindowsTesseractApi.cs ├── OcrEngineMode.cs ├── PageSegmentation.cs ├── Resources.zip ├── TesserNet.csproj ├── Tesseract.cs ├── TesseractBase.cs ├── TesseractException.cs ├── TesseractOptions.cs └── TesseractPool.cs └── stylecop.json /.gitignore: -------------------------------------------------------------------------------- 1 | ## Ignore Visual Studio temporary files, build results, and 2 | ## files generated by popular Visual Studio add-ons. 3 | ## 4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 5 | 6 | # User-specific files 7 | *.rsuser 8 | *.suo 9 | *.user 10 | *.userosscache 11 | *.sln.docstates 12 | 13 | # User-specific files (MonoDevelop/Xamarin Studio) 14 | *.userprefs 15 | 16 | # Mono auto generated files 17 | mono_crash.* 18 | 19 | # Build results 20 | [Dd]ebug/ 21 | [Dd]ebugPublic/ 22 | [Rr]elease/ 23 | [Rr]eleases/ 24 | x64/ 25 | x86/ 26 | [Aa][Rr][Mm]/ 27 | [Aa][Rr][Mm]64/ 28 | bld/ 29 | [Bb]in/ 30 | [Oo]bj/ 31 | [Ll]og/ 32 | [Ll]ogs/ 33 | 34 | # Visual Studio 2015/2017 cache/options directory 35 | .vs/ 36 | # Uncomment if you have tasks that create the project's static files in wwwroot 37 | #wwwroot/ 38 | 39 | # Visual Studio 2017 auto generated files 40 | Generated\ Files/ 41 | 42 | # MSTest test Results 43 | [Tt]est[Rr]esult*/ 44 | [Bb]uild[Ll]og.* 45 | 46 | # NUnit 47 | *.VisualState.xml 48 | TestResult.xml 49 | nunit-*.xml 50 | 51 | # Build Results of an ATL Project 52 | [Dd]ebugPS/ 53 | [Rr]eleasePS/ 54 | dlldata.c 55 | 56 | # Benchmark Results 57 | BenchmarkDotNet.Artifacts/ 58 | 59 | # .NET Core 60 | project.lock.json 61 | project.fragment.lock.json 62 | artifacts/ 63 | 64 | # StyleCop 65 | StyleCopReport.xml 66 | 67 | # Files built by Visual Studio 68 | *_i.c 69 | *_p.c 70 | *_h.h 71 | *.ilk 72 | *.meta 73 | *.obj 74 | *.iobj 75 | *.pch 76 | *.pdb 77 | *.ipdb 78 | *.pgc 79 | *.pgd 80 | *.rsp 81 | *.sbr 82 | *.tlb 83 | *.tli 84 | *.tlh 85 | *.tmp 86 | *.tmp_proj 87 | *_wpftmp.csproj 88 | *.log 89 | *.vspscc 90 | *.vssscc 91 | .builds 92 | *.pidb 93 | *.svclog 94 | *.scc 95 | 96 | # Chutzpah Test files 97 | _Chutzpah* 98 | 99 | # Visual C++ cache files 100 | ipch/ 101 | *.aps 102 | *.ncb 103 | *.opendb 104 | *.opensdf 105 | *.sdf 106 | *.cachefile 107 | *.VC.db 108 | *.VC.VC.opendb 109 | 110 | # Visual Studio profiler 111 | *.psess 112 | *.vsp 113 | *.vspx 114 | *.sap 115 | 116 | # Visual Studio Trace Files 117 | *.e2e 118 | 119 | # TFS 2012 Local Workspace 120 | $tf/ 121 | 122 | # Guidance Automation Toolkit 123 | *.gpState 124 | 125 | # ReSharper is a .NET coding add-in 126 | _ReSharper*/ 127 | *.[Rr]e[Ss]harper 128 | *.DotSettings.user 129 | 130 | # TeamCity is a build add-in 131 | _TeamCity* 132 | 133 | # DotCover is a Code Coverage Tool 134 | *.dotCover 135 | 136 | # AxoCover is a Code Coverage Tool 137 | .axoCover/* 138 | !.axoCover/settings.json 139 | 140 | # Visual Studio code coverage results 141 | *.coverage 142 | *.coveragexml 143 | 144 | # NCrunch 145 | _NCrunch_* 146 | .*crunch*.local.xml 147 | nCrunchTemp_* 148 | 149 | # MightyMoose 150 | *.mm.* 151 | AutoTest.Net/ 152 | 153 | # Web workbench (sass) 154 | .sass-cache/ 155 | 156 | # Installshield output folder 157 | [Ee]xpress/ 158 | 159 | # DocProject is a documentation generator add-in 160 | DocProject/buildhelp/ 161 | DocProject/Help/*.HxT 162 | DocProject/Help/*.HxC 163 | DocProject/Help/*.hhc 164 | DocProject/Help/*.hhk 165 | DocProject/Help/*.hhp 166 | DocProject/Help/Html2 167 | DocProject/Help/html 168 | 169 | # Click-Once directory 170 | publish/ 171 | 172 | # Publish Web Output 173 | *.[Pp]ublish.xml 174 | *.azurePubxml 175 | # Note: Comment the next line if you want to checkin your web deploy settings, 176 | # but database connection strings (with potential passwords) will be unencrypted 177 | *.pubxml 178 | *.publishproj 179 | 180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 181 | # checkin your Azure Web App publish settings, but sensitive information contained 182 | # in these scripts will be unencrypted 183 | PublishScripts/ 184 | 185 | # NuGet Packages 186 | *.nupkg 187 | # NuGet Symbol Packages 188 | *.snupkg 189 | # The packages folder can be ignored because of Package Restore 190 | **/[Pp]ackages/* 191 | # except build/, which is used as an MSBuild target. 192 | !**/[Pp]ackages/build/ 193 | # Uncomment if necessary however generally it will be regenerated when needed 194 | #!**/[Pp]ackages/repositories.config 195 | # NuGet v3's project.json files produces more ignorable files 196 | *.nuget.props 197 | *.nuget.targets 198 | 199 | # Microsoft Azure Build Output 200 | csx/ 201 | *.build.csdef 202 | 203 | # Microsoft Azure Emulator 204 | ecf/ 205 | rcf/ 206 | 207 | # Windows Store app package directories and files 208 | AppPackages/ 209 | BundleArtifacts/ 210 | Package.StoreAssociation.xml 211 | _pkginfo.txt 212 | *.appx 213 | *.appxbundle 214 | *.appxupload 215 | 216 | # Visual Studio cache files 217 | # files ending in .cache can be ignored 218 | *.[Cc]ache 219 | # but keep track of directories ending in .cache 220 | !?*.[Cc]ache/ 221 | 222 | # Others 223 | ClientBin/ 224 | ~$* 225 | *~ 226 | *.dbmdl 227 | *.dbproj.schemaview 228 | *.jfm 229 | *.pfx 230 | *.publishsettings 231 | orleans.codegen.cs 232 | 233 | # Including strong name files can present a security risk 234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 235 | #*.snk 236 | 237 | # Since there are multiple workflows, uncomment next line to ignore bower_components 238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 239 | #bower_components/ 240 | 241 | # RIA/Silverlight projects 242 | Generated_Code/ 243 | 244 | # Backup & report files from converting an old project file 245 | # to a newer Visual Studio version. Backup files are not needed, 246 | # because we have git ;-) 247 | _UpgradeReport_Files/ 248 | Backup*/ 249 | UpgradeLog*.XML 250 | UpgradeLog*.htm 251 | ServiceFabricBackup/ 252 | *.rptproj.bak 253 | 254 | # SQL Server files 255 | *.mdf 256 | *.ldf 257 | *.ndf 258 | 259 | # Business Intelligence projects 260 | *.rdl.data 261 | *.bim.layout 262 | *.bim_*.settings 263 | *.rptproj.rsuser 264 | *- [Bb]ackup.rdl 265 | *- [Bb]ackup ([0-9]).rdl 266 | *- [Bb]ackup ([0-9][0-9]).rdl 267 | 268 | # Microsoft Fakes 269 | FakesAssemblies/ 270 | 271 | # GhostDoc plugin setting file 272 | *.GhostDoc.xml 273 | 274 | # Node.js Tools for Visual Studio 275 | .ntvs_analysis.dat 276 | node_modules/ 277 | 278 | # Visual Studio 6 build log 279 | *.plg 280 | 281 | # Visual Studio 6 workspace options file 282 | *.opt 283 | 284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 285 | *.vbw 286 | 287 | # Visual Studio LightSwitch build output 288 | **/*.HTMLClient/GeneratedArtifacts 289 | **/*.DesktopClient/GeneratedArtifacts 290 | **/*.DesktopClient/ModelManifest.xml 291 | **/*.Server/GeneratedArtifacts 292 | **/*.Server/ModelManifest.xml 293 | _Pvt_Extensions 294 | 295 | # Paket dependency manager 296 | .paket/paket.exe 297 | paket-files/ 298 | 299 | # FAKE - F# Make 300 | .fake/ 301 | 302 | # CodeRush personal settings 303 | .cr/personal 304 | 305 | # Python Tools for Visual Studio (PTVS) 306 | __pycache__/ 307 | *.pyc 308 | 309 | # Cake - Uncomment if you are using it 310 | # tools/** 311 | # !tools/packages.config 312 | 313 | # Tabs Studio 314 | *.tss 315 | 316 | # Telerik's JustMock configuration file 317 | *.jmconfig 318 | 319 | # BizTalk build output 320 | *.btp.cs 321 | *.btm.cs 322 | *.odx.cs 323 | *.xsd.cs 324 | 325 | # OpenCover UI analysis results 326 | OpenCover/ 327 | 328 | # Azure Stream Analytics local run output 329 | ASALocalRun/ 330 | 331 | # MSBuild Binary and Structured Log 332 | *.binlog 333 | 334 | # NVidia Nsight GPU debugger configuration file 335 | *.nvuser 336 | 337 | # MFractors (Xamarin productivity tool) working folder 338 | .mfractor/ 339 | 340 | # Local History for Visual Studio 341 | .localhistory/ 342 | 343 | # BeatPulse healthcheck temp database 344 | healthchecksdb 345 | 346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 347 | MigrationBackup/ 348 | 349 | # Ionide (cross platform F# VS Code tools) working folder 350 | .ionide/ 351 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![NuGet](https://img.shields.io/nuget/v/TesserNet.svg)](https://www.nuget.org/packages/TesserNet/) 2 | Buy Me A Coffee 3 | 4 | # TesserNet 5 | TesserNet provides high level bindings for Tesseract in .NET. 6 | The library comes with all required native libraries and a trained English model, meaning you don't need any additional setup to get the library up and running! 7 | Additionally, the library provides a simple Tesseract instance pooling system (through the `TesseractPool` class) so you can carelessly make asynchronous OCR invocations. 8 | 9 | ## Limitations 10 | Windows is currently the only version that doesn't require installing extra dependencies. 11 | For Linux distributions it is necessary to install `tesseract-ocr`. 12 | For distributions that use `apt` as the package manager (e.g. Ubuntu, Debian, Raspbian) this can be done using `sudo apt-get install tesseract-ocr`. 13 | Linux support is new and experimental. Problems might arise due to `tesseract-ocr` not being available or because the found version is too old. 14 | iOS is currently not yet supported. 15 | 16 | ## Downloads 17 | [TesserNet](https://www.nuget.org/packages/TesserNet/) 18 | [TesserNet for System.Drawing](https://www.nuget.org/packages/TesserNet.System.Drawing/) 19 | [TesserNet for ImageSharp](https://www.nuget.org/packages/TesserNet.ImageSharp/) 20 | [TesserNet for SkiaSharp](https://www.nuget.org/packages/TesserNet.SkiaSharp/) 21 | 22 | ## License 23 | This product includes [Leptonica](http://leptonica.com/), which is available under a "BSD 2-clause" license. 24 | This product includes [Tesseract](https://github.com/tesseract-ocr/tesseract), which is available under a "Apache Version 2.0" license. 25 | 26 | ## Usage 27 | **When using on Linux, make sure `tesseract-ocr` has been installed on your system.** 28 | 29 | There are a few example project available for you to try out in the `src` directory. 30 | Note that the `TesserNet.Example.System.Drawing` example uses .NET Framework, 31 | meaning it will only run on Windows. 32 | 33 | To start off, one first needs to add the following import: 34 | ```cs 35 | using TesserNet; 36 | ``` 37 | 38 | One can then create a `Tesseract` instace: 39 | ```cs 40 | Tesseract tesseract = new Tesseract(); 41 | ``` 42 | 43 | With that instance one can now perform OCR. 44 | ```cs 45 | string result = tesseract.Read(...); 46 | ``` 47 | 48 | By default, the following `Read` methods are provided: 49 | ```cs 50 | string Read(byte[] data, int width, int height, int bytesPerPixel); 51 | string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 52 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel); 53 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 54 | ``` 55 | 56 | Additionally, if one prefers to use System.Drawing, ImageSharp or SkiaSharp, it is possible to also add a dependency to 57 | [TesserNet.System.Drawing](https://www.nuget.org/packages/TesserNet.System.Drawing/), 58 | [TesserNet.ImageSharp](https://www.nuget.org/packages/TesserNet.ImageSharp/) or 59 | [TesserNet.SkiaSharp](https://www.nuget.org/packages/TesserNet.SkiaSharp/) respectively. 60 | Adding either of these dependencies adds the following `Read` methods: 61 | ```cs 62 | string Read(Image image); 63 | string Read(Image image, Rectangle rectangle); 64 | Task ReadAsync(Image image); 65 | Task ReadAsync(Image image, Rectangle rectangle); 66 | ``` 67 | 68 | Furthermore, when trying to use concurrency, it might be useful to have a look at the `TesseractPool` class: 69 | ```cs 70 | TesseractPool pool = new TesseractPool(); 71 | ``` 72 | 73 | The `TesseractPool` class provides a pooling mechanism for running the OCR on multiple `Tesseract` instances, without having to manually deal with all the different instances. 74 | The class has the following methods: 75 | ```cs 76 | string Read(byte[] data, int width, int height, int bytesPerPixel); 77 | string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 78 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel); 79 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 80 | ``` 81 | 82 | And when either of the aforementioned image processing bridging libraries are present: 83 | ```cs 84 | string Read(Image image); 85 | string Read(Image image, Rectangle rectangle); 86 | Task ReadAsync(Image image); 87 | Task ReadAsync(Image image, Rectangle rectangle); 88 | ``` 89 | -------------------------------------------------------------------------------- /dependency_licenses/tesseract_LICENSES: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /src/Ruleset.ruleset: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /src/TesserNet.Example.ImageSharp/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Reflection; 4 | using SixLabors.ImageSharp; 5 | 6 | namespace TesserNet.Example.ImageSharp 7 | { 8 | public static class Program 9 | { 10 | public static void Main() 11 | { 12 | Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream("TesserNet.Example.ImageSharp.img.png"); 13 | Image image = Image.Load(stream); 14 | Tesseract tesseract = new Tesseract(); 15 | 16 | Console.WriteLine(tesseract.Read(image).Trim()); 17 | 18 | stream.Dispose(); 19 | image.Dispose(); 20 | tesseract.Dispose(); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/TesserNet.Example.ImageSharp/TesserNet.Example.ImageSharp.csproj: -------------------------------------------------------------------------------- 1 | 2 | 3 | Exe 4 | netcoreapp3.1 5 | AnyCPU;x64;x86 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/TesserNet.Example.ImageSharp/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Example.ImageSharp/img.png -------------------------------------------------------------------------------- /src/TesserNet.Example.SkiaSharp/Program.cs: -------------------------------------------------------------------------------- 1 | using SkiaSharp; 2 | using System; 3 | using System.IO; 4 | using System.Reflection; 5 | 6 | namespace TesserNet.Example.ImageSharp 7 | { 8 | public static class Program 9 | { 10 | public static void Main() 11 | { 12 | Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream("TesserNet.Example.SkiaSharp.img.png"); 13 | SKBitmap image = SKBitmap.Decode(stream); 14 | Tesseract tesseract = new Tesseract(); 15 | 16 | Console.WriteLine(tesseract.Read(image).Trim()); 17 | 18 | stream.Dispose(); 19 | image.Dispose(); 20 | tesseract.Dispose(); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/TesserNet.Example.SkiaSharp/TesserNet.Example.SkiaSharp.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | Exe 4 | netcoreapp3.1 5 | AnyCPU;x64;x86 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/TesserNet.Example.SkiaSharp/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Example.SkiaSharp/img.png -------------------------------------------------------------------------------- /src/TesserNet.Example.System.Drawing/Program.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Drawing; 3 | using System.IO; 4 | using System.Reflection; 5 | 6 | namespace TesserNet.Example.System.Drawing 7 | { 8 | public static class Program 9 | { 10 | public static void Main() 11 | { 12 | Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream("TesserNet.Example.System.Drawing.img.png"); 13 | Image image = Image.FromStream(stream); 14 | Tesseract tesseract = new Tesseract(); 15 | 16 | Console.WriteLine(tesseract.Read(image).Trim()); 17 | 18 | stream.Dispose(); 19 | image.Dispose(); 20 | tesseract.Dispose(); 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/TesserNet.Example.System.Drawing/TesserNet.Example.System.Drawing.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | Exe 4 | net48 5 | AnyCPU;x64;x86 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /src/TesserNet.Example.System.Drawing/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Example.System.Drawing/img.png -------------------------------------------------------------------------------- /src/TesserNet.ImageSharp/ImageSharpTesseractExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading.Tasks; 3 | using SixLabors.ImageSharp; 4 | using SixLabors.ImageSharp.PixelFormats; 5 | 6 | namespace TesserNet 7 | { 8 | /// 9 | /// Provides extension methods for the class. 10 | /// 11 | public static class ImageSharpTesseractExtensions 12 | { 13 | /// 14 | /// Performs OCR on the given image. 15 | /// 16 | /// The tesseract instance. 17 | /// The image. 18 | /// The found text as a UTF8 string. 19 | public static string Read(this ITesseract tesseract, Image image) 20 | => tesseract.Read(image, new Rectangle(-1, -1, -1, -1)); 21 | 22 | /// 23 | /// Performs OCR on a rectangle inside the given image. 24 | /// 25 | /// The tesseract instance. 26 | /// The image. 27 | /// The rectangle to perform OCR in. 28 | /// The found text as a UTF8 string. 29 | public static string Read(this ITesseract tesseract, Image image, Rectangle rectangle) 30 | { 31 | if (tesseract is null) 32 | { 33 | throw new ArgumentNullException(nameof(tesseract)); 34 | } 35 | 36 | if (image is null) 37 | { 38 | throw new ArgumentNullException(nameof(image)); 39 | } 40 | 41 | if (image is not Image bmp) 42 | { 43 | bmp = image.CloneAs(); 44 | } 45 | 46 | IntPtr data = BitmapToBytes(bmp); 47 | string result = tesseract.Read(data, image.Width, image.Height, 4, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); 48 | 49 | if (bmp != image) 50 | { 51 | bmp.Dispose(); 52 | } 53 | 54 | return result; 55 | } 56 | 57 | /// 58 | /// Performs OCR on the given image. 59 | /// 60 | /// The tesseract instance. 61 | /// The image. 62 | /// The found text as a UTF8 string. 63 | public static Task ReadAsync(this ITesseract tesseract, Image image) 64 | => tesseract.ReadAsync(image, new Rectangle(-1, -1, -1, -1)); 65 | 66 | /// 67 | /// Performs OCR on a rectangle inside the given image. 68 | /// 69 | /// The tesseract instance. 70 | /// The image. 71 | /// The rectangle to perform OCR in. 72 | /// The found text as a UTF8 string. 73 | public static Task ReadAsync(this ITesseract tesseract, Image image, Rectangle rectangle) 74 | { 75 | if (tesseract is null) 76 | { 77 | throw new ArgumentNullException(nameof(tesseract)); 78 | } 79 | 80 | if (image is null) 81 | { 82 | throw new ArgumentNullException(nameof(image)); 83 | } 84 | 85 | if (image is not Image bmp) 86 | { 87 | bmp = image.CloneAs(); 88 | } 89 | 90 | IntPtr data = BitmapToBytes(bmp); 91 | 92 | Task resultTask = tesseract.ReadAsync(data, image.Width, image.Height, 4, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); 93 | 94 | return resultTask.ContinueWith(r => 95 | { 96 | if (bmp != image) 97 | { 98 | bmp.Dispose(); 99 | } 100 | 101 | return r.Result; 102 | }); 103 | } 104 | 105 | private static unsafe IntPtr BitmapToBytes(Image image) 106 | { 107 | if (!image.DangerousTryGetSinglePixelMemory(out Memory memory)) 108 | { 109 | throw new TesseractException($"Could not get image pixels."); 110 | } 111 | 112 | fixed (Rgba32* ptr = memory.Span) 113 | { 114 | return new IntPtr(ptr); 115 | } 116 | } 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /src/TesserNet.ImageSharp/TesserNet.ImageSharp.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netstandard2.0 4 | ../Ruleset.ruleset 5 | bin/$(AssemblyName).xml 6 | 9 7 | enable 8 | true 9 | true 10 | 11 | Wesley Baartman 12 | https://github.com/CptWesley/TesserNet 13 | https://github.com/CptWesley/TesserNet 14 | Apache-2.0 15 | Provides extension methods for TesserNet when using ImageSharp. 16 | tesseract ocr bindings optical character recognition tesseract-ocr imagesharp 17 | $(Version) 18 | $(Version) 19 | 0.8.0 20 | 21 | README.md 22 | AnyCPU;x64;x86 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | all 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/TesserNet.SkiaSharp/SkiaSharpTesseractExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading.Tasks; 3 | using SkiaSharp; 4 | 5 | namespace TesserNet 6 | { 7 | /// 8 | /// Provides extension methods for the class. 9 | /// 10 | public static class SkiaSharpTesseractExtensions 11 | { 12 | /// 13 | /// Performs OCR on the given image. 14 | /// 15 | /// The tesseract instance. 16 | /// The image. 17 | /// The found text as a UTF8 string. 18 | public static string Read(this ITesseract tesseract, SKBitmap image) 19 | => tesseract.Read(image, new SKRect(-1, -1, -1, -1)); 20 | 21 | /// 22 | /// Performs OCR on a rectangle inside the given image. 23 | /// 24 | /// The tesseract instance. 25 | /// The image. 26 | /// The rectangle to perform OCR in. 27 | /// The found text as a UTF8 string. 28 | public static string Read(this ITesseract tesseract, SKBitmap image, SKRect rectangle) 29 | { 30 | if (tesseract is null) 31 | { 32 | throw new ArgumentNullException(nameof(tesseract)); 33 | } 34 | 35 | if (image is null) 36 | { 37 | throw new ArgumentNullException(nameof(image)); 38 | } 39 | 40 | IntPtr data = BitmapToBytes(image); 41 | return tesseract.Read(data, image.Width, image.Height, 4, (int)rectangle.Left, (int)rectangle.Top, (int)rectangle.Width, (int)rectangle.Height); 42 | } 43 | 44 | /// 45 | /// Performs OCR on the given image. 46 | /// 47 | /// The tesseract instance. 48 | /// The image. 49 | /// The found text as a UTF8 string. 50 | public static Task ReadAsync(this ITesseract tesseract, SKBitmap image) 51 | => tesseract.ReadAsync(image, new SKRect(-1, -1, -1, -1)); 52 | 53 | /// 54 | /// Performs OCR on a rectangle inside the given image. 55 | /// 56 | /// The tesseract instance. 57 | /// The image. 58 | /// The rectangle to perform OCR in. 59 | /// The found text as a UTF8 string. 60 | public static Task ReadAsync(this ITesseract tesseract, SKBitmap image, SKRect rectangle) 61 | { 62 | if (tesseract is null) 63 | { 64 | throw new ArgumentNullException(nameof(tesseract)); 65 | } 66 | 67 | if (image is null) 68 | { 69 | throw new ArgumentNullException(nameof(image)); 70 | } 71 | 72 | IntPtr data = BitmapToBytes(image); 73 | return tesseract.ReadAsync(data, image.Width, image.Height, 4, (int)rectangle.Left, (int)rectangle.Top, (int)rectangle.Width, (int)rectangle.Height); 74 | } 75 | 76 | /// 77 | /// Performs OCR on the given image. 78 | /// 79 | /// The tesseract instance. 80 | /// The image. 81 | /// The found text as a UTF8 string. 82 | public static string Read(this ITesseract tesseract, SKImage image) 83 | => tesseract.Read(SKBitmap.FromImage(image)); 84 | 85 | /// 86 | /// Performs OCR on a rectangle inside the given image. 87 | /// 88 | /// The tesseract instance. 89 | /// The image. 90 | /// The rectangle to perform OCR in. 91 | /// The found text as a UTF8 string. 92 | public static string Read(this ITesseract tesseract, SKImage image, SKRect rectangle) 93 | => tesseract.Read(SKBitmap.FromImage(image), rectangle); 94 | 95 | /// 96 | /// Performs OCR on the given image. 97 | /// 98 | /// The tesseract instance. 99 | /// The image. 100 | /// The found text as a UTF8 string. 101 | public static Task ReadAsync(this ITesseract tesseract, SKImage image) 102 | => tesseract.ReadAsync(SKBitmap.FromImage(image)); 103 | 104 | /// 105 | /// Performs OCR on a rectangle inside the given image. 106 | /// 107 | /// The tesseract instance. 108 | /// The image. 109 | /// The rectangle to perform OCR in. 110 | /// The found text as a UTF8 string. 111 | public static Task ReadAsync(this ITesseract tesseract, SKImage image, SKRect rectangle) 112 | => tesseract.ReadAsync(SKBitmap.FromImage(image), rectangle); 113 | 114 | private static unsafe IntPtr BitmapToBytes(SKBitmap bmp) 115 | { 116 | fixed (byte* ptr = bmp.GetPixelSpan()) 117 | { 118 | return new IntPtr(ptr); 119 | } 120 | } 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/TesserNet.SkiaSharp/TesserNet.SkiaSharp.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netstandard2.0 4 | ../Ruleset.ruleset 5 | bin/$(AssemblyName).xml 6 | 9 7 | enable 8 | true 9 | true 10 | 11 | Wesley Baartman 12 | https://github.com/CptWesley/TesserNet 13 | https://github.com/CptWesley/TesserNet 14 | Apache-2.0 15 | Provides extension methods for TesserNet when using SkiaSharp. 16 | tesseract ocr bindings optical character recognition tesseract-ocr skia sharp skiasharp 17 | $(Version) 18 | $(Version) 19 | 0.8.0 20 | 21 | README.md 22 | AnyCPU;x64;x86 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | all 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | -------------------------------------------------------------------------------- /src/TesserNet.System.Drawing/SystemDrawingTesseractExtensions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Diagnostics.CodeAnalysis; 3 | using System.Drawing; 4 | using System.Drawing.Imaging; 5 | using System.Threading.Tasks; 6 | 7 | namespace TesserNet 8 | { 9 | /// 10 | /// Provides extension methods for the class. 11 | /// 12 | public static class SystemDrawingTesseractExtensions 13 | { 14 | /// 15 | /// Performs OCR on the given image. 16 | /// 17 | /// The tesseract instance. 18 | /// The image. 19 | /// The found text as a UTF8 string. 20 | public static string Read(this ITesseract tesseract, Image image) 21 | => tesseract.Read(image, new Rectangle(-1, -1, -1, -1)); 22 | 23 | /// 24 | /// Performs OCR on a rectangle inside the given image. 25 | /// 26 | /// The tesseract instance. 27 | /// The image. 28 | /// The rectangle to perform OCR in. 29 | /// The found text as a UTF8 string. 30 | [SuppressMessage("Reliability", "CA2000", Justification = "Bitmap is disposed if new one was created.")] 31 | public static string Read(this ITesseract tesseract, Image image, Rectangle rectangle) 32 | { 33 | if (tesseract is null) 34 | { 35 | throw new ArgumentNullException(nameof(tesseract)); 36 | } 37 | 38 | if (image is not Bitmap bmp) 39 | { 40 | bmp = new Bitmap(image); 41 | } 42 | 43 | IntPtr data = BitmapToBytes(bmp); 44 | int bpp = Image.GetPixelFormatSize(bmp.PixelFormat) / 8; 45 | string result = tesseract.Read(data, image.Width, image.Height, bpp, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); 46 | 47 | if (bmp != image) 48 | { 49 | bmp.Dispose(); 50 | } 51 | 52 | return result; 53 | } 54 | 55 | /// 56 | /// Performs OCR on the given image. 57 | /// 58 | /// The tesseract instance. 59 | /// The image. 60 | /// The found text as a UTF8 string. 61 | public static Task ReadAsync(this ITesseract tesseract, Image image) 62 | => tesseract.ReadAsync(image, new Rectangle(-1, -1, -1, -1)); 63 | 64 | /// 65 | /// Performs OCR on a rectangle inside the given image. 66 | /// 67 | /// The tesseract instance. 68 | /// The image. 69 | /// The rectangle to perform OCR in. 70 | /// The found text as a UTF8 string. 71 | [SuppressMessage("Reliability", "CA2000", Justification = "Bitmap is disposed if new one was created.")] 72 | public static Task ReadAsync(this ITesseract tesseract, Image image, Rectangle rectangle) 73 | { 74 | if (tesseract is null) 75 | { 76 | throw new ArgumentNullException(nameof(tesseract)); 77 | } 78 | 79 | if (image is not Bitmap bmp) 80 | { 81 | bmp = new Bitmap(image); 82 | } 83 | 84 | IntPtr data = BitmapToBytes(bmp); 85 | int bpp = Image.GetPixelFormatSize(image.PixelFormat) / 8; 86 | Task resultTask = tesseract.ReadAsync(data, image.Width, image.Height, 4, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height); 87 | 88 | return resultTask.ContinueWith(r => 89 | { 90 | if (bmp != image) 91 | { 92 | bmp.Dispose(); 93 | } 94 | 95 | return r.Result; 96 | }); 97 | } 98 | 99 | private static IntPtr BitmapToBytes(Bitmap image) 100 | { 101 | BitmapData bmpData = image.LockBits(new Rectangle(0, 0, image.Width, image.Height), ImageLockMode.ReadOnly, image.PixelFormat); 102 | IntPtr ptr = bmpData.Scan0; 103 | image.UnlockBits(bmpData); 104 | return ptr; 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/TesserNet.System.Drawing/TesserNet.System.Drawing.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netstandard2.0 4 | ../Ruleset.ruleset 5 | bin/$(AssemblyName).xml 6 | 9 7 | enable 8 | true 9 | 10 | Wesley Baartman 11 | https://github.com/CptWesley/TesserNet 12 | https://github.com/CptWesley/TesserNet 13 | Apache-2.0 14 | Provides extension methods for TesserNet when using System.Drawing. 15 | tesseract ocr bindings optical character recognition tesseract-ocr system drawing 16 | $(Version) 17 | $(Version) 18 | 0.8.0 19 | 20 | README.md 21 | AnyCPU;x64;x86 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | all 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /src/TesserNet.Tests/ImageLoader.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using System.Reflection; 3 | 4 | namespace TesserNet.Tests 5 | { 6 | /// 7 | /// Used to load images. 8 | /// 9 | internal static class ImageLoader 10 | { 11 | /// 12 | /// Loads an image as a stream. 13 | /// 14 | /// The filename. 15 | /// The stream. 16 | public static Stream LoadStream(string fileName) 17 | { 18 | Assembly asm = Assembly.GetExecutingAssembly(); 19 | return asm.GetManifestResourceStream($"TesserNet.Tests.Resources.{fileName}"); 20 | } 21 | 22 | /// 23 | /// Loads an image as a byte array. 24 | /// 25 | /// The filename. 26 | /// The stream. 27 | public static byte[] LoadByteArray(string fileName) 28 | { 29 | using MemoryStream ms = new MemoryStream(); 30 | using Stream s = LoadStream(fileName); 31 | s.CopyTo(ms); 32 | return ms.ToArray(); 33 | } 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/TesserNet.Tests/Resources/img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Tests/Resources/img.png -------------------------------------------------------------------------------- /src/TesserNet.Tests/TesserNet.Tests.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netcoreapp3.1 4 | false 5 | ../Ruleset.ruleset 6 | bin/$(AssemblyName).xml 7 | true 8 | AnyCPU;x64;x86 9 | 10 | 11 | 12 | true 13 | opencover 14 | ./bin/ 15 | [TesserNet]* 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | all 28 | runtime; build; native; contentfiles; analyzers; buildtransitive 29 | 30 | 31 | all 32 | runtime; build; native; contentfiles; analyzers; buildtransitive 33 | 34 | 35 | all 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/TesserNet.Tests/Tests.cs: -------------------------------------------------------------------------------- 1 | using System.IO; 2 | using SkiaSharp; 3 | using Xunit; 4 | using static AssertNet.Assertions; 5 | 6 | namespace TesserNet.Tests 7 | { 8 | /// 9 | /// Contains simple tests. 10 | /// 11 | public static class Tests 12 | { 13 | private const string FileName1 = "img.png"; 14 | private const string FileContent1 = "Hello world!"; 15 | private static readonly ITesseract Tess = new Tesseract(); 16 | 17 | /// 18 | /// Checks that the ImageSharp implementation works for simple image. 19 | /// 20 | [Fact] 21 | public static void ImageSharp() 22 | { 23 | using Stream s = ImageLoader.LoadStream(FileName1); 24 | using var img = SixLabors.ImageSharp.Image.Load(s); 25 | AssertThat(Tess.Read(img).Trim()).IsEqualTo(FileContent1); 26 | } 27 | 28 | /// 29 | /// Checks that the SkiaSharp implementation works for simple image. 30 | /// 31 | [Fact] 32 | public static void SkiaSharp() 33 | { 34 | using Stream s = ImageLoader.LoadStream(FileName1); 35 | using var img = SKBitmap.Decode(s); 36 | AssertThat(Tess.Read(img).Trim()).IsEqualTo(FileContent1); 37 | } 38 | 39 | /// 40 | /// Checks that the SkiaSharp implementation works for simple image. 41 | /// 42 | [Fact] 43 | public static void SystemDrawing() 44 | { 45 | using Stream s = ImageLoader.LoadStream(FileName1); 46 | using var img = System.Drawing.Image.FromStream(s); 47 | AssertThat(Tess.Read(img).Trim()).IsEqualTo(FileContent1); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/TesserNet.sln: -------------------------------------------------------------------------------- 1 |  2 | Microsoft Visual Studio Solution File, Format Version 12.00 3 | # Visual Studio Version 17 4 | VisualStudioVersion = 17.3.32811.315 5 | MinimumVisualStudioVersion = 10.0.40219.1 6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet", "TesserNet\TesserNet.csproj", "{98D03E85-6685-40E1-B2FC-173636F955CD}" 7 | EndProject 8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Tests", "TesserNet.Tests\TesserNet.Tests.csproj", "{2E467823-D21D-4856-B643-6A8712AC3F0E}" 9 | EndProject 10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.System.Drawing", "TesserNet.System.Drawing\TesserNet.System.Drawing.csproj", "{B70F5950-175A-42BD-A164-EE7FE2977D3D}" 11 | EndProject 12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Example.System.Drawing", "TesserNet.Example.System.Drawing\TesserNet.Example.System.Drawing.csproj", "{DA52B301-D81D-4AD1-9BD7-EB567D1FB176}" 13 | EndProject 14 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.ImageSharp", "TesserNet.ImageSharp\TesserNet.ImageSharp.csproj", "{AE1BA00D-D8E1-4726-A969-C54E02D8985C}" 15 | EndProject 16 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Example.ImageSharp", "TesserNet.Example.ImageSharp\TesserNet.Example.ImageSharp.csproj", "{271E16DD-D0EF-43A4-846D-7FC0C84867B6}" 17 | EndProject 18 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.SkiaSharp", "TesserNet.SkiaSharp\TesserNet.SkiaSharp.csproj", "{0FC0748B-73DA-4897-B55A-907F5E296BC2}" 19 | EndProject 20 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Example.SkiaSharp", "TesserNet.Example.SkiaSharp\TesserNet.Example.SkiaSharp.csproj", "{DD580635-ACCF-4462-B7C2-197EE5354CE2}" 21 | EndProject 22 | Global 23 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 24 | Debug|Any CPU = Debug|Any CPU 25 | Debug|x64 = Debug|x64 26 | Debug|x86 = Debug|x86 27 | Release|Any CPU = Release|Any CPU 28 | Release|x64 = Release|x64 29 | Release|x86 = Release|x86 30 | EndGlobalSection 31 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 32 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 33 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|Any CPU.Build.0 = Debug|Any CPU 34 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x64.ActiveCfg = Debug|x64 35 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x64.Build.0 = Debug|x64 36 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x86.ActiveCfg = Debug|x86 37 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x86.Build.0 = Debug|x86 38 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|Any CPU.ActiveCfg = Release|Any CPU 39 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|Any CPU.Build.0 = Release|Any CPU 40 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x64.ActiveCfg = Release|x64 41 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x64.Build.0 = Release|x64 42 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x86.ActiveCfg = Release|x86 43 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x86.Build.0 = Release|x86 44 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 45 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|Any CPU.Build.0 = Debug|Any CPU 46 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x64.ActiveCfg = Debug|x64 47 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x64.Build.0 = Debug|x64 48 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x86.ActiveCfg = Debug|x86 49 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x86.Build.0 = Debug|x86 50 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|Any CPU.ActiveCfg = Release|Any CPU 51 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|Any CPU.Build.0 = Release|Any CPU 52 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x64.ActiveCfg = Release|x64 53 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x64.Build.0 = Release|x64 54 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x86.ActiveCfg = Release|x86 55 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x86.Build.0 = Release|x86 56 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 57 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|Any CPU.Build.0 = Debug|Any CPU 58 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x64.ActiveCfg = Debug|x64 59 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x64.Build.0 = Debug|x64 60 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x86.ActiveCfg = Debug|x86 61 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x86.Build.0 = Debug|x86 62 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|Any CPU.ActiveCfg = Release|Any CPU 63 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|Any CPU.Build.0 = Release|Any CPU 64 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x64.ActiveCfg = Release|x64 65 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x64.Build.0 = Release|x64 66 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x86.ActiveCfg = Release|x86 67 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x86.Build.0 = Release|x86 68 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 69 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|Any CPU.Build.0 = Debug|Any CPU 70 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x64.ActiveCfg = Debug|x64 71 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x64.Build.0 = Debug|x64 72 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x86.ActiveCfg = Debug|x86 73 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x86.Build.0 = Debug|x86 74 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|Any CPU.ActiveCfg = Release|Any CPU 75 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|Any CPU.Build.0 = Release|Any CPU 76 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x64.ActiveCfg = Release|x64 77 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x64.Build.0 = Release|x64 78 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x86.ActiveCfg = Release|x86 79 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x86.Build.0 = Release|x86 80 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 81 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|Any CPU.Build.0 = Debug|Any CPU 82 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x64.ActiveCfg = Debug|x64 83 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x64.Build.0 = Debug|x64 84 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x86.ActiveCfg = Debug|x86 85 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x86.Build.0 = Debug|x86 86 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|Any CPU.ActiveCfg = Release|Any CPU 87 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|Any CPU.Build.0 = Release|Any CPU 88 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x64.ActiveCfg = Release|x64 89 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x64.Build.0 = Release|x64 90 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x86.ActiveCfg = Release|x86 91 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x86.Build.0 = Release|x86 92 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 93 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|Any CPU.Build.0 = Debug|Any CPU 94 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x64.ActiveCfg = Debug|x64 95 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x64.Build.0 = Debug|x64 96 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x86.ActiveCfg = Debug|x86 97 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x86.Build.0 = Debug|x86 98 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|Any CPU.ActiveCfg = Release|Any CPU 99 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|Any CPU.Build.0 = Release|Any CPU 100 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x64.ActiveCfg = Release|x64 101 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x64.Build.0 = Release|x64 102 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x86.ActiveCfg = Release|x86 103 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x86.Build.0 = Release|x86 104 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 105 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|Any CPU.Build.0 = Debug|Any CPU 106 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x64.ActiveCfg = Debug|x64 107 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x64.Build.0 = Debug|x64 108 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x86.ActiveCfg = Debug|x86 109 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x86.Build.0 = Debug|x86 110 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|Any CPU.ActiveCfg = Release|Any CPU 111 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|Any CPU.Build.0 = Release|Any CPU 112 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x64.ActiveCfg = Release|x64 113 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x64.Build.0 = Release|x64 114 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x86.ActiveCfg = Release|x86 115 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x86.Build.0 = Release|x86 116 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU 117 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|Any CPU.Build.0 = Debug|Any CPU 118 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x64.ActiveCfg = Debug|x64 119 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x64.Build.0 = Debug|x64 120 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x86.ActiveCfg = Debug|x86 121 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x86.Build.0 = Debug|x86 122 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|Any CPU.ActiveCfg = Release|Any CPU 123 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|Any CPU.Build.0 = Release|Any CPU 124 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x64.ActiveCfg = Release|x64 125 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x64.Build.0 = Release|x64 126 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x86.ActiveCfg = Release|x86 127 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x86.Build.0 = Release|x86 128 | EndGlobalSection 129 | GlobalSection(SolutionProperties) = preSolution 130 | HideSolutionNode = FALSE 131 | EndGlobalSection 132 | GlobalSection(ExtensibilityGlobals) = postSolution 133 | SolutionGuid = {543B2CD5-02AB-4379-9D7F-045CA7223CC4} 134 | EndGlobalSection 135 | EndGlobal 136 | -------------------------------------------------------------------------------- /src/TesserNet/ITesseract.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading.Tasks; 3 | 4 | namespace TesserNet 5 | { 6 | /// 7 | /// Interface for Tesseract instances. 8 | /// 9 | public unsafe interface ITesseract : IDisposable 10 | { 11 | /// 12 | /// Gets or sets the options. 13 | /// 14 | TesseractOptions Options { get; set; } 15 | 16 | /// 17 | /// Performs OCR on the given image. 18 | /// 19 | /// The bytes of the image. 20 | /// The width of the image. 21 | /// The height of the image. 22 | /// The number of bytes per pixel. 23 | /// The found text as a UTF8 string. 24 | string Read(byte[] data, int width, int height, int bytesPerPixel); 25 | 26 | /// 27 | /// Performs OCR on the given image. 28 | /// 29 | /// The bytes of the image. 30 | /// The width of the image. 31 | /// The height of the image. 32 | /// The number of bytes per pixel. 33 | /// The found text as a UTF8 string. 34 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel); 35 | 36 | /// 37 | /// Performs OCR on a rectangle inside the given image. 38 | /// 39 | /// The bytes of the image. 40 | /// The width of the image. 41 | /// The height of the image. 42 | /// The number of bytes per pixel. 43 | /// The X coordinate of the rectangle. 44 | /// The Y coordinate of the rectangle. 45 | /// The width of the rectangle. 46 | /// The height of the rectangle. 47 | /// The found text as a UTF8 string. 48 | string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 49 | 50 | /// 51 | /// Performs OCR on a rectangle inside the given image. 52 | /// 53 | /// The bytes of the image. 54 | /// The width of the image. 55 | /// The height of the image. 56 | /// The number of bytes per pixel. 57 | /// The X coordinate of the rectangle. 58 | /// The Y coordinate of the rectangle. 59 | /// The width of the rectangle. 60 | /// The height of the rectangle. 61 | /// The found text as a UTF8 string. 62 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 63 | 64 | /// 65 | /// Performs OCR on the given image. 66 | /// 67 | /// The bytes of the image. 68 | /// The width of the image. 69 | /// The height of the image. 70 | /// The number of bytes per pixel. 71 | /// The found text as a UTF8 string. 72 | string Read(Memory data, int width, int height, int bytesPerPixel); 73 | 74 | /// 75 | /// Performs OCR on the given image. 76 | /// 77 | /// The bytes of the image. 78 | /// The width of the image. 79 | /// The height of the image. 80 | /// The number of bytes per pixel. 81 | /// The found text as a UTF8 string. 82 | Task ReadAsync(Memory data, int width, int height, int bytesPerPixel); 83 | 84 | /// 85 | /// Performs OCR on a rectangle inside the given image. 86 | /// 87 | /// The bytes of the image. 88 | /// The width of the image. 89 | /// The height of the image. 90 | /// The number of bytes per pixel. 91 | /// The X coordinate of the rectangle. 92 | /// The Y coordinate of the rectangle. 93 | /// The width of the rectangle. 94 | /// The height of the rectangle. 95 | /// The found text as a UTF8 string. 96 | string Read(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 97 | 98 | /// 99 | /// Performs OCR on a rectangle inside the given image. 100 | /// 101 | /// The bytes of the image. 102 | /// The width of the image. 103 | /// The height of the image. 104 | /// The number of bytes per pixel. 105 | /// The X coordinate of the rectangle. 106 | /// The Y coordinate of the rectangle. 107 | /// The width of the rectangle. 108 | /// The height of the rectangle. 109 | /// The found text as a UTF8 string. 110 | Task ReadAsync(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 111 | 112 | /// 113 | /// Performs OCR on the given image. 114 | /// 115 | /// The bytes of the image. 116 | /// The width of the image. 117 | /// The height of the image. 118 | /// The number of bytes per pixel. 119 | /// The found text as a UTF8 string. 120 | string Read(Span data, int width, int height, int bytesPerPixel); 121 | 122 | /// 123 | /// Performs OCR on the given image. 124 | /// 125 | /// The bytes of the image. 126 | /// The width of the image. 127 | /// The height of the image. 128 | /// The number of bytes per pixel. 129 | /// The found text as a UTF8 string. 130 | Task ReadAsync(Span data, int width, int height, int bytesPerPixel); 131 | 132 | /// 133 | /// Performs OCR on a rectangle inside the given image. 134 | /// 135 | /// The bytes of the image. 136 | /// The width of the image. 137 | /// The height of the image. 138 | /// The number of bytes per pixel. 139 | /// The X coordinate of the rectangle. 140 | /// The Y coordinate of the rectangle. 141 | /// The width of the rectangle. 142 | /// The height of the rectangle. 143 | /// The found text as a UTF8 string. 144 | string Read(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 145 | 146 | /// 147 | /// Performs OCR on a rectangle inside the given image. 148 | /// 149 | /// The bytes of the image. 150 | /// The width of the image. 151 | /// The height of the image. 152 | /// The number of bytes per pixel. 153 | /// The X coordinate of the rectangle. 154 | /// The Y coordinate of the rectangle. 155 | /// The width of the rectangle. 156 | /// The height of the rectangle. 157 | /// The found text as a UTF8 string. 158 | Task ReadAsync(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 159 | 160 | /// 161 | /// Performs OCR on the given image. 162 | /// 163 | /// The bytes of the image. 164 | /// The width of the image. 165 | /// The height of the image. 166 | /// The number of bytes per pixel. 167 | /// The found text as a UTF8 string. 168 | string Read(byte* data, int width, int height, int bytesPerPixel); 169 | 170 | /// 171 | /// Performs OCR on the given image. 172 | /// 173 | /// The bytes of the image. 174 | /// The width of the image. 175 | /// The height of the image. 176 | /// The number of bytes per pixel. 177 | /// The found text as a UTF8 string. 178 | Task ReadAsync(byte* data, int width, int height, int bytesPerPixel); 179 | 180 | /// 181 | /// Performs OCR on a rectangle inside the given image. 182 | /// 183 | /// The bytes of the image. 184 | /// The width of the image. 185 | /// The height of the image. 186 | /// The number of bytes per pixel. 187 | /// The X coordinate of the rectangle. 188 | /// The Y coordinate of the rectangle. 189 | /// The width of the rectangle. 190 | /// The height of the rectangle. 191 | /// The found text as a UTF8 string. 192 | string Read(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 193 | 194 | /// 195 | /// Performs OCR on a rectangle inside the given image. 196 | /// 197 | /// The bytes of the image. 198 | /// The width of the image. 199 | /// The height of the image. 200 | /// The number of bytes per pixel. 201 | /// The X coordinate of the rectangle. 202 | /// The Y coordinate of the rectangle. 203 | /// The width of the rectangle. 204 | /// The height of the rectangle. 205 | /// The found text as a UTF8 string. 206 | Task ReadAsync(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 207 | 208 | /// 209 | /// Performs OCR on the given image. 210 | /// 211 | /// The bytes of the image. 212 | /// The width of the image. 213 | /// The height of the image. 214 | /// The number of bytes per pixel. 215 | /// The found text as a UTF8 string. 216 | string Read(IntPtr data, int width, int height, int bytesPerPixel); 217 | 218 | /// 219 | /// Performs OCR on the given image. 220 | /// 221 | /// The bytes of the image. 222 | /// The width of the image. 223 | /// The height of the image. 224 | /// The number of bytes per pixel. 225 | /// The found text as a UTF8 string. 226 | Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel); 227 | 228 | /// 229 | /// Performs OCR on a rectangle inside the given image. 230 | /// 231 | /// The bytes of the image. 232 | /// The width of the image. 233 | /// The height of the image. 234 | /// The number of bytes per pixel. 235 | /// The X coordinate of the rectangle. 236 | /// The Y coordinate of the rectangle. 237 | /// The width of the rectangle. 238 | /// The height of the rectangle. 239 | /// The found text as a UTF8 string. 240 | string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 241 | 242 | /// 243 | /// Performs OCR on a rectangle inside the given image. 244 | /// 245 | /// The bytes of the image. 246 | /// The width of the image. 247 | /// The height of the image. 248 | /// The number of bytes per pixel. 249 | /// The X coordinate of the rectangle. 250 | /// The Y coordinate of the rectangle. 251 | /// The width of the rectangle. 252 | /// The height of the rectangle. 253 | /// The found text as a UTF8 string. 254 | Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 255 | } 256 | } 257 | -------------------------------------------------------------------------------- /src/TesserNet/Internal/LazyQueue.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | 6 | namespace TesserNet.Internal 7 | { 8 | /// 9 | /// Provides implementation for a simple asynchronous queue. 10 | /// 11 | /// Type of elements stored in the queue. 12 | internal class LazyQueue : IDisposable 13 | { 14 | private readonly Queue queue = new Queue(); 15 | private readonly SemaphoreSlim mutation = new SemaphoreSlim(1, 1); 16 | private readonly SemaphoreSlim availability = new SemaphoreSlim(0); 17 | private bool isDisposed; 18 | 19 | /// 20 | /// Gets the count. 21 | /// 22 | public int Count => queue.Count; 23 | 24 | /// 25 | /// Enqueues a value asynchronously. 26 | /// 27 | /// The value to enqueue. 28 | /// A task that performs the enqueing. 29 | public async Task EnqueueAsync(T value) 30 | { 31 | await mutation.WaitAsync().ConfigureAwait(false); 32 | EnqueueInternal(value); 33 | } 34 | 35 | /// 36 | /// Enqueues a value synchronously. 37 | /// 38 | /// The value to enqueue. 39 | public void Enqueue(T value) 40 | { 41 | mutation.Wait(); 42 | EnqueueInternal(value); 43 | } 44 | 45 | /// 46 | /// Dequeues a value asynchronously. 47 | /// 48 | /// A task which awaits a value to dequeue. 49 | public async Task DequeueAsync() 50 | { 51 | await availability.WaitAsync().ConfigureAwait(false); 52 | await mutation.WaitAsync().ConfigureAwait(false); 53 | return DequeueInternal(); 54 | } 55 | 56 | /// 57 | /// Dequeues a value synchronously. 58 | /// 59 | /// The value to dequeue. 60 | public T Dequeue() 61 | { 62 | availability.Wait(); 63 | mutation.Wait(); 64 | return DequeueInternal(); 65 | } 66 | 67 | /// 68 | public void Dispose() 69 | { 70 | Dispose(true); 71 | GC.SuppressFinalize(this); 72 | } 73 | 74 | /// 75 | /// Releases unmanaged and - optionally - managed resources. 76 | /// 77 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources. 78 | protected virtual void Dispose(bool disposing) 79 | { 80 | if (isDisposed) 81 | { 82 | return; 83 | } 84 | 85 | isDisposed = true; 86 | 87 | if (disposing) 88 | { 89 | availability.Dispose(); 90 | mutation.Dispose(); 91 | } 92 | } 93 | 94 | private void EnqueueInternal(T value) 95 | { 96 | queue.Enqueue(value); 97 | mutation.Release(); 98 | availability.Release(); 99 | } 100 | 101 | private T DequeueInternal() 102 | { 103 | T result = queue.Dequeue(); 104 | mutation.Release(); 105 | return result; 106 | } 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/TesserNet/Internal/Loader.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.IO; 4 | using System.IO.Compression; 5 | using System.Linq; 6 | using System.Reflection; 7 | using System.Runtime.InteropServices; 8 | using System.Runtime.Versioning; 9 | 10 | namespace TesserNet.Internal 11 | { 12 | /// 13 | /// Provides functionality for loading the correct libraries into the runtime. 14 | /// 15 | internal static class Loader 16 | { 17 | /// 18 | /// Gets the temporary directory to which the files were unpacked. 19 | /// 20 | /// The temporary unpack directory. 21 | internal static string GetUnpackDirectory() 22 | { 23 | string temp = Path.GetTempPath(); 24 | string version = Assembly.GetExecutingAssembly().GetName().Version!.ToString(); 25 | string platform = GetPlatformString(); 26 | return Path.Combine(temp, "tessernet", version, platform); 27 | } 28 | 29 | /// 30 | /// Loads the correct libraries into the runtime. 31 | /// 32 | internal static void Load() 33 | { 34 | Assembly assembly = Assembly.GetExecutingAssembly(); 35 | Stream stream = assembly.GetManifestResourceStream("TesserNet.Resources.zip")!; 36 | ZipArchive resources = new ZipArchive(stream); 37 | 38 | string platform = GetPlatformString(); 39 | IEnumerable files = resources.ForPlatform(platform); 40 | EnsureCopied(files); 41 | resources.Dispose(); 42 | stream.Dispose(); 43 | } 44 | 45 | private static string GetPlatformString() 46 | { 47 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 48 | { 49 | if (Environment.Is64BitProcess) 50 | { 51 | return "w64"; 52 | } 53 | else 54 | { 55 | return "w86"; 56 | } 57 | } 58 | else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux)) 59 | { 60 | return "linux"; 61 | } 62 | else 63 | { 64 | return "mac"; 65 | } 66 | } 67 | 68 | private static void EnsureCopied(IEnumerable entries) 69 | { 70 | string tempRoot = GetUnpackDirectory(); 71 | Directory.CreateDirectory(tempRoot); 72 | 73 | foreach (ZipArchiveEntry entry in entries) 74 | { 75 | CopyResource(tempRoot, entry); 76 | } 77 | } 78 | 79 | private static void CopyResource(string path, ZipArchiveEntry entry) 80 | { 81 | string fileName = Path.GetFileName(entry.FullName); 82 | string filePath = Path.Combine(path, fileName); 83 | 84 | if (!File.Exists(filePath)) 85 | { 86 | entry.ExtractToFile(filePath, false); 87 | } 88 | 89 | string extension = Path.GetExtension(filePath); 90 | if (extension == ".dll" && RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 91 | { 92 | NativeMethods.WindowsLoadLib(filePath); 93 | } 94 | else if (extension == ".so" || extension == ".dylib") 95 | { 96 | NativeMethods.UnixLoadLib(filePath); 97 | } 98 | } 99 | 100 | private static IEnumerable ForPlatform(this ZipArchive resources, string platform) 101 | => resources.Entries.Where(x => 102 | (x.FullName.StartsWith($"{platform}/", StringComparison.InvariantCulture) && x.FullName.Length > platform.Length + 1) 103 | || (x.FullName.StartsWith("any/", StringComparison.InvariantCulture) && x.FullName.Length > 4)); 104 | 105 | private class NativeMethods 106 | { 107 | [SupportedOSPlatform(PlatformNames.Windows)] 108 | [DllImport("kernel32", CharSet = CharSet.Ansi, ExactSpelling = false, SetLastError = true, EntryPoint = "LoadLibrary")] 109 | public static extern IntPtr WindowsLoadLib([MarshalAs(UnmanagedType.LPStr)] string lpFileName); 110 | 111 | [SupportedOSPlatform(PlatformNames.Linux)] 112 | [SupportedOSPlatform(PlatformNames.MacOS)] 113 | [DllImport("libdl", CharSet = CharSet.Ansi, ExactSpelling = false, SetLastError = true, EntryPoint = "dlopen")] 114 | public static extern IntPtr UnixLoadLib([MarshalAs(UnmanagedType.LPStr)] string filename, int flags = 2); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/TesserNet/Internal/PlatformNames.cs: -------------------------------------------------------------------------------- 1 | namespace TesserNet.Internal; 2 | 3 | /// 4 | /// Contains platform names. 5 | /// 6 | internal static class PlatformNames 7 | { 8 | /// 9 | /// The platform name for the Windows operating system. 10 | /// 11 | public const string Windows = "windows"; 12 | 13 | /// 14 | /// The platform name for the Linux operating system. 15 | /// 16 | public const string Linux = "linux"; 17 | 18 | /// 19 | /// The platform name for the MacOS operating system. 20 | /// 21 | public const string MacOS = "macos"; 22 | } 23 | -------------------------------------------------------------------------------- /src/TesserNet/Internal/TesseractApi.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.InteropServices; 3 | 4 | namespace TesserNet.Internal 5 | { 6 | /// 7 | /// Provides an interface for the Tesseract API. 8 | /// 9 | internal abstract class TesseractApi 10 | { 11 | private static bool unpacked; 12 | 13 | /// 14 | /// Creates an instance of the Tesseract API for the current operating system. 15 | /// 16 | /// A Tesseract API. 17 | public static TesseractApi Create() 18 | { 19 | if (!unpacked) 20 | { 21 | Loader.Load(); 22 | unpacked = true; 23 | } 24 | 25 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows)) 26 | { 27 | return new WindowsTesseractApi(); 28 | } 29 | 30 | return new UnixTesseractApi(); 31 | } 32 | 33 | /// 34 | /// Creates an instance of API base. 35 | /// 36 | /// A handle for the base. 37 | public abstract IntPtr TessBaseAPICreate(); 38 | 39 | /// 40 | /// Deletes an API base. 41 | /// 42 | /// The API base handle. 43 | public abstract void TessBaseAPIDelete(IntPtr handle); 44 | 45 | /// 46 | /// Sets the settings for the given API base. Can be executed multiple times to change settings in between runs. 47 | /// 48 | /// The API base handle. 49 | /// The data path. 50 | /// The language following ISO 639-2 specification. 51 | /// The OCR engine mode. 52 | /// The configs. 53 | /// Size of the configuration. 54 | /// A success code: zero if succesful, non-zero if a problem has occured. 55 | public abstract int TessBaseAPIInit1(IntPtr handle, string dataPath, string language, int oem, IntPtr configs, int configSize); 56 | 57 | /// 58 | /// Sets the image to be processed next. 59 | /// 60 | /// The API base handle. 61 | /// The data. 62 | /// The width. 63 | /// The height. 64 | /// The bytes per pixel. 65 | /// The bytes per line. 66 | public abstract void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine); 67 | 68 | /// 69 | /// Performs the OCR. 70 | /// 71 | /// The API base handle. 72 | /// The found text on the image as a UTF8 string. 73 | public abstract string TessBaseAPIGetUTF8Text(IntPtr handle); 74 | 75 | /// 76 | /// Sets the source resolution. 77 | /// 78 | /// The API base handle. 79 | /// The pixels per inch. 80 | public abstract void TessBaseAPISetSourceResolution(IntPtr handle, int ppi); 81 | 82 | /// 83 | /// Takes a rectangle of the image for performing OCR. 84 | /// 85 | /// The API base handle. 86 | /// The x coordinate. 87 | /// The y coordinate. 88 | /// The width. 89 | /// The height. 90 | public abstract void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height); 91 | 92 | /// 93 | /// Frees all image data and result data. 94 | /// 95 | /// The API base handle. 96 | public abstract void TessBaseAPIClear(IntPtr handle); 97 | 98 | /// 99 | /// Sets the segmentation mode. 100 | /// 101 | /// The API base handle. 102 | /// The mode. 103 | public abstract void TessBaseAPISetPageSegMode(IntPtr handle, int mode); 104 | 105 | /// 106 | /// Sets the segmentation mode. 107 | /// 108 | /// The API base handle. 109 | /// The name of the variable. 110 | /// The value. 111 | /// Whether the operation was succesful or not. 112 | public abstract bool TessBaseAPISetVariable(IntPtr handle, string key, string value); 113 | 114 | /// 115 | /// Sets the used config file. 116 | /// 117 | /// The API base handle. 118 | /// The name or path to the file of the config file. 119 | public abstract void TessBaseAPIReadConfigFile(IntPtr handle, string file); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/TesserNet/Internal/UnixTesseractApi.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.InteropServices; 3 | using System.Runtime.Versioning; 4 | 5 | namespace TesserNet.Internal 6 | { 7 | /// 8 | /// Unix implementation of the Tesseract API. 9 | /// 10 | /// 11 | internal class UnixTesseractApi : TesseractApi 12 | { 13 | /// 14 | [SupportedOSPlatform(PlatformNames.Linux)] 15 | public override IntPtr TessBaseAPICreate() 16 | => NativeMethods.TessBaseAPICreate(); 17 | 18 | /// 19 | [SupportedOSPlatform(PlatformNames.Linux)] 20 | public override void TessBaseAPIDelete(IntPtr handle) 21 | => NativeMethods.TessBaseAPIDelete(handle); 22 | 23 | /// 24 | [SupportedOSPlatform(PlatformNames.Linux)] 25 | public override string TessBaseAPIGetUTF8Text(IntPtr handle) 26 | => NativeMethods.TessBaseAPIGetUTF8Text(handle).ToUtf8String(); 27 | 28 | /// 29 | [SupportedOSPlatform(PlatformNames.Linux)] 30 | public override int TessBaseAPIInit1(IntPtr handle, string dataPath, string language, int oem, IntPtr configs, int configSize) 31 | => NativeMethods.TessBaseAPIInit1(handle, dataPath, language, oem, configs, configSize); 32 | 33 | /// 34 | [SupportedOSPlatform(PlatformNames.Linux)] 35 | public override void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine) 36 | => NativeMethods.TessBaseAPISetImage(handle, data, width, height, bytesPerPixel, bytesPerLine); 37 | 38 | /// 39 | [SupportedOSPlatform(PlatformNames.Linux)] 40 | public override void TessBaseAPISetSourceResolution(IntPtr handle, int ppi) 41 | => NativeMethods.TessBaseAPISetSourceResolution(handle, ppi); 42 | 43 | /// 44 | [SupportedOSPlatform(PlatformNames.Linux)] 45 | public override void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height) 46 | => NativeMethods.TessBaseAPISetRectangle(handle, x, y, width, height); 47 | 48 | /// 49 | [SupportedOSPlatform(PlatformNames.Linux)] 50 | public override void TessBaseAPIClear(IntPtr handle) 51 | => NativeMethods.TessBaseAPIClear(handle); 52 | 53 | /// 54 | [SupportedOSPlatform(PlatformNames.Linux)] 55 | public override void TessBaseAPISetPageSegMode(IntPtr handle, int mode) 56 | => NativeMethods.TessBaseAPISetPageSegMode(handle, mode); 57 | 58 | /// 59 | [SupportedOSPlatform(PlatformNames.Linux)] 60 | public override bool TessBaseAPISetVariable(IntPtr handle, string key, string value) 61 | => NativeMethods.TessBaseAPISetVariable(handle, key, value); 62 | 63 | /// 64 | [SupportedOSPlatform(PlatformNames.Linux)] 65 | public override void TessBaseAPIReadConfigFile(IntPtr handle, string file) 66 | => NativeMethods.TessBaseAPIReadConfigFile(handle, file); 67 | 68 | private static class NativeMethods 69 | { 70 | private const string DllPath = "libtesseract.so.4"; 71 | 72 | [SupportedOSPlatform(PlatformNames.Linux)] 73 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 74 | public static extern IntPtr TessBaseAPICreate(); 75 | 76 | [SupportedOSPlatform(PlatformNames.Linux)] 77 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 78 | public static extern void TessBaseAPIDelete(IntPtr handle); 79 | 80 | [SupportedOSPlatform(PlatformNames.Linux)] 81 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 82 | public static extern void TessBaseAPIClear(IntPtr handle); 83 | 84 | [SupportedOSPlatform(PlatformNames.Linux)] 85 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 86 | public static extern int TessBaseAPIInit1(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string dataPath, [MarshalAs(UnmanagedType.LPStr)] string language, int oem, IntPtr configs, int configSize); 87 | 88 | [SupportedOSPlatform(PlatformNames.Linux)] 89 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 90 | public static extern void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine); 91 | 92 | [SupportedOSPlatform(PlatformNames.Linux)] 93 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 94 | public static extern IntPtr TessBaseAPIGetUTF8Text(IntPtr handle); 95 | 96 | [SupportedOSPlatform(PlatformNames.Linux)] 97 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 98 | public static extern void TessBaseAPISetSourceResolution(IntPtr handle, int ppi); 99 | 100 | [SupportedOSPlatform(PlatformNames.Linux)] 101 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 102 | public static extern void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height); 103 | 104 | [SupportedOSPlatform(PlatformNames.Linux)] 105 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 106 | public static extern void TessBaseAPISetPageSegMode(IntPtr handle, int mode); 107 | 108 | [SupportedOSPlatform(PlatformNames.Linux)] 109 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 110 | public static extern bool TessBaseAPISetVariable(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key, [MarshalAs(UnmanagedType.LPStr)] string value); 111 | 112 | [SupportedOSPlatform(PlatformNames.Linux)] 113 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 114 | public static extern void TessBaseAPIReadConfigFile(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string file); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/TesserNet/Internal/Utf8Helper.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.InteropServices; 3 | using System.Text; 4 | 5 | namespace TesserNet.Internal 6 | { 7 | /// 8 | /// Provides classes to help with dealing with UTF8 strings. 9 | /// 10 | internal static class Utf8Helper 11 | { 12 | /// 13 | /// Reads a UTF8 string from a pointer. 14 | /// 15 | /// The pointer to read from. 16 | /// The string at the pointer. 17 | public static string ToUtf8String(this IntPtr ptr) 18 | { 19 | byte[] bytes = new byte[ptr.GetStringLength()]; 20 | 21 | for (int i = 0; i < bytes.Length; i++) 22 | { 23 | bytes[i] = Marshal.ReadByte(ptr, i); 24 | } 25 | 26 | Marshal.FreeHGlobal(ptr); 27 | 28 | return Encoding.UTF8.GetString(bytes); 29 | } 30 | 31 | private static int GetStringLength(this IntPtr ptr) 32 | { 33 | int length = 0; 34 | while (true) 35 | { 36 | byte b = Marshal.ReadByte(ptr, length); 37 | if (b == 0) 38 | { 39 | return length; 40 | } 41 | 42 | length++; 43 | } 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/TesserNet/Internal/WindowsTesseractApi.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.InteropServices; 3 | using System.Runtime.Versioning; 4 | 5 | namespace TesserNet.Internal 6 | { 7 | /// 8 | /// Windows implementation of the Tesseract API. 9 | /// 10 | /// 11 | internal class WindowsTesseractApi : TesseractApi 12 | { 13 | /// 14 | [SupportedOSPlatform(PlatformNames.Windows)] 15 | public override IntPtr TessBaseAPICreate() 16 | => NativeMethods.TessBaseAPICreate(); 17 | 18 | /// 19 | [SupportedOSPlatform(PlatformNames.Windows)] 20 | public override void TessBaseAPIDelete(IntPtr handle) 21 | => NativeMethods.TessBaseAPIDelete(handle); 22 | 23 | /// 24 | [SupportedOSPlatform(PlatformNames.Windows)] 25 | public override string TessBaseAPIGetUTF8Text(IntPtr handle) 26 | => NativeMethods.TessBaseAPIGetUTF8Text(handle).ToUtf8String(); 27 | 28 | /// 29 | [SupportedOSPlatform(PlatformNames.Windows)] 30 | public override int TessBaseAPIInit1(IntPtr handle, string dataPath, string language, int oem, IntPtr configs, int configSize) 31 | => NativeMethods.TessBaseAPIInit1(handle, dataPath, language, oem, configs, configSize); 32 | 33 | /// 34 | [SupportedOSPlatform(PlatformNames.Windows)] 35 | public override void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine) 36 | => NativeMethods.TessBaseAPISetImage(handle, data, width, height, bytesPerPixel, bytesPerLine); 37 | 38 | /// 39 | [SupportedOSPlatform(PlatformNames.Windows)] 40 | public override void TessBaseAPISetSourceResolution(IntPtr handle, int ppi) 41 | => NativeMethods.TessBaseAPISetSourceResolution(handle, ppi); 42 | 43 | /// 44 | [SupportedOSPlatform(PlatformNames.Windows)] 45 | public override void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height) 46 | => NativeMethods.TessBaseAPISetRectangle(handle, x, y, width, height); 47 | 48 | /// 49 | [SupportedOSPlatform(PlatformNames.Windows)] 50 | public override void TessBaseAPIClear(IntPtr handle) 51 | => NativeMethods.TessBaseAPIClear(handle); 52 | 53 | /// 54 | [SupportedOSPlatform(PlatformNames.Windows)] 55 | public override void TessBaseAPISetPageSegMode(IntPtr handle, int mode) 56 | => NativeMethods.TessBaseAPISetPageSegMode(handle, mode); 57 | 58 | /// 59 | [SupportedOSPlatform(PlatformNames.Windows)] 60 | public override bool TessBaseAPISetVariable(IntPtr handle, string key, string value) 61 | => NativeMethods.TessBaseAPISetVariable(handle, key, value); 62 | 63 | /// 64 | [SupportedOSPlatform(PlatformNames.Windows)] 65 | public override void TessBaseAPIReadConfigFile(IntPtr handle, string file) 66 | => NativeMethods.TessBaseAPIReadConfigFile(handle, file); 67 | 68 | private static class NativeMethods 69 | { 70 | private const string DllPath = "libtesseract500"; 71 | 72 | [SupportedOSPlatform(PlatformNames.Windows)] 73 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 74 | public static extern IntPtr TessBaseAPICreate(); 75 | 76 | [SupportedOSPlatform(PlatformNames.Windows)] 77 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 78 | public static extern void TessBaseAPIDelete(IntPtr handle); 79 | 80 | [SupportedOSPlatform(PlatformNames.Windows)] 81 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 82 | public static extern void TessBaseAPIClear(IntPtr handle); 83 | 84 | [SupportedOSPlatform(PlatformNames.Windows)] 85 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 86 | public static extern int TessBaseAPIInit1(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string dataPath, [MarshalAs(UnmanagedType.LPStr)] string language, int oem, IntPtr configs, int configSize); 87 | 88 | [SupportedOSPlatform(PlatformNames.Windows)] 89 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 90 | public static extern void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine); 91 | 92 | [SupportedOSPlatform(PlatformNames.Windows)] 93 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 94 | public static extern IntPtr TessBaseAPIGetUTF8Text(IntPtr handle); 95 | 96 | [SupportedOSPlatform(PlatformNames.Windows)] 97 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 98 | public static extern void TessBaseAPISetSourceResolution(IntPtr handle, int ppi); 99 | 100 | [SupportedOSPlatform(PlatformNames.Windows)] 101 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 102 | public static extern void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height); 103 | 104 | [SupportedOSPlatform(PlatformNames.Windows)] 105 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 106 | public static extern void TessBaseAPISetPageSegMode(IntPtr handle, int mode); 107 | 108 | [SupportedOSPlatform(PlatformNames.Windows)] 109 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 110 | public static extern bool TessBaseAPISetVariable(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key, [MarshalAs(UnmanagedType.LPStr)] string value); 111 | 112 | [SupportedOSPlatform(PlatformNames.Windows)] 113 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)] 114 | public static extern void TessBaseAPIReadConfigFile(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string file); 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/TesserNet/OcrEngineMode.cs: -------------------------------------------------------------------------------- 1 | namespace TesserNet 2 | { 3 | /// 4 | /// Enum for the OCR setting to be used. 5 | /// 6 | public enum OcrEngineMode 7 | { 8 | /// 9 | /// Only run the legacy Tesseract OCR. 10 | /// 11 | TesseractOnly = 0, 12 | 13 | /// 14 | /// Only run the new LSTM based OCR. 15 | /// 16 | LstmOnly = 1, 17 | 18 | /// 19 | /// Combine LSTM and the legacy Tesseract OCR. 20 | /// 21 | Combined = 2, 22 | 23 | /// 24 | /// The default setting (picks whatever is available). 25 | /// 26 | Default = 3, 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/TesserNet/PageSegmentation.cs: -------------------------------------------------------------------------------- 1 | namespace TesserNet 2 | { 3 | /// 4 | /// Indicates how page segmentation should be treated. 5 | /// 6 | public enum PageSegmentation 7 | { 8 | /// 9 | /// Orientation and script detection (OSD) only. 10 | /// 11 | Osd = 0, 12 | 13 | /// 14 | /// Automatic page segmentation with OSD. 15 | /// 16 | SegmentationOsd = 1, 17 | 18 | /// 19 | /// Automatic page segmentation, but no OSD, or OCR. 20 | /// 21 | Segmentation = 2, 22 | 23 | /// 24 | /// Fully automatic page segmentation, but no OSD. (Default). 25 | /// 26 | SegmentationOcr = 3, 27 | 28 | /// 29 | /// Assume a single column of text of variable sizes. 30 | /// 31 | Column = 4, 32 | 33 | /// 34 | /// Assume a single uniform block of vertically aligned text. 35 | /// 36 | VerticalBlock = 5, 37 | 38 | /// 39 | /// Assume a single uniform block of text. 40 | /// 41 | Block = 6, 42 | 43 | /// 44 | /// Treat the image as a single text line. 45 | /// 46 | Line = 7, 47 | 48 | /// 49 | /// Treat the image as a single word. 50 | /// 51 | Word = 8, 52 | 53 | /// 54 | /// Treat the image as a single word in a circle. 55 | /// 56 | WordCircle = 9, 57 | 58 | /// 59 | /// Treat the image as a single character. 60 | /// 61 | Character = 10, 62 | 63 | /// 64 | /// Sparse text. Find as much text as possible in no particular order. 65 | /// 66 | Sparse = 11, 67 | 68 | /// 69 | /// Sparse text with OSD. 70 | /// 71 | SparseOsd = 12, 72 | 73 | /// 74 | /// Raw line. Treat the image as a single text line, bypassing hacks that are Tesseract-specific. 75 | /// 76 | Raw = 13, 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/TesserNet/Resources.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet/Resources.zip -------------------------------------------------------------------------------- /src/TesserNet/TesserNet.csproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | netstandard2.0 4 | ../Ruleset.ruleset 5 | bin/$(AssemblyName).xml 6 | 10 7 | enable 8 | true 9 | true 10 | 11 | Wesley Baartman 12 | https://github.com/CptWesley/TesserNet 13 | https://github.com/CptWesley/TesserNet 14 | Apache-2.0 15 | High level bindings for the OCR library Tesseract for NET. 16 | tesseract ocr bindings optical character recognition tesseract-ocr 17 | $(Version) 18 | $(Version) 19 | 0.8.0 20 | 21 | README.md 22 | AnyCPU;x64;x86 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | all 34 | 35 | 36 | 37 | 38 | 39 | all 40 | compile 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/TesserNet/Tesseract.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.IO; 3 | using System.Threading.Tasks; 4 | using TesserNet.Internal; 5 | 6 | namespace TesserNet 7 | { 8 | /// 9 | /// Provides high level bindings for the Tesseract API. 10 | /// 11 | public class Tesseract : TesseractBase 12 | { 13 | private readonly TesseractApi api; 14 | private readonly IntPtr handle; 15 | private readonly object lck = new object(); 16 | private bool isDisposed; 17 | private TesseractOptions? lastOptions; 18 | 19 | /// 20 | /// Initializes a new instance of the class. 21 | /// 22 | public Tesseract() 23 | : this(new TesseractOptions()) 24 | { 25 | } 26 | 27 | /// 28 | /// Initializes a new instance of the class. 29 | /// 30 | /// The options. 31 | public Tesseract(Action options) 32 | : this() 33 | { 34 | if (options != null) 35 | { 36 | options(Options); 37 | } 38 | } 39 | 40 | /// 41 | /// Initializes a new instance of the class. 42 | /// 43 | /// The options. 44 | public Tesseract(TesseractOptions options) 45 | : base(options) 46 | { 47 | api = TesseractApi.Create(); 48 | handle = api.TessBaseAPICreate(); 49 | } 50 | 51 | /// 52 | /// Finalizes an instance of the class. 53 | /// 54 | ~Tesseract() 55 | => Dispose(false); 56 | 57 | /// 58 | public override unsafe string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 59 | { 60 | if (isDisposed) 61 | { 62 | throw new ObjectDisposedException(nameof(Tesseract)); 63 | } 64 | 65 | lock (lck) 66 | { 67 | if (isDisposed) 68 | { 69 | throw new ObjectDisposedException(nameof(Tesseract)); 70 | } 71 | 72 | if (!Options.Equals(lastOptions!)) 73 | { 74 | lastOptions = Options.Copy(); 75 | Init(); 76 | } 77 | 78 | try 79 | { 80 | api.TessBaseAPISetImage(handle, data, width, height, bytesPerPixel, width * bytesPerPixel); 81 | } 82 | catch 83 | { 84 | throw new TesseractException("Error while setting subject image."); 85 | } 86 | 87 | try 88 | { 89 | api.TessBaseAPISetSourceResolution(handle, Options.PixelsPerInch); 90 | } 91 | catch 92 | { 93 | throw new TesseractException("Error while setting resolution."); 94 | } 95 | 96 | if (rectX >= 0 && rectY >= 0 && rectWidth > 0 && rectHeight > 0) 97 | { 98 | try 99 | { 100 | api.TessBaseAPISetRectangle(handle, rectX, rectY, rectWidth, rectHeight); 101 | } 102 | catch 103 | { 104 | throw new TesseractException("Error while setting a rectangle."); 105 | } 106 | } 107 | 108 | string result; 109 | try 110 | { 111 | result = api.TessBaseAPIGetUTF8Text(handle); 112 | } 113 | catch 114 | { 115 | throw new TesseractException("Error while performing OCR."); 116 | } 117 | 118 | try 119 | { 120 | api.TessBaseAPIClear(handle); 121 | } 122 | catch 123 | { 124 | throw new TesseractException("Error while clearing result data."); 125 | } 126 | 127 | return result; 128 | } 129 | } 130 | 131 | /// 132 | public override Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 133 | => Task.Run(() => Read(data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight)); 134 | 135 | /// 136 | /// Releases unmanaged and - optionally - managed resources. 137 | /// 138 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources. 139 | protected override void Dispose(bool disposing) 140 | { 141 | if (isDisposed) 142 | { 143 | return; 144 | } 145 | 146 | lock (lck) 147 | { 148 | api.TessBaseAPIDelete(handle); 149 | } 150 | 151 | isDisposed = true; 152 | } 153 | 154 | private void Init() 155 | { 156 | int result = api.TessBaseAPIInit1(handle, Options.DataPath, Options.Language, (int)Options.EngineMode, IntPtr.Zero, 0); 157 | if (result != 0) 158 | { 159 | throw new TesseractException($"Error while initializing Tesseract with data file '{Path.Combine(Options.DataPath, $"{Options.Language}.traineddata")}'. It's possible the training data was not found or the data does not support the current OCR engine mode."); 160 | } 161 | 162 | try 163 | { 164 | api.TessBaseAPISetPageSegMode(handle, (int)Options.PageSegmentation); 165 | } 166 | catch 167 | { 168 | throw new TesseractException("Error while setting page segmentation mode."); 169 | } 170 | 171 | try 172 | { 173 | if (!api.TessBaseAPISetVariable(handle, "tessedit_char_whitelist", string.IsNullOrWhiteSpace(Options.Whitelist) ? string.Empty : Options.Whitelist)) 174 | { 175 | throw new TesseractException("Setting whitelist unsuccesful."); 176 | } 177 | } 178 | catch 179 | { 180 | throw new TesseractException("Error while setting whitelist."); 181 | } 182 | 183 | try 184 | { 185 | if (!api.TessBaseAPISetVariable(handle, "tessedit_char_blacklist", string.IsNullOrWhiteSpace(Options.Blacklist) ? string.Empty : Options.Blacklist)) 186 | { 187 | throw new TesseractException("Setting blacklist unsuccesful."); 188 | } 189 | } 190 | catch 191 | { 192 | throw new TesseractException("Error while setting blacklist."); 193 | } 194 | 195 | try 196 | { 197 | if (!api.TessBaseAPISetVariable(handle, "classify_bln_numeric_mode", Options.Numeric ? "1" : "0")) 198 | { 199 | throw new TesseractException("Setting numeric mode unsuccesful."); 200 | } 201 | } 202 | catch 203 | { 204 | throw new TesseractException("Error while setting numeric mode."); 205 | } 206 | 207 | if (!string.IsNullOrWhiteSpace(Options.Config)) 208 | { 209 | try 210 | { 211 | api.TessBaseAPIReadConfigFile(handle, Options.Config); 212 | } 213 | catch 214 | { 215 | throw new TesseractException($"Error while loading config: '{Options.Config}'."); 216 | } 217 | } 218 | } 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /src/TesserNet/TesseractBase.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Threading.Tasks; 3 | 4 | namespace TesserNet 5 | { 6 | /// 7 | /// Abstract base class for Tesseract instances. 8 | /// 9 | public unsafe abstract class TesseractBase : ITesseract 10 | { 11 | /// 12 | /// Initializes a new instance of the class. 13 | /// 14 | /// The options. 15 | public TesseractBase(TesseractOptions options) 16 | => Options = options; 17 | 18 | /// 19 | public TesseractOptions Options { get; set; } 20 | 21 | /// 22 | public abstract string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 23 | 24 | /// 25 | public abstract Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight); 26 | 27 | /// 28 | public string Read(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 29 | { 30 | fixed (byte* ptr = data) 31 | { 32 | return Read(ptr, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 33 | } 34 | } 35 | 36 | /// 37 | public Task ReadAsync(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 38 | { 39 | fixed (byte* ptr = data) 40 | { 41 | return ReadAsync(ptr, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 42 | } 43 | } 44 | 45 | /// 46 | public string Read(byte[] data, int width, int height, int bytesPerPixel) 47 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1); 48 | 49 | /// 50 | public string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 51 | => Read((Span)data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 52 | 53 | /// 54 | public string Read(Memory data, int width, int height, int bytesPerPixel) 55 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1); 56 | 57 | /// 58 | public string Read(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 59 | => Read(data.Span, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 60 | 61 | /// 62 | public string Read(Span data, int width, int height, int bytesPerPixel) 63 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1); 64 | 65 | /// 66 | public string Read(byte* data, int width, int height, int bytesPerPixel) 67 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1); 68 | 69 | /// 70 | public string Read(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 71 | => Read(new IntPtr(data), width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 72 | 73 | /// 74 | public string Read(IntPtr data, int width, int height, int bytesPerPixel) 75 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1); 76 | 77 | /// 78 | public Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel) 79 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1); 80 | 81 | /// 82 | public Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 83 | => ReadAsync((Memory)data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 84 | 85 | /// 86 | public Task ReadAsync(Memory data, int width, int height, int bytesPerPixel) 87 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1); 88 | 89 | /// 90 | public Task ReadAsync(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 91 | => ReadAsync(data.Span, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 92 | 93 | /// 94 | public Task ReadAsync(Span data, int width, int height, int bytesPerPixel) 95 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1); 96 | 97 | /// 98 | public Task ReadAsync(byte* data, int width, int height, int bytesPerPixel) 99 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1); 100 | 101 | /// 102 | public Task ReadAsync(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 103 | => ReadAsync(new IntPtr(data), width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 104 | 105 | /// 106 | public Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel) 107 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1); 108 | 109 | /// 110 | public void Dispose() 111 | { 112 | Dispose(true); 113 | GC.SuppressFinalize(this); 114 | } 115 | 116 | /// 117 | /// Releases unmanaged and - optionally - managed resources. 118 | /// 119 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources. 120 | protected abstract void Dispose(bool disposing); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/TesserNet/TesseractException.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Runtime.Serialization; 3 | 4 | namespace TesserNet 5 | { 6 | /// 7 | /// Exception thrown when something goes wrong with Tesseract execution. 8 | /// 9 | /// 10 | public class TesseractException : Exception 11 | { 12 | /// 13 | /// Initializes a new instance of the class. 14 | /// 15 | public TesseractException() 16 | { 17 | } 18 | 19 | /// 20 | /// Initializes a new instance of the class. 21 | /// 22 | /// The message that describes the error. 23 | public TesseractException(string message) 24 | : base(message) 25 | { 26 | } 27 | 28 | /// 29 | /// Initializes a new instance of the class. 30 | /// 31 | /// The error message that explains the reason for the exception. 32 | /// The exception that is the cause of the current exception, or a null reference (Nothing in Visual Basic) if no inner exception is specified. 33 | public TesseractException(string message, Exception innerException) 34 | : base(message, innerException) 35 | { 36 | } 37 | 38 | /// 39 | /// Initializes a new instance of the class. 40 | /// 41 | /// The that holds the serialized object data about the exception being thrown. 42 | /// The that contains contextual information about the source or destination. 43 | protected TesseractException(SerializationInfo info, StreamingContext context) 44 | : base(info, context) 45 | { 46 | } 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/TesserNet/TesseractOptions.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using TesserNet.Internal; 3 | 4 | namespace TesserNet 5 | { 6 | /// 7 | /// Represents the options used for invoking Tesseract. 8 | /// 9 | public class TesseractOptions : IEquatable 10 | { 11 | /// 12 | /// Gets or sets the language. 13 | /// 14 | public string Language { get; set; } = "eng"; 15 | 16 | /// 17 | /// Gets or sets the data path. 18 | /// 19 | public string DataPath { get; set; } = Environment.GetEnvironmentVariable("TESSDATA_PREFIX") ?? Loader.GetUnpackDirectory(); 20 | 21 | /// 22 | /// Gets or sets the engine mode. 23 | /// 24 | public OcrEngineMode EngineMode { get; set; } = OcrEngineMode.Default; 25 | 26 | /// 27 | /// Gets or sets the pixels per inch. 28 | /// 29 | public int PixelsPerInch { get; set; } = 70; 30 | 31 | /// 32 | /// Gets or sets the page segmentation option. 33 | /// 34 | public PageSegmentation PageSegmentation { get; set; } = PageSegmentation.Block; 35 | 36 | /// 37 | /// Gets or sets the whitelist. 38 | /// 39 | public string Whitelist { get; set; } = string.Empty; 40 | 41 | /// 42 | /// Gets or sets the blacklist. 43 | /// 44 | public string Blacklist { get; set; } = string.Empty; 45 | 46 | /// 47 | /// Gets or sets a value indicating whether the thing we try to parse is numeric. 48 | /// 49 | public bool Numeric { get; set; } 50 | 51 | /// 52 | /// Gets or sets the configuration name or path. 53 | /// 54 | public string Config { get; set; } = string.Empty; 55 | 56 | /// 57 | /// Creates a copy of the options. 58 | /// 59 | /// A copy of the options. 60 | public TesseractOptions Copy() 61 | => new TesseractOptions 62 | { 63 | Language = this.Language, 64 | DataPath = this.DataPath, 65 | EngineMode = this.EngineMode, 66 | PixelsPerInch = this.PixelsPerInch, 67 | PageSegmentation = this.PageSegmentation, 68 | Whitelist = this.Whitelist, 69 | Blacklist = this.Blacklist, 70 | Numeric = this.Numeric, 71 | Config = this.Config, 72 | }; 73 | 74 | /// 75 | public override bool Equals(object? obj) 76 | { 77 | if (obj is TesseractOptions other) 78 | { 79 | return Equals(other); 80 | } 81 | 82 | return false; 83 | } 84 | 85 | /// 86 | public bool Equals(TesseractOptions? other) 87 | { 88 | if (other is null) 89 | { 90 | return false; 91 | } 92 | 93 | return Language == other.Language 94 | && DataPath == other.DataPath 95 | && EngineMode == other.EngineMode 96 | && PixelsPerInch == other.PixelsPerInch 97 | && PageSegmentation == other.PageSegmentation 98 | && Whitelist == other.Whitelist 99 | && Blacklist == other.Blacklist 100 | && Numeric == other.Numeric 101 | && Config == other.Config; 102 | } 103 | 104 | /// 105 | public override int GetHashCode() 106 | => Language.GetHashCode() 107 | + (2 * DataPath.GetHashCode()) 108 | + (3 * (int)(EngineMode + 1)) 109 | + (4 * (PixelsPerInch + 1)) 110 | + (5 * (int)(PageSegmentation + 1)) 111 | + (6 * Whitelist.GetHashCode()) 112 | + (7 * Blacklist.GetHashCode()) 113 | + (Numeric ? 8 : 0) 114 | + (9 * Config.GetHashCode()); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/TesserNet/TesseractPool.cs: -------------------------------------------------------------------------------- 1 | using System; 2 | using System.Collections.Generic; 3 | using System.Threading; 4 | using System.Threading.Tasks; 5 | using TesserNet.Internal; 6 | 7 | namespace TesserNet 8 | { 9 | /// 10 | /// Scheduler for easier management of multiple tesseract instances. 11 | /// 12 | public class TesseractPool : TesseractBase 13 | { 14 | private const int DefaultMaxPoolSize = 6; 15 | 16 | private readonly LazyQueue waiting = new LazyQueue(); 17 | private readonly HashSet tesseracts = new HashSet(); 18 | private readonly SemaphoreSlim semaphore = new SemaphoreSlim(1); 19 | private int busyCount; 20 | private int maxPoolSize; 21 | private bool isDisposed; 22 | 23 | /// 24 | /// Initializes a new instance of the class. 25 | /// 26 | public TesseractPool() 27 | : this(DefaultMaxPoolSize) 28 | { 29 | } 30 | 31 | /// 32 | /// Initializes a new instance of the class. 33 | /// 34 | /// The options. 35 | /// Maximum size of the pool. 36 | public TesseractPool(Action options, int maxPoolSize) 37 | : this(maxPoolSize) 38 | { 39 | if (options != null) 40 | { 41 | options(Options); 42 | } 43 | } 44 | 45 | /// 46 | /// Initializes a new instance of the class. 47 | /// 48 | /// The options. 49 | public TesseractPool(Action options) 50 | : this(options, DefaultMaxPoolSize) 51 | { 52 | } 53 | 54 | /// 55 | /// Initializes a new instance of the class. 56 | /// 57 | /// Maximum size of the pool. 58 | public TesseractPool(int maxPoolSize) 59 | : this(new TesseractOptions(), maxPoolSize) 60 | { 61 | } 62 | 63 | /// 64 | /// Initializes a new instance of the class. 65 | /// 66 | /// The Tesseract options used for all spawned instances. 67 | public TesseractPool(TesseractOptions options) 68 | : this(options, DefaultMaxPoolSize) 69 | { 70 | } 71 | 72 | /// 73 | /// Initializes a new instance of the class. 74 | /// 75 | /// The Tesseract options used for all spawned instances. 76 | /// Maximum size of the pool. 77 | public TesseractPool(TesseractOptions options, int maxPoolSize) 78 | : base(options) 79 | => (Options, this.maxPoolSize) = (options, maxPoolSize); 80 | 81 | /// 82 | /// Gets or sets the maximum size of the pool. 83 | /// 84 | public int MaxPoolSize 85 | { 86 | get => maxPoolSize; 87 | set => Resize(value); 88 | } 89 | 90 | /// 91 | public override string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 92 | { 93 | if (isDisposed) 94 | { 95 | throw new ObjectDisposedException(nameof(TesseractPool)); 96 | } 97 | 98 | semaphore.Wait(); 99 | 100 | Tesseract tesseract; 101 | try 102 | { 103 | if (waiting.Count > 0) 104 | { 105 | tesseract = waiting.Dequeue(); 106 | } 107 | else if (tesseracts.Count < MaxPoolSize) 108 | { 109 | tesseract = new Tesseract(); 110 | tesseracts.Add(tesseract); 111 | } 112 | else 113 | { 114 | tesseract = waiting.Dequeue(); 115 | } 116 | 117 | tesseract.Options = Options.Copy(); 118 | } 119 | finally 120 | { 121 | semaphore.Release(); 122 | } 123 | 124 | string result = tesseract.Read(data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 125 | waiting.Enqueue(tesseract); 126 | return result; 127 | } 128 | 129 | /// 130 | public override async Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight) 131 | { 132 | if (isDisposed) 133 | { 134 | throw new ObjectDisposedException(nameof(TesseractPool)); 135 | } 136 | 137 | await semaphore.WaitAsync().ConfigureAwait(false); 138 | 139 | Tesseract tesseract; 140 | try 141 | { 142 | if (waiting.Count > 0) 143 | { 144 | tesseract = await waiting.DequeueAsync().ConfigureAwait(false); 145 | } 146 | else if (tesseracts.Count < MaxPoolSize) 147 | { 148 | tesseract = new Tesseract(); 149 | tesseracts.Add(tesseract); 150 | } 151 | else 152 | { 153 | tesseract = await waiting.DequeueAsync().ConfigureAwait(false); 154 | } 155 | 156 | Interlocked.Increment(ref busyCount); 157 | tesseract.Options = Options.Copy(); 158 | } 159 | finally 160 | { 161 | semaphore.Release(); 162 | } 163 | 164 | Task ocr = tesseract.ReadAsync(data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight); 165 | _ = GoToWaiting(tesseract, ocr); 166 | return await ocr.ConfigureAwait(false); 167 | } 168 | 169 | /// 170 | /// Releases unmanaged and - optionally - managed resources. 171 | /// 172 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources. 173 | protected override void Dispose(bool disposing) 174 | { 175 | if (isDisposed) 176 | { 177 | return; 178 | } 179 | 180 | isDisposed = true; 181 | 182 | if (disposing) 183 | { 184 | semaphore.Wait(); 185 | 186 | foreach (Tesseract tesseract in tesseracts) 187 | { 188 | tesseract.Dispose(); 189 | } 190 | 191 | waiting.Dispose(); 192 | semaphore.Dispose(); 193 | } 194 | } 195 | 196 | private async Task GoToWaiting(Tesseract t, Task task) 197 | { 198 | await task.ConfigureAwait(false); 199 | Interlocked.Decrement(ref busyCount); 200 | await waiting.EnqueueAsync(t).ConfigureAwait(false); 201 | } 202 | 203 | private void Resize(int size) 204 | { 205 | maxPoolSize = size; 206 | 207 | if (!isDisposed) 208 | { 209 | _ = KillExcess(); 210 | } 211 | } 212 | 213 | private async Task KillExcess() 214 | { 215 | await semaphore.WaitAsync().ConfigureAwait(false); 216 | 217 | while (busyCount + waiting.Count > maxPoolSize) 218 | { 219 | Tesseract tesseract = await waiting.DequeueAsync().ConfigureAwait(false); 220 | tesseracts.Remove(tesseract); 221 | tesseract.Dispose(); 222 | } 223 | 224 | semaphore.Release(); 225 | } 226 | } 227 | } 228 | -------------------------------------------------------------------------------- /src/stylecop.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/StyleCop.Analyzers/StyleCop.Analyzers/Settings/stylecop.schema.json", 3 | "settings": { 4 | "indentation": { 5 | "useTabs": false, 6 | "indentationSize": 4 7 | }, 8 | "maintainabilityRules": { 9 | "topLevelTypes": [ "class", "interface", "struct" ] 10 | }, 11 | "orderingRules": { 12 | "usingDirectivesPlacement": "outsideNamespace", 13 | "elementOrder": [ "kind", "constant", "accessibility", "static", "readonly" ] 14 | } 15 | } 16 | } 17 | --------------------------------------------------------------------------------