├── .gitignore
├── LICENSE
├── README.md
├── dependency_licenses
└── tesseract_LICENSES
└── src
├── Ruleset.ruleset
├── TesserNet.Example.ImageSharp
├── Program.cs
├── TesserNet.Example.ImageSharp.csproj
└── img.png
├── TesserNet.Example.SkiaSharp
├── Program.cs
├── TesserNet.Example.SkiaSharp.csproj
└── img.png
├── TesserNet.Example.System.Drawing
├── Program.cs
├── TesserNet.Example.System.Drawing.csproj
└── img.png
├── TesserNet.ImageSharp
├── ImageSharpTesseractExtensions.cs
└── TesserNet.ImageSharp.csproj
├── TesserNet.SkiaSharp
├── SkiaSharpTesseractExtensions.cs
└── TesserNet.SkiaSharp.csproj
├── TesserNet.System.Drawing
├── SystemDrawingTesseractExtensions.cs
└── TesserNet.System.Drawing.csproj
├── TesserNet.Tests
├── ImageLoader.cs
├── Resources
│ └── img.png
├── TesserNet.Tests.csproj
└── Tests.cs
├── TesserNet.sln
├── TesserNet
├── ITesseract.cs
├── Internal
│ ├── LazyQueue.cs
│ ├── Loader.cs
│ ├── PlatformNames.cs
│ ├── TesseractApi.cs
│ ├── UnixTesseractApi.cs
│ ├── Utf8Helper.cs
│ └── WindowsTesseractApi.cs
├── OcrEngineMode.cs
├── PageSegmentation.cs
├── Resources.zip
├── TesserNet.csproj
├── Tesseract.cs
├── TesseractBase.cs
├── TesseractException.cs
├── TesseractOptions.cs
└── TesseractPool.cs
└── stylecop.json
/.gitignore:
--------------------------------------------------------------------------------
1 | ## Ignore Visual Studio temporary files, build results, and
2 | ## files generated by popular Visual Studio add-ons.
3 | ##
4 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
5 |
6 | # User-specific files
7 | *.rsuser
8 | *.suo
9 | *.user
10 | *.userosscache
11 | *.sln.docstates
12 |
13 | # User-specific files (MonoDevelop/Xamarin Studio)
14 | *.userprefs
15 |
16 | # Mono auto generated files
17 | mono_crash.*
18 |
19 | # Build results
20 | [Dd]ebug/
21 | [Dd]ebugPublic/
22 | [Rr]elease/
23 | [Rr]eleases/
24 | x64/
25 | x86/
26 | [Aa][Rr][Mm]/
27 | [Aa][Rr][Mm]64/
28 | bld/
29 | [Bb]in/
30 | [Oo]bj/
31 | [Ll]og/
32 | [Ll]ogs/
33 |
34 | # Visual Studio 2015/2017 cache/options directory
35 | .vs/
36 | # Uncomment if you have tasks that create the project's static files in wwwroot
37 | #wwwroot/
38 |
39 | # Visual Studio 2017 auto generated files
40 | Generated\ Files/
41 |
42 | # MSTest test Results
43 | [Tt]est[Rr]esult*/
44 | [Bb]uild[Ll]og.*
45 |
46 | # NUnit
47 | *.VisualState.xml
48 | TestResult.xml
49 | nunit-*.xml
50 |
51 | # Build Results of an ATL Project
52 | [Dd]ebugPS/
53 | [Rr]eleasePS/
54 | dlldata.c
55 |
56 | # Benchmark Results
57 | BenchmarkDotNet.Artifacts/
58 |
59 | # .NET Core
60 | project.lock.json
61 | project.fragment.lock.json
62 | artifacts/
63 |
64 | # StyleCop
65 | StyleCopReport.xml
66 |
67 | # Files built by Visual Studio
68 | *_i.c
69 | *_p.c
70 | *_h.h
71 | *.ilk
72 | *.meta
73 | *.obj
74 | *.iobj
75 | *.pch
76 | *.pdb
77 | *.ipdb
78 | *.pgc
79 | *.pgd
80 | *.rsp
81 | *.sbr
82 | *.tlb
83 | *.tli
84 | *.tlh
85 | *.tmp
86 | *.tmp_proj
87 | *_wpftmp.csproj
88 | *.log
89 | *.vspscc
90 | *.vssscc
91 | .builds
92 | *.pidb
93 | *.svclog
94 | *.scc
95 |
96 | # Chutzpah Test files
97 | _Chutzpah*
98 |
99 | # Visual C++ cache files
100 | ipch/
101 | *.aps
102 | *.ncb
103 | *.opendb
104 | *.opensdf
105 | *.sdf
106 | *.cachefile
107 | *.VC.db
108 | *.VC.VC.opendb
109 |
110 | # Visual Studio profiler
111 | *.psess
112 | *.vsp
113 | *.vspx
114 | *.sap
115 |
116 | # Visual Studio Trace Files
117 | *.e2e
118 |
119 | # TFS 2012 Local Workspace
120 | $tf/
121 |
122 | # Guidance Automation Toolkit
123 | *.gpState
124 |
125 | # ReSharper is a .NET coding add-in
126 | _ReSharper*/
127 | *.[Rr]e[Ss]harper
128 | *.DotSettings.user
129 |
130 | # TeamCity is a build add-in
131 | _TeamCity*
132 |
133 | # DotCover is a Code Coverage Tool
134 | *.dotCover
135 |
136 | # AxoCover is a Code Coverage Tool
137 | .axoCover/*
138 | !.axoCover/settings.json
139 |
140 | # Visual Studio code coverage results
141 | *.coverage
142 | *.coveragexml
143 |
144 | # NCrunch
145 | _NCrunch_*
146 | .*crunch*.local.xml
147 | nCrunchTemp_*
148 |
149 | # MightyMoose
150 | *.mm.*
151 | AutoTest.Net/
152 |
153 | # Web workbench (sass)
154 | .sass-cache/
155 |
156 | # Installshield output folder
157 | [Ee]xpress/
158 |
159 | # DocProject is a documentation generator add-in
160 | DocProject/buildhelp/
161 | DocProject/Help/*.HxT
162 | DocProject/Help/*.HxC
163 | DocProject/Help/*.hhc
164 | DocProject/Help/*.hhk
165 | DocProject/Help/*.hhp
166 | DocProject/Help/Html2
167 | DocProject/Help/html
168 |
169 | # Click-Once directory
170 | publish/
171 |
172 | # Publish Web Output
173 | *.[Pp]ublish.xml
174 | *.azurePubxml
175 | # Note: Comment the next line if you want to checkin your web deploy settings,
176 | # but database connection strings (with potential passwords) will be unencrypted
177 | *.pubxml
178 | *.publishproj
179 |
180 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
181 | # checkin your Azure Web App publish settings, but sensitive information contained
182 | # in these scripts will be unencrypted
183 | PublishScripts/
184 |
185 | # NuGet Packages
186 | *.nupkg
187 | # NuGet Symbol Packages
188 | *.snupkg
189 | # The packages folder can be ignored because of Package Restore
190 | **/[Pp]ackages/*
191 | # except build/, which is used as an MSBuild target.
192 | !**/[Pp]ackages/build/
193 | # Uncomment if necessary however generally it will be regenerated when needed
194 | #!**/[Pp]ackages/repositories.config
195 | # NuGet v3's project.json files produces more ignorable files
196 | *.nuget.props
197 | *.nuget.targets
198 |
199 | # Microsoft Azure Build Output
200 | csx/
201 | *.build.csdef
202 |
203 | # Microsoft Azure Emulator
204 | ecf/
205 | rcf/
206 |
207 | # Windows Store app package directories and files
208 | AppPackages/
209 | BundleArtifacts/
210 | Package.StoreAssociation.xml
211 | _pkginfo.txt
212 | *.appx
213 | *.appxbundle
214 | *.appxupload
215 |
216 | # Visual Studio cache files
217 | # files ending in .cache can be ignored
218 | *.[Cc]ache
219 | # but keep track of directories ending in .cache
220 | !?*.[Cc]ache/
221 |
222 | # Others
223 | ClientBin/
224 | ~$*
225 | *~
226 | *.dbmdl
227 | *.dbproj.schemaview
228 | *.jfm
229 | *.pfx
230 | *.publishsettings
231 | orleans.codegen.cs
232 |
233 | # Including strong name files can present a security risk
234 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
235 | #*.snk
236 |
237 | # Since there are multiple workflows, uncomment next line to ignore bower_components
238 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
239 | #bower_components/
240 |
241 | # RIA/Silverlight projects
242 | Generated_Code/
243 |
244 | # Backup & report files from converting an old project file
245 | # to a newer Visual Studio version. Backup files are not needed,
246 | # because we have git ;-)
247 | _UpgradeReport_Files/
248 | Backup*/
249 | UpgradeLog*.XML
250 | UpgradeLog*.htm
251 | ServiceFabricBackup/
252 | *.rptproj.bak
253 |
254 | # SQL Server files
255 | *.mdf
256 | *.ldf
257 | *.ndf
258 |
259 | # Business Intelligence projects
260 | *.rdl.data
261 | *.bim.layout
262 | *.bim_*.settings
263 | *.rptproj.rsuser
264 | *- [Bb]ackup.rdl
265 | *- [Bb]ackup ([0-9]).rdl
266 | *- [Bb]ackup ([0-9][0-9]).rdl
267 |
268 | # Microsoft Fakes
269 | FakesAssemblies/
270 |
271 | # GhostDoc plugin setting file
272 | *.GhostDoc.xml
273 |
274 | # Node.js Tools for Visual Studio
275 | .ntvs_analysis.dat
276 | node_modules/
277 |
278 | # Visual Studio 6 build log
279 | *.plg
280 |
281 | # Visual Studio 6 workspace options file
282 | *.opt
283 |
284 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
285 | *.vbw
286 |
287 | # Visual Studio LightSwitch build output
288 | **/*.HTMLClient/GeneratedArtifacts
289 | **/*.DesktopClient/GeneratedArtifacts
290 | **/*.DesktopClient/ModelManifest.xml
291 | **/*.Server/GeneratedArtifacts
292 | **/*.Server/ModelManifest.xml
293 | _Pvt_Extensions
294 |
295 | # Paket dependency manager
296 | .paket/paket.exe
297 | paket-files/
298 |
299 | # FAKE - F# Make
300 | .fake/
301 |
302 | # CodeRush personal settings
303 | .cr/personal
304 |
305 | # Python Tools for Visual Studio (PTVS)
306 | __pycache__/
307 | *.pyc
308 |
309 | # Cake - Uncomment if you are using it
310 | # tools/**
311 | # !tools/packages.config
312 |
313 | # Tabs Studio
314 | *.tss
315 |
316 | # Telerik's JustMock configuration file
317 | *.jmconfig
318 |
319 | # BizTalk build output
320 | *.btp.cs
321 | *.btm.cs
322 | *.odx.cs
323 | *.xsd.cs
324 |
325 | # OpenCover UI analysis results
326 | OpenCover/
327 |
328 | # Azure Stream Analytics local run output
329 | ASALocalRun/
330 |
331 | # MSBuild Binary and Structured Log
332 | *.binlog
333 |
334 | # NVidia Nsight GPU debugger configuration file
335 | *.nvuser
336 |
337 | # MFractors (Xamarin productivity tool) working folder
338 | .mfractor/
339 |
340 | # Local History for Visual Studio
341 | .localhistory/
342 |
343 | # BeatPulse healthcheck temp database
344 | healthchecksdb
345 |
346 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
347 | MigrationBackup/
348 |
349 | # Ionide (cross platform F# VS Code tools) working folder
350 | .ionide/
351 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [yyyy] [name of copyright owner]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
202 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://www.nuget.org/packages/TesserNet/)
2 |
3 |
4 | # TesserNet
5 | TesserNet provides high level bindings for Tesseract in .NET.
6 | The library comes with all required native libraries and a trained English model, meaning you don't need any additional setup to get the library up and running!
7 | Additionally, the library provides a simple Tesseract instance pooling system (through the `TesseractPool` class) so you can carelessly make asynchronous OCR invocations.
8 |
9 | ## Limitations
10 | Windows is currently the only version that doesn't require installing extra dependencies.
11 | For Linux distributions it is necessary to install `tesseract-ocr`.
12 | For distributions that use `apt` as the package manager (e.g. Ubuntu, Debian, Raspbian) this can be done using `sudo apt-get install tesseract-ocr`.
13 | Linux support is new and experimental. Problems might arise due to `tesseract-ocr` not being available or because the found version is too old.
14 | iOS is currently not yet supported.
15 |
16 | ## Downloads
17 | [TesserNet](https://www.nuget.org/packages/TesserNet/)
18 | [TesserNet for System.Drawing](https://www.nuget.org/packages/TesserNet.System.Drawing/)
19 | [TesserNet for ImageSharp](https://www.nuget.org/packages/TesserNet.ImageSharp/)
20 | [TesserNet for SkiaSharp](https://www.nuget.org/packages/TesserNet.SkiaSharp/)
21 |
22 | ## License
23 | This product includes [Leptonica](http://leptonica.com/), which is available under a "BSD 2-clause" license.
24 | This product includes [Tesseract](https://github.com/tesseract-ocr/tesseract), which is available under a "Apache Version 2.0" license.
25 |
26 | ## Usage
27 | **When using on Linux, make sure `tesseract-ocr` has been installed on your system.**
28 |
29 | There are a few example project available for you to try out in the `src` directory.
30 | Note that the `TesserNet.Example.System.Drawing` example uses .NET Framework,
31 | meaning it will only run on Windows.
32 |
33 | To start off, one first needs to add the following import:
34 | ```cs
35 | using TesserNet;
36 | ```
37 |
38 | One can then create a `Tesseract` instace:
39 | ```cs
40 | Tesseract tesseract = new Tesseract();
41 | ```
42 |
43 | With that instance one can now perform OCR.
44 | ```cs
45 | string result = tesseract.Read(...);
46 | ```
47 |
48 | By default, the following `Read` methods are provided:
49 | ```cs
50 | string Read(byte[] data, int width, int height, int bytesPerPixel);
51 | string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
52 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel);
53 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
54 | ```
55 |
56 | Additionally, if one prefers to use System.Drawing, ImageSharp or SkiaSharp, it is possible to also add a dependency to
57 | [TesserNet.System.Drawing](https://www.nuget.org/packages/TesserNet.System.Drawing/),
58 | [TesserNet.ImageSharp](https://www.nuget.org/packages/TesserNet.ImageSharp/) or
59 | [TesserNet.SkiaSharp](https://www.nuget.org/packages/TesserNet.SkiaSharp/) respectively.
60 | Adding either of these dependencies adds the following `Read` methods:
61 | ```cs
62 | string Read(Image image);
63 | string Read(Image image, Rectangle rectangle);
64 | Task ReadAsync(Image image);
65 | Task ReadAsync(Image image, Rectangle rectangle);
66 | ```
67 |
68 | Furthermore, when trying to use concurrency, it might be useful to have a look at the `TesseractPool` class:
69 | ```cs
70 | TesseractPool pool = new TesseractPool();
71 | ```
72 |
73 | The `TesseractPool` class provides a pooling mechanism for running the OCR on multiple `Tesseract` instances, without having to manually deal with all the different instances.
74 | The class has the following methods:
75 | ```cs
76 | string Read(byte[] data, int width, int height, int bytesPerPixel);
77 | string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
78 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel);
79 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
80 | ```
81 |
82 | And when either of the aforementioned image processing bridging libraries are present:
83 | ```cs
84 | string Read(Image image);
85 | string Read(Image image, Rectangle rectangle);
86 | Task ReadAsync(Image image);
87 | Task ReadAsync(Image image, Rectangle rectangle);
88 | ```
89 |
--------------------------------------------------------------------------------
/dependency_licenses/tesseract_LICENSES:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
--------------------------------------------------------------------------------
/src/Ruleset.ruleset:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
--------------------------------------------------------------------------------
/src/TesserNet.Example.ImageSharp/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Reflection;
4 | using SixLabors.ImageSharp;
5 |
6 | namespace TesserNet.Example.ImageSharp
7 | {
8 | public static class Program
9 | {
10 | public static void Main()
11 | {
12 | Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream("TesserNet.Example.ImageSharp.img.png");
13 | Image image = Image.Load(stream);
14 | Tesseract tesseract = new Tesseract();
15 |
16 | Console.WriteLine(tesseract.Read(image).Trim());
17 |
18 | stream.Dispose();
19 | image.Dispose();
20 | tesseract.Dispose();
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/TesserNet.Example.ImageSharp/TesserNet.Example.ImageSharp.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | Exe
4 | netcoreapp3.1
5 | AnyCPU;x64;x86
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/src/TesserNet.Example.ImageSharp/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Example.ImageSharp/img.png
--------------------------------------------------------------------------------
/src/TesserNet.Example.SkiaSharp/Program.cs:
--------------------------------------------------------------------------------
1 | using SkiaSharp;
2 | using System;
3 | using System.IO;
4 | using System.Reflection;
5 |
6 | namespace TesserNet.Example.ImageSharp
7 | {
8 | public static class Program
9 | {
10 | public static void Main()
11 | {
12 | Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream("TesserNet.Example.SkiaSharp.img.png");
13 | SKBitmap image = SKBitmap.Decode(stream);
14 | Tesseract tesseract = new Tesseract();
15 |
16 | Console.WriteLine(tesseract.Read(image).Trim());
17 |
18 | stream.Dispose();
19 | image.Dispose();
20 | tesseract.Dispose();
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/TesserNet.Example.SkiaSharp/TesserNet.Example.SkiaSharp.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | Exe
4 | netcoreapp3.1
5 | AnyCPU;x64;x86
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/src/TesserNet.Example.SkiaSharp/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Example.SkiaSharp/img.png
--------------------------------------------------------------------------------
/src/TesserNet.Example.System.Drawing/Program.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Drawing;
3 | using System.IO;
4 | using System.Reflection;
5 |
6 | namespace TesserNet.Example.System.Drawing
7 | {
8 | public static class Program
9 | {
10 | public static void Main()
11 | {
12 | Stream stream = Assembly.GetExecutingAssembly().GetManifestResourceStream("TesserNet.Example.System.Drawing.img.png");
13 | Image image = Image.FromStream(stream);
14 | Tesseract tesseract = new Tesseract();
15 |
16 | Console.WriteLine(tesseract.Read(image).Trim());
17 |
18 | stream.Dispose();
19 | image.Dispose();
20 | tesseract.Dispose();
21 | }
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/TesserNet.Example.System.Drawing/TesserNet.Example.System.Drawing.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | Exe
4 | net48
5 | AnyCPU;x64;x86
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
--------------------------------------------------------------------------------
/src/TesserNet.Example.System.Drawing/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Example.System.Drawing/img.png
--------------------------------------------------------------------------------
/src/TesserNet.ImageSharp/ImageSharpTesseractExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Threading.Tasks;
3 | using SixLabors.ImageSharp;
4 | using SixLabors.ImageSharp.PixelFormats;
5 |
6 | namespace TesserNet
7 | {
8 | ///
9 | /// Provides extension methods for the class.
10 | ///
11 | public static class ImageSharpTesseractExtensions
12 | {
13 | ///
14 | /// Performs OCR on the given image.
15 | ///
16 | /// The tesseract instance.
17 | /// The image.
18 | /// The found text as a UTF8 string.
19 | public static string Read(this ITesseract tesseract, Image image)
20 | => tesseract.Read(image, new Rectangle(-1, -1, -1, -1));
21 |
22 | ///
23 | /// Performs OCR on a rectangle inside the given image.
24 | ///
25 | /// The tesseract instance.
26 | /// The image.
27 | /// The rectangle to perform OCR in.
28 | /// The found text as a UTF8 string.
29 | public static string Read(this ITesseract tesseract, Image image, Rectangle rectangle)
30 | {
31 | if (tesseract is null)
32 | {
33 | throw new ArgumentNullException(nameof(tesseract));
34 | }
35 |
36 | if (image is null)
37 | {
38 | throw new ArgumentNullException(nameof(image));
39 | }
40 |
41 | if (image is not Image bmp)
42 | {
43 | bmp = image.CloneAs();
44 | }
45 |
46 | IntPtr data = BitmapToBytes(bmp);
47 | string result = tesseract.Read(data, image.Width, image.Height, 4, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
48 |
49 | if (bmp != image)
50 | {
51 | bmp.Dispose();
52 | }
53 |
54 | return result;
55 | }
56 |
57 | ///
58 | /// Performs OCR on the given image.
59 | ///
60 | /// The tesseract instance.
61 | /// The image.
62 | /// The found text as a UTF8 string.
63 | public static Task ReadAsync(this ITesseract tesseract, Image image)
64 | => tesseract.ReadAsync(image, new Rectangle(-1, -1, -1, -1));
65 |
66 | ///
67 | /// Performs OCR on a rectangle inside the given image.
68 | ///
69 | /// The tesseract instance.
70 | /// The image.
71 | /// The rectangle to perform OCR in.
72 | /// The found text as a UTF8 string.
73 | public static Task ReadAsync(this ITesseract tesseract, Image image, Rectangle rectangle)
74 | {
75 | if (tesseract is null)
76 | {
77 | throw new ArgumentNullException(nameof(tesseract));
78 | }
79 |
80 | if (image is null)
81 | {
82 | throw new ArgumentNullException(nameof(image));
83 | }
84 |
85 | if (image is not Image bmp)
86 | {
87 | bmp = image.CloneAs();
88 | }
89 |
90 | IntPtr data = BitmapToBytes(bmp);
91 |
92 | Task resultTask = tesseract.ReadAsync(data, image.Width, image.Height, 4, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
93 |
94 | return resultTask.ContinueWith(r =>
95 | {
96 | if (bmp != image)
97 | {
98 | bmp.Dispose();
99 | }
100 |
101 | return r.Result;
102 | });
103 | }
104 |
105 | private static unsafe IntPtr BitmapToBytes(Image image)
106 | {
107 | if (!image.DangerousTryGetSinglePixelMemory(out Memory memory))
108 | {
109 | throw new TesseractException($"Could not get image pixels.");
110 | }
111 |
112 | fixed (Rgba32* ptr = memory.Span)
113 | {
114 | return new IntPtr(ptr);
115 | }
116 | }
117 | }
118 | }
119 |
--------------------------------------------------------------------------------
/src/TesserNet.ImageSharp/TesserNet.ImageSharp.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 | ../Ruleset.ruleset
5 | bin/$(AssemblyName).xml
6 | 9
7 | enable
8 | true
9 | true
10 |
11 | Wesley Baartman
12 | https://github.com/CptWesley/TesserNet
13 | https://github.com/CptWesley/TesserNet
14 | Apache-2.0
15 | Provides extension methods for TesserNet when using ImageSharp.
16 | tesseract ocr bindings optical character recognition tesseract-ocr imagesharp
17 | $(Version)
18 | $(Version)
19 | 0.8.0
20 |
21 | README.md
22 | AnyCPU;x64;x86
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | all
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/src/TesserNet.SkiaSharp/SkiaSharpTesseractExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Threading.Tasks;
3 | using SkiaSharp;
4 |
5 | namespace TesserNet
6 | {
7 | ///
8 | /// Provides extension methods for the class.
9 | ///
10 | public static class SkiaSharpTesseractExtensions
11 | {
12 | ///
13 | /// Performs OCR on the given image.
14 | ///
15 | /// The tesseract instance.
16 | /// The image.
17 | /// The found text as a UTF8 string.
18 | public static string Read(this ITesseract tesseract, SKBitmap image)
19 | => tesseract.Read(image, new SKRect(-1, -1, -1, -1));
20 |
21 | ///
22 | /// Performs OCR on a rectangle inside the given image.
23 | ///
24 | /// The tesseract instance.
25 | /// The image.
26 | /// The rectangle to perform OCR in.
27 | /// The found text as a UTF8 string.
28 | public static string Read(this ITesseract tesseract, SKBitmap image, SKRect rectangle)
29 | {
30 | if (tesseract is null)
31 | {
32 | throw new ArgumentNullException(nameof(tesseract));
33 | }
34 |
35 | if (image is null)
36 | {
37 | throw new ArgumentNullException(nameof(image));
38 | }
39 |
40 | IntPtr data = BitmapToBytes(image);
41 | return tesseract.Read(data, image.Width, image.Height, 4, (int)rectangle.Left, (int)rectangle.Top, (int)rectangle.Width, (int)rectangle.Height);
42 | }
43 |
44 | ///
45 | /// Performs OCR on the given image.
46 | ///
47 | /// The tesseract instance.
48 | /// The image.
49 | /// The found text as a UTF8 string.
50 | public static Task ReadAsync(this ITesseract tesseract, SKBitmap image)
51 | => tesseract.ReadAsync(image, new SKRect(-1, -1, -1, -1));
52 |
53 | ///
54 | /// Performs OCR on a rectangle inside the given image.
55 | ///
56 | /// The tesseract instance.
57 | /// The image.
58 | /// The rectangle to perform OCR in.
59 | /// The found text as a UTF8 string.
60 | public static Task ReadAsync(this ITesseract tesseract, SKBitmap image, SKRect rectangle)
61 | {
62 | if (tesseract is null)
63 | {
64 | throw new ArgumentNullException(nameof(tesseract));
65 | }
66 |
67 | if (image is null)
68 | {
69 | throw new ArgumentNullException(nameof(image));
70 | }
71 |
72 | IntPtr data = BitmapToBytes(image);
73 | return tesseract.ReadAsync(data, image.Width, image.Height, 4, (int)rectangle.Left, (int)rectangle.Top, (int)rectangle.Width, (int)rectangle.Height);
74 | }
75 |
76 | ///
77 | /// Performs OCR on the given image.
78 | ///
79 | /// The tesseract instance.
80 | /// The image.
81 | /// The found text as a UTF8 string.
82 | public static string Read(this ITesseract tesseract, SKImage image)
83 | => tesseract.Read(SKBitmap.FromImage(image));
84 |
85 | ///
86 | /// Performs OCR on a rectangle inside the given image.
87 | ///
88 | /// The tesseract instance.
89 | /// The image.
90 | /// The rectangle to perform OCR in.
91 | /// The found text as a UTF8 string.
92 | public static string Read(this ITesseract tesseract, SKImage image, SKRect rectangle)
93 | => tesseract.Read(SKBitmap.FromImage(image), rectangle);
94 |
95 | ///
96 | /// Performs OCR on the given image.
97 | ///
98 | /// The tesseract instance.
99 | /// The image.
100 | /// The found text as a UTF8 string.
101 | public static Task ReadAsync(this ITesseract tesseract, SKImage image)
102 | => tesseract.ReadAsync(SKBitmap.FromImage(image));
103 |
104 | ///
105 | /// Performs OCR on a rectangle inside the given image.
106 | ///
107 | /// The tesseract instance.
108 | /// The image.
109 | /// The rectangle to perform OCR in.
110 | /// The found text as a UTF8 string.
111 | public static Task ReadAsync(this ITesseract tesseract, SKImage image, SKRect rectangle)
112 | => tesseract.ReadAsync(SKBitmap.FromImage(image), rectangle);
113 |
114 | private static unsafe IntPtr BitmapToBytes(SKBitmap bmp)
115 | {
116 | fixed (byte* ptr = bmp.GetPixelSpan())
117 | {
118 | return new IntPtr(ptr);
119 | }
120 | }
121 | }
122 | }
123 |
--------------------------------------------------------------------------------
/src/TesserNet.SkiaSharp/TesserNet.SkiaSharp.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 | ../Ruleset.ruleset
5 | bin/$(AssemblyName).xml
6 | 9
7 | enable
8 | true
9 | true
10 |
11 | Wesley Baartman
12 | https://github.com/CptWesley/TesserNet
13 | https://github.com/CptWesley/TesserNet
14 | Apache-2.0
15 | Provides extension methods for TesserNet when using SkiaSharp.
16 | tesseract ocr bindings optical character recognition tesseract-ocr skia sharp skiasharp
17 | $(Version)
18 | $(Version)
19 | 0.8.0
20 |
21 | README.md
22 | AnyCPU;x64;x86
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | all
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
--------------------------------------------------------------------------------
/src/TesserNet.System.Drawing/SystemDrawingTesseractExtensions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Diagnostics.CodeAnalysis;
3 | using System.Drawing;
4 | using System.Drawing.Imaging;
5 | using System.Threading.Tasks;
6 |
7 | namespace TesserNet
8 | {
9 | ///
10 | /// Provides extension methods for the class.
11 | ///
12 | public static class SystemDrawingTesseractExtensions
13 | {
14 | ///
15 | /// Performs OCR on the given image.
16 | ///
17 | /// The tesseract instance.
18 | /// The image.
19 | /// The found text as a UTF8 string.
20 | public static string Read(this ITesseract tesseract, Image image)
21 | => tesseract.Read(image, new Rectangle(-1, -1, -1, -1));
22 |
23 | ///
24 | /// Performs OCR on a rectangle inside the given image.
25 | ///
26 | /// The tesseract instance.
27 | /// The image.
28 | /// The rectangle to perform OCR in.
29 | /// The found text as a UTF8 string.
30 | [SuppressMessage("Reliability", "CA2000", Justification = "Bitmap is disposed if new one was created.")]
31 | public static string Read(this ITesseract tesseract, Image image, Rectangle rectangle)
32 | {
33 | if (tesseract is null)
34 | {
35 | throw new ArgumentNullException(nameof(tesseract));
36 | }
37 |
38 | if (image is not Bitmap bmp)
39 | {
40 | bmp = new Bitmap(image);
41 | }
42 |
43 | IntPtr data = BitmapToBytes(bmp);
44 | int bpp = Image.GetPixelFormatSize(bmp.PixelFormat) / 8;
45 | string result = tesseract.Read(data, image.Width, image.Height, bpp, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
46 |
47 | if (bmp != image)
48 | {
49 | bmp.Dispose();
50 | }
51 |
52 | return result;
53 | }
54 |
55 | ///
56 | /// Performs OCR on the given image.
57 | ///
58 | /// The tesseract instance.
59 | /// The image.
60 | /// The found text as a UTF8 string.
61 | public static Task ReadAsync(this ITesseract tesseract, Image image)
62 | => tesseract.ReadAsync(image, new Rectangle(-1, -1, -1, -1));
63 |
64 | ///
65 | /// Performs OCR on a rectangle inside the given image.
66 | ///
67 | /// The tesseract instance.
68 | /// The image.
69 | /// The rectangle to perform OCR in.
70 | /// The found text as a UTF8 string.
71 | [SuppressMessage("Reliability", "CA2000", Justification = "Bitmap is disposed if new one was created.")]
72 | public static Task ReadAsync(this ITesseract tesseract, Image image, Rectangle rectangle)
73 | {
74 | if (tesseract is null)
75 | {
76 | throw new ArgumentNullException(nameof(tesseract));
77 | }
78 |
79 | if (image is not Bitmap bmp)
80 | {
81 | bmp = new Bitmap(image);
82 | }
83 |
84 | IntPtr data = BitmapToBytes(bmp);
85 | int bpp = Image.GetPixelFormatSize(image.PixelFormat) / 8;
86 | Task resultTask = tesseract.ReadAsync(data, image.Width, image.Height, 4, rectangle.X, rectangle.Y, rectangle.Width, rectangle.Height);
87 |
88 | return resultTask.ContinueWith(r =>
89 | {
90 | if (bmp != image)
91 | {
92 | bmp.Dispose();
93 | }
94 |
95 | return r.Result;
96 | });
97 | }
98 |
99 | private static IntPtr BitmapToBytes(Bitmap image)
100 | {
101 | BitmapData bmpData = image.LockBits(new Rectangle(0, 0, image.Width, image.Height), ImageLockMode.ReadOnly, image.PixelFormat);
102 | IntPtr ptr = bmpData.Scan0;
103 | image.UnlockBits(bmpData);
104 | return ptr;
105 | }
106 | }
107 | }
108 |
--------------------------------------------------------------------------------
/src/TesserNet.System.Drawing/TesserNet.System.Drawing.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 | ../Ruleset.ruleset
5 | bin/$(AssemblyName).xml
6 | 9
7 | enable
8 | true
9 |
10 | Wesley Baartman
11 | https://github.com/CptWesley/TesserNet
12 | https://github.com/CptWesley/TesserNet
13 | Apache-2.0
14 | Provides extension methods for TesserNet when using System.Drawing.
15 | tesseract ocr bindings optical character recognition tesseract-ocr system drawing
16 | $(Version)
17 | $(Version)
18 | 0.8.0
19 |
20 | README.md
21 | AnyCPU;x64;x86
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 | all
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
--------------------------------------------------------------------------------
/src/TesserNet.Tests/ImageLoader.cs:
--------------------------------------------------------------------------------
1 | using System.IO;
2 | using System.Reflection;
3 |
4 | namespace TesserNet.Tests
5 | {
6 | ///
7 | /// Used to load images.
8 | ///
9 | internal static class ImageLoader
10 | {
11 | ///
12 | /// Loads an image as a stream.
13 | ///
14 | /// The filename.
15 | /// The stream.
16 | public static Stream LoadStream(string fileName)
17 | {
18 | Assembly asm = Assembly.GetExecutingAssembly();
19 | return asm.GetManifestResourceStream($"TesserNet.Tests.Resources.{fileName}");
20 | }
21 |
22 | ///
23 | /// Loads an image as a byte array.
24 | ///
25 | /// The filename.
26 | /// The stream.
27 | public static byte[] LoadByteArray(string fileName)
28 | {
29 | using MemoryStream ms = new MemoryStream();
30 | using Stream s = LoadStream(fileName);
31 | s.CopyTo(ms);
32 | return ms.ToArray();
33 | }
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/TesserNet.Tests/Resources/img.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet.Tests/Resources/img.png
--------------------------------------------------------------------------------
/src/TesserNet.Tests/TesserNet.Tests.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netcoreapp3.1
4 | false
5 | ../Ruleset.ruleset
6 | bin/$(AssemblyName).xml
7 | true
8 | AnyCPU;x64;x86
9 |
10 |
11 |
12 | true
13 | opencover
14 | ./bin/
15 | [TesserNet]*
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 | all
28 | runtime; build; native; contentfiles; analyzers; buildtransitive
29 |
30 |
31 | all
32 | runtime; build; native; contentfiles; analyzers; buildtransitive
33 |
34 |
35 | all
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/src/TesserNet.Tests/Tests.cs:
--------------------------------------------------------------------------------
1 | using System.IO;
2 | using SkiaSharp;
3 | using Xunit;
4 | using static AssertNet.Assertions;
5 |
6 | namespace TesserNet.Tests
7 | {
8 | ///
9 | /// Contains simple tests.
10 | ///
11 | public static class Tests
12 | {
13 | private const string FileName1 = "img.png";
14 | private const string FileContent1 = "Hello world!";
15 | private static readonly ITesseract Tess = new Tesseract();
16 |
17 | ///
18 | /// Checks that the ImageSharp implementation works for simple image.
19 | ///
20 | [Fact]
21 | public static void ImageSharp()
22 | {
23 | using Stream s = ImageLoader.LoadStream(FileName1);
24 | using var img = SixLabors.ImageSharp.Image.Load(s);
25 | AssertThat(Tess.Read(img).Trim()).IsEqualTo(FileContent1);
26 | }
27 |
28 | ///
29 | /// Checks that the SkiaSharp implementation works for simple image.
30 | ///
31 | [Fact]
32 | public static void SkiaSharp()
33 | {
34 | using Stream s = ImageLoader.LoadStream(FileName1);
35 | using var img = SKBitmap.Decode(s);
36 | AssertThat(Tess.Read(img).Trim()).IsEqualTo(FileContent1);
37 | }
38 |
39 | ///
40 | /// Checks that the SkiaSharp implementation works for simple image.
41 | ///
42 | [Fact]
43 | public static void SystemDrawing()
44 | {
45 | using Stream s = ImageLoader.LoadStream(FileName1);
46 | using var img = System.Drawing.Image.FromStream(s);
47 | AssertThat(Tess.Read(img).Trim()).IsEqualTo(FileContent1);
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/TesserNet.sln:
--------------------------------------------------------------------------------
1 |
2 | Microsoft Visual Studio Solution File, Format Version 12.00
3 | # Visual Studio Version 17
4 | VisualStudioVersion = 17.3.32811.315
5 | MinimumVisualStudioVersion = 10.0.40219.1
6 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet", "TesserNet\TesserNet.csproj", "{98D03E85-6685-40E1-B2FC-173636F955CD}"
7 | EndProject
8 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Tests", "TesserNet.Tests\TesserNet.Tests.csproj", "{2E467823-D21D-4856-B643-6A8712AC3F0E}"
9 | EndProject
10 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.System.Drawing", "TesserNet.System.Drawing\TesserNet.System.Drawing.csproj", "{B70F5950-175A-42BD-A164-EE7FE2977D3D}"
11 | EndProject
12 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Example.System.Drawing", "TesserNet.Example.System.Drawing\TesserNet.Example.System.Drawing.csproj", "{DA52B301-D81D-4AD1-9BD7-EB567D1FB176}"
13 | EndProject
14 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.ImageSharp", "TesserNet.ImageSharp\TesserNet.ImageSharp.csproj", "{AE1BA00D-D8E1-4726-A969-C54E02D8985C}"
15 | EndProject
16 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Example.ImageSharp", "TesserNet.Example.ImageSharp\TesserNet.Example.ImageSharp.csproj", "{271E16DD-D0EF-43A4-846D-7FC0C84867B6}"
17 | EndProject
18 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.SkiaSharp", "TesserNet.SkiaSharp\TesserNet.SkiaSharp.csproj", "{0FC0748B-73DA-4897-B55A-907F5E296BC2}"
19 | EndProject
20 | Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "TesserNet.Example.SkiaSharp", "TesserNet.Example.SkiaSharp\TesserNet.Example.SkiaSharp.csproj", "{DD580635-ACCF-4462-B7C2-197EE5354CE2}"
21 | EndProject
22 | Global
23 | GlobalSection(SolutionConfigurationPlatforms) = preSolution
24 | Debug|Any CPU = Debug|Any CPU
25 | Debug|x64 = Debug|x64
26 | Debug|x86 = Debug|x86
27 | Release|Any CPU = Release|Any CPU
28 | Release|x64 = Release|x64
29 | Release|x86 = Release|x86
30 | EndGlobalSection
31 | GlobalSection(ProjectConfigurationPlatforms) = postSolution
32 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
33 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|Any CPU.Build.0 = Debug|Any CPU
34 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x64.ActiveCfg = Debug|x64
35 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x64.Build.0 = Debug|x64
36 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x86.ActiveCfg = Debug|x86
37 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Debug|x86.Build.0 = Debug|x86
38 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|Any CPU.ActiveCfg = Release|Any CPU
39 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|Any CPU.Build.0 = Release|Any CPU
40 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x64.ActiveCfg = Release|x64
41 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x64.Build.0 = Release|x64
42 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x86.ActiveCfg = Release|x86
43 | {98D03E85-6685-40E1-B2FC-173636F955CD}.Release|x86.Build.0 = Release|x86
44 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
45 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|Any CPU.Build.0 = Debug|Any CPU
46 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x64.ActiveCfg = Debug|x64
47 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x64.Build.0 = Debug|x64
48 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x86.ActiveCfg = Debug|x86
49 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Debug|x86.Build.0 = Debug|x86
50 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|Any CPU.ActiveCfg = Release|Any CPU
51 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|Any CPU.Build.0 = Release|Any CPU
52 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x64.ActiveCfg = Release|x64
53 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x64.Build.0 = Release|x64
54 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x86.ActiveCfg = Release|x86
55 | {2E467823-D21D-4856-B643-6A8712AC3F0E}.Release|x86.Build.0 = Release|x86
56 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
57 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|Any CPU.Build.0 = Debug|Any CPU
58 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x64.ActiveCfg = Debug|x64
59 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x64.Build.0 = Debug|x64
60 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x86.ActiveCfg = Debug|x86
61 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Debug|x86.Build.0 = Debug|x86
62 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|Any CPU.ActiveCfg = Release|Any CPU
63 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|Any CPU.Build.0 = Release|Any CPU
64 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x64.ActiveCfg = Release|x64
65 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x64.Build.0 = Release|x64
66 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x86.ActiveCfg = Release|x86
67 | {B70F5950-175A-42BD-A164-EE7FE2977D3D}.Release|x86.Build.0 = Release|x86
68 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
69 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|Any CPU.Build.0 = Debug|Any CPU
70 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x64.ActiveCfg = Debug|x64
71 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x64.Build.0 = Debug|x64
72 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x86.ActiveCfg = Debug|x86
73 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Debug|x86.Build.0 = Debug|x86
74 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|Any CPU.ActiveCfg = Release|Any CPU
75 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|Any CPU.Build.0 = Release|Any CPU
76 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x64.ActiveCfg = Release|x64
77 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x64.Build.0 = Release|x64
78 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x86.ActiveCfg = Release|x86
79 | {DA52B301-D81D-4AD1-9BD7-EB567D1FB176}.Release|x86.Build.0 = Release|x86
80 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
81 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|Any CPU.Build.0 = Debug|Any CPU
82 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x64.ActiveCfg = Debug|x64
83 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x64.Build.0 = Debug|x64
84 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x86.ActiveCfg = Debug|x86
85 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Debug|x86.Build.0 = Debug|x86
86 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|Any CPU.ActiveCfg = Release|Any CPU
87 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|Any CPU.Build.0 = Release|Any CPU
88 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x64.ActiveCfg = Release|x64
89 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x64.Build.0 = Release|x64
90 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x86.ActiveCfg = Release|x86
91 | {AE1BA00D-D8E1-4726-A969-C54E02D8985C}.Release|x86.Build.0 = Release|x86
92 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
93 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|Any CPU.Build.0 = Debug|Any CPU
94 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x64.ActiveCfg = Debug|x64
95 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x64.Build.0 = Debug|x64
96 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x86.ActiveCfg = Debug|x86
97 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Debug|x86.Build.0 = Debug|x86
98 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|Any CPU.ActiveCfg = Release|Any CPU
99 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|Any CPU.Build.0 = Release|Any CPU
100 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x64.ActiveCfg = Release|x64
101 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x64.Build.0 = Release|x64
102 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x86.ActiveCfg = Release|x86
103 | {271E16DD-D0EF-43A4-846D-7FC0C84867B6}.Release|x86.Build.0 = Release|x86
104 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
105 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|Any CPU.Build.0 = Debug|Any CPU
106 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x64.ActiveCfg = Debug|x64
107 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x64.Build.0 = Debug|x64
108 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x86.ActiveCfg = Debug|x86
109 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Debug|x86.Build.0 = Debug|x86
110 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|Any CPU.ActiveCfg = Release|Any CPU
111 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|Any CPU.Build.0 = Release|Any CPU
112 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x64.ActiveCfg = Release|x64
113 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x64.Build.0 = Release|x64
114 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x86.ActiveCfg = Release|x86
115 | {0FC0748B-73DA-4897-B55A-907F5E296BC2}.Release|x86.Build.0 = Release|x86
116 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
117 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|Any CPU.Build.0 = Debug|Any CPU
118 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x64.ActiveCfg = Debug|x64
119 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x64.Build.0 = Debug|x64
120 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x86.ActiveCfg = Debug|x86
121 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Debug|x86.Build.0 = Debug|x86
122 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|Any CPU.ActiveCfg = Release|Any CPU
123 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|Any CPU.Build.0 = Release|Any CPU
124 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x64.ActiveCfg = Release|x64
125 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x64.Build.0 = Release|x64
126 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x86.ActiveCfg = Release|x86
127 | {DD580635-ACCF-4462-B7C2-197EE5354CE2}.Release|x86.Build.0 = Release|x86
128 | EndGlobalSection
129 | GlobalSection(SolutionProperties) = preSolution
130 | HideSolutionNode = FALSE
131 | EndGlobalSection
132 | GlobalSection(ExtensibilityGlobals) = postSolution
133 | SolutionGuid = {543B2CD5-02AB-4379-9D7F-045CA7223CC4}
134 | EndGlobalSection
135 | EndGlobal
136 |
--------------------------------------------------------------------------------
/src/TesserNet/ITesseract.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Threading.Tasks;
3 |
4 | namespace TesserNet
5 | {
6 | ///
7 | /// Interface for Tesseract instances.
8 | ///
9 | public unsafe interface ITesseract : IDisposable
10 | {
11 | ///
12 | /// Gets or sets the options.
13 | ///
14 | TesseractOptions Options { get; set; }
15 |
16 | ///
17 | /// Performs OCR on the given image.
18 | ///
19 | /// The bytes of the image.
20 | /// The width of the image.
21 | /// The height of the image.
22 | /// The number of bytes per pixel.
23 | /// The found text as a UTF8 string.
24 | string Read(byte[] data, int width, int height, int bytesPerPixel);
25 |
26 | ///
27 | /// Performs OCR on the given image.
28 | ///
29 | /// The bytes of the image.
30 | /// The width of the image.
31 | /// The height of the image.
32 | /// The number of bytes per pixel.
33 | /// The found text as a UTF8 string.
34 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel);
35 |
36 | ///
37 | /// Performs OCR on a rectangle inside the given image.
38 | ///
39 | /// The bytes of the image.
40 | /// The width of the image.
41 | /// The height of the image.
42 | /// The number of bytes per pixel.
43 | /// The X coordinate of the rectangle.
44 | /// The Y coordinate of the rectangle.
45 | /// The width of the rectangle.
46 | /// The height of the rectangle.
47 | /// The found text as a UTF8 string.
48 | string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
49 |
50 | ///
51 | /// Performs OCR on a rectangle inside the given image.
52 | ///
53 | /// The bytes of the image.
54 | /// The width of the image.
55 | /// The height of the image.
56 | /// The number of bytes per pixel.
57 | /// The X coordinate of the rectangle.
58 | /// The Y coordinate of the rectangle.
59 | /// The width of the rectangle.
60 | /// The height of the rectangle.
61 | /// The found text as a UTF8 string.
62 | Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
63 |
64 | ///
65 | /// Performs OCR on the given image.
66 | ///
67 | /// The bytes of the image.
68 | /// The width of the image.
69 | /// The height of the image.
70 | /// The number of bytes per pixel.
71 | /// The found text as a UTF8 string.
72 | string Read(Memory data, int width, int height, int bytesPerPixel);
73 |
74 | ///
75 | /// Performs OCR on the given image.
76 | ///
77 | /// The bytes of the image.
78 | /// The width of the image.
79 | /// The height of the image.
80 | /// The number of bytes per pixel.
81 | /// The found text as a UTF8 string.
82 | Task ReadAsync(Memory data, int width, int height, int bytesPerPixel);
83 |
84 | ///
85 | /// Performs OCR on a rectangle inside the given image.
86 | ///
87 | /// The bytes of the image.
88 | /// The width of the image.
89 | /// The height of the image.
90 | /// The number of bytes per pixel.
91 | /// The X coordinate of the rectangle.
92 | /// The Y coordinate of the rectangle.
93 | /// The width of the rectangle.
94 | /// The height of the rectangle.
95 | /// The found text as a UTF8 string.
96 | string Read(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
97 |
98 | ///
99 | /// Performs OCR on a rectangle inside the given image.
100 | ///
101 | /// The bytes of the image.
102 | /// The width of the image.
103 | /// The height of the image.
104 | /// The number of bytes per pixel.
105 | /// The X coordinate of the rectangle.
106 | /// The Y coordinate of the rectangle.
107 | /// The width of the rectangle.
108 | /// The height of the rectangle.
109 | /// The found text as a UTF8 string.
110 | Task ReadAsync(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
111 |
112 | ///
113 | /// Performs OCR on the given image.
114 | ///
115 | /// The bytes of the image.
116 | /// The width of the image.
117 | /// The height of the image.
118 | /// The number of bytes per pixel.
119 | /// The found text as a UTF8 string.
120 | string Read(Span data, int width, int height, int bytesPerPixel);
121 |
122 | ///
123 | /// Performs OCR on the given image.
124 | ///
125 | /// The bytes of the image.
126 | /// The width of the image.
127 | /// The height of the image.
128 | /// The number of bytes per pixel.
129 | /// The found text as a UTF8 string.
130 | Task ReadAsync(Span data, int width, int height, int bytesPerPixel);
131 |
132 | ///
133 | /// Performs OCR on a rectangle inside the given image.
134 | ///
135 | /// The bytes of the image.
136 | /// The width of the image.
137 | /// The height of the image.
138 | /// The number of bytes per pixel.
139 | /// The X coordinate of the rectangle.
140 | /// The Y coordinate of the rectangle.
141 | /// The width of the rectangle.
142 | /// The height of the rectangle.
143 | /// The found text as a UTF8 string.
144 | string Read(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
145 |
146 | ///
147 | /// Performs OCR on a rectangle inside the given image.
148 | ///
149 | /// The bytes of the image.
150 | /// The width of the image.
151 | /// The height of the image.
152 | /// The number of bytes per pixel.
153 | /// The X coordinate of the rectangle.
154 | /// The Y coordinate of the rectangle.
155 | /// The width of the rectangle.
156 | /// The height of the rectangle.
157 | /// The found text as a UTF8 string.
158 | Task ReadAsync(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
159 |
160 | ///
161 | /// Performs OCR on the given image.
162 | ///
163 | /// The bytes of the image.
164 | /// The width of the image.
165 | /// The height of the image.
166 | /// The number of bytes per pixel.
167 | /// The found text as a UTF8 string.
168 | string Read(byte* data, int width, int height, int bytesPerPixel);
169 |
170 | ///
171 | /// Performs OCR on the given image.
172 | ///
173 | /// The bytes of the image.
174 | /// The width of the image.
175 | /// The height of the image.
176 | /// The number of bytes per pixel.
177 | /// The found text as a UTF8 string.
178 | Task ReadAsync(byte* data, int width, int height, int bytesPerPixel);
179 |
180 | ///
181 | /// Performs OCR on a rectangle inside the given image.
182 | ///
183 | /// The bytes of the image.
184 | /// The width of the image.
185 | /// The height of the image.
186 | /// The number of bytes per pixel.
187 | /// The X coordinate of the rectangle.
188 | /// The Y coordinate of the rectangle.
189 | /// The width of the rectangle.
190 | /// The height of the rectangle.
191 | /// The found text as a UTF8 string.
192 | string Read(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
193 |
194 | ///
195 | /// Performs OCR on a rectangle inside the given image.
196 | ///
197 | /// The bytes of the image.
198 | /// The width of the image.
199 | /// The height of the image.
200 | /// The number of bytes per pixel.
201 | /// The X coordinate of the rectangle.
202 | /// The Y coordinate of the rectangle.
203 | /// The width of the rectangle.
204 | /// The height of the rectangle.
205 | /// The found text as a UTF8 string.
206 | Task ReadAsync(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
207 |
208 | ///
209 | /// Performs OCR on the given image.
210 | ///
211 | /// The bytes of the image.
212 | /// The width of the image.
213 | /// The height of the image.
214 | /// The number of bytes per pixel.
215 | /// The found text as a UTF8 string.
216 | string Read(IntPtr data, int width, int height, int bytesPerPixel);
217 |
218 | ///
219 | /// Performs OCR on the given image.
220 | ///
221 | /// The bytes of the image.
222 | /// The width of the image.
223 | /// The height of the image.
224 | /// The number of bytes per pixel.
225 | /// The found text as a UTF8 string.
226 | Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel);
227 |
228 | ///
229 | /// Performs OCR on a rectangle inside the given image.
230 | ///
231 | /// The bytes of the image.
232 | /// The width of the image.
233 | /// The height of the image.
234 | /// The number of bytes per pixel.
235 | /// The X coordinate of the rectangle.
236 | /// The Y coordinate of the rectangle.
237 | /// The width of the rectangle.
238 | /// The height of the rectangle.
239 | /// The found text as a UTF8 string.
240 | string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
241 |
242 | ///
243 | /// Performs OCR on a rectangle inside the given image.
244 | ///
245 | /// The bytes of the image.
246 | /// The width of the image.
247 | /// The height of the image.
248 | /// The number of bytes per pixel.
249 | /// The X coordinate of the rectangle.
250 | /// The Y coordinate of the rectangle.
251 | /// The width of the rectangle.
252 | /// The height of the rectangle.
253 | /// The found text as a UTF8 string.
254 | Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
255 | }
256 | }
257 |
--------------------------------------------------------------------------------
/src/TesserNet/Internal/LazyQueue.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Threading;
4 | using System.Threading.Tasks;
5 |
6 | namespace TesserNet.Internal
7 | {
8 | ///
9 | /// Provides implementation for a simple asynchronous queue.
10 | ///
11 | /// Type of elements stored in the queue.
12 | internal class LazyQueue : IDisposable
13 | {
14 | private readonly Queue queue = new Queue();
15 | private readonly SemaphoreSlim mutation = new SemaphoreSlim(1, 1);
16 | private readonly SemaphoreSlim availability = new SemaphoreSlim(0);
17 | private bool isDisposed;
18 |
19 | ///
20 | /// Gets the count.
21 | ///
22 | public int Count => queue.Count;
23 |
24 | ///
25 | /// Enqueues a value asynchronously.
26 | ///
27 | /// The value to enqueue.
28 | /// A task that performs the enqueing.
29 | public async Task EnqueueAsync(T value)
30 | {
31 | await mutation.WaitAsync().ConfigureAwait(false);
32 | EnqueueInternal(value);
33 | }
34 |
35 | ///
36 | /// Enqueues a value synchronously.
37 | ///
38 | /// The value to enqueue.
39 | public void Enqueue(T value)
40 | {
41 | mutation.Wait();
42 | EnqueueInternal(value);
43 | }
44 |
45 | ///
46 | /// Dequeues a value asynchronously.
47 | ///
48 | /// A task which awaits a value to dequeue.
49 | public async Task DequeueAsync()
50 | {
51 | await availability.WaitAsync().ConfigureAwait(false);
52 | await mutation.WaitAsync().ConfigureAwait(false);
53 | return DequeueInternal();
54 | }
55 |
56 | ///
57 | /// Dequeues a value synchronously.
58 | ///
59 | /// The value to dequeue.
60 | public T Dequeue()
61 | {
62 | availability.Wait();
63 | mutation.Wait();
64 | return DequeueInternal();
65 | }
66 |
67 | ///
68 | public void Dispose()
69 | {
70 | Dispose(true);
71 | GC.SuppressFinalize(this);
72 | }
73 |
74 | ///
75 | /// Releases unmanaged and - optionally - managed resources.
76 | ///
77 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources.
78 | protected virtual void Dispose(bool disposing)
79 | {
80 | if (isDisposed)
81 | {
82 | return;
83 | }
84 |
85 | isDisposed = true;
86 |
87 | if (disposing)
88 | {
89 | availability.Dispose();
90 | mutation.Dispose();
91 | }
92 | }
93 |
94 | private void EnqueueInternal(T value)
95 | {
96 | queue.Enqueue(value);
97 | mutation.Release();
98 | availability.Release();
99 | }
100 |
101 | private T DequeueInternal()
102 | {
103 | T result = queue.Dequeue();
104 | mutation.Release();
105 | return result;
106 | }
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/src/TesserNet/Internal/Loader.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.IO;
4 | using System.IO.Compression;
5 | using System.Linq;
6 | using System.Reflection;
7 | using System.Runtime.InteropServices;
8 | using System.Runtime.Versioning;
9 |
10 | namespace TesserNet.Internal
11 | {
12 | ///
13 | /// Provides functionality for loading the correct libraries into the runtime.
14 | ///
15 | internal static class Loader
16 | {
17 | ///
18 | /// Gets the temporary directory to which the files were unpacked.
19 | ///
20 | /// The temporary unpack directory.
21 | internal static string GetUnpackDirectory()
22 | {
23 | string temp = Path.GetTempPath();
24 | string version = Assembly.GetExecutingAssembly().GetName().Version!.ToString();
25 | string platform = GetPlatformString();
26 | return Path.Combine(temp, "tessernet", version, platform);
27 | }
28 |
29 | ///
30 | /// Loads the correct libraries into the runtime.
31 | ///
32 | internal static void Load()
33 | {
34 | Assembly assembly = Assembly.GetExecutingAssembly();
35 | Stream stream = assembly.GetManifestResourceStream("TesserNet.Resources.zip")!;
36 | ZipArchive resources = new ZipArchive(stream);
37 |
38 | string platform = GetPlatformString();
39 | IEnumerable files = resources.ForPlatform(platform);
40 | EnsureCopied(files);
41 | resources.Dispose();
42 | stream.Dispose();
43 | }
44 |
45 | private static string GetPlatformString()
46 | {
47 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
48 | {
49 | if (Environment.Is64BitProcess)
50 | {
51 | return "w64";
52 | }
53 | else
54 | {
55 | return "w86";
56 | }
57 | }
58 | else if (RuntimeInformation.IsOSPlatform(OSPlatform.Linux))
59 | {
60 | return "linux";
61 | }
62 | else
63 | {
64 | return "mac";
65 | }
66 | }
67 |
68 | private static void EnsureCopied(IEnumerable entries)
69 | {
70 | string tempRoot = GetUnpackDirectory();
71 | Directory.CreateDirectory(tempRoot);
72 |
73 | foreach (ZipArchiveEntry entry in entries)
74 | {
75 | CopyResource(tempRoot, entry);
76 | }
77 | }
78 |
79 | private static void CopyResource(string path, ZipArchiveEntry entry)
80 | {
81 | string fileName = Path.GetFileName(entry.FullName);
82 | string filePath = Path.Combine(path, fileName);
83 |
84 | if (!File.Exists(filePath))
85 | {
86 | entry.ExtractToFile(filePath, false);
87 | }
88 |
89 | string extension = Path.GetExtension(filePath);
90 | if (extension == ".dll" && RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
91 | {
92 | NativeMethods.WindowsLoadLib(filePath);
93 | }
94 | else if (extension == ".so" || extension == ".dylib")
95 | {
96 | NativeMethods.UnixLoadLib(filePath);
97 | }
98 | }
99 |
100 | private static IEnumerable ForPlatform(this ZipArchive resources, string platform)
101 | => resources.Entries.Where(x =>
102 | (x.FullName.StartsWith($"{platform}/", StringComparison.InvariantCulture) && x.FullName.Length > platform.Length + 1)
103 | || (x.FullName.StartsWith("any/", StringComparison.InvariantCulture) && x.FullName.Length > 4));
104 |
105 | private class NativeMethods
106 | {
107 | [SupportedOSPlatform(PlatformNames.Windows)]
108 | [DllImport("kernel32", CharSet = CharSet.Ansi, ExactSpelling = false, SetLastError = true, EntryPoint = "LoadLibrary")]
109 | public static extern IntPtr WindowsLoadLib([MarshalAs(UnmanagedType.LPStr)] string lpFileName);
110 |
111 | [SupportedOSPlatform(PlatformNames.Linux)]
112 | [SupportedOSPlatform(PlatformNames.MacOS)]
113 | [DllImport("libdl", CharSet = CharSet.Ansi, ExactSpelling = false, SetLastError = true, EntryPoint = "dlopen")]
114 | public static extern IntPtr UnixLoadLib([MarshalAs(UnmanagedType.LPStr)] string filename, int flags = 2);
115 | }
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/src/TesserNet/Internal/PlatformNames.cs:
--------------------------------------------------------------------------------
1 | namespace TesserNet.Internal;
2 |
3 | ///
4 | /// Contains platform names.
5 | ///
6 | internal static class PlatformNames
7 | {
8 | ///
9 | /// The platform name for the Windows operating system.
10 | ///
11 | public const string Windows = "windows";
12 |
13 | ///
14 | /// The platform name for the Linux operating system.
15 | ///
16 | public const string Linux = "linux";
17 |
18 | ///
19 | /// The platform name for the MacOS operating system.
20 | ///
21 | public const string MacOS = "macos";
22 | }
23 |
--------------------------------------------------------------------------------
/src/TesserNet/Internal/TesseractApi.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.InteropServices;
3 |
4 | namespace TesserNet.Internal
5 | {
6 | ///
7 | /// Provides an interface for the Tesseract API.
8 | ///
9 | internal abstract class TesseractApi
10 | {
11 | private static bool unpacked;
12 |
13 | ///
14 | /// Creates an instance of the Tesseract API for the current operating system.
15 | ///
16 | /// A Tesseract API.
17 | public static TesseractApi Create()
18 | {
19 | if (!unpacked)
20 | {
21 | Loader.Load();
22 | unpacked = true;
23 | }
24 |
25 | if (RuntimeInformation.IsOSPlatform(OSPlatform.Windows))
26 | {
27 | return new WindowsTesseractApi();
28 | }
29 |
30 | return new UnixTesseractApi();
31 | }
32 |
33 | ///
34 | /// Creates an instance of API base.
35 | ///
36 | /// A handle for the base.
37 | public abstract IntPtr TessBaseAPICreate();
38 |
39 | ///
40 | /// Deletes an API base.
41 | ///
42 | /// The API base handle.
43 | public abstract void TessBaseAPIDelete(IntPtr handle);
44 |
45 | ///
46 | /// Sets the settings for the given API base. Can be executed multiple times to change settings in between runs.
47 | ///
48 | /// The API base handle.
49 | /// The data path.
50 | /// The language following ISO 639-2 specification.
51 | /// The OCR engine mode.
52 | /// The configs.
53 | /// Size of the configuration.
54 | /// A success code: zero if succesful, non-zero if a problem has occured.
55 | public abstract int TessBaseAPIInit1(IntPtr handle, string dataPath, string language, int oem, IntPtr configs, int configSize);
56 |
57 | ///
58 | /// Sets the image to be processed next.
59 | ///
60 | /// The API base handle.
61 | /// The data.
62 | /// The width.
63 | /// The height.
64 | /// The bytes per pixel.
65 | /// The bytes per line.
66 | public abstract void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine);
67 |
68 | ///
69 | /// Performs the OCR.
70 | ///
71 | /// The API base handle.
72 | /// The found text on the image as a UTF8 string.
73 | public abstract string TessBaseAPIGetUTF8Text(IntPtr handle);
74 |
75 | ///
76 | /// Sets the source resolution.
77 | ///
78 | /// The API base handle.
79 | /// The pixels per inch.
80 | public abstract void TessBaseAPISetSourceResolution(IntPtr handle, int ppi);
81 |
82 | ///
83 | /// Takes a rectangle of the image for performing OCR.
84 | ///
85 | /// The API base handle.
86 | /// The x coordinate.
87 | /// The y coordinate.
88 | /// The width.
89 | /// The height.
90 | public abstract void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height);
91 |
92 | ///
93 | /// Frees all image data and result data.
94 | ///
95 | /// The API base handle.
96 | public abstract void TessBaseAPIClear(IntPtr handle);
97 |
98 | ///
99 | /// Sets the segmentation mode.
100 | ///
101 | /// The API base handle.
102 | /// The mode.
103 | public abstract void TessBaseAPISetPageSegMode(IntPtr handle, int mode);
104 |
105 | ///
106 | /// Sets the segmentation mode.
107 | ///
108 | /// The API base handle.
109 | /// The name of the variable.
110 | /// The value.
111 | /// Whether the operation was succesful or not.
112 | public abstract bool TessBaseAPISetVariable(IntPtr handle, string key, string value);
113 |
114 | ///
115 | /// Sets the used config file.
116 | ///
117 | /// The API base handle.
118 | /// The name or path to the file of the config file.
119 | public abstract void TessBaseAPIReadConfigFile(IntPtr handle, string file);
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/src/TesserNet/Internal/UnixTesseractApi.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.InteropServices;
3 | using System.Runtime.Versioning;
4 |
5 | namespace TesserNet.Internal
6 | {
7 | ///
8 | /// Unix implementation of the Tesseract API.
9 | ///
10 | ///
11 | internal class UnixTesseractApi : TesseractApi
12 | {
13 | ///
14 | [SupportedOSPlatform(PlatformNames.Linux)]
15 | public override IntPtr TessBaseAPICreate()
16 | => NativeMethods.TessBaseAPICreate();
17 |
18 | ///
19 | [SupportedOSPlatform(PlatformNames.Linux)]
20 | public override void TessBaseAPIDelete(IntPtr handle)
21 | => NativeMethods.TessBaseAPIDelete(handle);
22 |
23 | ///
24 | [SupportedOSPlatform(PlatformNames.Linux)]
25 | public override string TessBaseAPIGetUTF8Text(IntPtr handle)
26 | => NativeMethods.TessBaseAPIGetUTF8Text(handle).ToUtf8String();
27 |
28 | ///
29 | [SupportedOSPlatform(PlatformNames.Linux)]
30 | public override int TessBaseAPIInit1(IntPtr handle, string dataPath, string language, int oem, IntPtr configs, int configSize)
31 | => NativeMethods.TessBaseAPIInit1(handle, dataPath, language, oem, configs, configSize);
32 |
33 | ///
34 | [SupportedOSPlatform(PlatformNames.Linux)]
35 | public override void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine)
36 | => NativeMethods.TessBaseAPISetImage(handle, data, width, height, bytesPerPixel, bytesPerLine);
37 |
38 | ///
39 | [SupportedOSPlatform(PlatformNames.Linux)]
40 | public override void TessBaseAPISetSourceResolution(IntPtr handle, int ppi)
41 | => NativeMethods.TessBaseAPISetSourceResolution(handle, ppi);
42 |
43 | ///
44 | [SupportedOSPlatform(PlatformNames.Linux)]
45 | public override void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height)
46 | => NativeMethods.TessBaseAPISetRectangle(handle, x, y, width, height);
47 |
48 | ///
49 | [SupportedOSPlatform(PlatformNames.Linux)]
50 | public override void TessBaseAPIClear(IntPtr handle)
51 | => NativeMethods.TessBaseAPIClear(handle);
52 |
53 | ///
54 | [SupportedOSPlatform(PlatformNames.Linux)]
55 | public override void TessBaseAPISetPageSegMode(IntPtr handle, int mode)
56 | => NativeMethods.TessBaseAPISetPageSegMode(handle, mode);
57 |
58 | ///
59 | [SupportedOSPlatform(PlatformNames.Linux)]
60 | public override bool TessBaseAPISetVariable(IntPtr handle, string key, string value)
61 | => NativeMethods.TessBaseAPISetVariable(handle, key, value);
62 |
63 | ///
64 | [SupportedOSPlatform(PlatformNames.Linux)]
65 | public override void TessBaseAPIReadConfigFile(IntPtr handle, string file)
66 | => NativeMethods.TessBaseAPIReadConfigFile(handle, file);
67 |
68 | private static class NativeMethods
69 | {
70 | private const string DllPath = "libtesseract.so.4";
71 |
72 | [SupportedOSPlatform(PlatformNames.Linux)]
73 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
74 | public static extern IntPtr TessBaseAPICreate();
75 |
76 | [SupportedOSPlatform(PlatformNames.Linux)]
77 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
78 | public static extern void TessBaseAPIDelete(IntPtr handle);
79 |
80 | [SupportedOSPlatform(PlatformNames.Linux)]
81 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
82 | public static extern void TessBaseAPIClear(IntPtr handle);
83 |
84 | [SupportedOSPlatform(PlatformNames.Linux)]
85 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
86 | public static extern int TessBaseAPIInit1(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string dataPath, [MarshalAs(UnmanagedType.LPStr)] string language, int oem, IntPtr configs, int configSize);
87 |
88 | [SupportedOSPlatform(PlatformNames.Linux)]
89 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
90 | public static extern void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine);
91 |
92 | [SupportedOSPlatform(PlatformNames.Linux)]
93 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
94 | public static extern IntPtr TessBaseAPIGetUTF8Text(IntPtr handle);
95 |
96 | [SupportedOSPlatform(PlatformNames.Linux)]
97 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
98 | public static extern void TessBaseAPISetSourceResolution(IntPtr handle, int ppi);
99 |
100 | [SupportedOSPlatform(PlatformNames.Linux)]
101 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
102 | public static extern void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height);
103 |
104 | [SupportedOSPlatform(PlatformNames.Linux)]
105 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
106 | public static extern void TessBaseAPISetPageSegMode(IntPtr handle, int mode);
107 |
108 | [SupportedOSPlatform(PlatformNames.Linux)]
109 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
110 | public static extern bool TessBaseAPISetVariable(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key, [MarshalAs(UnmanagedType.LPStr)] string value);
111 |
112 | [SupportedOSPlatform(PlatformNames.Linux)]
113 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
114 | public static extern void TessBaseAPIReadConfigFile(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string file);
115 | }
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/src/TesserNet/Internal/Utf8Helper.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.InteropServices;
3 | using System.Text;
4 |
5 | namespace TesserNet.Internal
6 | {
7 | ///
8 | /// Provides classes to help with dealing with UTF8 strings.
9 | ///
10 | internal static class Utf8Helper
11 | {
12 | ///
13 | /// Reads a UTF8 string from a pointer.
14 | ///
15 | /// The pointer to read from.
16 | /// The string at the pointer.
17 | public static string ToUtf8String(this IntPtr ptr)
18 | {
19 | byte[] bytes = new byte[ptr.GetStringLength()];
20 |
21 | for (int i = 0; i < bytes.Length; i++)
22 | {
23 | bytes[i] = Marshal.ReadByte(ptr, i);
24 | }
25 |
26 | Marshal.FreeHGlobal(ptr);
27 |
28 | return Encoding.UTF8.GetString(bytes);
29 | }
30 |
31 | private static int GetStringLength(this IntPtr ptr)
32 | {
33 | int length = 0;
34 | while (true)
35 | {
36 | byte b = Marshal.ReadByte(ptr, length);
37 | if (b == 0)
38 | {
39 | return length;
40 | }
41 |
42 | length++;
43 | }
44 | }
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/TesserNet/Internal/WindowsTesseractApi.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.InteropServices;
3 | using System.Runtime.Versioning;
4 |
5 | namespace TesserNet.Internal
6 | {
7 | ///
8 | /// Windows implementation of the Tesseract API.
9 | ///
10 | ///
11 | internal class WindowsTesseractApi : TesseractApi
12 | {
13 | ///
14 | [SupportedOSPlatform(PlatformNames.Windows)]
15 | public override IntPtr TessBaseAPICreate()
16 | => NativeMethods.TessBaseAPICreate();
17 |
18 | ///
19 | [SupportedOSPlatform(PlatformNames.Windows)]
20 | public override void TessBaseAPIDelete(IntPtr handle)
21 | => NativeMethods.TessBaseAPIDelete(handle);
22 |
23 | ///
24 | [SupportedOSPlatform(PlatformNames.Windows)]
25 | public override string TessBaseAPIGetUTF8Text(IntPtr handle)
26 | => NativeMethods.TessBaseAPIGetUTF8Text(handle).ToUtf8String();
27 |
28 | ///
29 | [SupportedOSPlatform(PlatformNames.Windows)]
30 | public override int TessBaseAPIInit1(IntPtr handle, string dataPath, string language, int oem, IntPtr configs, int configSize)
31 | => NativeMethods.TessBaseAPIInit1(handle, dataPath, language, oem, configs, configSize);
32 |
33 | ///
34 | [SupportedOSPlatform(PlatformNames.Windows)]
35 | public override void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine)
36 | => NativeMethods.TessBaseAPISetImage(handle, data, width, height, bytesPerPixel, bytesPerLine);
37 |
38 | ///
39 | [SupportedOSPlatform(PlatformNames.Windows)]
40 | public override void TessBaseAPISetSourceResolution(IntPtr handle, int ppi)
41 | => NativeMethods.TessBaseAPISetSourceResolution(handle, ppi);
42 |
43 | ///
44 | [SupportedOSPlatform(PlatformNames.Windows)]
45 | public override void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height)
46 | => NativeMethods.TessBaseAPISetRectangle(handle, x, y, width, height);
47 |
48 | ///
49 | [SupportedOSPlatform(PlatformNames.Windows)]
50 | public override void TessBaseAPIClear(IntPtr handle)
51 | => NativeMethods.TessBaseAPIClear(handle);
52 |
53 | ///
54 | [SupportedOSPlatform(PlatformNames.Windows)]
55 | public override void TessBaseAPISetPageSegMode(IntPtr handle, int mode)
56 | => NativeMethods.TessBaseAPISetPageSegMode(handle, mode);
57 |
58 | ///
59 | [SupportedOSPlatform(PlatformNames.Windows)]
60 | public override bool TessBaseAPISetVariable(IntPtr handle, string key, string value)
61 | => NativeMethods.TessBaseAPISetVariable(handle, key, value);
62 |
63 | ///
64 | [SupportedOSPlatform(PlatformNames.Windows)]
65 | public override void TessBaseAPIReadConfigFile(IntPtr handle, string file)
66 | => NativeMethods.TessBaseAPIReadConfigFile(handle, file);
67 |
68 | private static class NativeMethods
69 | {
70 | private const string DllPath = "libtesseract500";
71 |
72 | [SupportedOSPlatform(PlatformNames.Windows)]
73 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
74 | public static extern IntPtr TessBaseAPICreate();
75 |
76 | [SupportedOSPlatform(PlatformNames.Windows)]
77 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
78 | public static extern void TessBaseAPIDelete(IntPtr handle);
79 |
80 | [SupportedOSPlatform(PlatformNames.Windows)]
81 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
82 | public static extern void TessBaseAPIClear(IntPtr handle);
83 |
84 | [SupportedOSPlatform(PlatformNames.Windows)]
85 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
86 | public static extern int TessBaseAPIInit1(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string dataPath, [MarshalAs(UnmanagedType.LPStr)] string language, int oem, IntPtr configs, int configSize);
87 |
88 | [SupportedOSPlatform(PlatformNames.Windows)]
89 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
90 | public static extern void TessBaseAPISetImage(IntPtr handle, IntPtr data, int width, int height, int bytesPerPixel, int bytesPerLine);
91 |
92 | [SupportedOSPlatform(PlatformNames.Windows)]
93 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
94 | public static extern IntPtr TessBaseAPIGetUTF8Text(IntPtr handle);
95 |
96 | [SupportedOSPlatform(PlatformNames.Windows)]
97 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
98 | public static extern void TessBaseAPISetSourceResolution(IntPtr handle, int ppi);
99 |
100 | [SupportedOSPlatform(PlatformNames.Windows)]
101 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
102 | public static extern void TessBaseAPISetRectangle(IntPtr handle, int x, int y, int width, int height);
103 |
104 | [SupportedOSPlatform(PlatformNames.Windows)]
105 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
106 | public static extern void TessBaseAPISetPageSegMode(IntPtr handle, int mode);
107 |
108 | [SupportedOSPlatform(PlatformNames.Windows)]
109 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
110 | public static extern bool TessBaseAPISetVariable(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string key, [MarshalAs(UnmanagedType.LPStr)] string value);
111 |
112 | [SupportedOSPlatform(PlatformNames.Windows)]
113 | [DllImport(DllPath, CharSet = CharSet.Auto, SetLastError = true, CallingConvention = CallingConvention.Cdecl)]
114 | public static extern void TessBaseAPIReadConfigFile(IntPtr handle, [MarshalAs(UnmanagedType.LPStr)] string file);
115 | }
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/src/TesserNet/OcrEngineMode.cs:
--------------------------------------------------------------------------------
1 | namespace TesserNet
2 | {
3 | ///
4 | /// Enum for the OCR setting to be used.
5 | ///
6 | public enum OcrEngineMode
7 | {
8 | ///
9 | /// Only run the legacy Tesseract OCR.
10 | ///
11 | TesseractOnly = 0,
12 |
13 | ///
14 | /// Only run the new LSTM based OCR.
15 | ///
16 | LstmOnly = 1,
17 |
18 | ///
19 | /// Combine LSTM and the legacy Tesseract OCR.
20 | ///
21 | Combined = 2,
22 |
23 | ///
24 | /// The default setting (picks whatever is available).
25 | ///
26 | Default = 3,
27 | }
28 | }
29 |
--------------------------------------------------------------------------------
/src/TesserNet/PageSegmentation.cs:
--------------------------------------------------------------------------------
1 | namespace TesserNet
2 | {
3 | ///
4 | /// Indicates how page segmentation should be treated.
5 | ///
6 | public enum PageSegmentation
7 | {
8 | ///
9 | /// Orientation and script detection (OSD) only.
10 | ///
11 | Osd = 0,
12 |
13 | ///
14 | /// Automatic page segmentation with OSD.
15 | ///
16 | SegmentationOsd = 1,
17 |
18 | ///
19 | /// Automatic page segmentation, but no OSD, or OCR.
20 | ///
21 | Segmentation = 2,
22 |
23 | ///
24 | /// Fully automatic page segmentation, but no OSD. (Default).
25 | ///
26 | SegmentationOcr = 3,
27 |
28 | ///
29 | /// Assume a single column of text of variable sizes.
30 | ///
31 | Column = 4,
32 |
33 | ///
34 | /// Assume a single uniform block of vertically aligned text.
35 | ///
36 | VerticalBlock = 5,
37 |
38 | ///
39 | /// Assume a single uniform block of text.
40 | ///
41 | Block = 6,
42 |
43 | ///
44 | /// Treat the image as a single text line.
45 | ///
46 | Line = 7,
47 |
48 | ///
49 | /// Treat the image as a single word.
50 | ///
51 | Word = 8,
52 |
53 | ///
54 | /// Treat the image as a single word in a circle.
55 | ///
56 | WordCircle = 9,
57 |
58 | ///
59 | /// Treat the image as a single character.
60 | ///
61 | Character = 10,
62 |
63 | ///
64 | /// Sparse text. Find as much text as possible in no particular order.
65 | ///
66 | Sparse = 11,
67 |
68 | ///
69 | /// Sparse text with OSD.
70 | ///
71 | SparseOsd = 12,
72 |
73 | ///
74 | /// Raw line. Treat the image as a single text line, bypassing hacks that are Tesseract-specific.
75 | ///
76 | Raw = 13,
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/src/TesserNet/Resources.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CptWesley/TesserNet/3f240ca7d85e90d3a5cd9ae60ecd51bfb744287b/src/TesserNet/Resources.zip
--------------------------------------------------------------------------------
/src/TesserNet/TesserNet.csproj:
--------------------------------------------------------------------------------
1 |
2 |
3 | netstandard2.0
4 | ../Ruleset.ruleset
5 | bin/$(AssemblyName).xml
6 | 10
7 | enable
8 | true
9 | true
10 |
11 | Wesley Baartman
12 | https://github.com/CptWesley/TesserNet
13 | https://github.com/CptWesley/TesserNet
14 | Apache-2.0
15 | High level bindings for the OCR library Tesseract for NET.
16 | tesseract ocr bindings optical character recognition tesseract-ocr
17 | $(Version)
18 | $(Version)
19 | 0.8.0
20 |
21 | README.md
22 | AnyCPU;x64;x86
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 | all
34 |
35 |
36 |
37 |
38 |
39 | all
40 | compile
41 |
42 |
43 |
--------------------------------------------------------------------------------
/src/TesserNet/Tesseract.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.IO;
3 | using System.Threading.Tasks;
4 | using TesserNet.Internal;
5 |
6 | namespace TesserNet
7 | {
8 | ///
9 | /// Provides high level bindings for the Tesseract API.
10 | ///
11 | public class Tesseract : TesseractBase
12 | {
13 | private readonly TesseractApi api;
14 | private readonly IntPtr handle;
15 | private readonly object lck = new object();
16 | private bool isDisposed;
17 | private TesseractOptions? lastOptions;
18 |
19 | ///
20 | /// Initializes a new instance of the class.
21 | ///
22 | public Tesseract()
23 | : this(new TesseractOptions())
24 | {
25 | }
26 |
27 | ///
28 | /// Initializes a new instance of the class.
29 | ///
30 | /// The options.
31 | public Tesseract(Action options)
32 | : this()
33 | {
34 | if (options != null)
35 | {
36 | options(Options);
37 | }
38 | }
39 |
40 | ///
41 | /// Initializes a new instance of the class.
42 | ///
43 | /// The options.
44 | public Tesseract(TesseractOptions options)
45 | : base(options)
46 | {
47 | api = TesseractApi.Create();
48 | handle = api.TessBaseAPICreate();
49 | }
50 |
51 | ///
52 | /// Finalizes an instance of the class.
53 | ///
54 | ~Tesseract()
55 | => Dispose(false);
56 |
57 | ///
58 | public override unsafe string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
59 | {
60 | if (isDisposed)
61 | {
62 | throw new ObjectDisposedException(nameof(Tesseract));
63 | }
64 |
65 | lock (lck)
66 | {
67 | if (isDisposed)
68 | {
69 | throw new ObjectDisposedException(nameof(Tesseract));
70 | }
71 |
72 | if (!Options.Equals(lastOptions!))
73 | {
74 | lastOptions = Options.Copy();
75 | Init();
76 | }
77 |
78 | try
79 | {
80 | api.TessBaseAPISetImage(handle, data, width, height, bytesPerPixel, width * bytesPerPixel);
81 | }
82 | catch
83 | {
84 | throw new TesseractException("Error while setting subject image.");
85 | }
86 |
87 | try
88 | {
89 | api.TessBaseAPISetSourceResolution(handle, Options.PixelsPerInch);
90 | }
91 | catch
92 | {
93 | throw new TesseractException("Error while setting resolution.");
94 | }
95 |
96 | if (rectX >= 0 && rectY >= 0 && rectWidth > 0 && rectHeight > 0)
97 | {
98 | try
99 | {
100 | api.TessBaseAPISetRectangle(handle, rectX, rectY, rectWidth, rectHeight);
101 | }
102 | catch
103 | {
104 | throw new TesseractException("Error while setting a rectangle.");
105 | }
106 | }
107 |
108 | string result;
109 | try
110 | {
111 | result = api.TessBaseAPIGetUTF8Text(handle);
112 | }
113 | catch
114 | {
115 | throw new TesseractException("Error while performing OCR.");
116 | }
117 |
118 | try
119 | {
120 | api.TessBaseAPIClear(handle);
121 | }
122 | catch
123 | {
124 | throw new TesseractException("Error while clearing result data.");
125 | }
126 |
127 | return result;
128 | }
129 | }
130 |
131 | ///
132 | public override Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
133 | => Task.Run(() => Read(data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight));
134 |
135 | ///
136 | /// Releases unmanaged and - optionally - managed resources.
137 | ///
138 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources.
139 | protected override void Dispose(bool disposing)
140 | {
141 | if (isDisposed)
142 | {
143 | return;
144 | }
145 |
146 | lock (lck)
147 | {
148 | api.TessBaseAPIDelete(handle);
149 | }
150 |
151 | isDisposed = true;
152 | }
153 |
154 | private void Init()
155 | {
156 | int result = api.TessBaseAPIInit1(handle, Options.DataPath, Options.Language, (int)Options.EngineMode, IntPtr.Zero, 0);
157 | if (result != 0)
158 | {
159 | throw new TesseractException($"Error while initializing Tesseract with data file '{Path.Combine(Options.DataPath, $"{Options.Language}.traineddata")}'. It's possible the training data was not found or the data does not support the current OCR engine mode.");
160 | }
161 |
162 | try
163 | {
164 | api.TessBaseAPISetPageSegMode(handle, (int)Options.PageSegmentation);
165 | }
166 | catch
167 | {
168 | throw new TesseractException("Error while setting page segmentation mode.");
169 | }
170 |
171 | try
172 | {
173 | if (!api.TessBaseAPISetVariable(handle, "tessedit_char_whitelist", string.IsNullOrWhiteSpace(Options.Whitelist) ? string.Empty : Options.Whitelist))
174 | {
175 | throw new TesseractException("Setting whitelist unsuccesful.");
176 | }
177 | }
178 | catch
179 | {
180 | throw new TesseractException("Error while setting whitelist.");
181 | }
182 |
183 | try
184 | {
185 | if (!api.TessBaseAPISetVariable(handle, "tessedit_char_blacklist", string.IsNullOrWhiteSpace(Options.Blacklist) ? string.Empty : Options.Blacklist))
186 | {
187 | throw new TesseractException("Setting blacklist unsuccesful.");
188 | }
189 | }
190 | catch
191 | {
192 | throw new TesseractException("Error while setting blacklist.");
193 | }
194 |
195 | try
196 | {
197 | if (!api.TessBaseAPISetVariable(handle, "classify_bln_numeric_mode", Options.Numeric ? "1" : "0"))
198 | {
199 | throw new TesseractException("Setting numeric mode unsuccesful.");
200 | }
201 | }
202 | catch
203 | {
204 | throw new TesseractException("Error while setting numeric mode.");
205 | }
206 |
207 | if (!string.IsNullOrWhiteSpace(Options.Config))
208 | {
209 | try
210 | {
211 | api.TessBaseAPIReadConfigFile(handle, Options.Config);
212 | }
213 | catch
214 | {
215 | throw new TesseractException($"Error while loading config: '{Options.Config}'.");
216 | }
217 | }
218 | }
219 | }
220 | }
221 |
--------------------------------------------------------------------------------
/src/TesserNet/TesseractBase.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Threading.Tasks;
3 |
4 | namespace TesserNet
5 | {
6 | ///
7 | /// Abstract base class for Tesseract instances.
8 | ///
9 | public unsafe abstract class TesseractBase : ITesseract
10 | {
11 | ///
12 | /// Initializes a new instance of the class.
13 | ///
14 | /// The options.
15 | public TesseractBase(TesseractOptions options)
16 | => Options = options;
17 |
18 | ///
19 | public TesseractOptions Options { get; set; }
20 |
21 | ///
22 | public abstract string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
23 |
24 | ///
25 | public abstract Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight);
26 |
27 | ///
28 | public string Read(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
29 | {
30 | fixed (byte* ptr = data)
31 | {
32 | return Read(ptr, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
33 | }
34 | }
35 |
36 | ///
37 | public Task ReadAsync(Span data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
38 | {
39 | fixed (byte* ptr = data)
40 | {
41 | return ReadAsync(ptr, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
42 | }
43 | }
44 |
45 | ///
46 | public string Read(byte[] data, int width, int height, int bytesPerPixel)
47 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1);
48 |
49 | ///
50 | public string Read(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
51 | => Read((Span)data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
52 |
53 | ///
54 | public string Read(Memory data, int width, int height, int bytesPerPixel)
55 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1);
56 |
57 | ///
58 | public string Read(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
59 | => Read(data.Span, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
60 |
61 | ///
62 | public string Read(Span data, int width, int height, int bytesPerPixel)
63 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1);
64 |
65 | ///
66 | public string Read(byte* data, int width, int height, int bytesPerPixel)
67 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1);
68 |
69 | ///
70 | public string Read(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
71 | => Read(new IntPtr(data), width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
72 |
73 | ///
74 | public string Read(IntPtr data, int width, int height, int bytesPerPixel)
75 | => Read(data, width, height, bytesPerPixel, -1, -1, -1, -1);
76 |
77 | ///
78 | public Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel)
79 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1);
80 |
81 | ///
82 | public Task ReadAsync(byte[] data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
83 | => ReadAsync((Memory)data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
84 |
85 | ///
86 | public Task ReadAsync(Memory data, int width, int height, int bytesPerPixel)
87 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1);
88 |
89 | ///
90 | public Task ReadAsync(Memory data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
91 | => ReadAsync(data.Span, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
92 |
93 | ///
94 | public Task ReadAsync(Span data, int width, int height, int bytesPerPixel)
95 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1);
96 |
97 | ///
98 | public Task ReadAsync(byte* data, int width, int height, int bytesPerPixel)
99 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1);
100 |
101 | ///
102 | public Task ReadAsync(byte* data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
103 | => ReadAsync(new IntPtr(data), width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
104 |
105 | ///
106 | public Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel)
107 | => ReadAsync(data, width, height, bytesPerPixel, -1, -1, -1, -1);
108 |
109 | ///
110 | public void Dispose()
111 | {
112 | Dispose(true);
113 | GC.SuppressFinalize(this);
114 | }
115 |
116 | ///
117 | /// Releases unmanaged and - optionally - managed resources.
118 | ///
119 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources.
120 | protected abstract void Dispose(bool disposing);
121 | }
122 | }
123 |
--------------------------------------------------------------------------------
/src/TesserNet/TesseractException.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Runtime.Serialization;
3 |
4 | namespace TesserNet
5 | {
6 | ///
7 | /// Exception thrown when something goes wrong with Tesseract execution.
8 | ///
9 | ///
10 | public class TesseractException : Exception
11 | {
12 | ///
13 | /// Initializes a new instance of the class.
14 | ///
15 | public TesseractException()
16 | {
17 | }
18 |
19 | ///
20 | /// Initializes a new instance of the class.
21 | ///
22 | /// The message that describes the error.
23 | public TesseractException(string message)
24 | : base(message)
25 | {
26 | }
27 |
28 | ///
29 | /// Initializes a new instance of the class.
30 | ///
31 | /// The error message that explains the reason for the exception.
32 | /// The exception that is the cause of the current exception, or a null reference (Nothing in Visual Basic) if no inner exception is specified.
33 | public TesseractException(string message, Exception innerException)
34 | : base(message, innerException)
35 | {
36 | }
37 |
38 | ///
39 | /// Initializes a new instance of the class.
40 | ///
41 | /// The that holds the serialized object data about the exception being thrown.
42 | /// The that contains contextual information about the source or destination.
43 | protected TesseractException(SerializationInfo info, StreamingContext context)
44 | : base(info, context)
45 | {
46 | }
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/TesserNet/TesseractOptions.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using TesserNet.Internal;
3 |
4 | namespace TesserNet
5 | {
6 | ///
7 | /// Represents the options used for invoking Tesseract.
8 | ///
9 | public class TesseractOptions : IEquatable
10 | {
11 | ///
12 | /// Gets or sets the language.
13 | ///
14 | public string Language { get; set; } = "eng";
15 |
16 | ///
17 | /// Gets or sets the data path.
18 | ///
19 | public string DataPath { get; set; } = Environment.GetEnvironmentVariable("TESSDATA_PREFIX") ?? Loader.GetUnpackDirectory();
20 |
21 | ///
22 | /// Gets or sets the engine mode.
23 | ///
24 | public OcrEngineMode EngineMode { get; set; } = OcrEngineMode.Default;
25 |
26 | ///
27 | /// Gets or sets the pixels per inch.
28 | ///
29 | public int PixelsPerInch { get; set; } = 70;
30 |
31 | ///
32 | /// Gets or sets the page segmentation option.
33 | ///
34 | public PageSegmentation PageSegmentation { get; set; } = PageSegmentation.Block;
35 |
36 | ///
37 | /// Gets or sets the whitelist.
38 | ///
39 | public string Whitelist { get; set; } = string.Empty;
40 |
41 | ///
42 | /// Gets or sets the blacklist.
43 | ///
44 | public string Blacklist { get; set; } = string.Empty;
45 |
46 | ///
47 | /// Gets or sets a value indicating whether the thing we try to parse is numeric.
48 | ///
49 | public bool Numeric { get; set; }
50 |
51 | ///
52 | /// Gets or sets the configuration name or path.
53 | ///
54 | public string Config { get; set; } = string.Empty;
55 |
56 | ///
57 | /// Creates a copy of the options.
58 | ///
59 | /// A copy of the options.
60 | public TesseractOptions Copy()
61 | => new TesseractOptions
62 | {
63 | Language = this.Language,
64 | DataPath = this.DataPath,
65 | EngineMode = this.EngineMode,
66 | PixelsPerInch = this.PixelsPerInch,
67 | PageSegmentation = this.PageSegmentation,
68 | Whitelist = this.Whitelist,
69 | Blacklist = this.Blacklist,
70 | Numeric = this.Numeric,
71 | Config = this.Config,
72 | };
73 |
74 | ///
75 | public override bool Equals(object? obj)
76 | {
77 | if (obj is TesseractOptions other)
78 | {
79 | return Equals(other);
80 | }
81 |
82 | return false;
83 | }
84 |
85 | ///
86 | public bool Equals(TesseractOptions? other)
87 | {
88 | if (other is null)
89 | {
90 | return false;
91 | }
92 |
93 | return Language == other.Language
94 | && DataPath == other.DataPath
95 | && EngineMode == other.EngineMode
96 | && PixelsPerInch == other.PixelsPerInch
97 | && PageSegmentation == other.PageSegmentation
98 | && Whitelist == other.Whitelist
99 | && Blacklist == other.Blacklist
100 | && Numeric == other.Numeric
101 | && Config == other.Config;
102 | }
103 |
104 | ///
105 | public override int GetHashCode()
106 | => Language.GetHashCode()
107 | + (2 * DataPath.GetHashCode())
108 | + (3 * (int)(EngineMode + 1))
109 | + (4 * (PixelsPerInch + 1))
110 | + (5 * (int)(PageSegmentation + 1))
111 | + (6 * Whitelist.GetHashCode())
112 | + (7 * Blacklist.GetHashCode())
113 | + (Numeric ? 8 : 0)
114 | + (9 * Config.GetHashCode());
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/src/TesserNet/TesseractPool.cs:
--------------------------------------------------------------------------------
1 | using System;
2 | using System.Collections.Generic;
3 | using System.Threading;
4 | using System.Threading.Tasks;
5 | using TesserNet.Internal;
6 |
7 | namespace TesserNet
8 | {
9 | ///
10 | /// Scheduler for easier management of multiple tesseract instances.
11 | ///
12 | public class TesseractPool : TesseractBase
13 | {
14 | private const int DefaultMaxPoolSize = 6;
15 |
16 | private readonly LazyQueue waiting = new LazyQueue();
17 | private readonly HashSet tesseracts = new HashSet();
18 | private readonly SemaphoreSlim semaphore = new SemaphoreSlim(1);
19 | private int busyCount;
20 | private int maxPoolSize;
21 | private bool isDisposed;
22 |
23 | ///
24 | /// Initializes a new instance of the class.
25 | ///
26 | public TesseractPool()
27 | : this(DefaultMaxPoolSize)
28 | {
29 | }
30 |
31 | ///
32 | /// Initializes a new instance of the class.
33 | ///
34 | /// The options.
35 | /// Maximum size of the pool.
36 | public TesseractPool(Action options, int maxPoolSize)
37 | : this(maxPoolSize)
38 | {
39 | if (options != null)
40 | {
41 | options(Options);
42 | }
43 | }
44 |
45 | ///
46 | /// Initializes a new instance of the class.
47 | ///
48 | /// The options.
49 | public TesseractPool(Action options)
50 | : this(options, DefaultMaxPoolSize)
51 | {
52 | }
53 |
54 | ///
55 | /// Initializes a new instance of the class.
56 | ///
57 | /// Maximum size of the pool.
58 | public TesseractPool(int maxPoolSize)
59 | : this(new TesseractOptions(), maxPoolSize)
60 | {
61 | }
62 |
63 | ///
64 | /// Initializes a new instance of the class.
65 | ///
66 | /// The Tesseract options used for all spawned instances.
67 | public TesseractPool(TesseractOptions options)
68 | : this(options, DefaultMaxPoolSize)
69 | {
70 | }
71 |
72 | ///
73 | /// Initializes a new instance of the class.
74 | ///
75 | /// The Tesseract options used for all spawned instances.
76 | /// Maximum size of the pool.
77 | public TesseractPool(TesseractOptions options, int maxPoolSize)
78 | : base(options)
79 | => (Options, this.maxPoolSize) = (options, maxPoolSize);
80 |
81 | ///
82 | /// Gets or sets the maximum size of the pool.
83 | ///
84 | public int MaxPoolSize
85 | {
86 | get => maxPoolSize;
87 | set => Resize(value);
88 | }
89 |
90 | ///
91 | public override string Read(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
92 | {
93 | if (isDisposed)
94 | {
95 | throw new ObjectDisposedException(nameof(TesseractPool));
96 | }
97 |
98 | semaphore.Wait();
99 |
100 | Tesseract tesseract;
101 | try
102 | {
103 | if (waiting.Count > 0)
104 | {
105 | tesseract = waiting.Dequeue();
106 | }
107 | else if (tesseracts.Count < MaxPoolSize)
108 | {
109 | tesseract = new Tesseract();
110 | tesseracts.Add(tesseract);
111 | }
112 | else
113 | {
114 | tesseract = waiting.Dequeue();
115 | }
116 |
117 | tesseract.Options = Options.Copy();
118 | }
119 | finally
120 | {
121 | semaphore.Release();
122 | }
123 |
124 | string result = tesseract.Read(data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
125 | waiting.Enqueue(tesseract);
126 | return result;
127 | }
128 |
129 | ///
130 | public override async Task ReadAsync(IntPtr data, int width, int height, int bytesPerPixel, int rectX, int rectY, int rectWidth, int rectHeight)
131 | {
132 | if (isDisposed)
133 | {
134 | throw new ObjectDisposedException(nameof(TesseractPool));
135 | }
136 |
137 | await semaphore.WaitAsync().ConfigureAwait(false);
138 |
139 | Tesseract tesseract;
140 | try
141 | {
142 | if (waiting.Count > 0)
143 | {
144 | tesseract = await waiting.DequeueAsync().ConfigureAwait(false);
145 | }
146 | else if (tesseracts.Count < MaxPoolSize)
147 | {
148 | tesseract = new Tesseract();
149 | tesseracts.Add(tesseract);
150 | }
151 | else
152 | {
153 | tesseract = await waiting.DequeueAsync().ConfigureAwait(false);
154 | }
155 |
156 | Interlocked.Increment(ref busyCount);
157 | tesseract.Options = Options.Copy();
158 | }
159 | finally
160 | {
161 | semaphore.Release();
162 | }
163 |
164 | Task ocr = tesseract.ReadAsync(data, width, height, bytesPerPixel, rectX, rectY, rectWidth, rectHeight);
165 | _ = GoToWaiting(tesseract, ocr);
166 | return await ocr.ConfigureAwait(false);
167 | }
168 |
169 | ///
170 | /// Releases unmanaged and - optionally - managed resources.
171 | ///
172 | /// true to release both managed and unmanaged resources; false to release only unmanaged resources.
173 | protected override void Dispose(bool disposing)
174 | {
175 | if (isDisposed)
176 | {
177 | return;
178 | }
179 |
180 | isDisposed = true;
181 |
182 | if (disposing)
183 | {
184 | semaphore.Wait();
185 |
186 | foreach (Tesseract tesseract in tesseracts)
187 | {
188 | tesseract.Dispose();
189 | }
190 |
191 | waiting.Dispose();
192 | semaphore.Dispose();
193 | }
194 | }
195 |
196 | private async Task GoToWaiting(Tesseract t, Task task)
197 | {
198 | await task.ConfigureAwait(false);
199 | Interlocked.Decrement(ref busyCount);
200 | await waiting.EnqueueAsync(t).ConfigureAwait(false);
201 | }
202 |
203 | private void Resize(int size)
204 | {
205 | maxPoolSize = size;
206 |
207 | if (!isDisposed)
208 | {
209 | _ = KillExcess();
210 | }
211 | }
212 |
213 | private async Task KillExcess()
214 | {
215 | await semaphore.WaitAsync().ConfigureAwait(false);
216 |
217 | while (busyCount + waiting.Count > maxPoolSize)
218 | {
219 | Tesseract tesseract = await waiting.DequeueAsync().ConfigureAwait(false);
220 | tesseracts.Remove(tesseract);
221 | tesseract.Dispose();
222 | }
223 |
224 | semaphore.Release();
225 | }
226 | }
227 | }
228 |
--------------------------------------------------------------------------------
/src/stylecop.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://raw.githubusercontent.com/DotNetAnalyzers/StyleCopAnalyzers/master/StyleCop.Analyzers/StyleCop.Analyzers/Settings/stylecop.schema.json",
3 | "settings": {
4 | "indentation": {
5 | "useTabs": false,
6 | "indentationSize": 4
7 | },
8 | "maintainabilityRules": {
9 | "topLevelTypes": [ "class", "interface", "struct" ]
10 | },
11 | "orderingRules": {
12 | "usingDirectivesPlacement": "outsideNamespace",
13 | "elementOrder": [ "kind", "constant", "accessibility", "static", "readonly" ]
14 | }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------