├── .gitignore ├── img.png ├── img.tiff ├── README.md ├── Makefile ├── img.txt ├── .github └── workflows │ └── rust.yml ├── Cargo.toml ├── LICENSE ├── page-seg-modes.txt ├── src ├── page_seg_mode.rs └── lib.rs └── img.html /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | Cargo.lock 3 | -------------------------------------------------------------------------------- /img.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antimatter15/tesseract-rs/HEAD/img.png -------------------------------------------------------------------------------- /img.tiff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/antimatter15/tesseract-rs/HEAD/img.tiff -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # tesseract-rs 2 | Rust bindings for [Tesseract](https://github.com/tesseract-ocr/tesseract) 3 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | src/page_seg_mode.rs: page-seg-modes.txt build_page_seg_modes.py 2 | python build_page_seg_modes.py < page-seg-modes.txt | rustfmt > $@ 3 | -------------------------------------------------------------------------------- /img.txt: -------------------------------------------------------------------------------- 1 | Hundreds of companies around the world are using Rust in production today for fast, low- 2 | resource, cross-platform solutions. Software you know and love, like Firefox, Dropbox, and 3 | Cloudflare, uses Rust. From startups to large corporations, from embedded devices to 4 | scalable web services, Rust is a great fit. 5 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | 17 | steps: 18 | - name: Install tesseract 19 | run: sudo apt-get install libleptonica-dev libtesseract-dev clang tesseract-ocr-eng 20 | - uses: actions/checkout@v2 21 | - name: Build 22 | run: cargo build --verbose --no-default-features 23 | - name: Run tests 24 | run: cargo test --verbose --no-default-features 25 | - name: Check formatting 26 | run: cargo fmt -- --check 27 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "tesseract" 3 | version = "0.15.2" 4 | authors = ["Kevin Kwok ", "Chris Couzens "] 5 | documentation = "https://docs.rs/tesseract" 6 | repository = "https://github.com/antimatter15/tesseract-rs" 7 | description = "Higher-level bindings for Tesseract OCR" 8 | license = "MIT" 9 | keywords = ["tesseract", "OCR", "bindings"] 10 | categories = ["api-bindings", "multimedia::images"] 11 | 12 | [dependencies] 13 | tesseract-sys = "~0.6" 14 | tesseract-plumbing = { version="~0.11.1", default-features = false } 15 | thiserror = "1.0" 16 | 17 | [features] 18 | default = ["tesseract_5_2"] 19 | tesseract_5_2 = ["tesseract-plumbing/tesseract_5_2"] 20 | 21 | [package.metadata.docs.rs] 22 | # docs.rs is not compatible with tesseract_5_2 23 | no-default-features = true 24 | 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Kevin Kwok 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /page-seg-modes.txt: -------------------------------------------------------------------------------- 1 | # Copied from https://tesseract-ocr.github.io/tessapi/5.x/a01818.html#a338d4c8b5d497b5ec3e6e4269d8ac66aab76fe3ca390d99e070ea60b892ee18ef 2 | PSM_OSD_ONLY 3 | Orientation and script detection only. 4 | 5 | PSM_AUTO_OSD 6 | Automatic page segmentation with orientation and script detection. (OSD) 7 | 8 | PSM_AUTO_ONLY 9 | Automatic page segmentation, but no OSD, or OCR. 10 | 11 | PSM_AUTO 12 | Fully automatic page segmentation, but no OSD. 13 | 14 | PSM_SINGLE_COLUMN 15 | Assume a single column of text of variable sizes. 16 | 17 | PSM_SINGLE_BLOCK_VERT_TEXT 18 | Assume a single uniform block of vertically aligned text. 19 | 20 | PSM_SINGLE_BLOCK 21 | Assume a single uniform block of text. (Default.) 22 | 23 | PSM_SINGLE_LINE 24 | Treat the image as a single text line. 25 | 26 | PSM_SINGLE_WORD 27 | Treat the image as a single word. 28 | 29 | PSM_CIRCLE_WORD 30 | Treat the image as a single word in a circle. 31 | 32 | PSM_SINGLE_CHAR 33 | Treat the image as a single character. 34 | 35 | PSM_SPARSE_TEXT 36 | Find as much text as possible in no particular order. 37 | 38 | PSM_SPARSE_TEXT_OSD 39 | Sparse text with orientation and script det. 40 | 41 | PSM_RAW_LINE 42 | Treat the image as a single text line, bypassing hacks that are Tesseract-specific. 43 | -------------------------------------------------------------------------------- /src/page_seg_mode.rs: -------------------------------------------------------------------------------- 1 | // ⚠️ This file is generated 2 | // ⚠️ Regenerate with `make src/page_seg_mode.rs` 3 | 4 | use tesseract_sys::TessPageSegMode; 5 | 6 | /// Enum representing different PageSegMode options accepted by Tesseract 7 | #[derive(Debug, Clone, Copy, PartialEq, Eq)] 8 | pub enum PageSegMode { 9 | /// Orientation and script detection only. 10 | PsmOsdOnly, 11 | /// Automatic page segmentation with orientation and script detection. (OSD) 12 | PsmAutoOsd, 13 | /// Automatic page segmentation, but no OSD, or OCR. 14 | PsmAutoOnly, 15 | /// Fully automatic page segmentation, but no OSD. 16 | PsmAuto, 17 | /// Assume a single column of text of variable sizes. 18 | PsmSingleColumn, 19 | /// Assume a single uniform block of vertically aligned text. 20 | PsmSingleBlockVertText, 21 | /// Assume a single uniform block of text. (Default.) 22 | PsmSingleBlock, 23 | /// Treat the image as a single text line. 24 | PsmSingleLine, 25 | /// Treat the image as a single word. 26 | PsmSingleWord, 27 | /// Treat the image as a single word in a circle. 28 | PsmCircleWord, 29 | /// Treat the image as a single character. 30 | PsmSingleChar, 31 | /// Find as much text as possible in no particular order. 32 | PsmSparseText, 33 | /// Sparse text with orientation and script det. 34 | PsmSparseTextOsd, 35 | /// Treat the image as a single text line, bypassing hacks that are Tesseract-specific. 36 | PsmRawLine, 37 | } 38 | 39 | impl PageSegMode { 40 | /// Get the page-seg-mode's value as used by Tesseract 41 | pub fn as_tess_page_seg_mode(&self) -> TessPageSegMode { 42 | match self { 43 | PageSegMode::PsmOsdOnly => tesseract_sys::TessPageSegMode_PSM_OSD_ONLY, 44 | PageSegMode::PsmAutoOsd => tesseract_sys::TessPageSegMode_PSM_AUTO_OSD, 45 | PageSegMode::PsmAutoOnly => tesseract_sys::TessPageSegMode_PSM_AUTO_ONLY, 46 | PageSegMode::PsmAuto => tesseract_sys::TessPageSegMode_PSM_AUTO, 47 | PageSegMode::PsmSingleColumn => tesseract_sys::TessPageSegMode_PSM_SINGLE_COLUMN, 48 | PageSegMode::PsmSingleBlockVertText => { 49 | tesseract_sys::TessPageSegMode_PSM_SINGLE_BLOCK_VERT_TEXT 50 | } 51 | PageSegMode::PsmSingleBlock => tesseract_sys::TessPageSegMode_PSM_SINGLE_BLOCK, 52 | PageSegMode::PsmSingleLine => tesseract_sys::TessPageSegMode_PSM_SINGLE_LINE, 53 | PageSegMode::PsmSingleWord => tesseract_sys::TessPageSegMode_PSM_SINGLE_WORD, 54 | PageSegMode::PsmCircleWord => tesseract_sys::TessPageSegMode_PSM_CIRCLE_WORD, 55 | PageSegMode::PsmSingleChar => tesseract_sys::TessPageSegMode_PSM_SINGLE_CHAR, 56 | PageSegMode::PsmSparseText => tesseract_sys::TessPageSegMode_PSM_SPARSE_TEXT, 57 | PageSegMode::PsmSparseTextOsd => tesseract_sys::TessPageSegMode_PSM_SPARSE_TEXT_OSD, 58 | PageSegMode::PsmRawLine => tesseract_sys::TessPageSegMode_PSM_RAW_LINE, 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /img.html: -------------------------------------------------------------------------------- 1 |
2 |
3 |

4 | 5 | Hundreds 6 | of 7 | companies 8 | around 9 | the 10 | world 11 | are 12 | using 13 | Rust 14 | in 15 | production 16 | today 17 | for 18 | fast, 19 | low- 20 | 21 | 22 | resource, 23 | cross-platform 24 | solutions. 25 | Software 26 | you 27 | know 28 | and 29 | love, 30 | like 31 | Firefox, 32 | Dropbox, 33 | and 34 | 35 | 36 | Cloudflare, 37 | uses 38 | Rust. 39 | From 40 | startups 41 | to 42 | large 43 | corporations, 44 | from 45 | embedded 46 | devices 47 | to 48 | 49 | 50 | scalable 51 | web 52 | services, 53 | Rust 54 | is 55 | a 56 | great 57 | fit. 58 | 59 |

60 |
61 |
62 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | pub extern crate tesseract_plumbing as plumbing; 2 | extern crate tesseract_sys; 3 | extern crate thiserror; 4 | 5 | use self::thiserror::Error; 6 | use std::ffi::CString; 7 | use std::ffi::NulError; 8 | use std::os::raw::c_int; 9 | use std::str; 10 | mod page_seg_mode; 11 | 12 | pub use page_seg_mode::PageSegMode; 13 | 14 | use self::tesseract_sys::{ 15 | TessOcrEngineMode, TessOcrEngineMode_OEM_DEFAULT, TessOcrEngineMode_OEM_LSTM_ONLY, 16 | TessOcrEngineMode_OEM_TESSERACT_LSTM_COMBINED, TessOcrEngineMode_OEM_TESSERACT_ONLY, 17 | }; 18 | 19 | #[derive(Debug, Error)] 20 | pub enum InitializeError { 21 | #[error("Conversion to CString failed")] 22 | CStringError(#[from] NulError), 23 | #[error("TessBaseApi failed to initialize")] 24 | TessBaseAPIInitError(#[from] plumbing::TessBaseApiInitError), 25 | } 26 | 27 | #[derive(Debug, Error)] 28 | pub enum SetImageError { 29 | #[error("Conversion to CString failed")] 30 | CStringError(#[from] NulError), 31 | #[error("Failed to read image")] 32 | PixReadError(#[from] plumbing::leptonica_plumbing::PixReadError), 33 | } 34 | 35 | #[derive(Debug, Error)] 36 | pub enum SetVariableError { 37 | #[error("Conversion to CString failed")] 38 | CStringError(#[from] NulError), 39 | #[error("TessBaseApi failed to set variable")] 40 | TessBaseAPISetVariableError(#[from] plumbing::TessBaseApiSetVariableError), 41 | } 42 | 43 | #[derive(Debug, Error)] 44 | pub enum TesseractError { 45 | #[error("Failed to set language")] 46 | InitializeError(#[from] InitializeError), 47 | #[error("Failed to set image")] 48 | SetImageError(#[from] SetImageError), 49 | #[error("Errored whilst recognizing")] 50 | RecognizeError(#[from] plumbing::TessBaseApiRecogniseError), 51 | #[error("Errored whilst getting text")] 52 | GetTextError(#[from] plumbing::TessBaseApiGetUtf8TextError), 53 | #[error("Errored whilst getting HOCR text")] 54 | GetHOCRTextError(#[from] plumbing::TessBaseApiGetHocrTextError), 55 | #[error("Errored whilst getting TSV text")] 56 | GetTsvTextError(#[from] plumbing::TessBaseApiGetTsvTextError), 57 | #[error("Errored whilst setting frame")] 58 | SetFrameError(#[from] plumbing::TessBaseApiSetImageSafetyError), 59 | #[error("Errored whilst setting image from mem")] 60 | SetImgFromMemError(#[from] plumbing::leptonica_plumbing::PixReadMemError), 61 | #[error("Errored whilst setting variable")] 62 | SetVariableError(#[from] SetVariableError), 63 | } 64 | 65 | /// https://tesseract-ocr.github.io/tessapi/5.x/a01818.html#a04550a0ed1279562027bf2fc92c421aead84e1ef94e50df1622b4fcd189c6c00b 66 | pub enum OcrEngineMode { 67 | /// Run Tesseract only - fastest; deprecated 68 | Default, 69 | /// Run just the LSTM line recognizer. 70 | LstmOnly, 71 | /// Run the LSTM recognizer, but allow fallback 72 | /// to Tesseract when things get difficult. 73 | /// deprecated 74 | TesseractLstmCombined, 75 | /// Specify this mode, 76 | /// to indicate that any of the above modes 77 | /// should be automatically inferred from the 78 | /// variables in the language-specific config, 79 | /// command-line configs, or if not specified 80 | /// in any of the above should be set to the 81 | /// default OEM_TESSERACT_ONLY. 82 | TesseractOnly, 83 | } 84 | 85 | impl OcrEngineMode { 86 | fn to_value(&self) -> TessOcrEngineMode { 87 | match *self { 88 | OcrEngineMode::Default => TessOcrEngineMode_OEM_DEFAULT, 89 | OcrEngineMode::LstmOnly => TessOcrEngineMode_OEM_LSTM_ONLY, 90 | OcrEngineMode::TesseractLstmCombined => TessOcrEngineMode_OEM_TESSERACT_LSTM_COMBINED, 91 | OcrEngineMode::TesseractOnly => TessOcrEngineMode_OEM_TESSERACT_ONLY, 92 | } 93 | } 94 | } 95 | 96 | pub struct Tesseract(plumbing::TessBaseApi); 97 | 98 | impl Tesseract { 99 | pub fn new(datapath: Option<&str>, language: Option<&str>) -> Result { 100 | let mut tess = Tesseract(plumbing::TessBaseApi::create()); 101 | let datapath = match datapath { 102 | Some(i) => Some(CString::new(i)?), 103 | None => None, 104 | }; 105 | let language = match language { 106 | Some(i) => Some(CString::new(i)?), 107 | None => None, 108 | }; 109 | 110 | tess.0.init_2(datapath.as_deref(), language.as_deref())?; 111 | Ok(tess) 112 | } 113 | 114 | pub fn new_with_oem( 115 | datapath: Option<&str>, 116 | language: Option<&str>, 117 | oem: OcrEngineMode, 118 | ) -> Result { 119 | let mut tess = Tesseract(plumbing::TessBaseApi::create()); 120 | let datapath = match datapath { 121 | Some(i) => Some(CString::new(i)?), 122 | None => None, 123 | }; 124 | let language = match language { 125 | Some(i) => Some(CString::new(i)?), 126 | None => None, 127 | }; 128 | 129 | tess.0 130 | .init_4(datapath.as_deref(), language.as_deref(), oem.to_value())?; 131 | Ok(tess) 132 | } 133 | 134 | #[cfg(feature = "tesseract_5_2")] 135 | pub fn new_with_data( 136 | data: &[u8], 137 | language: Option<&str>, 138 | oem: OcrEngineMode, 139 | ) -> Result { 140 | let mut tess = Tesseract(plumbing::TessBaseApi::create()); 141 | let language = match language { 142 | Some(i) => Some(CString::new(i)?), 143 | None => None, 144 | }; 145 | 146 | tess.0.init_1(data, language.as_deref(), oem.to_value())?; 147 | Ok(tess) 148 | } 149 | 150 | pub fn set_image(mut self, filename: &str) -> Result { 151 | let pix = plumbing::leptonica_plumbing::Pix::read(&CString::new(filename)?)?; 152 | self.0.set_image_2(&pix); 153 | Ok(self) 154 | } 155 | pub fn set_frame( 156 | mut self, 157 | frame_data: &[u8], 158 | width: i32, 159 | height: i32, 160 | bytes_per_pixel: i32, 161 | bytes_per_line: i32, 162 | ) -> Result { 163 | self.0 164 | .set_image(frame_data, width, height, bytes_per_pixel, bytes_per_line)?; 165 | Ok(self) 166 | } 167 | pub fn set_image_from_mem( 168 | mut self, 169 | img: &[u8], 170 | ) -> Result { 171 | let pix = plumbing::leptonica_plumbing::Pix::read_mem(img)?; 172 | self.0.set_image_2(&pix); 173 | Ok(self) 174 | } 175 | 176 | pub fn set_rectangle(mut self, left: i32, top: i32, width: i32, height: i32) -> Self { 177 | self.0.set_rectangle(left, top, width, height); 178 | self 179 | } 180 | 181 | pub fn set_source_resolution(mut self, ppi: i32) -> Self { 182 | self.0.set_source_resolution(ppi); 183 | self 184 | } 185 | 186 | pub fn set_variable(mut self, name: &str, value: &str) -> Result { 187 | self.0 188 | .set_variable(&CString::new(name)?, &CString::new(value)?)?; 189 | Ok(self) 190 | } 191 | pub fn recognize(mut self) -> Result { 192 | self.0.recognize()?; 193 | Ok(self) 194 | } 195 | pub fn get_text(&mut self) -> Result { 196 | Ok(self 197 | .0 198 | .get_utf8_text()? 199 | .as_ref() 200 | .to_string_lossy() 201 | .into_owned()) 202 | } 203 | pub fn mean_text_conf(&mut self) -> i32 { 204 | self.0.mean_text_conf() 205 | } 206 | 207 | /// Get the text encoded as HTML with bounding box tags 208 | /// 209 | /// See [img.html](../img.html) for an example. 210 | pub fn get_hocr_text( 211 | &mut self, 212 | page: c_int, 213 | ) -> Result { 214 | Ok(self 215 | .0 216 | .get_hocr_text(page)? 217 | .as_ref() 218 | .to_string_lossy() 219 | .into_owned()) 220 | } 221 | 222 | /// Get the text encoded as TSV, including bounding boxes, confidence 223 | /// 224 | /// See [char* TessBaseAPI::GetTSVText](https://github.com/tesseract-ocr/tesseract/blob/cdebe13d81e2ad2a83be533886750f5491b25262/src/api/baseapi.cpp#L1398) 225 | pub fn get_tsv_text( 226 | &mut self, 227 | page: c_int, 228 | ) -> Result { 229 | Ok(self 230 | .0 231 | .get_tsv_text(page)? 232 | .as_ref() 233 | .to_string_lossy() 234 | .into_owned()) 235 | } 236 | 237 | pub fn set_page_seg_mode(&mut self, mode: PageSegMode) { 238 | self.0.set_page_seg_mode(mode.as_tess_page_seg_mode()); 239 | } 240 | } 241 | 242 | pub fn ocr(filename: &str, language: &str) -> Result { 243 | Ok(Tesseract::new(None, Some(language))? 244 | .set_image(filename)? 245 | .recognize()? 246 | .get_text()?) 247 | } 248 | 249 | pub fn ocr_from_frame( 250 | frame_data: &[u8], 251 | width: i32, 252 | height: i32, 253 | bytes_per_pixel: i32, 254 | bytes_per_line: i32, 255 | language: &str, 256 | ) -> Result { 257 | Ok(Tesseract::new(None, Some(language))? 258 | .set_frame(frame_data, width, height, bytes_per_pixel, bytes_per_line)? 259 | .recognize()? 260 | .get_text()?) 261 | } 262 | 263 | #[test] 264 | fn ocr_test() -> Result<(), TesseractError> { 265 | assert_eq!( 266 | ocr("img.png", "eng")?, 267 | include_str!("../img.txt").to_string() 268 | ); 269 | Ok(()) 270 | } 271 | 272 | #[test] 273 | fn ocr_from_frame_test() -> Result<(), TesseractError> { 274 | assert_eq!( 275 | ocr_from_frame(include_bytes!("../img.tiff"), 2256, 324, 3, 2256 * 3, "eng")?, 276 | include_str!("../img.txt").to_string() 277 | ); 278 | Ok(()) 279 | } 280 | 281 | #[test] 282 | fn ocr_from_mem_with_ppi() -> Result<(), TesseractError> { 283 | let mut cube = Tesseract::new(None, Some("eng"))? 284 | .set_image_from_mem(include_bytes!("../img.tiff"))? 285 | .set_source_resolution(70); 286 | assert_eq!(&cube.get_text()?, include_str!("../img.txt")); 287 | Ok(()) 288 | } 289 | 290 | #[test] 291 | fn expanded_test() -> Result<(), TesseractError> { 292 | let mut cube = Tesseract::new(None, Some("eng"))? 293 | .set_image("img.png")? 294 | .set_variable("tessedit_char_blacklist", "z")? 295 | .recognize()?; 296 | assert_eq!(&cube.get_text()?, include_str!("../img.txt")); 297 | Ok(()) 298 | } 299 | 300 | #[test] 301 | fn hocr_test() -> Result<(), TesseractError> { 302 | let mut cube = Tesseract::new(None, Some("eng"))?.set_image("img.png")?; 303 | assert!(&cube.get_hocr_text(0)?.contains("
Result<(), TesseractError> { 310 | let only_tesseract_str = 311 | Tesseract::new_with_oem(None, Some("eng"), OcrEngineMode::TesseractOnly)? 312 | .set_image("img.png")? 313 | .recognize()? 314 | .get_text()?; 315 | 316 | let only_lstm_str = Tesseract::new_with_oem(None, Some("eng"), OcrEngineMode::LstmOnly)? 317 | .set_image("img.png")? 318 | .recognize()? 319 | .get_text()?; 320 | 321 | assert_ne!(only_tesseract_str, only_lstm_str); 322 | Ok(()) 323 | } 324 | 325 | #[test] 326 | fn oem_ltsm_only_test() -> Result<(), TesseractError> { 327 | let only_lstm_str = Tesseract::new_with_oem(None, Some("eng"), OcrEngineMode::LstmOnly)? 328 | .set_image("img.png")? 329 | .recognize()? 330 | .get_text()?; 331 | 332 | assert_eq!(only_lstm_str, include_str!("../img.txt")); 333 | Ok(()) 334 | } 335 | 336 | #[test] 337 | fn initialize_with_none() { 338 | assert!(Tesseract::new(None, None).is_ok()); 339 | } 340 | --------------------------------------------------------------------------------