├── .gitignore ├── .vscode └── settings.json ├── Cargo.toml ├── README.md └── src └── lib.rs /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /Cargo.lock 3 | 4 | /PaddleOCR-json 5 | /pojnew 6 | /v1.4.0 -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "rust-analyzer.cargo.features": "all" 3 | } -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "paddleocr" 3 | version = "0.4.1" 4 | edition = "2021" 5 | license = "MIT" 6 | description = "A simple Rust wrapper for PaddleOCR-JSON." 7 | homepage = "https://github.com/OverflowCat/paddleocr" 8 | repository = "https://github.com/OverflowCat/paddleocr" 9 | readme = "README.md" 10 | categories = ["computer-vision", "external-ffi-bindings"] 11 | 12 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 13 | 14 | [dependencies] 15 | base64 = { version = "0.21.4", optional = true } 16 | serde = { version = "1.0", features = ["derive"] } 17 | serde_json = { version = "1.0" } 18 | 19 | 20 | [lib] 21 | doctest = false 22 | 23 | [features] 24 | bytes = ["dep:base64"] 25 | 26 | [dev-dependencies] 27 | paddleocr = { path = ".", features = ["bytes"] } 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Crate `paddleocr` 2 | 3 | [![](https://img.shields.io/crates/v/paddleocr.svg)](https://crates.io/crates/paddleocr/) 4 | 5 | A simple wrapper for [`hiroi-sora/PaddleOCR-json`](https://github.com/hiroi-sora/PaddleOCR-json). 6 | 7 | ## Usage 8 | 9 | ```rust 10 | let mut p = paddleocr::Ppocr::new( 11 | PathBuf::from(".../PaddleOCR-json.exe"), // path to binary 12 | Default::default(), // language config_path, default `zh_CN` 13 | ) 14 | .unwrap(); // initialize 15 | 16 | let now = std::time::Instant::now(); // benchmark 17 | { 18 | // OCR files 19 | println!("{}", p.ocr(Path::new(".../test1.png").into()).unwrap()); 20 | println!("{}", p.ocr(Path::new(".../test2.png").into()).unwrap()); 21 | println!("{}", p.ocr(Path::new(".../test3.png").into()).unwrap()); 22 | 23 | // OCR clipboard 24 | println!("{}", p.ocr_clipboard().unwrap()); 25 | } 26 | println!("Elapsed: {:.2?}", now.elapsed()); 27 | ``` 28 | 29 | Use `ocr_and_parse` to get structured results. 30 | 31 | By enabling the `bytes` feature, you can pass image data as a byte array (`AsRef<[u8]>`). 32 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | use std::io::Result as IoResult; 2 | use std::io::{BufRead, BufReader, Write}; 3 | use std::path::Path; 4 | use std::process; 5 | use std::{error::Error, fmt, path::PathBuf}; 6 | 7 | use serde::{ 8 | Deserialize, // for `ocr_and_parse` 9 | Serialize, // for `WriteDict` 10 | }; 11 | 12 | #[derive(Debug, Clone)] 13 | pub struct OsNotSupportedError; 14 | impl fmt::Display for OsNotSupportedError { 15 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 16 | write!(f, "OS not supported") 17 | } 18 | } 19 | impl Error for OsNotSupportedError {} 20 | 21 | type Point = [usize; 2]; 22 | 23 | #[derive(Deserialize, Debug, Clone)] 24 | #[serde(untagged)] 25 | pub enum OcrRec { 26 | Content { code: u32, data: Vec }, 27 | Message { code: u32, data: String }, 28 | } 29 | 30 | #[derive(Deserialize, Debug, Clone)] 31 | pub struct ContentData { 32 | #[serde(rename(deserialize = "box"))] 33 | pub rect: Rectangle, 34 | pub score: f64, 35 | pub text: String, 36 | } 37 | 38 | pub type Rectangle = [Point; 4]; 39 | 40 | /** 41 | * The image to be recognized. 42 | */ 43 | #[derive(Debug, Serialize)] 44 | #[serde(untagged)] 45 | pub enum ImageData { 46 | ImagePathDict { image_path: String }, 47 | ImageBase64Dict { image_base64: String }, 48 | } 49 | 50 | impl ImageData { 51 | /** 52 | * Create an `ImageData` from a file path. 53 | */ 54 | pub fn from_path(path: S) -> ImageData 55 | where 56 | S: AsRef + std::fmt::Display, 57 | { 58 | ImageData::ImagePathDict { 59 | image_path: path.to_string(), 60 | } 61 | } 62 | /** 63 | * Create an `ImageData` from a base64 string. 64 | */ 65 | pub fn from_base64(base64: String) -> ImageData { 66 | ImageData::ImageBase64Dict { 67 | image_base64: base64, 68 | } 69 | } 70 | /** 71 | * Create an `ImageData` from a byte slice. 72 | * Requires the `bytes` feature. 73 | */ 74 | #[cfg(feature = "bytes")] 75 | pub fn from_bytes(bytes: T) -> ImageData 76 | where 77 | T: AsRef<[u8]>, 78 | { 79 | use base64::Engine; 80 | let engine = base64::engine::general_purpose::STANDARD; 81 | ImageData::ImageBase64Dict { 82 | image_base64: engine.encode(bytes), 83 | } 84 | } 85 | } 86 | 87 | impl From<&Path> for ImageData { 88 | fn from(path: &Path) -> Self { 89 | ImageData::from_path(path.to_string_lossy()) 90 | } 91 | } 92 | impl From for ImageData { 93 | fn from(path: PathBuf) -> Self { 94 | ImageData::from_path(path.to_string_lossy()) 95 | } 96 | } 97 | 98 | /** 99 | * A paddleocr-json instance. 100 | */ 101 | pub struct Ppocr { 102 | #[allow(dead_code)] 103 | exe_path: PathBuf, 104 | process: process::Child, 105 | } 106 | 107 | impl Ppocr { 108 | /** 109 | Initialize a new instance. 110 | 111 | # Examples 112 | 113 | ```no_run 114 | let mut p = paddleocr::Ppocr::new( 115 | PathBuf::from(".../PaddleOCR-json.exe"), // path to binary 116 | Default::default(), // language config_path, default `zh_CN` 117 | ) 118 | .unwrap(); // initialize 119 | ``` 120 | */ 121 | pub fn new(exe_path: PathBuf, config_path: Option) -> Result> { 122 | std::env::set_var("RUST_BACKTRACE", "full"); 123 | if !cfg!(target_os = "windows") { 124 | return Err(Box::new(OsNotSupportedError {})); 125 | } 126 | if !exe_path.exists() { 127 | return Err(Box::new(std::io::Error::new( 128 | std::io::ErrorKind::NotFound, 129 | "Executable not found", 130 | ))); 131 | } 132 | 133 | let wd = exe_path 134 | .canonicalize()? 135 | .parent() 136 | .ok_or_else(|| "No parent directory found")? 137 | .to_path_buf(); 138 | 139 | let mut command = process::Command::new(&exe_path); 140 | command.current_dir(wd); 141 | if let Some(config_path) = config_path { 142 | command.args(&["--config_path", &config_path.to_string_lossy()]); 143 | } 144 | let process = command 145 | .stdout(process::Stdio::piped()) 146 | .stderr(process::Stdio::piped()) 147 | .stdin(process::Stdio::piped()) 148 | .spawn()?; 149 | 150 | let mut p = Ppocr { exe_path, process }; 151 | 152 | for _i in 1..10 { 153 | match p.read_line() { 154 | Ok(line) => { 155 | if line.contains("OCR init completed.") 156 | || line.contains("Image path dose not exist") 157 | { 158 | break; // successfully initialized 159 | } /* else if line.contains("PaddleOCR-json v1.2.1") { 160 | // in v1.2.1 the last line cannot be read by read_line 161 | p.write_fmt(format_args!("\n")).err(); 162 | } */ 163 | } 164 | Err(e) => { 165 | return Err(Box::new(e)); 166 | } 167 | } 168 | } 169 | 170 | Ok(p) 171 | } 172 | 173 | fn read_line(&mut self) -> IoResult { 174 | let mut buff = String::new(); 175 | let mut stdout = BufReader::new(self.process.stdout.as_mut().unwrap()); 176 | match stdout.read_line(&mut buff) { 177 | Ok(_siz) => Ok(buff), 178 | Err(e) => Err(e), 179 | } 180 | } 181 | 182 | #[inline] 183 | fn write_fmt(&mut self, fmt: fmt::Arguments<'_>) -> IoResult<()> { 184 | let inner = self.process.stdin.as_mut().ok_or(std::io::Error::new( 185 | std::io::ErrorKind::Other, 186 | "stdin not piped", 187 | ))?; 188 | inner.write_fmt(fmt) 189 | } 190 | 191 | /** 192 | OCRs the image at the given path. Note that the returned JSON is not parsed or checked, and a valid JSON does not necessarily mean it is successful. 193 | 194 | # Examples 195 | 196 | ```no_run 197 | let mut p = paddleocr::Ppocr::new( 198 | PathBuf::from(".../PaddleOCR-json.exe"), // path to binary 199 | Default::default(), // language config_path, default `zh_CN` 200 | ) 201 | .unwrap(); // initialize 202 | println!("{}", p.ocr(Path::new(".../test.png").into())); 203 | ``` 204 | # Results 205 | 206 | ## Return values 207 | 208 | 通过API调用一次OCR,无论成功与否,都会返回一个字典。 209 | 210 | 字典中,根含两个元素:状态码`code`和内容`data`。 211 | 212 | 状态码`code`为整数,每种状态码对应一种情况: 213 | 214 | ### `100` 识别到文字 215 | 216 | - data内容为数组。数组每一项为字典,含三个元素: 217 | - `text` :文本内容,字符串。 218 | - `box` :文本包围盒,长度为4的数组,分别为左上角、右上角、右下角、左下角的`[x,y]`。整数。 219 | - `score` :识别置信度,浮点数。 220 | - 例: 221 | ``` 222 | {'code':100,'data':[{'box':[[13,5],[161,5],[161,27],[13,27]],'score':0.9996442794799805,'text':'飞舞的因果交流'}]} 223 | ``` 224 | 225 | ### `101` 未识别到文字 226 | 227 | - data为字符串:`No text found in image. Path:"图片路径"` 228 | - 例:```{'code':101,'data':'No text found in image. Path: "D:\\空白.png"'}``` 229 | - 这是正常现象,识别没有文字的空白图片时会出现这种结果。 230 | 231 | ### `200` 图片路径不存在 232 | 233 | - data:`Image path dose not exist. Path:"图片路径".` 234 | - 例:`{'code':200,'data':'Image path dose not exist. Path: "D:\\不存在.png"'}` 235 | - 注意,在系统未开启utf-8支持(`使用 Unicode UTF-8 提供全球语言支持"`)时,不能读入含emoji等特殊字符的路径(如`😀.png`)。但一般的中文及其他 Unicode 字符路径是没问题的,不受系统区域及默认编码影响。 236 | 237 | ### `201` 图片路径string无法转换到wstring 238 | 239 | - data:`Image path failed to convert to utf-16 wstring. Path: "图片路径".` 240 | - 使用API时,理论上不会报这个错。 241 | - 开发API时,若传入字符串的编码不合法,有可能报这个错。 242 | 243 | ### `202` 图片路径存在,但无法打开文件 244 | 245 | - data:`Image open failed. Path: "图片路径".` 246 | - 可能由系统权限等原因引起。 247 | 248 | ### `203` 图片打开成功,但读取到的内容无法被opencv解码 249 | 250 | - data:`Image decode failed. Path: "图片路径".` 251 | - 注意,引擎不以文件后缀来区分各种图片,而是对存在的路径,均读入字节尝试解码。若传入的文件路径不是图片,或图片已损坏,则会报这个错。 252 | - 反之,将正常图片的后缀改为别的(如`.png`改成`.jpg或.exe`),也可以被正常识别。 253 | 254 | ### `210` 剪贴板打开失败 255 | 256 | - data:`Clipboard open failed.` 257 | - 可能由别的程序正在占用剪贴板等原因引起。 258 | 259 | ### `211` 剪贴板为空 260 | 261 | - data:`Clipboard is empty.` 262 | 263 | ### `212` 剪贴板的格式不支持 264 | 265 | - data:`Clipboard format is not valid.` 266 | - 引擎只能识别剪贴板中的位图或文件。若不是这两种格式(如复制了一段文本),则会报这个错。 267 | 268 | ### `213` 剪贴板获取内容句柄失败 269 | 270 | - data:`Getting clipboard data handle failed.` 271 | - 可能由别的程序正在占用剪贴板等原因引起。 272 | 273 | ### `214` 剪贴板查询到的文件的数量不为1 274 | 275 | - data:`Clipboard number of query files is not valid. Number: 文件数量` 276 | - 只允许一次复制一个文件。一次复制多个文件再调用OCR会得到此报错。 277 | 278 | ### `215` 剪贴板检索图形对象信息失败 279 | 280 | - data:`Clipboard get bitmap object failed.` 281 | - 剪贴板中是位图,但获取位图信息失败。可能由别的程序正在占用剪贴板等原因引起。 282 | 283 | ### `216` 剪贴板获取位图数据失败 284 | 285 | - data:`Getting clipboard bitmap bits failed.` 286 | - 剪贴板中是位图,获取位图信息成功,但读入缓冲区失败。可能由别的程序正在占用剪贴板等原因引起。 287 | 288 | ### `217` 剪贴板中位图的通道数不支持 289 | 290 | - data:`Clipboard number of image channels is not valid. Number: 通道数` 291 | - 引擎只允许读入通道为1(黑白)、3(RGB)、4(RGBA)的图片。位图通道数不是1、3或4,会报这个错。 292 | 293 | ### `300` base64字符串解析为string失败 294 | 295 | - data:`Base64 decode failed.` 296 | - 传入非法Base64字符串引起。(注意,传入Base64信息不应带有`data:image/jpg;base64,`前缀。) 297 | 298 | ### `301` base64字符串解析成功,但读取到的内容无法被opencv解码 299 | 300 | - data:`Base64 data imdecode failed.` 301 | 302 | ### `400` json对象 转字符串失败 303 | 304 | - data:`Json dump failed.CODE_ERR_JSON_DUMP` 305 | - 输入异常:传入非法json字符串,或者字符串含非utf-8编码字符导致无法解析引起。 306 | 307 | ### `401` json字符串 转对象失败 308 | 309 | - data:`Json dump failed.CODE_ERR_JSON_DUMP` 310 | - 输出异常:输出时OCR结果无法被编码为json字符串。 311 | 312 | ### `402` json对象 解析某个键时失败 313 | 314 | - data:`Json parse key 键名 failed.` 315 | - 比错误码`400`更精准的提示。如果发生异常,程序优先报`402`,无法处理才报`400`。 316 | 317 | ### `403` 未发现有效任务 318 | 319 | - data:`No valid tasks.` 320 | - 本次传入的指令中不含有效任务。 321 | */ 322 | 323 | pub fn ocr(&mut self, image: ImageData) -> IoResult { 324 | let s = serde_json::to_string(&image).unwrap().replace("\n", ""); 325 | self.write_fmt(format_args!("{}\n", s))?; 326 | self.read_line() 327 | } 328 | 329 | /** 330 | OCRs the image in clipboard. Note that the returned JSON is not parsed or checked, and a valid JSON does not necessarily mean it is successful. 331 | 332 | # Examples 333 | 334 | ```no_run 335 | let mut p = paddleocr::Ppocr::new( 336 | PathBuf::from(".../PaddleOCR-json.exe"), // path to binary 337 | Default::default(), // language config_path, default `zh_CN` 338 | ) 339 | .unwrap(); // initialize 340 | println!("{}", p.ocr_clipboard()); 341 | ``` 342 | */ 343 | #[inline] 344 | pub fn ocr_clipboard(&mut self) -> IoResult { 345 | self.ocr(ImageData::from_path("clipboard")) 346 | } 347 | 348 | pub fn ocr_and_parse(&mut self, image: ImageData) -> Result, String> { 349 | let ocr_result = self.ocr(image); 350 | let Ok(ocr_string) = ocr_result.as_ref() else { 351 | return Err("OCR failed".to_string()); 352 | }; 353 | match serde_json::from_str::(&ocr_string) { 354 | Ok(OcrRec::Content { data, .. }) => Ok(data), 355 | Ok(OcrRec::Message { code, data }) => Err(format!("Error Message {}: {}", code, data)), 356 | Err(e) => Err(format!("Response JSON parse failed: {}", e)), 357 | } 358 | } 359 | } 360 | 361 | impl Drop for Ppocr { 362 | /** 363 | * Kill the process when the instance is dropped. 364 | */ 365 | fn drop(&mut self) { 366 | self.process.kill().err(); 367 | } 368 | } 369 | 370 | #[cfg(test)] 371 | 372 | mod tests { 373 | use std::path::{Path, PathBuf}; 374 | 375 | use crate::{ImageData, Ppocr}; 376 | #[test] 377 | fn recognize() { 378 | let mut p = Ppocr::new( 379 | PathBuf::from( 380 | "E:/code/paddleocr/v1.4.0/PaddleOCR-json.exe", // path to binary 381 | ), 382 | Default::default(), 383 | ) 384 | .unwrap(); // initialize 385 | 386 | let now = std::time::Instant::now(); // benchmark 387 | { 388 | // OCR files 389 | println!( 390 | "{}", 391 | p.ocr(Path::new("C:/Users/Neko/Pictures/test1.png").into()) 392 | .unwrap() 393 | ); 394 | println!( 395 | "{}", 396 | p.ocr(Path::new("C:/Users/Neko/Pictures/test2.png").into()) 397 | .unwrap() 398 | ); 399 | println!( 400 | "{}", 401 | p.ocr(Path::new("C:/Users/Neko/Pictures/test3.png").into()) 402 | .unwrap() 403 | ); 404 | println!( 405 | "{}", 406 | p.ocr(Path::new("C:/Users/Neko/Pictures/test4.png").into()) 407 | .unwrap() 408 | ); 409 | println!( 410 | "{}", 411 | p.ocr(Path::new("C:/Users/Neko/Pictures/test5.png").into()) 412 | .unwrap() 413 | ); 414 | 415 | // OCR clipboard 416 | println!("{}", p.ocr_clipboard().unwrap()); 417 | } 418 | println!("Elapsed: {:.2?}", now.elapsed()); 419 | } 420 | 421 | #[test] 422 | fn parse() { 423 | let mut p = Ppocr::new( 424 | PathBuf::from("E:/code/paddleocr/v1.4.0/PaddleOCR-json.exe"), // path to binary 425 | Default::default(), // language config_path, default `zh_CN` 426 | ) 427 | .unwrap(); // initialize 428 | 429 | // OCR files 430 | p.ocr_and_parse(Path::new("C:/Users/Neko/Pictures/test2.png").into()) 431 | .unwrap(); 432 | 433 | p.ocr_and_parse(ImageData::from_bytes(include_bytes!( 434 | "C:/Users/Neko/Pictures/test3.png" 435 | ))) 436 | .unwrap(); 437 | } 438 | } 439 | --------------------------------------------------------------------------------