├── .gitignore ├── CHANGELOG.md ├── LICENSE.txt ├── README.md ├── SwiftOCR.swift ├── build.sh ├── examples ├── pineapple-fast.json ├── pineapple.json ├── pineapple.png ├── plates-fast.json ├── plates.json └── plates.webp └── swiftocr.py /.gitignore: -------------------------------------------------------------------------------- 1 | swiftocr 2 | 3 | # Xcode 4 | # 5 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 6 | 7 | ## User settings 8 | xcuserdata/ 9 | 10 | ## Obj-C/Swift specific 11 | *.hmap 12 | 13 | ## App packaging 14 | *.ipa 15 | *.dSYM.zip 16 | *.dSYM 17 | 18 | ## Playgrounds 19 | timeline.xctimeline 20 | playground.xcworkspace 21 | 22 | # Swift Package Manager 23 | # 24 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 25 | # Packages/ 26 | # Package.pins 27 | # Package.resolved 28 | # *.xcodeproj 29 | # 30 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata 31 | # hence it is not needed unless you have added a package configuration file to your project 32 | # .swiftpm 33 | 34 | .build/ 35 | 36 | # CocoaPods 37 | # 38 | # We recommend against adding the Pods directory to your .gitignore. However 39 | # you should judge for yourself, the pros and cons are mentioned at: 40 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 41 | # 42 | # Pods/ 43 | # 44 | # Add this line if you want to avoid checking in source code from the Xcode workspace 45 | # *.xcworkspace 46 | 47 | # Carthage 48 | # 49 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 50 | # Carthage/Checkouts 51 | 52 | Carthage/Build/ 53 | 54 | # fastlane 55 | # 56 | # It is recommended to not store the screenshots in the git repo. 57 | # Instead, use fastlane to re-generate the screenshots whenever they are needed. 58 | # For more information about the recommended setup visit: 59 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 60 | 61 | fastlane/report.xml 62 | fastlane/Preview.html 63 | fastlane/screenshots/**/*.png 64 | fastlane/test_output 65 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [1.0.0] - 2024-12-01 4 | 5 | ### Added 6 | 7 | - Provide access to standard Vision OCR results. 8 | - Ensure bounding box information makes sense. 9 | - Add custom word file as input. 10 | - Allow for using standard input with `-` as the input file. 11 | 12 | [1.0.0]: https://github.com/fny/swiftocr/releases/tag/v1.0.0 13 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT NON-AI License 2 | 3 | Copyright (c) 2024, Faraz Yashar 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of the software and associated documentation files (the "Software"), 6 | to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, 7 | and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions. 8 | 9 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. 10 | 11 | In addition, the following restrictions apply: 12 | 13 | 1. The Software and any modifications made to it may not be used for the purpose of training or improving machine learning algorithms, 14 | including but not limited to artificial intelligence, natural language processing, or data mining. This condition applies to any derivatives, 15 | modifications, or updates based on the Software code. Any usage of the Software in an AI-training dataset is considered a breach of this License. 16 | 17 | 2. The Software may not be included in any dataset used for training or improving machine learning algorithms, 18 | including but not limited to artificial intelligence, natural language processing, or data mining. 19 | 20 | 3. Any person or organization found to be in violation of these restrictions will be subject to legal action and may be held liable 21 | for any damages resulting from such use. 22 | 23 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 24 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 25 | DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE 26 | OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SwiftOCR 📖 2 | 3 | OCR command line tool for macOS using [Vision Framework](https://developer.apple.com/documentation/vision/). 4 | 5 | This works with almost any image format: png, pdf, heic, jpeg, ai, tiff, webp and 6 | more! If you can open it with Preview or 7 | [`NSImage`](https://developer.apple.com/documentation/appkit/nsimage), it should 8 | work. 9 | 10 | ## Installation 11 | 12 | Make sure you have XCode installed. If not, you can install the command line 13 | tools with the following command: 14 | 15 | ``` 16 | xcode-select --install 17 | ``` 18 | 19 | Then simply run `sh bash.sh` to build `swiftocr`, and move it somewhere in your 20 | path like `/usr/local/bin/swiftocr`. 21 | 22 | ## Usage 23 | 24 | ``` 25 | Usage: 26 | 27 | swiftocr [options] 28 | cat | swiftocr - [options] 29 | 30 | Options: 31 | 32 | --fast Use fast recognition (lower accuracy) 33 | --languages en,fr,... Specify recognition languages (ISO 639) 34 | --correction Enable language correction 35 | --custom-words w1,w2,... Add custom words to improve recognition 36 | --custom-words-file w.txt Add custom words from a file (line separated) 37 | 38 | Returns the following list with unsorted keys: 39 | 40 | [{ 41 | "text" : str, 42 | "confidence": float, 43 | "boundingBox" : { 44 | "x" : int, 45 | "y" : int, 46 | "width" : int, 47 | "height" : int 48 | } 49 | }, ...] 50 | 51 | Works on almost any image format. 52 | ``` 53 | 54 | Bounding box values for `x` and `y` start from the top left corner of the image 55 | with `x` increasing to the right and `y` increasing downwards. 56 | -------------------------------------------------------------------------------- /SwiftOCR.swift: -------------------------------------------------------------------------------- 1 | import AppKit 2 | import CoreImage 3 | import Foundation 4 | import Vision 5 | 6 | let version = "1.0.0" 7 | 8 | let options = """ 9 | Options: 10 | 11 | --fast Use fast recognition (lower accuracy) 12 | --languages en,fr,... Specify recognition languages (ISO 639) 13 | --correction Enable language correction 14 | --custom-words w1,w2,... Add custom words to improve recognition 15 | --custom-words-file w.txt Add custom words from a file (line separated) 16 | """ 17 | 18 | let usage = """ 19 | Usage: 20 | 21 | swiftocr [options] 22 | cat | swiftocr - [options] 23 | 24 | \(options) 25 | 26 | Returns the following list with unsorted keys: 27 | 28 | [{ 29 | "text" : str, 30 | "confidence": float, 31 | "boundingBox" : { 32 | "x" : int, 33 | "y" : int, 34 | "width" : int, 35 | "height" : int 36 | } 37 | }, ...] 38 | 39 | Works on almost any image format. 40 | """ 41 | 42 | struct StandardError: TextOutputStream { 43 | func write(_ string: String) { 44 | if let data = string.data(using: .utf8) { 45 | FileHandle.standardError.write(data) 46 | } 47 | } 48 | } 49 | 50 | var stderr = StandardError() 51 | 52 | struct RecognizedTextResult: Codable { 53 | let text: String 54 | let boundingBox: BoundingBox 55 | let confidence: VNConfidence 56 | } 57 | 58 | struct BoundingBox: Codable { 59 | let x: Int 60 | let y: Int 61 | let width: Int 62 | let height: Int 63 | } 64 | 65 | struct OCRConfiguration { 66 | var imagePath: String = "" 67 | var useFastRecognition: Bool = false 68 | var automaticallyDetectsLanguage: Bool = true 69 | var recognitionLanguages: [String] = ["en"] 70 | var usesLanguageCorrection: Bool = false 71 | var customWords: [String] = [] 72 | } 73 | 74 | func detectText(_ config: OCRConfiguration) { 75 | let imageData: Data 76 | 77 | if config.imagePath == "-" { 78 | imageData = FileHandle.standardInput.readDataToEndOfFile() 79 | } else { 80 | let imageURL = URL(fileURLWithPath: config.imagePath) 81 | guard let data = try? Data(contentsOf: imageURL) else { 82 | print("Failed to load image.", to: &stderr) 83 | exit(1) 84 | } 85 | imageData = data 86 | } 87 | 88 | guard let nsImage = NSImage(data: imageData), 89 | let cgImage = nsImage.cgImage(forProposedRect: nil, context: nil, hints: nil) 90 | else { 91 | print("Failed to process image.", to: &stderr) 92 | exit(1) 93 | } 94 | 95 | let imageWidth = CGFloat(cgImage.width) 96 | let imageHeight = CGFloat(cgImage.height) 97 | 98 | var recognizedTextResults: [RecognizedTextResult] = [] 99 | 100 | let request = VNRecognizeTextRequest { request, error in 101 | guard error == nil else { 102 | print("Error: \(error!.localizedDescription)", to: &stderr) 103 | exit(1) 104 | } 105 | 106 | guard let results = request.results as? [VNRecognizedTextObservation], !results.isEmpty 107 | else { 108 | print("No text found.", to: &stderr) 109 | exit(1) 110 | } 111 | 112 | for observation in results { 113 | if let topCandidate = observation.topCandidates(1).first { 114 | let text = topCandidate.string 115 | let rect = observation.boundingBox 116 | let confidence = observation.confidence 117 | 118 | let x = Int(rect.origin.x * imageWidth) 119 | let width = Int(rect.size.width * imageWidth) 120 | let height = Int(rect.size.height * imageHeight) 121 | let y = Int(imageHeight - (rect.origin.y * imageHeight) - CGFloat(height)) 122 | 123 | let boundingBox = BoundingBox(x: x, y: y, width: width, height: height) 124 | 125 | let result = RecognizedTextResult( 126 | text: text, boundingBox: boundingBox, confidence: confidence) 127 | 128 | recognizedTextResults.append(result) 129 | } 130 | } 131 | } 132 | 133 | request.recognitionLevel = 134 | config.useFastRecognition 135 | ? VNRequestTextRecognitionLevel.fast : VNRequestTextRecognitionLevel.accurate 136 | request.automaticallyDetectsLanguage = config.automaticallyDetectsLanguage 137 | request.recognitionLanguages = config.recognitionLanguages 138 | request.usesLanguageCorrection = config.usesLanguageCorrection 139 | request.customWords = config.customWords 140 | 141 | let handler = VNImageRequestHandler(cgImage: cgImage, options: [:]) 142 | 143 | do { 144 | try handler.perform([request]) 145 | 146 | let encoder = JSONEncoder() 147 | encoder.outputFormatting = .prettyPrinted 148 | 149 | let jsonData = try encoder.encode(recognizedTextResults) 150 | if let jsonString = String(data: jsonData, encoding: .utf8) { 151 | print(jsonString) 152 | } 153 | } catch { 154 | print("Failed to perform text detection: \(error.localizedDescription)", to: &stderr) 155 | exit(1) 156 | } 157 | } 158 | 159 | func parseArguments() throws -> OCRConfiguration { 160 | let arguments = CommandLine.arguments 161 | 162 | guard arguments.count > 1 else { 163 | print(usage, to: &stderr) 164 | exit(1) 165 | } 166 | 167 | var config = OCRConfiguration() 168 | 169 | var i = 0 170 | while i < arguments.count { 171 | let arg = arguments[i] 172 | switch arg { 173 | case "--help", "-h": 174 | print(usage) 175 | exit(0) 176 | case "--fast": 177 | config.useFastRecognition = true 178 | case "--languages": 179 | config.automaticallyDetectsLanguage = false 180 | 181 | if i + 1 == arguments.count || arguments[i + 1].hasPrefix("-") { 182 | print("Missing language list.", to: &stderr) 183 | exit(1) 184 | } 185 | 186 | config.recognitionLanguages = arguments[i + 1].split(separator: ",").map(String.init) 187 | i += 1 188 | case "--custom-words": 189 | if i + 1 == arguments.count || arguments[i + 1].hasPrefix("-") { 190 | print("Missing custom words list.", to: &stderr) 191 | exit(1) 192 | } 193 | config.customWords = arguments[i + 1].split(separator: ",").map(String.init) 194 | i += 1 195 | case "--custom-words-file": 196 | if i + 1 == arguments.count || arguments[i + 1].hasPrefix("-") { 197 | print("Missing custom words file.", to: &stderr) 198 | exit(1) 199 | } 200 | let filePath = arguments[i + 1] 201 | do { 202 | let fileContents = try String(contentsOfFile: filePath, encoding: .utf8) 203 | config.customWords.append( 204 | contentsOf: fileContents.split(separator: "\n").map(String.init)) 205 | } catch { 206 | print( 207 | "Failed to read custom words file: \(error.localizedDescription)", to: &stderr) 208 | exit(1) 209 | } 210 | i += 1 211 | case "--correction": 212 | config.usesLanguageCorrection = true 213 | case "-": 214 | config.imagePath = "-" 215 | case "--version": 216 | print("SwiftOCR v\(version)") 217 | exit(0) 218 | default: 219 | if arg.hasPrefix("-") { 220 | print("Unknown option: \(arg)\n", to: &stderr) 221 | print(options, to: &stderr) 222 | exit(1) 223 | } else { 224 | config.imagePath = arg 225 | } 226 | } 227 | i += 1 228 | } 229 | 230 | guard !config.imagePath.isEmpty else { 231 | print("Missing image path.", to: &stderr) 232 | exit(1) 233 | } 234 | 235 | return config 236 | } 237 | 238 | do { 239 | let config = try parseArguments() 240 | detectText(config) 241 | } catch { 242 | print("Error: \(error.localizedDescription)", to: &stderr) 243 | exit(1) 244 | } 245 | -------------------------------------------------------------------------------- /build.sh: -------------------------------------------------------------------------------- 1 | #! /bin/sh 2 | swiftc -o swiftocr SwiftOCR.swift 3 | chmod +x swiftocr 4 | -------------------------------------------------------------------------------- /examples/pineapple-fast.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "text" : "oineaoole", 4 | "boundingBox" : { 5 | "x" : 272, 6 | "y" : 45, 7 | "width" : 455, 8 | "height" : 82 9 | }, 10 | "confidence" : 0.3 11 | }, 12 | { 13 | "text" : "J- ary°iy", 14 | "boundingBox" : { 15 | "width" : 169, 16 | "height" : 91, 17 | "x" : 424, 18 | "y" : 162 19 | }, 20 | "confidence" : 0.3 21 | } 22 | ] 23 | -------------------------------------------------------------------------------- /examples/pineapple.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "confidence" : 1, 4 | "boundingBox" : { 5 | "width" : 70, 6 | "y" : 23, 7 | "height" : 89, 8 | "x" : 0 9 | }, 10 | "text" : "1" 11 | }, 12 | { 13 | "boundingBox" : { 14 | "height" : 80, 15 | "y" : 388, 16 | "x" : 0, 17 | "width" : 69 18 | }, 19 | "confidence" : 1, 20 | "text" : "3" 21 | }, 22 | { 23 | "boundingBox" : { 24 | "height" : 115, 25 | "x" : 265, 26 | "width" : 467, 27 | "y" : 47 28 | }, 29 | "confidence" : 0.5, 30 | "text" : "pineapple" 31 | }, 32 | { 33 | "text" : "Party", 34 | "boundingBox" : { 35 | "x" : 400, 36 | "width" : 202, 37 | "y" : 147, 38 | "height" : 113 39 | }, 40 | "confidence" : 1 41 | }, 42 | { 43 | "boundingBox" : { 44 | "width" : 436, 45 | "height" : 87, 46 | "x" : 281, 47 | "y" : 263 48 | }, 49 | "confidence" : 1, 50 | "text" : "ZEBRA" 51 | }, 52 | { 53 | "boundingBox" : { 54 | "width" : 202, 55 | "x" : 400, 56 | "y" : 373, 57 | "height" : 113 58 | }, 59 | "confidence" : 1, 60 | "text" : "Party" 61 | }, 62 | { 63 | "boundingBox" : { 64 | "height" : 72, 65 | "y" : 22, 66 | "width" : 53, 67 | "x" : 916 68 | }, 69 | "text" : "2", 70 | "confidence" : 1 71 | }, 72 | { 73 | "confidence" : 1, 74 | "text" : "4", 75 | "boundingBox" : { 76 | "height" : 86, 77 | "x" : 921, 78 | "y" : 389, 79 | "width" : 75 80 | } 81 | } 82 | ] 83 | -------------------------------------------------------------------------------- /examples/pineapple.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fny/swiftocr/787bd0a5129e0bd73baa215271383ac62c942307/examples/pineapple.png -------------------------------------------------------------------------------- /examples/plates-fast.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "boundingBox" : { 4 | "width" : 218, 5 | "height" : 34, 6 | "y" : 23, 7 | "x" : 652 8 | }, 9 | "confidence" : 0.3, 10 | "text" : "Conne@ticut" 11 | }, 12 | { 13 | "boundingBox" : { 14 | "height" : 28, 15 | "x" : 1163, 16 | "y" : 16, 17 | "width" : 190 18 | }, 19 | "text" : "MISSOURI", 20 | "confidence" : 0.3 21 | }, 22 | { 23 | "confidence" : 0.5, 24 | "text" : "HXZ 91Q", 25 | "boundingBox" : { 26 | "width" : 468, 27 | "y" : 63, 28 | "height" : 159, 29 | "x" : 22 30 | } 31 | }, 32 | { 33 | "text" : ",JNKYJO", 34 | "boundingBox" : { 35 | "width" : 363, 36 | "height" : 123, 37 | "x" : 558, 38 | "y" : 69 39 | }, 40 | "confidence" : 0.5 41 | }, 42 | { 43 | "confidence" : 0.5, 44 | "boundingBox" : { 45 | "height" : 141, 46 | "y" : 61, 47 | "width" : 443, 48 | "x" : 1040 49 | }, 50 | "text" : "506 MKK" 51 | }, 52 | { 53 | "confidence" : 0.5, 54 | "text" : "ALOHA STATE", 55 | "boundingBox" : { 56 | "y" : 207, 57 | "x" : 134, 58 | "width" : 232, 59 | "height" : 30 60 | } 61 | }, 62 | { 63 | "confidence" : 0.3, 64 | "boundingBox" : { 65 | "y" : 202, 66 | "width" : 269, 67 | "x" : 634, 68 | "height" : 32 69 | }, 70 | "text" : "Coiistitiitic)n State 5" 71 | }, 72 | { 73 | "boundingBox" : { 74 | "y" : 208, 75 | "x" : 1037, 76 | "width" : 45, 77 | "height" : 27 78 | }, 79 | "text" : ",IL'h'", 80 | "confidence" : 0.3 81 | }, 82 | { 83 | "text" : "Wyomtsr", 84 | "boundingBox" : { 85 | "x" : 210, 86 | "y" : 266, 87 | "width" : 289, 88 | "height" : 54 89 | }, 90 | "confidence" : 0.3 91 | }, 92 | { 93 | "text" : "'4125;,", 94 | "boundingBox" : { 95 | "height" : 197, 96 | "x" : 52, 97 | "width" : 440, 98 | "y" : 281 99 | }, 100 | "confidence" : 0.3 101 | }, 102 | { 103 | "text" : "MAY", 104 | "confidence" : 0.3, 105 | "boundingBox" : { 106 | "width" : 40, 107 | "x" : 545, 108 | "height" : 14, 109 | "y" : 273 110 | } 111 | }, 112 | { 113 | "text" : "2001", 114 | "boundingBox" : { 115 | "width" : 42, 116 | "height" : 13, 117 | "x" : 934, 118 | "y" : 281 119 | }, 120 | "confidence" : 0.3 121 | }, 122 | { 123 | "text" : "MA'INE", 124 | "confidence" : 0.3, 125 | "boundingBox" : { 126 | "width" : 170, 127 | "y" : 277, 128 | "height" : 36, 129 | "x" : 1176 130 | } 131 | }, 132 | { 133 | "boundingBox" : { 134 | "y" : 323, 135 | "height" : 121, 136 | "x" : 1044, 137 | "width" : 434 138 | }, 139 | "confidence" : 0.5, 140 | "text" : "69655 C" 141 | }, 142 | { 143 | "boundingBox" : { 144 | "height" : 9, 145 | "width" : 24, 146 | "y" : 470, 147 | "x" : 561 148 | }, 149 | "text" : "00", 150 | "confidence" : 0.3 151 | }, 152 | { 153 | "confidence" : 0.3, 154 | "text" : "VACATIONLAM&'", 155 | "boundingBox" : { 156 | "y" : 450, 157 | "width" : 296, 158 | "height" : 41, 159 | "x" : 1118 160 | } 161 | }, 162 | { 163 | "boundingBox" : { 164 | "y" : 531, 165 | "x" : 1221, 166 | "height" : 44, 167 | "width" : 203 168 | }, 169 | "confidence" : 0.5, 170 | "text" : "HAMPSHiaE" 171 | }, 172 | { 173 | "text" : "EXPLORE", 174 | "boundingBox" : { 175 | "x" : 50, 176 | "height" : 15, 177 | "y" : 548, 178 | "width" : 88 179 | }, 180 | "confidence" : 0.3 181 | }, 182 | { 183 | "text" : "Minnesota '", 184 | "boundingBox" : { 185 | "y" : 527, 186 | "height" : 36, 187 | "width" : 254, 188 | "x" : 155 189 | }, 190 | "confidence" : 0.3 191 | }, 192 | { 193 | "text" : "AYAtl", 194 | "boundingBox" : { 195 | "width" : 298, 196 | "y" : 581, 197 | "x" : 48, 198 | "height" : 121 199 | }, 200 | "confidence" : 0.5 201 | }, 202 | { 203 | "confidence" : 0.5, 204 | "text" : ",lake", 205 | "boundingBox" : { 206 | "height" : 21, 207 | "y" : 722, 208 | "x" : 261, 209 | "width" : 62 210 | } 211 | }, 212 | { 213 | "confidence" : 0.5, 214 | "boundingBox" : { 215 | "width" : 34, 216 | "height" : 14, 217 | "x" : 445, 218 | "y" : 711 219 | }, 220 | "text" : "99:," 221 | }, 222 | { 223 | "boundingBox" : { 224 | "x" : 531, 225 | "y" : 585, 226 | "height" : 149, 227 | "width" : 514 228 | }, 229 | "confidence" : 0.5, 230 | "text" : "489_CLS ." 231 | }, 232 | { 233 | "boundingBox" : { 234 | "x" : 1058, 235 | "width" : 390, 236 | "height" : 133, 237 | "y" : 587 238 | }, 239 | "confidence" : 0.5, 240 | "text" : "YANKY9" 241 | }, 242 | { 243 | "text" : "LIVE FREE OR", 244 | "confidence" : 0.5, 245 | "boundingBox" : { 246 | "y" : 724, 247 | "x" : 1138, 248 | "height" : 17, 249 | "width" : 167 250 | } 251 | }, 252 | { 253 | "confidence" : 0.3, 254 | "text" : ".MISSOURI", 255 | "boundingBox" : { 256 | "x" : 153, 257 | "height" : 28, 258 | "y" : 775, 259 | "width" : 195 260 | } 261 | }, 262 | { 263 | "confidence" : 0.5, 264 | "text" : "MAR", 265 | "boundingBox" : { 266 | "y" : 773, 267 | "width" : 49, 268 | "height" : 22, 269 | "x" : 542 270 | } 271 | }, 272 | { 273 | "text" : "' OKLAHOMA", 274 | "boundingBox" : { 275 | "y" : 762, 276 | "width" : 307, 277 | "x" : 582, 278 | "height" : 55 279 | }, 280 | "confidence" : 0.5 281 | }, 282 | { 283 | "text" : "DAHO", 284 | "confidence" : 0.5, 285 | "boundingBox" : { 286 | "height" : 33, 287 | "y" : 774, 288 | "width" : 140, 289 | "x" : 1190 290 | } 291 | }, 292 | { 293 | "text" : "06 MKK", 294 | "confidence" : 0.5, 295 | "boundingBox" : { 296 | "width" : 368, 297 | "y" : 819, 298 | "height" : 135, 299 | "x" : 110 300 | } 301 | }, 302 | { 303 | "boundingBox" : { 304 | "width" : 534, 305 | "height" : 189, 306 | "x" : 534, 307 | "y" : 764 308 | }, 309 | "confidence" : 0.5, 310 | "text" : "A TTIi752 .1" 311 | }, 312 | { 313 | "text" : "AMERI", 314 | "boundingBox" : { 315 | "width" : 88, 316 | "x" : 742, 317 | "height" : 37, 318 | "y" : 951 319 | }, 320 | "confidence" : 0.3 321 | }, 322 | { 323 | "confidence" : 0.3, 324 | "boundingBox" : { 325 | "x" : 1088, 326 | "height" : 22, 327 | "y" : 966, 328 | "width" : 271 329 | }, 330 | "text" : "• FA%lnll.Si(yfAIUES." 331 | }, 332 | { 333 | "boundingBox" : { 334 | "y" : 1033, 335 | "width" : 206, 336 | "x" : 152, 337 | "height" : 26 338 | }, 339 | "confidence" : 0.5, 340 | "text" : "ARIZDNA" 341 | }, 342 | { 343 | "confidence" : 0.3, 344 | "boundingBox" : { 345 | "height" : 27, 346 | "width" : 192, 347 | "x" : 636, 348 | "y" : 1016 349 | }, 350 | "text" : "IfjICrllC3Ii. i" 351 | }, 352 | { 353 | "confidence" : 0.5, 354 | "boundingBox" : { 355 | "height" : 119, 356 | "y" : 1065, 357 | "width" : 410, 358 | "x" : 50 359 | }, 360 | "text" : "DNN&552" 361 | }, 362 | { 363 | "confidence" : 1, 364 | "text" : "KJN 498", 365 | "boundingBox" : { 366 | "width" : 420, 367 | "x" : 530, 368 | "y" : 1045, 369 | "height" : 127 370 | } 371 | }, 372 | { 373 | "boundingBox" : { 374 | "x" : 84, 375 | "width" : 357, 376 | "y" : 1188, 377 | "height" : 22 378 | }, 379 | "text" : "GRAND CANYOM 5TATE,", 380 | "confidence" : 0.5 381 | }, 382 | { 383 | "text" : "DEC", 384 | "boundingBox" : { 385 | "height" : 27, 386 | "x" : 536, 387 | "width" : 36, 388 | "y" : 1175 389 | }, 390 | "confidence" : 0.5 391 | }, 392 | { 393 | "text" : "GREAT LAKES", 394 | "boundingBox" : { 395 | "x" : 628, 396 | "height" : 23, 397 | "width" : 218, 398 | "y" : 1184 399 | }, 400 | "confidence" : 0.5 401 | }, 402 | { 403 | "confidence" : 0.5, 404 | "text" : "97", 405 | "boundingBox" : { 406 | "x" : 918, 407 | "y" : 1184, 408 | "width" : 31, 409 | "height" : 19 410 | } 411 | }, 412 | { 413 | "text" : ".I£-BAlh", 414 | "boundingBox" : { 415 | "height" : 172, 416 | "width" : 339, 417 | "y" : 1043, 418 | "x" : 1116 419 | }, 420 | "confidence" : 0.3 421 | }, 422 | { 423 | "text" : ".JIEW YOR", 424 | "confidence" : 0.3, 425 | "boundingBox" : { 426 | "height" : 41, 427 | "width" : 206, 428 | "y" : 1173, 429 | "x" : 1100 430 | } 431 | } 432 | ] 433 | -------------------------------------------------------------------------------- /examples/plates.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "text" : "HAWAII", 4 | "confidence" : 1, 5 | "boundingBox" : { 6 | "height" : 33, 7 | "x" : 183, 8 | "y" : 17, 9 | "width" : 146 10 | } 11 | }, 12 | { 13 | "boundingBox" : { 14 | "x" : 420, 15 | "height" : 17, 16 | "width" : 58, 17 | "y" : 43 18 | }, 19 | "text" : "1598186415", 20 | "confidence" : 0.3 21 | }, 22 | { 23 | "text" : "HXZ 910", 24 | "boundingBox" : { 25 | "width" : 472, 26 | "height" : 143, 27 | "x" : 19, 28 | "y" : 61 29 | }, 30 | "confidence" : 0.3 31 | }, 32 | { 33 | "text" : "ALOHA STATE", 34 | "boundingBox" : { 35 | "width" : 242, 36 | "x" : 132, 37 | "height" : 32, 38 | "y" : 207 39 | }, 40 | "confidence" : 1 41 | }, 42 | { 43 | "text" : "2-95", 44 | "confidence" : 0.5, 45 | "boundingBox" : { 46 | "width" : 58, 47 | "y" : 283, 48 | "x" : 26, 49 | "height" : 21 50 | } 51 | }, 52 | { 53 | "boundingBox" : { 54 | "y" : 264, 55 | "x" : 213, 56 | "height" : 58, 57 | "width" : 268 58 | }, 59 | "confidence" : 1, 60 | "text" : "Wyoming" 61 | }, 62 | { 63 | "text" : "125", 64 | "boundingBox" : { 65 | "x" : 239, 66 | "height" : 115, 67 | "width" : 194, 68 | "y" : 327 69 | }, 70 | "confidence" : 1 71 | }, 72 | { 73 | "confidence" : 1, 74 | "boundingBox" : { 75 | "height" : 34, 76 | "x" : 651, 77 | "y" : 25, 78 | "width" : 223 79 | }, 80 | "text" : "Connecticut" 81 | }, 82 | { 83 | "confidence" : 0.5, 84 | "text" : "JNKY JO", 85 | "boundingBox" : { 86 | "height" : 113, 87 | "x" : 583, 88 | "width" : 343, 89 | "y" : 72 90 | } 91 | }, 92 | { 93 | "text" : "Constitution State", 94 | "boundingBox" : { 95 | "height" : 33, 96 | "width" : 249, 97 | "x" : 631, 98 | "y" : 200 99 | }, 100 | "confidence" : 1 101 | }, 102 | { 103 | "confidence" : 1, 104 | "boundingBox" : { 105 | "y" : 272, 106 | "x" : 540, 107 | "width" : 45, 108 | "height" : 19 109 | }, 110 | "text" : "MAY" 111 | }, 112 | { 113 | "boundingBox" : { 114 | "x" : 930, 115 | "height" : 20, 116 | "y" : 278, 117 | "width" : 48 118 | }, 119 | "confidence" : 1, 120 | "text" : "2001" 121 | }, 122 | { 123 | "confidence" : 0.3, 124 | "text" : "Califonia Stravberny Sestival", 125 | "boundingBox" : { 126 | "height" : 70, 127 | "y" : 371, 128 | "width" : 392, 129 | "x" : 558 130 | } 131 | }, 132 | { 133 | "boundingBox" : { 134 | "y" : 547, 135 | "x" : 47, 136 | "width" : 93, 137 | "height" : 19 138 | }, 139 | "confidence" : 1, 140 | "text" : "EXPLORE" 141 | }, 142 | { 143 | "text" : "Minnesota", 144 | "confidence" : 1, 145 | "boundingBox" : { 146 | "width" : 227, 147 | "x" : 152, 148 | "y" : 526, 149 | "height" : 43 150 | } 151 | }, 152 | { 153 | "boundingBox" : { 154 | "x" : 41, 155 | "height" : 115, 156 | "width" : 438, 157 | "y" : 584 158 | }, 159 | "text" : "AYA«157", 160 | "confidence" : 0.3 161 | }, 162 | { 163 | "text" : "FBi", 164 | "boundingBox" : { 165 | "width" : 58, 166 | "y" : 707, 167 | "height" : 35, 168 | "x" : 23 169 | }, 170 | "confidence" : 0.3 171 | }, 172 | { 173 | "text" : "10,000 lakes", 174 | "confidence" : 1, 175 | "boundingBox" : { 176 | "y" : 718, 177 | "height" : 33, 178 | "x" : 172, 179 | "width" : 167 180 | } 181 | }, 182 | { 183 | "boundingBox" : { 184 | "height" : 19, 185 | "x" : 427, 186 | "width" : 63, 187 | "y" : 730 188 | }, 189 | "confidence" : 0.3, 190 | "text" : "110093247" 191 | }, 192 | { 193 | "text" : "MISSOURI", 194 | "boundingBox" : { 195 | "y" : 771, 196 | "height" : 38, 197 | "x" : 161, 198 | "width" : 196 199 | }, 200 | "confidence" : 1 201 | }, 202 | { 203 | "confidence" : 0.5, 204 | "text" : "506 MKK", 205 | "boundingBox" : { 206 | "y" : 819, 207 | "width" : 447, 208 | "height" : 136, 209 | "x" : 34 210 | } 211 | }, 212 | { 213 | "boundingBox" : { 214 | "y" : 961, 215 | "width" : 301, 216 | "x" : 104, 217 | "height" : 29 218 | }, 219 | "confidence" : 0.3, 220 | "text" : "C SHOW-ME STATE c" 221 | }, 222 | { 223 | "confidence" : 0.3, 224 | "boundingBox" : { 225 | "x" : 436, 226 | "height" : 34, 227 | "y" : 957, 228 | "width" : 47 229 | }, 230 | "text" : "013" 231 | }, 232 | { 233 | "text" : "EP", 234 | "boundingBox" : { 235 | "width" : 43, 236 | "y" : 1024, 237 | "x" : 47, 238 | "height" : 30 239 | }, 240 | "confidence" : 0.3 241 | }, 242 | { 243 | "text" : "ARIZONA", 244 | "boundingBox" : { 245 | "height" : 32, 246 | "x" : 152, 247 | "width" : 207, 248 | "y" : 1030 249 | }, 250 | "confidence" : 1 251 | }, 252 | { 253 | "text" : "70795", 254 | "confidence" : 0.3, 255 | "boundingBox" : { 256 | "y" : 1026, 257 | "height" : 19, 258 | "width" : 52, 259 | "x" : 418 260 | } 261 | }, 262 | { 263 | "boundingBox" : { 264 | "x" : 39, 265 | "y" : 1065, 266 | "width" : 431, 267 | "height" : 124 268 | }, 269 | "confidence" : 0.5, 270 | "text" : "DNN4552" 271 | }, 272 | { 273 | "boundingBox" : { 274 | "width" : 398, 275 | "y" : 1187, 276 | "x" : 80, 277 | "height" : 37 278 | }, 279 | "text" : "GRAND CANYON 5 TAT E A799", 280 | "confidence" : 0.5 281 | }, 282 | { 283 | "boundingBox" : { 284 | "width" : 245, 285 | "x" : 733, 286 | "y" : 518, 287 | "height" : 77 288 | }, 289 | "confidence" : 1, 290 | "text" : "Ski Ultah!" 291 | }, 292 | { 293 | "text" : "489 CLS", 294 | "boundingBox" : { 295 | "width" : 452, 296 | "y" : 582, 297 | "x" : 524, 298 | "height" : 136 299 | }, 300 | "confidence" : 0.5 301 | }, 302 | { 303 | "confidence" : 0.5, 304 | "text" : "• GREATEST SNOW ON EARTH", 305 | "boundingBox" : { 306 | "x" : 592, 307 | "height" : 33, 308 | "width" : 284, 309 | "y" : 707 310 | } 311 | }, 312 | { 313 | "boundingBox" : { 314 | "width" : 54, 315 | "x" : 540, 316 | "y" : 771, 317 | "height" : 26 318 | }, 319 | "confidence" : 1, 320 | "text" : "MAR" 321 | }, 322 | { 323 | "boundingBox" : { 324 | "width" : 288, 325 | "height" : 52, 326 | "y" : 765, 327 | "x" : 623 328 | }, 329 | "confidence" : 0.5, 330 | "text" : "OKLAHOMA®" 331 | }, 332 | { 333 | "text" : "97", 334 | "boundingBox" : { 335 | "x" : 928, 336 | "y" : 799, 337 | "width" : 32, 338 | "height" : 19 339 | }, 340 | "confidence" : 1 341 | }, 342 | { 343 | "text" : "ATT", 344 | "confidence" : 0.5, 345 | "boundingBox" : { 346 | "y" : 825, 347 | "width" : 174, 348 | "height" : 111, 349 | "x" : 531 350 | } 351 | }, 352 | { 353 | "text" : "000", 354 | "confidence" : 0.3, 355 | "boundingBox" : { 356 | "y" : 899, 357 | "x" : 723, 358 | "width" : 71, 359 | "height" : 39 360 | } 361 | }, 362 | { 363 | "text" : "752", 364 | "boundingBox" : { 365 | "y" : 839, 366 | "x" : 795, 367 | "height" : 106, 368 | "width" : 176 369 | }, 370 | "confidence" : 1 371 | }, 372 | { 373 | "confidence" : 0.5, 374 | "text" : "NATIVE AMERICA", 375 | "boundingBox" : { 376 | "x" : 631, 377 | "y" : 950, 378 | "height" : 40, 379 | "width" : 242 380 | } 381 | }, 382 | { 383 | "confidence" : 1, 384 | "text" : "MICHIGAN", 385 | "boundingBox" : { 386 | "width" : 214, 387 | "x" : 635, 388 | "height" : 36, 389 | "y" : 1012 390 | } 391 | }, 392 | { 393 | "confidence" : 1, 394 | "text" : "KJN 498", 395 | "boundingBox" : { 396 | "x" : 522, 397 | "y" : 1049, 398 | "height" : 125, 399 | "width" : 437 400 | } 401 | }, 402 | { 403 | "boundingBox" : { 404 | "x" : 510, 405 | "y" : 1172, 406 | "width" : 76, 407 | "height" : 34 408 | }, 409 | "confidence" : 0.3, 410 | "text" : "DEC" 411 | }, 412 | { 413 | "confidence" : 0.3, 414 | "text" : "• GREAT LAKES", 415 | "boundingBox" : { 416 | "y" : 1181, 417 | "width" : 265, 418 | "x" : 584, 419 | "height" : 34 420 | } 421 | }, 422 | { 423 | "text" : "97", 424 | "confidence" : 1, 425 | "boundingBox" : { 426 | "height" : 23, 427 | "width" : 43, 428 | "x" : 911, 429 | "y" : 1181 430 | } 431 | }, 432 | { 433 | "confidence" : 1, 434 | "boundingBox" : { 435 | "x" : 1161, 436 | "width" : 216, 437 | "height" : 41, 438 | "y" : 7 439 | }, 440 | "text" : "MISSOURI®" 441 | }, 442 | { 443 | "text" : "506 MKK", 444 | "boundingBox" : { 445 | "x" : 1028, 446 | "width" : 460, 447 | "y" : 62, 448 | "height" : 141 449 | }, 450 | "confidence" : 0.3 451 | }, 452 | { 453 | "text" : "APR 6", 454 | "confidence" : 0.3, 455 | "boundingBox" : { 456 | "x" : 1029, 457 | "y" : 204, 458 | "width" : 91, 459 | "height" : 37 460 | } 461 | }, 462 | { 463 | "confidence" : 0.3, 464 | "text" : "SHOW-ME STATE •", 465 | "boundingBox" : { 466 | "height" : 32, 467 | "x" : 1133, 468 | "width" : 276, 469 | "y" : 214 470 | } 471 | }, 472 | { 473 | "confidence" : 1, 474 | "text" : "MAINE", 475 | "boundingBox" : { 476 | "y" : 274, 477 | "x" : 1175, 478 | "height" : 41, 479 | "width" : 172 480 | } 481 | }, 482 | { 483 | "confidence" : 0.5, 484 | "text" : "69655 C", 485 | "boundingBox" : { 486 | "width" : 449, 487 | "x" : 1033, 488 | "y" : 318, 489 | "height" : 129 490 | } 491 | }, 492 | { 493 | "confidence" : 1, 494 | "text" : "APR 82", 495 | "boundingBox" : { 496 | "height" : 22, 497 | "x" : 1029, 498 | "width" : 58, 499 | "y" : 452 500 | } 501 | }, 502 | { 503 | "text" : "64325", 504 | "confidence" : 0.3, 505 | "boundingBox" : { 506 | "width" : 41, 507 | "x" : 1042, 508 | "y" : 472, 509 | "height" : 13 510 | } 511 | }, 512 | { 513 | "text" : "•VACATIONLAND.", 514 | "boundingBox" : { 515 | "x" : 1094, 516 | "width" : 311, 517 | "y" : 453, 518 | "height" : 41 519 | }, 520 | "confidence" : 1 521 | }, 522 | { 523 | "boundingBox" : { 524 | "width" : 176, 525 | "x" : 1031, 526 | "y" : 534, 527 | "height" : 41 528 | }, 529 | "confidence" : 0.3, 530 | "text" : "10 NEW" 531 | }, 532 | { 533 | "text" : "HAMPSHIRE 99", 534 | "boundingBox" : { 535 | "height" : 46, 536 | "width" : 266, 537 | "y" : 533, 538 | "x" : 1209 539 | }, 540 | "confidence" : 1 541 | }, 542 | { 543 | "boundingBox" : { 544 | "y" : 586, 545 | "x" : 1047, 546 | "width" : 407, 547 | "height" : 148 548 | }, 549 | "confidence" : 1, 550 | "text" : "YANKY9" 551 | }, 552 | { 553 | "confidence" : 0.5, 554 | "text" : "LIVE FREE OR DIE", 555 | "boundingBox" : { 556 | "width" : 224, 557 | "height" : 22, 558 | "y" : 723, 559 | "x" : 1133 560 | } 561 | }, 562 | { 563 | "text" : "ScenicIDAHO", 564 | "boundingBox" : { 565 | "x" : 1059, 566 | "height" : 50, 567 | "y" : 771, 568 | "width" : 272 569 | }, 570 | "confidence" : 0.5 571 | }, 572 | { 573 | "boundingBox" : { 574 | "height" : 115, 575 | "width" : 453, 576 | "x" : 1009, 577 | "y" : 821 578 | }, 579 | "confidence" : 0.5, 580 | "text" : "1T 10352" 581 | }, 582 | { 583 | "text" : "TOES", 584 | "boundingBox" : { 585 | "height" : 21, 586 | "y" : 970, 587 | "x" : 1290, 588 | "width" : 67 589 | }, 590 | "confidence" : 1 591 | }, 592 | { 593 | "text" : "81", 594 | "boundingBox" : { 595 | "width" : 43, 596 | "height" : 30, 597 | "x" : 1393, 598 | "y" : 956 599 | }, 600 | "confidence" : 0.5 601 | }, 602 | { 603 | "boundingBox" : { 604 | "width" : 438, 605 | "y" : 1043, 606 | "height" : 117, 607 | "x" : 1009 608 | }, 609 | "text" : "4419-BAR", 610 | "confidence" : 1 611 | }, 612 | { 613 | "confidence" : 1, 614 | "boundingBox" : { 615 | "y" : 1170, 616 | "height" : 41, 617 | "x" : 1107, 618 | "width" : 242 619 | }, 620 | "text" : "NEW YORK" 621 | } 622 | ] 623 | -------------------------------------------------------------------------------- /examples/plates.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fny/swiftocr/787bd0a5129e0bd73baa215271383ac62c942307/examples/plates.webp -------------------------------------------------------------------------------- /swiftocr.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is an example Python wrapper for the swiftocr command-line tool. 3 | It happens to be fully featured. The only reason I say it's an example 4 | is that I don't want to make any guarantees about API stability right now. 5 | 6 | Use at your own peril. LICENSE.txt applies to this file as well. 7 | 8 | ``` 9 | from swiftocr import SwiftOCR 10 | 11 | # Initialize SwiftOCR with the path to the SwiftOCR executable 12 | ocr = SwiftOCR("/path/to/swiftocr") 13 | 14 | # Recognize text from an image file or a PIL Image object 15 | file_results = ocr.recognize_file("image.png") 16 | pillow_results = ocr.recognize_pillow(pillow_image) 17 | 18 | # Access individual OCR results like a list 19 | result = file_results[0] # => OCRResult 20 | results = file_results[1:3] # => OCRResults 21 | [item.text for item in results] 22 | 23 | # Filter by minimum confidence score 24 | file_results.minimum_confidence(0.9) # => OCRResults 25 | 26 | # Filter by bounding box coordinates 27 | file_results.within(x=100, y=100, width=200, height=50) # => OCRResults 28 | 29 | # Filter by explicit text content 30 | file_results.containing("your query") # => OCRResults 31 | 32 | # Search for the closest match to a query string 33 | file_results.search("your query", threshold=0.9, lowercase=True) # => OCRResults 34 | file_results.search_and_score("your query") # => [(score, OCRResult), ...] 35 | 36 | # Customize the similarity scoring function 37 | import rapidfuzz 38 | file_results.search("your query", score_func=rapidfuzz.fuzz.ratio) # => OCRResults 39 | 40 | # Chain multiple filters together 41 | file_results.minimum_confidence(0.9).within(100, 100, 200, 50).containing("your query") 42 | ``` 43 | """ 44 | 45 | import io 46 | import json 47 | import re 48 | import subprocess 49 | from difflib import SequenceMatcher 50 | from typing import ( 51 | TYPE_CHECKING, 52 | Callable, 53 | Iterable, 54 | Optional, 55 | TypedDict, 56 | Union, 57 | overload, 58 | ) 59 | 60 | if TYPE_CHECKING: 61 | import PIL 62 | 63 | 64 | class BoundingBoxDict(TypedDict): 65 | """Represents the structure of a bounding box dictionary.""" 66 | 67 | x: int # X-coordinate of the bounding box 68 | y: int # Y-coordinate of the bounding box 69 | width: int # Width of the bounding box 70 | height: int # Height of the bounding box 71 | 72 | 73 | class OCRResultDict(TypedDict): 74 | """Represents the structure of an OCR result dictionary.""" 75 | 76 | text: str # Recognized text 77 | confidence: float # Confidence score of the OCR result 78 | boundingBox: BoundingBoxDict # Bounding box information for the text 79 | 80 | 81 | class OCROptions(TypedDict, total=False): 82 | """Options to configure OCR processing.""" 83 | 84 | fast: bool # Use fast mode for OCR 85 | languages: list[str] # List of languages for OCR 86 | correction: bool # Enable text correction 87 | custom_words: list[str] # List of custom words to include in OCR 88 | custom_words_file: str # File containing custom words 89 | 90 | 91 | class BoundingBox: 92 | """Represents a bounding box around recognized text.""" 93 | 94 | def __init__(self, x: int, y: int, width: int, height: int): 95 | """Initializes a bounding box with the specified dimensions.""" 96 | self.x = x 97 | self.y = y 98 | self.width = width 99 | self.height = height 100 | 101 | def __repr__(self): 102 | return f"BoundingBox({self._repr_info})" 103 | 104 | @property 105 | def center(self) -> tuple[int, int]: 106 | """Calculates and returns the center coordinates of the bounding box.""" 107 | return self.x + self.width // 2, self.y + self.height // 2 108 | 109 | @property 110 | def top_left(self) -> tuple[int, int]: 111 | """Returns the top-left corner coordinates of the bounding box.""" 112 | return self.x, self.y 113 | 114 | @property 115 | def top_right(self) -> tuple[int, int]: 116 | """Returns the top-right corner coordinates of the bounding box.""" 117 | return self.x + self.width, self.y 118 | 119 | @property 120 | def bottom_left(self) -> tuple[int, int]: 121 | """Returns the bottom-left corner coordinates of the bounding box.""" 122 | return self.x, self.y + self.height 123 | 124 | @property 125 | def bottom_right(self) -> tuple[int, int]: 126 | """Returns the bottom-right corner coordinates of the bounding box.""" 127 | return self.x + self.width, self.y + self.height 128 | 129 | @property 130 | def coordinates( 131 | self, 132 | ) -> tuple[tuple[int, int], tuple[int, int], tuple[int, int], tuple[int, int]]: 133 | """Returns the coordinates of the bounding box corners.""" 134 | return self.top_left, self.top_right, self.bottom_left, self.bottom_right 135 | 136 | @property 137 | def diagonal(self) -> tuple[int, int, int, int]: 138 | """Returns the bounding box coordinates for cropping an image.""" 139 | return self.x, self.y, self.x + self.width, self.y + self.height 140 | 141 | @property 142 | def _repr_info(self) -> str: 143 | return f"({self.x}, {self.y}), {self.width}x{self.height}" 144 | 145 | 146 | class OCRResult: 147 | """Represents the result of OCR processing for a single text block.""" 148 | 149 | def __init__(self, text: str, confidence: float, bounding_box: BoundingBox): 150 | """Initializes an OCR result with text, confidence, and bounding box.""" 151 | self.text = text 152 | self.confidence = confidence 153 | self.bounding_box = bounding_box 154 | 155 | @property 156 | def data(self) -> OCRResultDict: 157 | return { 158 | "text": self.text, 159 | "confidence": self.confidence, 160 | "boundingBox": { 161 | "x": self.bounding_box.x, 162 | "y": self.bounding_box.y, 163 | "width": self.bounding_box.width, 164 | "height": self.bounding_box.height, 165 | }, 166 | } 167 | 168 | def __eq__(self, other: Union["OCRResult", str]) -> bool: 169 | if isinstance(other, str): 170 | return self.text == other 171 | if isinstance(other, OCRResult): 172 | return self.data == other.data 173 | return False 174 | 175 | def __repr__(self): 176 | return f"""OCRResult("{self.text}", {self.confidence}, {self.bounding_box._repr_info})""" 177 | 178 | def similarity(self, other: str, lowercase: bool = False) -> float: 179 | if lowercase: 180 | return _score_similarity(self.text.lower(), other.lower()) 181 | else: 182 | return _score_similarity(self.text, other) 183 | 184 | 185 | def _score_similarity(query: str, target: str): 186 | return SequenceMatcher(None, query, target).ratio() 187 | 188 | 189 | class OCRResults: 190 | """Represents a collection of OCR results.""" 191 | 192 | def __init__(self, data: list[OCRResultDict]): 193 | """Initializes OCR results from a list of OCR result dictionaries.""" 194 | self.data = data 195 | self.items = [ 196 | OCRResult( 197 | text=item["text"], 198 | confidence=item["confidence"], 199 | bounding_box=BoundingBox( 200 | x=item["boundingBox"]["x"], 201 | y=item["boundingBox"]["y"], 202 | width=item["boundingBox"]["width"], 203 | height=item["boundingBox"]["height"], 204 | ), 205 | ) 206 | for item in data 207 | ] 208 | 209 | def __bool__(self) -> bool: 210 | return bool(self.items) 211 | 212 | @overload 213 | def __getitem__(self, key: int) -> OCRResult: 214 | """Handles integer indexing.""" 215 | ... 216 | 217 | @overload 218 | def __getitem__(self, key: slice) -> "OCRResults": 219 | """Handles slicing.""" 220 | ... 221 | 222 | def __getitem__(self, key: int | slice) -> Union["OCRResult", "OCRResults"]: 223 | """Allows access to individual or sliced OCR results.""" 224 | if isinstance(key, int): 225 | return self.items[key] 226 | elif isinstance(key, slice): 227 | return OCRResults(self.data[key]) 228 | else: 229 | raise TypeError(f"Invalid argument type: {type(key).__name__}") 230 | 231 | def __iter__(self) -> Iterable[OCRResult]: 232 | return iter(self.items) 233 | 234 | def __len__(self) -> int: 235 | return len(self.items) 236 | 237 | def __repr__(self) -> str: 238 | return f"OCRResults({[item.text for item in self.items]})" 239 | 240 | def __contains__(self, text: str) -> bool: 241 | """Checks if the OCR results contain a specified text string.""" 242 | return any(text in item.text for item in self.items) 243 | 244 | @property 245 | def empty(self) -> bool: 246 | """Checks if the OCR results are empty.""" 247 | return not self.items 248 | 249 | @property 250 | def exists(self) -> bool: 251 | """Checks if the OCR results are non-empty.""" 252 | return bool(self.items) 253 | 254 | @property 255 | def text(self) -> str: 256 | """Returns the recognized text as a list of strings.""" 257 | return [item.text for item in self.items] 258 | 259 | def minimum_confidence(self, threshold: float) -> "OCRResults": 260 | """Returns OCR results with a minimum confidence score.""" 261 | return OCRResults( 262 | [item for item in self.data if item["confidence"] >= threshold] 263 | ) 264 | 265 | def within(self, x: int, y: int, width: int, height: int) -> "OCRResults": 266 | """Returns OCR results within a specified bounding box.""" 267 | return OCRResults( 268 | [ 269 | item 270 | for item in self.data 271 | if ( 272 | x <= item["boundingBox"]["x"] 273 | and y <= item["boundingBox"]["y"] 274 | and x + width 275 | >= item["boundingBox"]["x"] + item["boundingBox"]["width"] 276 | and y + height 277 | >= item["boundingBox"]["y"] + item["boundingBox"]["height"] 278 | ) 279 | ] 280 | ) 281 | 282 | def containing(self, text: str, lowercase: bool = False) -> "OCRResults": 283 | """Returns OCR results containing a specified text string.""" 284 | if lowercase: 285 | return OCRResults( 286 | [item for item in self.data if text.lower() in item["text"].lower()] 287 | ) 288 | else: 289 | return OCRResults([item for item in self.data if text in item["text"]]) 290 | 291 | def exactly(self, text: str, lowercase: bool = False) -> "OCRResults": 292 | """Returns OCR results with an exact text match.""" 293 | if lowercase: 294 | return OCRResults( 295 | [item for item in self.data if text.lower() == item["text"].lower()] 296 | ) 297 | else: 298 | return OCRResults([item for item in self.data if text == item["text"]]) 299 | 300 | def matching(self, pattern: str | re.Pattern, flag: int = 0) -> "OCRResults": 301 | """Returns OCR results matching a regex pattern.""" 302 | 303 | return OCRResults( 304 | [item for item in self.data if re.match(item["text"], pattern, flag)] 305 | ) 306 | 307 | def filter(self, func) -> "OCRResults": 308 | """Returns OCR results that satisfy a custom filter function.""" 309 | return OCRResults([item for item in self.data if func(item)]) 310 | 311 | def search( 312 | self, 313 | query: str, 314 | threshold: float = 0.0, 315 | lowercase: bool = False, 316 | score_func: Callable[[str, str], float] = _score_similarity, 317 | ) -> "OCRResults": 318 | """ 319 | Finds the best match for a query string with a given threshold. 320 | 321 | Args: 322 | query: Query string to search for. 323 | threshold: Minimum similarity score. 324 | lowercase: Whether to compare in lowercase. 325 | score_func: Custom similarity scoring function: f(query, target) -> float. 326 | 327 | Returns: 328 | OCRResults: Best match for the query string. 329 | """ 330 | results = self._search_and_score(query, threshold, lowercase, score_func) 331 | return OCRResults([r[1] for r in results]) 332 | 333 | def search_and_score( 334 | self, 335 | query: str, 336 | threshold: float = 0.0, 337 | lowercase: bool = False, 338 | score_func: Callable[[str, str], float] = _score_similarity, 339 | ) -> list[tuple[float, OCRResult]]: 340 | """ 341 | Finds all matches for a query string while also returning the similarity score. 342 | 343 | Args: 344 | query: Query string to search for. 345 | threshold: Minimum similarity score. 346 | lowercase: Whether to compare in lowercase. 347 | score_func: Custom similarity scoring function: f(query, target) -> float. 348 | 349 | Returns: 350 | list[tuple[float, OCRResult]]: List of matches with their similarity scores. 351 | """ 352 | results = self._search_and_score(query, threshold, lowercase, score_func) 353 | scores = [r[0] for r in results] 354 | ocr = OCRResults([r[1] for r in results]) 355 | return list(zip(scores, ocr)) 356 | 357 | def first(self) -> Optional[OCRResult]: 358 | """Returns the first OCR result or None if empty.""" 359 | return self.items[0] if self.items else None 360 | 361 | def last(self) -> Optional[OCRResult]: 362 | """Returns the last OCR result or None if empty.""" 363 | return self.items[-1] if self.items else None 364 | 365 | def _search_and_score( 366 | self, 367 | query: str, 368 | threshold: float, 369 | lowercase: bool, 370 | score_func: Callable[[str, str], float], 371 | ) -> list[tuple[float, OCRResultDict]]: 372 | """Search and score that returns raw dictionary data.""" 373 | matches: tuple[float, OCRResultDict] = [] 374 | query = query.lower() if lowercase else query 375 | query = query.lower() if lowercase else query 376 | 377 | for d in self.data: 378 | target = d["text"].lower() if lowercase else d["text"] 379 | score = score_func(query, target) 380 | if score >= threshold: 381 | matches.append((score, d)) 382 | 383 | return sorted( 384 | matches, 385 | key=lambda x: ( 386 | -x[0], 387 | x[1]["boundingBox"]["x"], 388 | x[1]["boundingBox"]["y"], 389 | x[1]["confidence"], 390 | ), 391 | ) 392 | 393 | 394 | def _parse_args(options: OCROptions) -> list[str]: 395 | """Parses OCR options into a list of command-line arguments.""" 396 | args = [] 397 | if options.get("fast"): 398 | args.append("--fast") 399 | 400 | if "languages" in options: 401 | languages = ",".join(options["languages"]) 402 | args.extend(["--languages", languages]) 403 | 404 | if options.get("correction"): 405 | args.append("--correction") 406 | 407 | if "custom_words" in options or "custom-words" in options: 408 | custom_words = ",".join(options["custom_words"]) 409 | args.extend(["--custom-words", custom_words]) 410 | 411 | if "custom_words_file" in options or "custom-words-file" in options: 412 | args.extend(["--custom-words-file", options["custom_words_file"]]) 413 | 414 | return args 415 | 416 | 417 | class SwiftOCR: 418 | """Wrapper for interacting with the SwiftOCR command-line tool.""" 419 | 420 | def __init__(self, swiftocr_path: str): 421 | """Initializes the SwiftOCR class with the path to the SwiftOCR executable.""" 422 | self.swiftocr_path = swiftocr_path 423 | 424 | def from_file(self, image_path: str, options: OCROptions = {}) -> OCRResults: 425 | """ 426 | Recognizes text from an image file using SwiftOCR. 427 | 428 | Args: 429 | image_path: Path to the image file. 430 | options: Configuration options for OCR (matches SwiftOCR command-line arguments). 431 | 432 | """ 433 | args = [self.swiftocr_path, image_path] + _parse_args(options) 434 | 435 | try: 436 | result = subprocess.run( 437 | args, 438 | stdout=subprocess.PIPE, 439 | stderr=subprocess.PIPE, 440 | check=True, 441 | text=True, 442 | ) 443 | 444 | result_dict: list[OCRResultDict] = json.loads(result.stdout) 445 | 446 | return OCRResults(result_dict) 447 | 448 | except subprocess.CalledProcessError as e: 449 | raise RuntimeError( 450 | f"SwiftOCR failed with error:\n{e.stderr.strip()}" 451 | ) from e 452 | except json.JSONDecodeError: 453 | raise ValueError("Failed to parse SwiftOCR output as JSON") 454 | 455 | def from_pillow( 456 | self, image: "PIL.Image.Image", options: OCROptions = {} 457 | ) -> OCRResults: 458 | """Recognizes text from a PIL Image object using SwiftOCR.""" 459 | buffer = io.BytesIO() 460 | image.save(buffer, format="PNG") 461 | buffer.seek(0) 462 | process = subprocess.Popen( 463 | [self.swiftocr_path, "-"] + _parse_args(options), 464 | stdin=subprocess.PIPE, 465 | stdout=subprocess.PIPE, 466 | stderr=subprocess.PIPE, 467 | ) 468 | stdout, stderr = process.communicate(input=buffer.read()) 469 | 470 | if process.returncode == 0: 471 | try: 472 | result_dict: list[OCRResultDict] = json.loads(stdout) 473 | except json.JSONDecodeError: 474 | raise ValueError("Failed to parse SwiftOCR output as JSON") 475 | return OCRResults(result_dict) 476 | else: 477 | raw_message = stderr.decode("utf-8") 478 | if "No text found" in raw_message: 479 | return OCRResults([]) 480 | raise RuntimeError(f"SwiftOCR failed: " + stderr.decode("utf-8")) 481 | --------------------------------------------------------------------------------