├── macocr_py ├── __init__.py └── wrapper.py ├── Package.swift ├── README.md ├── setup.py ├── LICENSE ├── .gitignore └── Sources └── macocr └── main.swift /macocr_py/__init__.py: -------------------------------------------------------------------------------- 1 | from .wrapper import macocr 2 | __all__ = ["macocr"] 3 | -------------------------------------------------------------------------------- /Package.swift: -------------------------------------------------------------------------------- 1 | // swift-tools-version:5.3 2 | // The swift-tools-version declares the minimum version of Swift required to build this package. 3 | 4 | import PackageDescription 5 | 6 | let package = Package( 7 | name: "macocr", 8 | platforms: [ 9 | .macOS(.v10_15) 10 | ], 11 | dependencies: [ 12 | ], 13 | targets: [ 14 | .target(name: "macocr", dependencies: []), 15 | ] 16 | ) 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # macocr 2 | 3 | OCR command line utility for macOS 10.15+. Utilizes the [VNRecognizeTextRequest](https://developer.apple.com/documentation/vision/vnrecognizetextrequest) API. 4 | 5 | ## Build and Run 6 | 7 | ``` 8 | swift build 9 | swift run 10 | ``` 11 | 12 | If `-c release` is not used, then the executable may be located at: `./.build/debug/macocr` 13 | 14 | ## Python Build and Run 15 | 16 | ``` 17 | pip install git+https://github.com/ughe/macocr.git 18 | # or locally: cd macocr && pip install -e . 19 | ``` 20 | 21 | usage: 22 | 23 | ``` 24 | from macocr_py import macocr 25 | result_json = macocr("./input.png") 26 | ``` 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | from setuptools.command.build_py import build_py 3 | import subprocess 4 | import shutil 5 | import os 6 | 7 | class BuildSwift(build_py): 8 | def run(self): 9 | # Build Swift binary from source 10 | subprocess.check_call(['swift', 'build', '-c', 'release']) 11 | 12 | # Copy binary to package 13 | bin_dir = os.path.join(self.build_lib, 'macocr_py', 'bin') 14 | os.makedirs(bin_dir, exist_ok=True) 15 | shutil.copy('.build/release/macocr', bin_dir) 16 | 17 | # Continue normal build 18 | super().run() 19 | 20 | setup( 21 | name='macocr', 22 | version='0.1.0', 23 | packages=['macocr_py'], 24 | cmdclass={'build_py': BuildSwift}, 25 | url='https://github.com/ughe/macocr', 26 | description='macOS OCR using Vision framework', 27 | ) 28 | -------------------------------------------------------------------------------- /macocr_py/wrapper.py: -------------------------------------------------------------------------------- 1 | import json 2 | import pathlib 3 | import subprocess 4 | import sys 5 | import typing as t 6 | 7 | def macocr(image_path: str, accurate: bool = True, fix: bool = False) -> t.List[t.Dict]: 8 | """Run macocr on input image and return JSON results. 9 | Utilizes the VNRecognizeTextRequest API on macOS only. 10 | https://developer.apple.com/documentation/vision/vnrecognizetextrequest""" 11 | binary = pathlib.Path(__file__).parent / "bin" / "macocr" 12 | cmd = [str(binary)] 13 | if not accurate: 14 | cmd.append("--fast") 15 | if fix: 16 | cmd.append("--fix") 17 | 18 | cmd.append("--json") 19 | cmd.append(image_path) 20 | 21 | try: 22 | result = subprocess.run(cmd, capture_output=True, text=True, check=True) 23 | return json.loads(result.stdout.strip()) 24 | except: 25 | print(f"Unexpected failure in macocr binary command: {' '.join(cmd)}", file=sys.stderr) 26 | return None 27 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 William Ughetta 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .venv 3 | *~ 4 | *.swp 5 | *.egg-info 6 | 7 | # Xcode 8 | # 9 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 10 | 11 | ## User settings 12 | xcuserdata/ 13 | 14 | ## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) 15 | *.xcscmblueprint 16 | *.xccheckout 17 | 18 | ## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4) 19 | build/ 20 | DerivedData/ 21 | *.moved-aside 22 | *.pbxuser 23 | !default.pbxuser 24 | *.mode1v3 25 | !default.mode1v3 26 | *.mode2v3 27 | !default.mode2v3 28 | *.perspectivev3 29 | !default.perspectivev3 30 | 31 | ## Obj-C/Swift specific 32 | *.hmap 33 | 34 | ## App packaging 35 | *.ipa 36 | *.dSYM.zip 37 | *.dSYM 38 | 39 | ## Playgrounds 40 | timeline.xctimeline 41 | playground.xcworkspace 42 | 43 | # Swift Package Manager 44 | # 45 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 46 | # Packages/ 47 | # Package.pins 48 | # Package.resolved 49 | # *.xcodeproj 50 | # 51 | # Xcode automatically generates this directory with a .xcworkspacedata file and xcuserdata 52 | # hence it is not needed unless you have added a package configuration file to your project 53 | # .swiftpm 54 | 55 | .build/ 56 | 57 | # CocoaPods 58 | # 59 | # We recommend against adding the Pods directory to your .gitignore. However 60 | # you should judge for yourself, the pros and cons are mentioned at: 61 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 62 | # 63 | # Pods/ 64 | # 65 | # Add this line if you want to avoid checking in source code from the Xcode workspace 66 | # *.xcworkspace 67 | 68 | # Carthage 69 | # 70 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 71 | # Carthage/Checkouts 72 | 73 | Carthage/Build/ 74 | 75 | # Accio dependency management 76 | Dependencies/ 77 | .accio/ 78 | 79 | # fastlane 80 | # 81 | # It is recommended to not store the screenshots in the git repo. 82 | # Instead, use fastlane to re-generate the screenshots whenever they are needed. 83 | # For more information about the recommended setup visit: 84 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 85 | 86 | fastlane/report.xml 87 | fastlane/Preview.html 88 | fastlane/screenshots/**/*.png 89 | fastlane/test_output 90 | 91 | # Code Injection 92 | # 93 | # After new code Injection tools there's a generated folder /iOSInjectionProject 94 | # https://github.com/johnno1962/injectionforxcode 95 | 96 | iOSInjectionProject/ 97 | -------------------------------------------------------------------------------- /Sources/macocr/main.swift: -------------------------------------------------------------------------------- 1 | import Cocoa 2 | import Vision 3 | 4 | // https://developer.apple.com/documentation/vision/vnrecognizetextrequest 5 | 6 | var MODE = VNRequestTextRecognitionLevel.accurate 7 | var USE_LANG_CORRECTION = false 8 | var REVISION:Int 9 | if #available(macOS 13, *) { 10 | REVISION = VNRecognizeTextRequestRevision3 11 | } else if #available(macOS 11, *) { 12 | REVISION = VNRecognizeTextRequestRevision2 13 | } else { 14 | REVISION = VNRecognizeTextRequestRevision1 15 | } 16 | 17 | func main(args: [String]) -> Int32 { 18 | var argIndex = 1 19 | var outputJSON = false 20 | var minTextHeight: Float? 21 | var customWords: [String] = [] 22 | 23 | // Parse flags 24 | while argIndex < args.count && args[argIndex].hasPrefix("-") { 25 | let flag = args[argIndex] 26 | 27 | switch flag { 28 | case "-j", "--json": 29 | outputJSON = true 30 | argIndex += 1 31 | case "--version": 32 | print("VNRecognizeTextRequest Revision \(REVISION)") 33 | return 0 34 | case "--fast": 35 | MODE = .fast 36 | argIndex += 1 37 | case "--fix": 38 | USE_LANG_CORRECTION = true 39 | argIndex += 1 40 | case "--min-text-height": 41 | guard argIndex + 1 < args.count else { 42 | fputs("Error: --min-text-height requires a value\n", stderr) 43 | return 1 44 | } 45 | if let value = Float(args[argIndex + 1]) { 46 | minTextHeight = value 47 | } else { 48 | fputs("Error: invalid --min-text-height value\n", stderr) 49 | return 1 50 | } 51 | argIndex += 2 52 | case "--custom-word-file": 53 | guard argIndex + 1 < args.count else { 54 | fputs("Error: --custom-word-file requires a path\n", stderr) 55 | return 1 56 | } 57 | let wordFile = args[argIndex + 1] 58 | if let contents = try? String(contentsOfFile: wordFile, encoding: .utf8) { 59 | customWords = contents.components(separatedBy: .newlines) 60 | .map { $0.trimmingCharacters(in: .whitespaces) } 61 | .filter { !$0.isEmpty } 62 | } else { 63 | fputs("Error: failed to read custom word file '\(wordFile)'\n", stderr) 64 | return 1 65 | } 66 | argIndex += 2 67 | default: 68 | fputs("Error: unknown flag '\(flag)'\n", stderr) 69 | return 1 70 | } 71 | } 72 | 73 | guard args.count >= argIndex + 1 else { 74 | fputs(String(format: "usage: %1$@ [flags] image [dst]\n", args[0]), stderr) 75 | fputs("flags:\n", stderr) 76 | fputs(" -j, --json Output in JSON format\n", stderr) 77 | fputs(" --version Print revision number\n", stderr) 78 | fputs(" --fast Use fast recognition (default: accurate)\n", stderr) 79 | fputs(" --fix Enable language correction (default: off)\n", stderr) 80 | fputs(" --min-text-height N Set minimum text height (0-1)\n", stderr) 81 | fputs(" --custom-word-file FILE Load custom words from file\n", stderr) 82 | return 1 83 | } 84 | 85 | let src = args[argIndex] 86 | let dst = args.count > argIndex + 1 ? args[argIndex + 1] : nil 87 | 88 | guard let img = NSImage(byReferencingFile: src) else { 89 | fputs("Error: failed to load image '\(src)'\n", stderr) 90 | return 1 91 | } 92 | 93 | guard let imgRef = img.cgImage(forProposedRect: &img.alignmentRect, context: nil, hints: nil) else { 94 | fputs("Error: failed to convert NSImage to CGImage for '\(src)'\n", stderr) 95 | return 1 96 | } 97 | 98 | let imgHeight = CGFloat(imgRef.height) 99 | 100 | let request = VNRecognizeTextRequest { (request, error) in 101 | let observations = request.results as? [VNRecognizedTextObservation] ?? [] 102 | 103 | let output: String 104 | 105 | if outputJSON { 106 | var jsonLines: [String] = ["["] 107 | 108 | for (index, observation) in observations.enumerated() { 109 | guard let candidate = observation.topCandidates(1).first else { continue } 110 | let bbox = observation.boundingBox 111 | 112 | // Convert normalized coordinates (0-1) to pixel coordinates 113 | // Vision uses bottom-left origin, y needs to be flipped 114 | let x = Int(bbox.minX * CGFloat(imgRef.width)) 115 | let y = Int((1 - bbox.maxY) * imgHeight) // Flip y-axis 116 | let w = Int(bbox.width * CGFloat(imgRef.width)) 117 | let h = Int(bbox.height * imgHeight) 118 | 119 | // Manually build JSON with deterministic key order 120 | let txtEscaped = candidate.string 121 | .replacingOccurrences(of: "\\", with: "\\\\") 122 | .replacingOccurrences(of: "\"", with: "\\\"") 123 | .replacingOccurrences(of: "\n", with: "\\n") 124 | .replacingOccurrences(of: "\r", with: "\\r") 125 | .replacingOccurrences(of: "\t", with: "\\t") 126 | 127 | let jsonObj = """ 128 | { "txt" : "\(txtEscaped)", 129 | "x" : \(x), "y" : \(y), "w" : \(w), "h" : \(h), "conf" : \(candidate.confidence) } 130 | """ 131 | 132 | if index < observations.count - 1 { 133 | jsonLines.append(jsonObj + ",") 134 | } else { 135 | jsonLines.append(jsonObj) 136 | } 137 | } 138 | 139 | jsonLines.append("]") 140 | output = jsonLines.joined(separator: "\n") 141 | } else { 142 | let obs : [String] = observations.map { $0.topCandidates(1).first?.string ?? ""} 143 | output = obs.joined(separator: "\n") 144 | } 145 | 146 | if let dst = dst { 147 | try? output.write(to: URL(fileURLWithPath: dst), atomically: true, encoding: .utf8) 148 | } else { 149 | print(output) 150 | } 151 | } 152 | request.recognitionLevel = MODE 153 | request.usesLanguageCorrection = USE_LANG_CORRECTION 154 | request.revision = REVISION 155 | 156 | if let minHeight = minTextHeight { 157 | request.minimumTextHeight = minHeight 158 | } 159 | 160 | if !customWords.isEmpty { 161 | request.customWords = customWords 162 | } 163 | 164 | try? VNImageRequestHandler(cgImage: imgRef, options: [:]).perform([request]) 165 | 166 | return 0 167 | } 168 | exit(main(args: CommandLine.arguments)) 169 | --------------------------------------------------------------------------------