├── .gitignore ├── .python-version ├── CONVENTIONS.md ├── app ├── Perspectra.xcodeproj │ ├── project.pbxproj │ ├── project.xcworkspace │ │ ├── contents.xcworkspacedata │ │ └── xcuserdata │ │ │ └── adrian.xcuserdatad │ │ │ └── UserInterfaceState.xcuserstate │ └── xcuserdata │ │ └── adrian.xcuserdatad │ │ └── xcschemes │ │ ├── Perspectra.xcscheme │ │ ├── Transform.xcscheme │ │ └── xcschememanagement.plist ├── Perspectra │ ├── AppDelegate.swift │ ├── Assets.xcassets │ │ └── AppIcon.appiconset │ │ │ └── Contents.json │ ├── Base.lproj │ │ └── MainMenu.xib │ └── Info.plist └── Transform │ ├── Base.lproj │ └── PhotoEditingViewController.xib │ ├── Info.plist │ ├── PhotoEditingViewController.swift │ └── Transform.entitlements ├── images ├── examples │ ├── 01_binary.png │ ├── 01_original.jpeg │ ├── 02_binary.png │ ├── 02_original.jpeg │ ├── 03_gray.png │ └── 03_original.jpeg ├── logo-1024.png ├── logo.icns └── logo.png ├── jxa ├── .gitignore ├── Perspectra.app │ └── Contents │ │ ├── Info.plist │ │ ├── MacOS │ │ └── applet │ │ ├── PkgInfo │ │ └── Resources │ │ ├── applet.icns │ │ ├── applet.rsrc │ │ └── description.rtfd │ │ └── TXT.rtf ├── makefile ├── package.json └── source │ ├── config.js │ └── main.js ├── makefile ├── notebooks ├── approximate_border_color.py ├── cartesian_to_polar.py ├── harris_corner_detection.py ├── line_angle.py ├── line_curvature.png ├── line_curvature.py ├── perspectra.py ├── polygon_simplification.py └── split_book_pages.py ├── pyproject.toml ├── readme.md ├── scripts ├── readme.md ├── setup-cx_freeze.py └── setup-py2app.py ├── src └── perspectra │ ├── __init__.py │ ├── __main__.py │ ├── approximate_polygon.py │ ├── binarize.py │ ├── cli.py │ ├── file_utils.py │ ├── multipass_cleaner.py │ ├── noise_generator.py │ ├── splitter.py │ └── transformer.py ├── tests ├── fixtures │ ├── book_color.jpeg │ ├── book_gray.png │ ├── doc_binary.png │ └── doc_photo.jpeg ├── test_approximate_polygon.py ├── test_binarize.py ├── test_cleaning.py ├── test_segmentation.py └── test_splitting.py └── uv.lock /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | .aider* 3 | .env 4 | .ipynb_checkpoints 5 | .jupyter 6 | .mypy_cache 7 | .pytest_cache 8 | .ruff_cache 9 | .venv 10 | *.app 11 | *.egg-info 12 | /app/SamplePhotoEditingExtension 13 | /examples 14 | /research 15 | build 16 | dist 17 | notebooks_wolfram 18 | tests/*.png 19 | -------------------------------------------------------------------------------- /.python-version: -------------------------------------------------------------------------------- 1 | 3.12 2 | -------------------------------------------------------------------------------- /CONVENTIONS.md: -------------------------------------------------------------------------------- 1 | - Always use `uv` for all project management tasks (e.g. `uv add`, `uv pip`, …) 2 | -------------------------------------------------------------------------------- /app/Perspectra.xcodeproj/project.pbxproj: -------------------------------------------------------------------------------- 1 | // !$*UTF8*$! 2 | { 3 | archiveVersion = 1; 4 | classes = { 5 | }; 6 | objectVersion = 46; 7 | objects = { 8 | 9 | /* Begin PBXBuildFile section */ 10 | 3AE3D0381E3BDD9C000A889D /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3AE3D0371E3BDD9C000A889D /* AppDelegate.swift */; }; 11 | 3AE3D03A1E3BDD9C000A889D /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 3AE3D0391E3BDD9C000A889D /* Assets.xcassets */; }; 12 | 3AE3D03D1E3BDD9C000A889D /* MainMenu.xib in Resources */ = {isa = PBXBuildFile; fileRef = 3AE3D03B1E3BDD9C000A889D /* MainMenu.xib */; }; 13 | 3AE3D04B1E3BDE31000A889D /* PhotosUI.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 3AE3D04A1E3BDE31000A889D /* PhotosUI.framework */; }; 14 | 3AE3D04D1E3BDE31000A889D /* Photos.framework in Frameworks */ = {isa = PBXBuildFile; fileRef = 3AE3D04C1E3BDE31000A889D /* Photos.framework */; }; 15 | 3AE3D0521E3BDE31000A889D /* PhotoEditingViewController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 3AE3D0511E3BDE31000A889D /* PhotoEditingViewController.swift */; }; 16 | 3AE3D0551E3BDE31000A889D /* PhotoEditingViewController.xib in Resources */ = {isa = PBXBuildFile; fileRef = 3AE3D0531E3BDE31000A889D /* PhotoEditingViewController.xib */; }; 17 | 3AE3D0591E3BDE31000A889D /* Transform.appex in Embed App Extensions */ = {isa = PBXBuildFile; fileRef = 3AE3D0481E3BDE31000A889D /* Transform.appex */; settings = {ATTRIBUTES = (RemoveHeadersOnCopy, ); }; }; 18 | /* End PBXBuildFile section */ 19 | 20 | /* Begin PBXContainerItemProxy section */ 21 | 3AE3D0571E3BDE31000A889D /* PBXContainerItemProxy */ = { 22 | isa = PBXContainerItemProxy; 23 | containerPortal = 3AE3D02C1E3BDD9B000A889D /* Project object */; 24 | proxyType = 1; 25 | remoteGlobalIDString = 3AE3D0471E3BDE31000A889D; 26 | remoteInfo = Transform; 27 | }; 28 | /* End PBXContainerItemProxy section */ 29 | 30 | /* Begin PBXCopyFilesBuildPhase section */ 31 | 3AE3D05D1E3BDE31000A889D /* Embed App Extensions */ = { 32 | isa = PBXCopyFilesBuildPhase; 33 | buildActionMask = 2147483647; 34 | dstPath = ""; 35 | dstSubfolderSpec = 13; 36 | files = ( 37 | 3AE3D0591E3BDE31000A889D /* Transform.appex in Embed App Extensions */, 38 | ); 39 | name = "Embed App Extensions"; 40 | runOnlyForDeploymentPostprocessing = 0; 41 | }; 42 | /* End PBXCopyFilesBuildPhase section */ 43 | 44 | /* Begin PBXFileReference section */ 45 | 3AE3D0341E3BDD9C000A889D /* Perspectra.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = Perspectra.app; sourceTree = BUILT_PRODUCTS_DIR; }; 46 | 3AE3D0371E3BDD9C000A889D /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; 47 | 3AE3D0391E3BDD9C000A889D /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; 48 | 3AE3D03C1E3BDD9C000A889D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/MainMenu.xib; sourceTree = ""; }; 49 | 3AE3D03E1E3BDD9C000A889D /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 50 | 3AE3D0481E3BDE31000A889D /* Transform.appex */ = {isa = PBXFileReference; explicitFileType = "wrapper.app-extension"; includeInIndex = 0; path = Transform.appex; sourceTree = BUILT_PRODUCTS_DIR; }; 51 | 3AE3D04A1E3BDE31000A889D /* PhotosUI.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = PhotosUI.framework; path = System/Library/Frameworks/PhotosUI.framework; sourceTree = SDKROOT; }; 52 | 3AE3D04C1E3BDE31000A889D /* Photos.framework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.framework; name = Photos.framework; path = System/Library/Frameworks/Photos.framework; sourceTree = SDKROOT; }; 53 | 3AE3D0501E3BDE31000A889D /* Transform.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = Transform.entitlements; sourceTree = ""; }; 54 | 3AE3D0511E3BDE31000A889D /* PhotoEditingViewController.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = PhotoEditingViewController.swift; sourceTree = ""; }; 55 | 3AE3D0541E3BDE31000A889D /* Base */ = {isa = PBXFileReference; lastKnownFileType = file.xib; name = Base; path = Base.lproj/PhotoEditingViewController.xib; sourceTree = ""; }; 56 | 3AE3D0561E3BDE31000A889D /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; 57 | /* End PBXFileReference section */ 58 | 59 | /* Begin PBXFrameworksBuildPhase section */ 60 | 3AE3D0311E3BDD9C000A889D /* Frameworks */ = { 61 | isa = PBXFrameworksBuildPhase; 62 | buildActionMask = 2147483647; 63 | files = ( 64 | ); 65 | runOnlyForDeploymentPostprocessing = 0; 66 | }; 67 | 3AE3D0451E3BDE31000A889D /* Frameworks */ = { 68 | isa = PBXFrameworksBuildPhase; 69 | buildActionMask = 2147483647; 70 | files = ( 71 | 3AE3D04B1E3BDE31000A889D /* PhotosUI.framework in Frameworks */, 72 | 3AE3D04D1E3BDE31000A889D /* Photos.framework in Frameworks */, 73 | ); 74 | runOnlyForDeploymentPostprocessing = 0; 75 | }; 76 | /* End PBXFrameworksBuildPhase section */ 77 | 78 | /* Begin PBXGroup section */ 79 | 3AE3D02B1E3BDD9B000A889D = { 80 | isa = PBXGroup; 81 | children = ( 82 | 3AE3D0361E3BDD9C000A889D /* Perspectra */, 83 | 3AE3D04E1E3BDE31000A889D /* Transform */, 84 | 3AE3D0491E3BDE31000A889D /* Frameworks */, 85 | 3AE3D0351E3BDD9C000A889D /* Products */, 86 | ); 87 | sourceTree = ""; 88 | }; 89 | 3AE3D0351E3BDD9C000A889D /* Products */ = { 90 | isa = PBXGroup; 91 | children = ( 92 | 3AE3D0341E3BDD9C000A889D /* Perspectra.app */, 93 | 3AE3D0481E3BDE31000A889D /* Transform.appex */, 94 | ); 95 | name = Products; 96 | sourceTree = ""; 97 | }; 98 | 3AE3D0361E3BDD9C000A889D /* Perspectra */ = { 99 | isa = PBXGroup; 100 | children = ( 101 | 3AE3D0371E3BDD9C000A889D /* AppDelegate.swift */, 102 | 3AE3D0391E3BDD9C000A889D /* Assets.xcassets */, 103 | 3AE3D03B1E3BDD9C000A889D /* MainMenu.xib */, 104 | 3AE3D03E1E3BDD9C000A889D /* Info.plist */, 105 | ); 106 | path = Perspectra; 107 | sourceTree = ""; 108 | }; 109 | 3AE3D0491E3BDE31000A889D /* Frameworks */ = { 110 | isa = PBXGroup; 111 | children = ( 112 | 3AE3D04A1E3BDE31000A889D /* PhotosUI.framework */, 113 | 3AE3D04C1E3BDE31000A889D /* Photos.framework */, 114 | ); 115 | name = Frameworks; 116 | sourceTree = ""; 117 | }; 118 | 3AE3D04E1E3BDE31000A889D /* Transform */ = { 119 | isa = PBXGroup; 120 | children = ( 121 | 3AE3D0511E3BDE31000A889D /* PhotoEditingViewController.swift */, 122 | 3AE3D0531E3BDE31000A889D /* PhotoEditingViewController.xib */, 123 | 3AE3D0561E3BDE31000A889D /* Info.plist */, 124 | 3AE3D04F1E3BDE31000A889D /* Supporting Files */, 125 | ); 126 | path = Transform; 127 | sourceTree = ""; 128 | }; 129 | 3AE3D04F1E3BDE31000A889D /* Supporting Files */ = { 130 | isa = PBXGroup; 131 | children = ( 132 | 3AE3D0501E3BDE31000A889D /* Transform.entitlements */, 133 | ); 134 | name = "Supporting Files"; 135 | sourceTree = ""; 136 | }; 137 | /* End PBXGroup section */ 138 | 139 | /* Begin PBXNativeTarget section */ 140 | 3AE3D0331E3BDD9C000A889D /* Perspectra */ = { 141 | isa = PBXNativeTarget; 142 | buildConfigurationList = 3AE3D0411E3BDD9C000A889D /* Build configuration list for PBXNativeTarget "Perspectra" */; 143 | buildPhases = ( 144 | 3AE3D0301E3BDD9C000A889D /* Sources */, 145 | 3AE3D0311E3BDD9C000A889D /* Frameworks */, 146 | 3AE3D0321E3BDD9C000A889D /* Resources */, 147 | 3AE3D05D1E3BDE31000A889D /* Embed App Extensions */, 148 | ); 149 | buildRules = ( 150 | ); 151 | dependencies = ( 152 | 3AE3D0581E3BDE31000A889D /* PBXTargetDependency */, 153 | ); 154 | name = Perspectra; 155 | productName = Perspectra; 156 | productReference = 3AE3D0341E3BDD9C000A889D /* Perspectra.app */; 157 | productType = "com.apple.product-type.application"; 158 | }; 159 | 3AE3D0471E3BDE31000A889D /* Transform */ = { 160 | isa = PBXNativeTarget; 161 | buildConfigurationList = 3AE3D05A1E3BDE31000A889D /* Build configuration list for PBXNativeTarget "Transform" */; 162 | buildPhases = ( 163 | 3AE3D0441E3BDE31000A889D /* Sources */, 164 | 3AE3D0451E3BDE31000A889D /* Frameworks */, 165 | 3AE3D0461E3BDE31000A889D /* Resources */, 166 | ); 167 | buildRules = ( 168 | ); 169 | dependencies = ( 170 | ); 171 | name = Transform; 172 | productName = Transform; 173 | productReference = 3AE3D0481E3BDE31000A889D /* Transform.appex */; 174 | productType = "com.apple.product-type.app-extension"; 175 | }; 176 | /* End PBXNativeTarget section */ 177 | 178 | /* Begin PBXProject section */ 179 | 3AE3D02C1E3BDD9B000A889D /* Project object */ = { 180 | isa = PBXProject; 181 | attributes = { 182 | LastSwiftUpdateCheck = 0820; 183 | LastUpgradeCheck = 0820; 184 | ORGANIZATIONNAME = Feram; 185 | TargetAttributes = { 186 | 3AE3D0331E3BDD9C000A889D = { 187 | CreatedOnToolsVersion = 8.2.1; 188 | ProvisioningStyle = Automatic; 189 | }; 190 | 3AE3D0471E3BDE31000A889D = { 191 | CreatedOnToolsVersion = 8.2.1; 192 | ProvisioningStyle = Automatic; 193 | }; 194 | }; 195 | }; 196 | buildConfigurationList = 3AE3D02F1E3BDD9B000A889D /* Build configuration list for PBXProject "Perspectra" */; 197 | compatibilityVersion = "Xcode 3.2"; 198 | developmentRegion = English; 199 | hasScannedForEncodings = 0; 200 | knownRegions = ( 201 | en, 202 | Base, 203 | ); 204 | mainGroup = 3AE3D02B1E3BDD9B000A889D; 205 | productRefGroup = 3AE3D0351E3BDD9C000A889D /* Products */; 206 | projectDirPath = ""; 207 | projectRoot = ""; 208 | targets = ( 209 | 3AE3D0331E3BDD9C000A889D /* Perspectra */, 210 | 3AE3D0471E3BDE31000A889D /* Transform */, 211 | ); 212 | }; 213 | /* End PBXProject section */ 214 | 215 | /* Begin PBXResourcesBuildPhase section */ 216 | 3AE3D0321E3BDD9C000A889D /* Resources */ = { 217 | isa = PBXResourcesBuildPhase; 218 | buildActionMask = 2147483647; 219 | files = ( 220 | 3AE3D03A1E3BDD9C000A889D /* Assets.xcassets in Resources */, 221 | 3AE3D03D1E3BDD9C000A889D /* MainMenu.xib in Resources */, 222 | ); 223 | runOnlyForDeploymentPostprocessing = 0; 224 | }; 225 | 3AE3D0461E3BDE31000A889D /* Resources */ = { 226 | isa = PBXResourcesBuildPhase; 227 | buildActionMask = 2147483647; 228 | files = ( 229 | 3AE3D0551E3BDE31000A889D /* PhotoEditingViewController.xib in Resources */, 230 | ); 231 | runOnlyForDeploymentPostprocessing = 0; 232 | }; 233 | /* End PBXResourcesBuildPhase section */ 234 | 235 | /* Begin PBXSourcesBuildPhase section */ 236 | 3AE3D0301E3BDD9C000A889D /* Sources */ = { 237 | isa = PBXSourcesBuildPhase; 238 | buildActionMask = 2147483647; 239 | files = ( 240 | 3AE3D0381E3BDD9C000A889D /* AppDelegate.swift in Sources */, 241 | ); 242 | runOnlyForDeploymentPostprocessing = 0; 243 | }; 244 | 3AE3D0441E3BDE31000A889D /* Sources */ = { 245 | isa = PBXSourcesBuildPhase; 246 | buildActionMask = 2147483647; 247 | files = ( 248 | 3AE3D0521E3BDE31000A889D /* PhotoEditingViewController.swift in Sources */, 249 | ); 250 | runOnlyForDeploymentPostprocessing = 0; 251 | }; 252 | /* End PBXSourcesBuildPhase section */ 253 | 254 | /* Begin PBXTargetDependency section */ 255 | 3AE3D0581E3BDE31000A889D /* PBXTargetDependency */ = { 256 | isa = PBXTargetDependency; 257 | target = 3AE3D0471E3BDE31000A889D /* Transform */; 258 | targetProxy = 3AE3D0571E3BDE31000A889D /* PBXContainerItemProxy */; 259 | }; 260 | /* End PBXTargetDependency section */ 261 | 262 | /* Begin PBXVariantGroup section */ 263 | 3AE3D03B1E3BDD9C000A889D /* MainMenu.xib */ = { 264 | isa = PBXVariantGroup; 265 | children = ( 266 | 3AE3D03C1E3BDD9C000A889D /* Base */, 267 | ); 268 | name = MainMenu.xib; 269 | sourceTree = ""; 270 | }; 271 | 3AE3D0531E3BDE31000A889D /* PhotoEditingViewController.xib */ = { 272 | isa = PBXVariantGroup; 273 | children = ( 274 | 3AE3D0541E3BDE31000A889D /* Base */, 275 | ); 276 | name = PhotoEditingViewController.xib; 277 | sourceTree = ""; 278 | }; 279 | /* End PBXVariantGroup section */ 280 | 281 | /* Begin XCBuildConfiguration section */ 282 | 3AE3D03F1E3BDD9C000A889D /* Debug */ = { 283 | isa = XCBuildConfiguration; 284 | buildSettings = { 285 | ALWAYS_SEARCH_USER_PATHS = NO; 286 | CLANG_ANALYZER_NONNULL = YES; 287 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 288 | CLANG_CXX_LIBRARY = "libc++"; 289 | CLANG_ENABLE_MODULES = YES; 290 | CLANG_ENABLE_OBJC_ARC = YES; 291 | CLANG_WARN_BOOL_CONVERSION = YES; 292 | CLANG_WARN_CONSTANT_CONVERSION = YES; 293 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 294 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 295 | CLANG_WARN_EMPTY_BODY = YES; 296 | CLANG_WARN_ENUM_CONVERSION = YES; 297 | CLANG_WARN_INFINITE_RECURSION = YES; 298 | CLANG_WARN_INT_CONVERSION = YES; 299 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 300 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 301 | CLANG_WARN_UNREACHABLE_CODE = YES; 302 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 303 | CODE_SIGN_IDENTITY = "-"; 304 | COPY_PHASE_STRIP = NO; 305 | DEBUG_INFORMATION_FORMAT = dwarf; 306 | ENABLE_STRICT_OBJC_MSGSEND = YES; 307 | ENABLE_TESTABILITY = YES; 308 | GCC_C_LANGUAGE_STANDARD = gnu99; 309 | GCC_DYNAMIC_NO_PIC = NO; 310 | GCC_NO_COMMON_BLOCKS = YES; 311 | GCC_OPTIMIZATION_LEVEL = 0; 312 | GCC_PREPROCESSOR_DEFINITIONS = ( 313 | "DEBUG=1", 314 | "$(inherited)", 315 | ); 316 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 317 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 318 | GCC_WARN_UNDECLARED_SELECTOR = YES; 319 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 320 | GCC_WARN_UNUSED_FUNCTION = YES; 321 | GCC_WARN_UNUSED_VARIABLE = YES; 322 | MACOSX_DEPLOYMENT_TARGET = 10.12; 323 | MTL_ENABLE_DEBUG_INFO = YES; 324 | ONLY_ACTIVE_ARCH = YES; 325 | SDKROOT = macosx; 326 | SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; 327 | SWIFT_OPTIMIZATION_LEVEL = "-Onone"; 328 | }; 329 | name = Debug; 330 | }; 331 | 3AE3D0401E3BDD9C000A889D /* Release */ = { 332 | isa = XCBuildConfiguration; 333 | buildSettings = { 334 | ALWAYS_SEARCH_USER_PATHS = NO; 335 | CLANG_ANALYZER_NONNULL = YES; 336 | CLANG_CXX_LANGUAGE_STANDARD = "gnu++0x"; 337 | CLANG_CXX_LIBRARY = "libc++"; 338 | CLANG_ENABLE_MODULES = YES; 339 | CLANG_ENABLE_OBJC_ARC = YES; 340 | CLANG_WARN_BOOL_CONVERSION = YES; 341 | CLANG_WARN_CONSTANT_CONVERSION = YES; 342 | CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; 343 | CLANG_WARN_DOCUMENTATION_COMMENTS = YES; 344 | CLANG_WARN_EMPTY_BODY = YES; 345 | CLANG_WARN_ENUM_CONVERSION = YES; 346 | CLANG_WARN_INFINITE_RECURSION = YES; 347 | CLANG_WARN_INT_CONVERSION = YES; 348 | CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; 349 | CLANG_WARN_SUSPICIOUS_MOVE = YES; 350 | CLANG_WARN_UNREACHABLE_CODE = YES; 351 | CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; 352 | CODE_SIGN_IDENTITY = "-"; 353 | COPY_PHASE_STRIP = NO; 354 | DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; 355 | ENABLE_NS_ASSERTIONS = NO; 356 | ENABLE_STRICT_OBJC_MSGSEND = YES; 357 | GCC_C_LANGUAGE_STANDARD = gnu99; 358 | GCC_NO_COMMON_BLOCKS = YES; 359 | GCC_WARN_64_TO_32_BIT_CONVERSION = YES; 360 | GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; 361 | GCC_WARN_UNDECLARED_SELECTOR = YES; 362 | GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; 363 | GCC_WARN_UNUSED_FUNCTION = YES; 364 | GCC_WARN_UNUSED_VARIABLE = YES; 365 | MACOSX_DEPLOYMENT_TARGET = 10.12; 366 | MTL_ENABLE_DEBUG_INFO = NO; 367 | SDKROOT = macosx; 368 | SWIFT_OPTIMIZATION_LEVEL = "-Owholemodule"; 369 | }; 370 | name = Release; 371 | }; 372 | 3AE3D0421E3BDD9C000A889D /* Debug */ = { 373 | isa = XCBuildConfiguration; 374 | buildSettings = { 375 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 376 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 377 | COMBINE_HIDPI_IMAGES = YES; 378 | INFOPLIST_FILE = Perspectra/Info.plist; 379 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; 380 | PRODUCT_BUNDLE_IDENTIFIER = co.feram.Perspectra; 381 | PRODUCT_NAME = "$(TARGET_NAME)"; 382 | SWIFT_VERSION = 3.0; 383 | }; 384 | name = Debug; 385 | }; 386 | 3AE3D0431E3BDD9C000A889D /* Release */ = { 387 | isa = XCBuildConfiguration; 388 | buildSettings = { 389 | ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; 390 | ASSETCATALOG_COMPILER_APPICON_NAME = AppIcon; 391 | COMBINE_HIDPI_IMAGES = YES; 392 | INFOPLIST_FILE = Perspectra/Info.plist; 393 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks"; 394 | PRODUCT_BUNDLE_IDENTIFIER = co.feram.Perspectra; 395 | PRODUCT_NAME = "$(TARGET_NAME)"; 396 | SWIFT_VERSION = 3.0; 397 | }; 398 | name = Release; 399 | }; 400 | 3AE3D05B1E3BDE31000A889D /* Debug */ = { 401 | isa = XCBuildConfiguration; 402 | buildSettings = { 403 | CODE_SIGN_ENTITLEMENTS = Transform/Transform.entitlements; 404 | INFOPLIST_FILE = Transform/Info.plist; 405 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @executable_path/../../../../Frameworks"; 406 | PRODUCT_BUNDLE_IDENTIFIER = co.feram.Perspectra.Transform; 407 | PRODUCT_NAME = "$(TARGET_NAME)"; 408 | SKIP_INSTALL = YES; 409 | SWIFT_VERSION = 3.0; 410 | }; 411 | name = Debug; 412 | }; 413 | 3AE3D05C1E3BDE31000A889D /* Release */ = { 414 | isa = XCBuildConfiguration; 415 | buildSettings = { 416 | CODE_SIGN_ENTITLEMENTS = Transform/Transform.entitlements; 417 | INFOPLIST_FILE = Transform/Info.plist; 418 | LD_RUNPATH_SEARCH_PATHS = "$(inherited) @executable_path/../Frameworks @executable_path/../../../../Frameworks"; 419 | PRODUCT_BUNDLE_IDENTIFIER = co.feram.Perspectra.Transform; 420 | PRODUCT_NAME = "$(TARGET_NAME)"; 421 | SKIP_INSTALL = YES; 422 | SWIFT_VERSION = 3.0; 423 | }; 424 | name = Release; 425 | }; 426 | /* End XCBuildConfiguration section */ 427 | 428 | /* Begin XCConfigurationList section */ 429 | 3AE3D02F1E3BDD9B000A889D /* Build configuration list for PBXProject "Perspectra" */ = { 430 | isa = XCConfigurationList; 431 | buildConfigurations = ( 432 | 3AE3D03F1E3BDD9C000A889D /* Debug */, 433 | 3AE3D0401E3BDD9C000A889D /* Release */, 434 | ); 435 | defaultConfigurationIsVisible = 0; 436 | defaultConfigurationName = Release; 437 | }; 438 | 3AE3D0411E3BDD9C000A889D /* Build configuration list for PBXNativeTarget "Perspectra" */ = { 439 | isa = XCConfigurationList; 440 | buildConfigurations = ( 441 | 3AE3D0421E3BDD9C000A889D /* Debug */, 442 | 3AE3D0431E3BDD9C000A889D /* Release */, 443 | ); 444 | defaultConfigurationIsVisible = 0; 445 | }; 446 | 3AE3D05A1E3BDE31000A889D /* Build configuration list for PBXNativeTarget "Transform" */ = { 447 | isa = XCConfigurationList; 448 | buildConfigurations = ( 449 | 3AE3D05B1E3BDE31000A889D /* Debug */, 450 | 3AE3D05C1E3BDE31000A889D /* Release */, 451 | ); 452 | defaultConfigurationIsVisible = 0; 453 | }; 454 | /* End XCConfigurationList section */ 455 | }; 456 | rootObject = 3AE3D02C1E3BDD9B000A889D /* Project object */; 457 | } 458 | -------------------------------------------------------------------------------- /app/Perspectra.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /app/Perspectra.xcodeproj/project.xcworkspace/xcuserdata/adrian.xcuserdatad/UserInterfaceState.xcuserstate: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/app/Perspectra.xcodeproj/project.xcworkspace/xcuserdata/adrian.xcuserdatad/UserInterfaceState.xcuserstate -------------------------------------------------------------------------------- /app/Perspectra.xcodeproj/xcuserdata/adrian.xcuserdatad/xcschemes/Perspectra.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 32 | 33 | 39 | 40 | 41 | 42 | 43 | 44 | 54 | 56 | 62 | 63 | 64 | 65 | 66 | 67 | 73 | 75 | 81 | 82 | 83 | 84 | 86 | 87 | 90 | 91 | 92 | -------------------------------------------------------------------------------- /app/Perspectra.xcodeproj/xcuserdata/adrian.xcuserdatad/xcschemes/Transform.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 6 | 9 | 10 | 16 | 22 | 23 | 24 | 30 | 36 | 37 | 38 | 39 | 40 | 45 | 46 | 47 | 48 | 54 | 55 | 56 | 57 | 58 | 59 | 70 | 72 | 78 | 79 | 80 | 81 | 82 | 83 | 90 | 92 | 98 | 99 | 100 | 101 | 103 | 104 | 107 | 108 | 109 | -------------------------------------------------------------------------------- /app/Perspectra.xcodeproj/xcuserdata/adrian.xcuserdatad/xcschemes/xcschememanagement.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | SchemeUserState 6 | 7 | Perspectra.xcscheme 8 | 9 | orderHint 10 | 0 11 | 12 | Transform.xcscheme 13 | 14 | orderHint 15 | 1 16 | 17 | 18 | SuppressBuildableAutocreation 19 | 20 | 3AE3D0331E3BDD9C000A889D 21 | 22 | primary 23 | 24 | 25 | 3AE3D0471E3BDE31000A889D 26 | 27 | primary 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /app/Perspectra/AppDelegate.swift: -------------------------------------------------------------------------------- 1 | // 2 | // AppDelegate.swift 3 | // Perspectra 4 | // 5 | // Created by Adrian Sieber on 2017-01-27. 6 | // Copyright © 2017 Feram. All rights reserved. 7 | // 8 | 9 | import Cocoa 10 | 11 | @NSApplicationMain 12 | class AppDelegate: NSObject, NSApplicationDelegate { 13 | 14 | @IBOutlet weak var window: NSWindow! 15 | 16 | 17 | func applicationDidFinishLaunching(_ aNotification: Notification) { 18 | // Insert code here to initialize your application 19 | } 20 | 21 | func applicationWillTerminate(_ aNotification: Notification) { 22 | // Insert code here to tear down your application 23 | } 24 | 25 | 26 | } 27 | 28 | -------------------------------------------------------------------------------- /app/Perspectra/Assets.xcassets/AppIcon.appiconset/Contents.json: -------------------------------------------------------------------------------- 1 | { 2 | "images" : [ 3 | { 4 | "idiom" : "mac", 5 | "size" : "16x16", 6 | "scale" : "1x" 7 | }, 8 | { 9 | "idiom" : "mac", 10 | "size" : "16x16", 11 | "scale" : "2x" 12 | }, 13 | { 14 | "idiom" : "mac", 15 | "size" : "32x32", 16 | "scale" : "1x" 17 | }, 18 | { 19 | "idiom" : "mac", 20 | "size" : "32x32", 21 | "scale" : "2x" 22 | }, 23 | { 24 | "idiom" : "mac", 25 | "size" : "128x128", 26 | "scale" : "1x" 27 | }, 28 | { 29 | "idiom" : "mac", 30 | "size" : "128x128", 31 | "scale" : "2x" 32 | }, 33 | { 34 | "idiom" : "mac", 35 | "size" : "256x256", 36 | "scale" : "1x" 37 | }, 38 | { 39 | "idiom" : "mac", 40 | "size" : "256x256", 41 | "scale" : "2x" 42 | }, 43 | { 44 | "idiom" : "mac", 45 | "size" : "512x512", 46 | "scale" : "1x" 47 | }, 48 | { 49 | "idiom" : "mac", 50 | "size" : "512x512", 51 | "scale" : "2x" 52 | } 53 | ], 54 | "info" : { 55 | "version" : 1, 56 | "author" : "xcode" 57 | } 58 | } -------------------------------------------------------------------------------- /app/Perspectra/Base.lproj/MainMenu.xib: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | 167 | 168 | 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | 178 | 179 | 180 | 181 | 182 | 183 | 184 | 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | 194 | 195 | 196 | 197 | 198 | 199 | 200 | 201 | 202 | 203 | 204 | 205 | 206 | 207 | 208 | 209 | 210 | 211 | 212 | 213 | 214 | 215 | 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | 226 | 227 | 228 | 229 | 230 | 231 | 232 | 233 | 234 | 235 | 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | 249 | 250 | 251 | 252 | 253 | 254 | 255 | 256 | 257 | 258 | 259 | 260 | 261 | 262 | 263 | 264 | 265 | 266 | 267 | 268 | 269 | 270 | 271 | 272 | 273 | 274 | 275 | 276 | 277 | 278 | 279 | 280 | 281 | 282 | 283 | 284 | 285 | 286 | 287 | 288 | 289 | 290 | 291 | 292 | 293 | 294 | 295 | 296 | 297 | 298 | 299 | 300 | 301 | 302 | 303 | 304 | 305 | 306 | 307 | 308 | 309 | 310 | 311 | 312 | 313 | 314 | 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | 324 | 325 | 326 | 327 | 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | 342 | 343 | 344 | 345 | 346 | 347 | 348 | 349 | 350 | 351 | 352 | 353 | 354 | 355 | 356 | 357 | 358 | 359 | 360 | 361 | 362 | 363 | 364 | 365 | 366 | 367 | 368 | 369 | 370 | 371 | 372 | 373 | 374 | 375 | 376 | 377 | 378 | 379 | 380 | 381 | 382 | 383 | 384 | 385 | 386 | 387 | 388 | 389 | 390 | 391 | 392 | 393 | 394 | 395 | 396 | 397 | 398 | 399 | 400 | 401 | 402 | 403 | 404 | 405 | 406 | 407 | 408 | 409 | 410 | 411 | 412 | 413 | 414 | 415 | 416 | 417 | 418 | 419 | 420 | 421 | 422 | 423 | 424 | 425 | 426 | 427 | 428 | 429 | 430 | 431 | 432 | 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | 441 | 442 | 443 | 444 | 445 | 446 | 447 | 448 | 449 | 450 | 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | 466 | 467 | 468 | 469 | 470 | 471 | 472 | 473 | 474 | 475 | 476 | 477 | 478 | 479 | 480 | 481 | 482 | 483 | 484 | 485 | 486 | 487 | 488 | 489 | 490 | 491 | 492 | 493 | 494 | 495 | 496 | 497 | 498 | 499 | 500 | 501 | 502 | 503 | 504 | 505 | 506 | 507 | 508 | 509 | 510 | 511 | 512 | 513 | 514 | 515 | 516 | 517 | 518 | 519 | 520 | 521 | 522 | 523 | 524 | 525 | 526 | 527 | 528 | 529 | 530 | 531 | 532 | 533 | 534 | 535 | 536 | 537 | Default 538 | 539 | 540 | 541 | 542 | 543 | 544 | Left to Right 545 | 546 | 547 | 548 | 549 | 550 | 551 | Right to Left 552 | 553 | 554 | 555 | 556 | 557 | 558 | 559 | 560 | 561 | 562 | Default 563 | 564 | 565 | 566 | 567 | 568 | 569 | Left to Right 570 | 571 | 572 | 573 | 574 | 575 | 576 | Right to Left 577 | 578 | 579 | 580 | 581 | 582 | 583 | 584 | 585 | 586 | 587 | 588 | 589 | 590 | 591 | 592 | 593 | 594 | 595 | 596 | 597 | 598 | 599 | 600 | 601 | 602 | 603 | 604 | 605 | 606 | 607 | 608 | 609 | 610 | 611 | 612 | 613 | 614 | 615 | 616 | 617 | 618 | 619 | 620 | 621 | 622 | 623 | 624 | 625 | 626 | 627 | 628 | 629 | 630 | 631 | 632 | 633 | 634 | 635 | 636 | 637 | 638 | 639 | 640 | 641 | 642 | 643 | 644 | 645 | 646 | 647 | 648 | 649 | 650 | 651 | 652 | 653 | 654 | 655 | 656 | 657 | 658 | 659 | 660 | 661 | 662 | 663 | 664 | 665 | 666 | 667 | 668 | 669 | 670 | 671 | 672 | 673 | 674 | 675 | 676 | 677 | 678 | 679 | 680 | 681 | 682 | 683 | 684 | 685 | 686 | 687 | 688 | 689 | 690 | 691 | 692 | 693 | -------------------------------------------------------------------------------- /app/Perspectra/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIconFile 10 | 11 | CFBundleIdentifier 12 | $(PRODUCT_BUNDLE_IDENTIFIER) 13 | CFBundleInfoDictionaryVersion 14 | 6.0 15 | CFBundleName 16 | $(PRODUCT_NAME) 17 | CFBundlePackageType 18 | APPL 19 | CFBundleShortVersionString 20 | 1.0 21 | CFBundleVersion 22 | 1 23 | LSApplicationCategoryType 24 | public.app-category.photography 25 | LSMinimumSystemVersion 26 | $(MACOSX_DEPLOYMENT_TARGET) 27 | NSHumanReadableCopyright 28 | Copyright © 2017 Feram. All rights reserved. 29 | NSMainNibFile 30 | MainMenu 31 | NSPrincipalClass 32 | NSApplication 33 | 34 | 35 | -------------------------------------------------------------------------------- /app/Transform/Base.lproj/PhotoEditingViewController.xib: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | -------------------------------------------------------------------------------- /app/Transform/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleDisplayName 8 | Transform 9 | CFBundleExecutable 10 | $(EXECUTABLE_NAME) 11 | CFBundleIdentifier 12 | $(PRODUCT_BUNDLE_IDENTIFIER) 13 | CFBundleInfoDictionaryVersion 14 | 6.0 15 | CFBundleName 16 | $(PRODUCT_NAME) 17 | CFBundlePackageType 18 | XPC! 19 | CFBundleShortVersionString 20 | 1.0 21 | CFBundleVersion 22 | 1 23 | LSMinimumSystemVersion 24 | $(MACOSX_DEPLOYMENT_TARGET) 25 | NSExtension 26 | 27 | NSExtensionAttributes 28 | 29 | PHSupportedMediaTypes 30 | 31 | Image 32 | 33 | 34 | NSExtensionPointIdentifier 35 | com.apple.photo-editing 36 | NSExtensionPrincipalClass 37 | $(PRODUCT_MODULE_NAME).PhotoEditingViewController 38 | 39 | NSHumanReadableCopyright 40 | Copyright © 2017 Feram. All rights reserved. 41 | 42 | 43 | -------------------------------------------------------------------------------- /app/Transform/PhotoEditingViewController.swift: -------------------------------------------------------------------------------- 1 | import Cocoa 2 | import Photos 3 | import PhotosUI 4 | 5 | class PhotoEditingViewController: NSViewController, PHContentEditingController { 6 | 7 | var input: PHContentEditingInput? 8 | 9 | override func viewDidLoad() { 10 | super.viewDidLoad() 11 | // Do any additional setup after loading the view. 12 | } 13 | 14 | // MARK: - PHContentEditingController 15 | 16 | func canHandle(_ adjustmentData: PHAdjustmentData) -> Bool { 17 | // Inspect the adjustmentData to determine whether your extension can 18 | // work with past edits. 19 | // (Typically, you use its formatIdentifier and formatVersion 20 | // properties to do this.) 21 | return false 22 | } 23 | 24 | func startContentEditing(with contentEditingInput: PHContentEditingInput, 25 | placeholderImage: NSImage) { 26 | // Present content for editing, and keep the contentEditingInput for 27 | // use when closing the edit session. 28 | // If you returned true from canHandleAdjustmentData:, 29 | // contentEditingInput has the original image and adjustment data. 30 | // If you returned false, the contentEditingInput has past edits "baked 31 | // in". 32 | input = contentEditingInput 33 | } 34 | 35 | func finishContentEditing(completionHandler: @escaping 36 | ((PHContentEditingOutput?) -> Void)) { 37 | // Update UI to reflect that editing has finished and output is being 38 | // rendered. 39 | 40 | // Render and provide output on a background queue. 41 | DispatchQueue.global().async { 42 | // Create editing output from the editing input. 43 | let output = PHContentEditingOutput(contentEditingInput: self.input!) 44 | 45 | // Provide new adjustments and render output to given location. 46 | // output.adjustmentData = new adjustment data 47 | // let renderedJPEGData = output JPEG 48 | // renderedJPEGData.writeToURL(output.renderedContentURL, 49 | // atomically: true) 50 | 51 | // Call completion handler to commit edit to Photos. 52 | // completionHandler(output) 53 | 54 | // Clean up temporary files, etc. 55 | } 56 | } 57 | 58 | var shouldShowCancelConfirmation: Bool { 59 | // Determines whether a confirmation to discard changes should be shown 60 | // to the user on cancel. 61 | // (Typically, this should be "true" if there are any unsaved changes.) 62 | return false 63 | } 64 | 65 | func cancelContentEditing() { 66 | // Clean up temporary files, etc. 67 | // May be called after finishContentEditingWithCompletionHandler: while 68 | // you prepare output. 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /app/Transform/Transform.entitlements: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | com.apple.security.app-sandbox 6 | 7 | com.apple.security.files.user-selected.read-only 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /images/examples/01_binary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/examples/01_binary.png -------------------------------------------------------------------------------- /images/examples/01_original.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/examples/01_original.jpeg -------------------------------------------------------------------------------- /images/examples/02_binary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/examples/02_binary.png -------------------------------------------------------------------------------- /images/examples/02_original.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/examples/02_original.jpeg -------------------------------------------------------------------------------- /images/examples/03_gray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/examples/03_gray.png -------------------------------------------------------------------------------- /images/examples/03_original.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/examples/03_original.jpeg -------------------------------------------------------------------------------- /images/logo-1024.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/logo-1024.png -------------------------------------------------------------------------------- /images/logo.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/logo.icns -------------------------------------------------------------------------------- /images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/images/logo.png -------------------------------------------------------------------------------- /jxa/.gitignore: -------------------------------------------------------------------------------- 1 | /build 2 | /Perspectra.app/Contents/Resources/Scripts 3 | -------------------------------------------------------------------------------- /jxa/Perspectra.app/Contents/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleAllowMixedLocalizations 6 | 7 | CFBundleDevelopmentRegion 8 | English 9 | CFBundleExecutable 10 | applet 11 | CFBundleIconFile 12 | applet 13 | CFBundleIdentifier 14 | com.apple.ScriptEditor.id.Perspectra 15 | CFBundleInfoDictionaryVersion 16 | 6.0 17 | CFBundleName 18 | Perspectra 19 | CFBundlePackageType 20 | APPL 21 | CFBundleShortVersionString 22 | 1.0 23 | CFBundleSignature 24 | aplt 25 | LSMinimumSystemVersionByArchitecture 26 | 27 | x86_64 28 | 10.6 29 | 30 | LSRequiresCarbon 31 | 32 | WindowState 33 | 34 | bundleDividerCollapsed 35 | 36 | bundlePositionOfDivider 37 | 0.0 38 | dividerCollapsed 39 | 40 | eventLogLevel 41 | -1 42 | name 43 | ScriptWindowState 44 | positionOfDivider 45 | 646 46 | savedFrame 47 | 237 289 1280 897 0 0 2560 1440 48 | selectedTab 49 | result 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /jxa/Perspectra.app/Contents/MacOS/applet: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/jxa/Perspectra.app/Contents/MacOS/applet -------------------------------------------------------------------------------- /jxa/Perspectra.app/Contents/PkgInfo: -------------------------------------------------------------------------------- 1 | APPLaplt -------------------------------------------------------------------------------- /jxa/Perspectra.app/Contents/Resources/applet.icns: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/jxa/Perspectra.app/Contents/Resources/applet.icns -------------------------------------------------------------------------------- /jxa/Perspectra.app/Contents/Resources/applet.rsrc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/jxa/Perspectra.app/Contents/Resources/applet.rsrc -------------------------------------------------------------------------------- /jxa/Perspectra.app/Contents/Resources/description.rtfd/TXT.rtf: -------------------------------------------------------------------------------- 1 | {\rtf1\ansi\ansicpg1252\cocoartf1504\cocoasubrtf810 2 | {\fonttbl} 3 | {\colortbl;\red255\green255\blue255;} 4 | {\*\expandedcolortbl;;} 5 | } -------------------------------------------------------------------------------- /jxa/makefile: -------------------------------------------------------------------------------- 1 | contentsPath = Perspectra.app/Contents 2 | resourcesPath = $(contentsPath)/Resources 3 | scriptsPath = $(resourcesPath)/Scripts 4 | # tempLibsPath = $(resourcesPath)/temp-script-libraries 5 | 6 | $(scriptsPath)/main.scpt: build/bundle.js | $(scriptsPath) 7 | osacompile -l JavaScript -o $@ $< 8 | 9 | build/bundle.js: source/main.js source/config.js | build 10 | echo "window = this;" > $@ 11 | ./node_modules/.bin/browserify $< >> $@ 12 | 13 | build: 14 | - mkdir build 15 | 16 | $(scriptsPath): 17 | - mkdir $@ 18 | 19 | .PHONY: clean 20 | 21 | clean: 22 | rm -rf build && \ 23 | rm -rf $(scriptsPath) 24 | -------------------------------------------------------------------------------- /jxa/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "jsx", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "build": "make" 8 | }, 9 | "keywords": [], 10 | "author": "Adrian Sieber", 11 | "license": "ISC", 12 | "devDependencies": { 13 | "browserify": "^14.0.0", 14 | "eslint": "^3.14.1", 15 | "eslint-config-javascript": "^1.2.0" 16 | }, 17 | "eslintConfig": { 18 | "extends": "eslint-config-javascript" 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /jxa/source/config.js: -------------------------------------------------------------------------------- 1 | const config = { 2 | allowedFileTypes: ['jpg', 'png', 'gif'], 3 | } 4 | 5 | Object.assign(config, { 6 | appWindow: { 7 | width: 600, 8 | height: 85, 9 | minWidth: 400, 10 | minHeight: 340, 11 | controlBarHeight: 80, 12 | }, 13 | textField: { 14 | width: 205, 15 | height: 24, 16 | label: { 17 | width: 200, 18 | height: 24, 19 | }, 20 | properties: { 21 | stringValue: `Image: (${config.allowedFileTypes.join(', ')})`, 22 | drawsBackground: false, 23 | editable: false, 24 | bezeled: false, 25 | selectable: true, 26 | }, 27 | }, 28 | button: { 29 | width: 150, 30 | height: 25, 31 | title: 'Choose an WHR …', 32 | }, 33 | }) 34 | 35 | module.exports = config 36 | -------------------------------------------------------------------------------- /jxa/source/main.js: -------------------------------------------------------------------------------- 1 | /* globals ObjC, $, Library */ 2 | /* eslint-disable new-cap */ 3 | 4 | const config = require('./config') 5 | 6 | function bitOr (elements) { 7 | return elements.reduce((current, next) => { 8 | // eslint-disable-next-line no-bitwise 9 | return current | next 10 | }) 11 | } 12 | 13 | ObjC.import('Cocoa') 14 | 15 | const styleMask = bitOr([ 16 | $.NSTitledWindowMask, 17 | $.NSClosableWindowMask, 18 | $.NSMiniaturizableWindowMask, 19 | $.NSResizableWindowMask, 20 | ]) 21 | 22 | if (!$.MyWindow) { 23 | ObjC.registerSubclass({ 24 | name: 'MyWindow', 25 | superclass: 'NSWindow', 26 | methods: { 27 | mouseDown: { 28 | types: ['void', ['id']], 29 | implementation: () => { 30 | $.NSLog('Left mouse click') 31 | }, 32 | }, 33 | rightMouseDown: { 34 | types: ['void', ['id']], 35 | implementation: () => { 36 | $.NSLog('Right mouse click') 37 | }, 38 | }, 39 | }, 40 | }) 41 | } 42 | 43 | const appWindow = $.MyWindow.alloc.initWithContentRectStyleMaskBackingDefer( 44 | $.NSMakeRect(0, 0, config.appWindow.width, config.appWindow.height), 45 | styleMask, 46 | $.NSBackingStoreBuffered, 47 | false 48 | ) 49 | 50 | function chooseImage () { 51 | const panel = $.NSOpenPanel.openPanel 52 | panel.title = 'Select an Image' 53 | panel.allowedFileTypes = $(config.allowedTypes) 54 | 55 | if (panel.runModal === $.NSOKButton) { 56 | // Panel.URLs is an NSArray not a JS array 57 | const imagePath = panel.URLs.objectAtIndex(0).path 58 | textField.stringValue = imagePath 59 | 60 | const image = $.NSImage.alloc.initByReferencingFile(imagePath) 61 | const imageRect = $.NSMakeRect( 62 | 0, 63 | config.appWindow.height, 64 | image.size.width, 65 | image.size.height 66 | ) 67 | const imageView = $.NSImageView.alloc.initWithFrame(imageRect) 68 | const width = image.size.width > config.appWindow.minWidth 69 | ? image.size.width 70 | : config.appWindow.minWidth 71 | const height = ( 72 | image.size.height > config.appWindow.minHeight 73 | ? image.size.height 74 | : config.appWindow.minHeight 75 | ) + config.appWindow.controlBarHeight 76 | 77 | appWindow.setFrameDisplay( 78 | $.NSMakeRect(0, 0, width, height), 79 | true 80 | ) 81 | 82 | imageView.setImage(image) 83 | appWindow.contentView.addSubview(imageView) 84 | } 85 | } 86 | 87 | 88 | if (!$.AppDelegate) { 89 | ObjC.registerSubclass({ 90 | name: 'AppDelegate', 91 | methods: { 92 | btnClickHandler: { 93 | types: ['void', ['id']], 94 | implementation: chooseImage, 95 | }, 96 | }, 97 | }) 98 | } 99 | const appDelegate = $.AppDelegate.alloc.init 100 | 101 | 102 | const textFieldLabelRect = $.NSMakeRect( 103 | 25, 104 | config.appWindow.height - 40, 105 | config.textField.label.width, 106 | config.textField.label.height 107 | ) 108 | const textFieldLabel = $.NSTextField.alloc.initWithFrame(textFieldLabelRect) 109 | Object.assign(textFieldLabel, config.textField.properties) 110 | appWindow.contentView.addSubview(textFieldLabel) 111 | 112 | 113 | const textFieldRect = $.NSMakeRect( 114 | 25, 115 | config.appWindow.height - 60, 116 | config.textField.width, 117 | config.textField.height 118 | ) 119 | const textField = $.NSTextField.alloc.initWithFrame(textFieldRect) 120 | textField.editable = false 121 | appWindow.contentView.addSubview(textField) 122 | 123 | const buttonRect = $.NSMakeRect( 124 | 230, 125 | config.appWindow.height - 62, 126 | config.button.width, 127 | config.button.height 128 | ) 129 | const button = $.NSButton.alloc.initWithFrame(buttonRect) 130 | button.title = 'Choose an Image …' 131 | button.bezelStyle = $.NSRoundedBezelStyle 132 | button.buttonType = $.NSMomentaryLightButton 133 | button.target = appDelegate 134 | button.action = 'btnClickHandler' 135 | appWindow.contentView.addSubview(button) 136 | 137 | 138 | 139 | appWindow.center 140 | appWindow.title = 'Perspectra' 141 | appWindow.makeKeyAndOrderFront(appWindow) 142 | -------------------------------------------------------------------------------- /makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help 2 | help: makefile 3 | @tail -n +4 makefile | grep ".PHONY" 4 | 5 | 6 | .PHONY: build 7 | build: 8 | uv build 9 | 10 | 11 | .PHONY: test 12 | test: 13 | uv run pytest 14 | 15 | 16 | .PHONY: test-receipts 17 | test-receipts: 18 | cd examples/receipts && \ 19 | rm -f *-corrected.jpeg && \ 20 | ls *.jpeg | while read -r file; do \ 21 | echo "Correcting $$file"; \ 22 | uv run perspectra correct $$file --output $${file%.*}-corrected.jpeg; \ 23 | done 24 | 25 | 26 | .PHONY: edit-notebooks 27 | edit-notebooks: 28 | uv run marimo edit notebooks 29 | 30 | 31 | .PHONY: install 32 | install: 33 | uv tool install --editable . 34 | 35 | 36 | .PHONY: uninstall 37 | uninstall: 38 | uv tool uninstall perspectra 39 | 40 | 41 | .PHONY: publish 42 | publish: build 43 | uv publish 44 | 45 | 46 | # TODO: Re-enable this code 47 | # pyFiles := $(shell find core/perspectra -name '*.py') 48 | # # TODO: Slow startup time on first execution 49 | # .PHONY: build-pyinstaller 50 | # build-pyinstaller: core/perspectra/__main__.py $(pyFiles) 51 | # pyinstaller $< \ 52 | # --paths core/perspectra \ 53 | # --noconfirm \ 54 | # --name perspectra 55 | 56 | 57 | # # TODO: Does not work yet due to import errors 58 | # .PHONY: build-nuitka 59 | # build-nuitka: core/perspectra/__main__.py $(pyFiles) 60 | # python -m nuitka \ 61 | # --standalone \ 62 | # --output-filename=perspectra \ 63 | # $< 64 | 65 | 66 | images/logo.icns: images/icon-1024.png 67 | nicns --in $< --out $@ 68 | 69 | 70 | # TODO: Fix this 71 | # # Create `.app` bundle with py2app 72 | # Perspectra.app: 73 | # python3 setup.py py2app -A 74 | 75 | 76 | # TODO: Fix this 77 | # # With cx_Freeze 78 | # Perspectra.app: 79 | # python3 setup.py install 80 | # python3 setup.py bdist_mac 81 | 82 | 83 | .PHONY: clean 84 | clean: 85 | rm -rf __pycache__ 86 | rm -rf .ipynb_checkpoints 87 | rm -rf .mypy_cache 88 | rm -rf .pytest_cache 89 | rm -rf .ruff_cache 90 | rm -rf .venv 91 | rm -rf *.app 92 | rm -rf *.egg-info 93 | rm -rf build 94 | rm -rf dist 95 | rm -rf perspectra.spec 96 | rm -rf src/perspectra/__pycache__ 97 | rm -rf src/perspectra/.mypy_cache 98 | -------------------------------------------------------------------------------- /notebooks/approximate_border_color.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell 8 | def __(): 9 | import numpy as np 10 | import scipy.ndimage.filters as filters 11 | from skimage import data 12 | import matplotlib.pyplot as plt 13 | 14 | 15 | def add_border_approx_color(arr, border_size=1): 16 | # Compute the local mean color of each pixel 17 | local_mean = filters.uniform_filter( 18 | arr, size=border_size * 2 + 1, mode="reflect" 19 | ) 20 | 21 | # Compute the difference between each pixel and its local mean 22 | diff = np.abs(arr - local_mean) 23 | 24 | # Scale the difference so that it ranges from 0 to 1 25 | diff /= diff.max() 26 | 27 | # Create a mask for the border 28 | mask = np.zeros(arr.shape) 29 | mask[border_size:-border_size, border_size:-border_size] = 1 30 | mask = filters.gaussian_filter(mask, border_size) 31 | 32 | # Apply the mask to the difference array to get the border color 33 | border_color = np.zeros_like(arr) 34 | for i in range(arr.shape[-1]): 35 | border_color[..., i] = np.mean(diff[..., i] * mask) 36 | 37 | # Create a new array with the border color 38 | border_arr = np.zeros( 39 | ( 40 | arr.shape[0] + border_size * 2, 41 | arr.shape[1] + border_size * 2, 42 | arr.shape[2], 43 | ) 44 | ) 45 | border_arr[border_size:-border_size, border_size:-border_size] = arr 46 | border_arr[:border_size, :, :] = border_color[:border_size, :, :] 47 | border_arr[-border_size:, :, :] = border_color[-border_size:, :, :] 48 | border_arr[:, :border_size, :] = border_color[:, :border_size, :] 49 | border_arr[:, -border_size:, :] = border_color[:, -border_size:, :] 50 | 51 | return border_arr 52 | return add_border_approx_color, data, filters, np, plt 53 | 54 | 55 | @app.cell 56 | def __(data, plt): 57 | plt.imshow(data.camera(), cmap="gray") 58 | plt.axis("off") 59 | plt.show() 60 | return 61 | 62 | 63 | @app.cell 64 | def __(add_border_approx_color, data, plt): 65 | image = add_border_approx_color(data.camera()) 66 | 67 | plt.imshow(image, cmap="gray") 68 | plt.axis("off") 69 | plt.show() 70 | return image, 71 | 72 | 73 | if __name__ == "__main__": 74 | app.run() 75 | -------------------------------------------------------------------------------- /notebooks/cartesian_to_polar.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell 8 | def __(mo): 9 | mo.md(r"# Cartesian to Polar") 10 | return 11 | 12 | 13 | @app.cell(hide_code=True) 14 | def __(): 15 | import marimo as mo 16 | import numpy as np 17 | import matplotlib.pyplot as plt 18 | return mo, np, plt 19 | 20 | 21 | @app.cell 22 | def __(np, plt): 23 | img_size = (250, 200) 24 | corner_points = np.array( 25 | [ 26 | [14, 46], 27 | [14, 140], 28 | [234, 144], 29 | [234, 47], 30 | [44, 30], 31 | ] 32 | ) 33 | 34 | 35 | def draw_points(points): 36 | canvas = np.zeros(img_size) 37 | 38 | fig, ax = plt.subplots() 39 | ax.set_xlim(0, canvas.shape[1]) 40 | ax.set_ylim(canvas.shape[0], 0) 41 | ax.set_aspect("equal") 42 | 43 | # Draw polygon 44 | polygon = plt.Polygon( 45 | np.flip(points), 46 | edgecolor="lightgray", 47 | fill=None, 48 | linewidth=1, 49 | ) 50 | ax.add_patch(polygon) 51 | 52 | # Draw points and angle text 53 | for point in points: 54 | ax.scatter(point[1], point[0], marker="x") 55 | 56 | plt.show() 57 | 58 | 59 | draw_points(corner_points) 60 | return corner_points, draw_points, img_size 61 | 62 | 63 | @app.cell 64 | def __(corner_points, img_size, np, plt): 65 | def draw_points_origin(points, rowOffset, colOffset): 66 | canvas = np.zeros(img_size) 67 | 68 | fig, ax = plt.subplots() 69 | ax.set_xlim(-colOffset, colOffset) 70 | ax.set_ylim(-rowOffset, rowOffset) 71 | ax.set_aspect("equal") 72 | 73 | # Draw polygon 74 | polygon = plt.Polygon( 75 | np.flip(points), 76 | edgecolor="lightgray", 77 | fill=None, 78 | linewidth=1, 79 | ) 80 | ax.add_patch(polygon) 81 | 82 | # Draw points and angle text 83 | for point in points: 84 | ax.scatter(point[1], point[0], marker="x") 85 | 86 | plt.show() 87 | 88 | 89 | def cartesian_to_polar(points): 90 | """ 91 | >>> cartesian_to_polar(np.array([[-1,1], [-1,-1], [1,-1], [1,1]])) 92 | array([[ 1.41421356, 45. ], 93 | [ 1.41421356, 135. ], 94 | [ 1.41421356, 225. ], 95 | [ 1.41421356, 315. ]]) 96 | """ 97 | x, y = points[:, 1], points[:, 0] 98 | r = np.hypot(x, y) 99 | thetas = np.arctan2(y, x) 100 | 101 | def norm_theta(theta): 102 | if theta < 0: 103 | return -theta 104 | else: 105 | return -(theta - 360) 106 | 107 | v_norm_theta = np.vectorize(norm_theta) 108 | 109 | thetas_norm = v_norm_theta(np.degrees(thetas)) 110 | 111 | polar_points = np.column_stack((r, thetas_norm)) 112 | 113 | return polar_points 114 | 115 | 116 | cartesian_to_polar(corner_points) 117 | return cartesian_to_polar, draw_points_origin 118 | 119 | 120 | @app.cell 121 | def __(cartesian_to_polar, np): 122 | def get_sorted_corners(img_size, corners): 123 | """ 124 | Corners sorted from upper left corner (smallest row, column) clockwise 125 | 126 | >>> get_sorted_corners((0, 0), np.array([[-1,-1], [-1,1], [1,-1], [1,1]])) 127 | array([[-1, -1], 128 | [-1, 1], 129 | [ 1, 1], 130 | [ 1, -1]]) 131 | 132 | >>> get_sorted_corners((250, 200), np.array([ 133 | ... [ 14, 46], 134 | ... [234, 144], 135 | ... [ 14, 140], 136 | ... [234, 47], 137 | ... [ 44, 30], 138 | ... ])) 139 | array([[ 14, 46], 140 | [ 14, 140], 141 | [234, 144], 142 | [234, 47], 143 | [ 44, 30]]) 144 | """ 145 | # Shift coordinate system 146 | rowOffset = img_size[0] / 2 147 | colOffset = img_size[1] / 2 148 | 149 | moved_corner_points = corners - np.array([rowOffset, colOffset]) 150 | 151 | polar_points = cartesian_to_polar(moved_corner_points) 152 | 153 | indices = np.argsort(polar_points[:, 1]) 154 | corners_sorted = corners[indices][::-1] 155 | 156 | left_uppermost_index = np.argmin(np.sum(corners_sorted, axis=1)) 157 | shifted_corner_points = np.roll( 158 | corners_sorted, -left_uppermost_index, axis=0 159 | ) 160 | 161 | return shifted_corner_points 162 | return get_sorted_corners, 163 | 164 | 165 | @app.cell 166 | def __(corner_points, get_sorted_corners, img_size, np): 167 | # Tests 168 | 169 | sorted_corners = get_sorted_corners( 170 | img_size, np.random.permutation(corner_points) 171 | ) 172 | assert np.array_equal( 173 | sorted_corners, [[14, 46], [14, 140], [234, 144], [234, 47], [44, 30]] 174 | ) 175 | 176 | sorted_corners = get_sorted_corners( 177 | img_size, np.random.permutation(corner_points) 178 | ) 179 | assert np.array_equal( 180 | sorted_corners, [[14, 46], [14, 140], [234, 144], [234, 47], [44, 30]] 181 | ) 182 | 183 | sorted_corners = get_sorted_corners( 184 | img_size, np.random.permutation(corner_points) 185 | ) 186 | assert np.array_equal( 187 | sorted_corners, [[14, 46], [14, 140], [234, 144], [234, 47], [44, 30]] 188 | ) 189 | return sorted_corners, 190 | 191 | 192 | if __name__ == "__main__": 193 | app.run() 194 | -------------------------------------------------------------------------------- /notebooks/harris_corner_detection.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell 8 | def __(): 9 | import os 10 | import logging 11 | 12 | import imageio.v3 as imageio 13 | from skimage.feature import corner_harris, corner_peaks 14 | from skimage.util import img_as_ubyte 15 | from skimage import exposure 16 | 17 | 18 | output_base_path = "output" 19 | debug = True 20 | 21 | 22 | class ImageDebugger: 23 | def __init__(self, level, base_path): 24 | self.level = level 25 | self.base_path = base_path 26 | self.step_counter = 0 27 | 28 | def set_level(self, level): 29 | self.level = level 30 | return self 31 | 32 | def set_base_path(self, base_path): 33 | self.base_path = base_path 34 | return self 35 | 36 | def save(self, name, image): 37 | if self.level != "debug": 38 | return 39 | self.step_counter += 1 40 | image_path = os.path.join( 41 | self.base_path, 42 | f"{self.step_counter}-{name}.png", 43 | ) 44 | imageio.imwrite(image_path, image) 45 | logging.info(f"Stored image: {image_path}") 46 | return self 47 | 48 | 49 | debugger = ImageDebugger( 50 | level="debug" if debug else "", 51 | base_path=output_base_path, 52 | ) 53 | 54 | 55 | def get_harris_peaks(image, sigma, k): 56 | img_harris = corner_harris(image, sigma=sigma, k=k) 57 | debugger.save( 58 | "harris_corner_response", 59 | img_as_ubyte( 60 | exposure.rescale_intensity( 61 | img_harris, 62 | ), 63 | ), 64 | ) 65 | 66 | peaks_image = corner_peaks( 67 | img_harris, 68 | min_distance=5, # Prevent inclusion of `image_corners` 69 | indices=False, 70 | ) 71 | debugger.save("harris_corner_peaks", peaks_image) 72 | 73 | peaks = corner_peaks( 74 | img_harris, 75 | min_distance=5, 76 | ) 77 | 78 | return peaks 79 | return ( 80 | ImageDebugger, 81 | corner_harris, 82 | corner_peaks, 83 | debug, 84 | debugger, 85 | exposure, 86 | get_harris_peaks, 87 | imageio, 88 | img_as_ubyte, 89 | logging, 90 | os, 91 | output_base_path, 92 | ) 93 | 94 | 95 | if __name__ == "__main__": 96 | app.run() 97 | -------------------------------------------------------------------------------- /notebooks/line_angle.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell 8 | def __(): 9 | import marimo as mo 10 | return mo, 11 | 12 | 13 | @app.cell 14 | def __(mo): 15 | mo.md(r"# Line Angle Calculation") 16 | return 17 | 18 | 19 | @app.cell 20 | def __(): 21 | import numpy as np 22 | import matplotlib.pyplot as plt 23 | 24 | 25 | corner_points = np.array( 26 | [ 27 | [14, 46], 28 | [14, 140], 29 | [60, 120], 30 | [234, 144], 31 | [234, 47], 32 | [150, 40], 33 | [100, 40], 34 | [44, 44], 35 | ] 36 | ) 37 | 38 | 39 | def draw_polygon(points, angles=[]): 40 | _, ax = plt.subplots() 41 | canvas = np.zeros((250, 200)) 42 | ax.set_xlim(0, canvas.shape[1]) 43 | ax.set_ylim(canvas.shape[0], 0) 44 | ax.set_aspect("equal") 45 | 46 | # Draw polygon 47 | polygon = plt.Polygon( 48 | np.flip(points), 49 | edgecolor="lightgray", 50 | fill=None, 51 | ) 52 | ax.add_patch(polygon) 53 | 54 | for point in points: 55 | ax.scatter(point[1], point[0], marker="x") 56 | 57 | # Draw angle text 58 | for i, angle in enumerate(angles): 59 | ax.text( 60 | points[i][1], 61 | points[i][0], 62 | f"{angle:+.1f}°", 63 | fontsize=10, 64 | ha="left", 65 | va="bottom", 66 | rotation=30, 67 | bbox=dict(facecolor="white", alpha=0.5, edgecolor="none"), 68 | ) 69 | 70 | plt.show() 71 | 72 | 73 | draw_polygon(corner_points) 74 | return corner_points, draw_polygon, np, plt 75 | 76 | 77 | @app.cell 78 | def __(np): 79 | def get_point_angles_in_deg(points): 80 | # The vectors are differences of coordinates 81 | # a points into the point, b out of the point 82 | a = points - np.roll(points, 1, axis=0) 83 | b = np.roll(a, -1, axis=0) # same but shifted 84 | 85 | # Calculate length of those vectors 86 | aLengths = np.linalg.norm(a, axis=1) 87 | bLengths = np.linalg.norm(b, axis=1) 88 | 89 | # Calculate length of the cross product 90 | # Since 2D (not 3D) cross product can't result in a vector, just its z-component 91 | crossproducts = np.cross(a, b) / aLengths / bLengths 92 | 93 | angles = np.arcsin(crossproducts) 94 | 95 | return angles / np.pi * 180 96 | return get_point_angles_in_deg, 97 | 98 | 99 | @app.cell 100 | def __(corner_points, draw_polygon, get_point_angles_in_deg, np): 101 | angles_degrees = get_point_angles_in_deg(corner_points) 102 | abs_angles = np.abs(angles_degrees) 103 | 104 | draw_polygon( 105 | corner_points, 106 | get_point_angles_in_deg(corner_points), 107 | ) 108 | return abs_angles, angles_degrees 109 | 110 | 111 | @app.cell 112 | def __(abs_angles, corner_points, draw_polygon, np): 113 | abs_angles_sorted = np.argsort(abs_angles) 114 | corners_final = corner_points[abs_angles_sorted][-4:] 115 | draw_polygon(corners_final) 116 | return abs_angles_sorted, corners_final 117 | 118 | 119 | if __name__ == "__main__": 120 | app.run() 121 | -------------------------------------------------------------------------------- /notebooks/line_curvature.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/notebooks/line_curvature.png -------------------------------------------------------------------------------- /notebooks/line_curvature.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell 8 | def __(): 9 | import marimo as mo 10 | return mo, 11 | 12 | 13 | @app.cell 14 | def __(mo): 15 | mo.md( 16 | r""" 17 | # Line Curvature Calculation 18 | 19 | Python implementation of following algorithm: 20 | 21 | 22 | Which is supposed to generate following image: 23 | """ 24 | ) 25 | return 26 | 27 | 28 | @app.cell 29 | def __(__file__, mo): 30 | import os 31 | 32 | img_path = os.path.join(os.path.dirname(__file__), "line_curvature.png") 33 | mo.image(src=img_path) 34 | return img_path, os 35 | 36 | 37 | @app.cell 38 | def __(): 39 | import numpy as np 40 | import matplotlib.pyplot as plt 41 | 42 | points = np.array( 43 | [ 44 | (1.34, 0.30), 45 | (0.92, 0.43), 46 | (0.68, 0.90), 47 | (0.25, 1.40), 48 | (-0.06, 1.13), 49 | (-0.34, 1.08), 50 | (-0.49, 1.14), 51 | (-0.72, 1.23), 52 | (-0.79, 0.52), 53 | (-0.94, 0.21), 54 | (-1.35, -0.20), 55 | (-0.35, -0.73), 56 | (0.54, -0.73), 57 | (0.68, -0.82), 58 | (0.84, -0.71), 59 | (1.20, -0.76), 60 | (1.23, -0.46), 61 | (1.32, -0.13), 62 | (1.34, 0.30), 63 | ] 64 | ) 65 | 66 | x = points[:, 0] 67 | y = points[:, 1] 68 | 69 | # First derivative 70 | dsx = np.diff(x) 71 | dsy = np.diff(y) 72 | ds = np.sqrt(dsx**2 + dsy**2) 73 | Tx = dsx / ds 74 | Ty = dsy / ds 75 | 76 | # Second derivative & curvature 77 | ds2 = 0.5 * (np.hstack((ds[-1], ds[:-1])) + ds) 78 | Hx = np.diff(np.hstack((Tx[-1], Tx))) / ds2 79 | Hy = np.diff(np.hstack((Ty[-1], Ty))) / ds2 80 | 81 | # Plot 82 | plt.figure() 83 | plt.plot(x, y, "ro-") 84 | x = x[:-1] 85 | y = y[:-1] # remove repeated point 86 | plt.quiver(x + dsx / 2, y + dsy / 2, Ty, -Tx, color="k", scale=16) 87 | plt.quiver(x, y, Hx, Hy, color="b", scale=16) 88 | plt.xlim(-2, 2) 89 | plt.ylim(-1.5, 2) 90 | plt.axis("equal") 91 | plt.show() 92 | return Hx, Hy, Tx, Ty, ds, ds2, dsx, dsy, np, plt, points, x, y 93 | 94 | 95 | if __name__ == "__main__": 96 | app.run() 97 | -------------------------------------------------------------------------------- /notebooks/perspectra.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell 8 | def __(circle, numpy): 9 | import os 10 | import skimage 11 | from skimage import ( 12 | color, 13 | draw, 14 | exposure, 15 | feature, 16 | filters, 17 | io, 18 | measure, 19 | morphology, 20 | segmentation, 21 | transform, 22 | util, 23 | ) 24 | from skimage.draw import disk, circle_perimeter 25 | 26 | import matplotlib.pyplot as plt 27 | import numpy as np 28 | 29 | 30 | def show_images(images, cols=1, titles=None): 31 | """ 32 | Display a list of images in a single figure with matplotlib. 33 | 34 | Parameters 35 | --------- 36 | images: List of np.arrays compatible with plt.imshow. 37 | 38 | cols (Default = 1): Number of columns in figure (number of rows is 39 | set to np.ceil(n_images/float(cols))). 40 | 41 | titles: List of titles corresponding to each image. Must have 42 | the same length as titles. 43 | """ 44 | 45 | assert (titles is None) or (len(images) == len(titles)) 46 | n_images = len(images) 47 | if titles is None: 48 | titles = ["Image (%d)" % i for i in range(1, n_images + 1)] 49 | fig = plt.figure() 50 | fig.set_size_inches(15, 15) 51 | for n, (image, title) in enumerate(zip(images, titles)): 52 | a = fig.add_subplot( 53 | cols, round(np.ceil(n_images / float(cols))), n + 1 54 | ) 55 | if image.ndim == 2: 56 | plt.gray() 57 | plt.imshow(image) 58 | a.set_title(title) 59 | fig.set_size_inches(np.array(fig.get_size_inches()) * n_images) 60 | plt.show() 61 | 62 | 63 | def load_image(file_name): 64 | file_path = file_name 65 | # file_path = os.path.join(os.getcwd(), file_name) 66 | return io.imread(file_path) 67 | 68 | 69 | def get_marked_image(image, corners): 70 | radius = image.size // 2**18 71 | circles = [circle(c, r, radius) for r, c in corners] 72 | for circ in circles: 73 | image[circ] = (255, 0, 0) 74 | return image 75 | 76 | 77 | def get_fixed_image(image, corners): 78 | rows = image.shape[0] 79 | cols = round(image.shape[1]) 80 | src_corners = [ 81 | (0, 0), 82 | (0, rows), 83 | (cols, rows), 84 | (cols, 0), 85 | ] 86 | 87 | protrans = transform.ProjectiveTransform() 88 | protrans.estimate(numpy.array(src_corners), numpy.array(corners)) 89 | 90 | return transform.warp(image, protrans, output_shape=image.shape) 91 | 92 | 93 | image = load_image("tests/fixtures/doc_photo.jpeg") 94 | corners = [ 95 | (100, 15), 96 | (20, 290), 97 | (505, 155), 98 | (410, 30), 99 | ] 100 | 101 | show_images([image, morphology.closing(color.rgb2gray(image))]) 102 | return ( 103 | circle_perimeter, 104 | color, 105 | corners, 106 | disk, 107 | draw, 108 | exposure, 109 | feature, 110 | filters, 111 | get_fixed_image, 112 | get_marked_image, 113 | image, 114 | io, 115 | load_image, 116 | measure, 117 | morphology, 118 | np, 119 | os, 120 | plt, 121 | segmentation, 122 | show_images, 123 | skimage, 124 | transform, 125 | util, 126 | ) 127 | 128 | 129 | if __name__ == "__main__": 130 | app.run() 131 | -------------------------------------------------------------------------------- /notebooks/polygon_simplification.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell(hide_code=True) 8 | def __(): 9 | import marimo as mo 10 | import numpy as np 11 | from skimage.measure import approximate_polygon 12 | import plotly.express as px 13 | return approximate_polygon, mo, np, px 14 | 15 | 16 | @app.cell(hide_code=True) 17 | def __(mo): 18 | mo.md( 19 | r""" 20 | # Polygon Simplification 21 | 22 | Given is a list of vertices which describe a polygon. The last edge is implicitly defined by connecting the last vertex to the first vertex of the list. 23 | 24 | For testing, we use following example polygon: 25 | """ 26 | ) 27 | return 28 | 29 | 30 | @app.cell 31 | def __(mo, np, px): 32 | points = np.array( 33 | [ 34 | [14, 46], 35 | [14, 140], 36 | [234, 144], 37 | [234, 47], 38 | [44, 30], 39 | ] 40 | ) 41 | mo.ui.plotly( 42 | px.scatter( 43 | x=points[:, 0], 44 | y=points[:, 1], 45 | width=600, 46 | height=300, 47 | ) 48 | ) 49 | return points, 50 | 51 | 52 | @app.cell 53 | def __(): 54 | def vertices_to_edges(vertices): 55 | edges = [] 56 | for index, coordinate in enumerate(vertices): 57 | if index != (len(vertices) - 1): 58 | edges.append([coordinate, vertices[index + 1]]) 59 | return edges 60 | return vertices_to_edges, 61 | 62 | 63 | @app.cell 64 | def __(np, vertices_to_edges): 65 | # TODO: Does not seem to work 66 | def simplify_polygon(vertices, targetCount=4): 67 | """ 68 | Merge globally shortest edge with shortest neighbor 69 | """ 70 | if not np.array_equal(vertices[0], vertices[-1]): 71 | raise ValueError( 72 | f"First vertex ({vertices[0]}) \ 73 | and last ({vertices[-1]}) must be the same" 74 | ) 75 | 76 | edges = np.array(vertices_to_edges(vertices)) 77 | 78 | while len(edges) > targetCount: 79 | edges_lengths = [np.linalg.norm(edge[0] - edge[1]) for edge in edges] 80 | edge_min_index = np.argmin(edges_lengths) 81 | edge_prev_length = edges_lengths[edge_min_index - 1] 82 | edge_next_length = np.take( 83 | edges_lengths, 84 | edge_min_index + 1, 85 | mode="wrap", 86 | ) 87 | 88 | if edge_prev_length < edge_next_length: 89 | # Merge with previous edge 90 | edges[edge_min_index][0] = edges[edge_min_index - 1][0] 91 | edges = np.delete(edges, edge_min_index - 1, axis=0) 92 | edges_lengths = np.delete(edges_lengths, edge_min_index - 1) 93 | else: 94 | # Merge with next edge 95 | edges[edge_min_index][1] = edges[ 96 | (edge_min_index + 1) % len(edges) 97 | ][1] 98 | edges = np.delete(edges, (edge_min_index + 1) % len(edges), axis=0) 99 | edges_lengths = np.delete( 100 | edges_lengths, (edge_min_index + 1) % len(edges) 101 | ) 102 | 103 | # Re-add first vertex to close polygon 104 | vertices_new = np.append(edges[:, 0], [edges[0][0]], axis=0) 105 | return vertices_new 106 | return simplify_polygon, 107 | 108 | 109 | @app.cell 110 | def __(mo, np, points, px, simplify_polygon): 111 | # Re-add first point to end of array 112 | _points_wrapped = np.vstack((points, points[0])) 113 | _simplified_polygons = simplify_polygon(_points_wrapped) 114 | mo.ui.plotly( 115 | px.scatter( 116 | x=_simplified_polygons[:, 0], 117 | y=_simplified_polygons[:, 1], 118 | width=600, 119 | height=300, 120 | ) 121 | ) 122 | return 123 | 124 | 125 | @app.cell 126 | def __(approximate_polygon): 127 | def reduce_polygon_to_4_points_new(corners_sorted, epsilon=0.1): 128 | reduced_polygon = corners_sorted 129 | while len(reduced_polygon) > 4: 130 | reduced_polygon = approximate_polygon( 131 | corners_sorted, 132 | tolerance=epsilon, 133 | ) 134 | epsilon += 0.1 135 | 136 | return reduced_polygon 137 | return reduce_polygon_to_4_points_new, 138 | 139 | 140 | @app.cell 141 | def __(mo, np, points, px): 142 | # Re-add first point to end of array 143 | corners_sorted_wrapped = np.vstack((points, points[0])) 144 | mo.ui.plotly( 145 | px.scatter( 146 | x=corners_sorted_wrapped[:, 0], 147 | y=corners_sorted_wrapped[:, 1], 148 | width=600, 149 | height=300, 150 | ) 151 | ) 152 | return corners_sorted_wrapped, 153 | 154 | 155 | @app.cell 156 | def __(corners_sorted_wrapped, mo, px, reduce_polygon_to_4_points_new): 157 | # TODO: Still picks the wrong 4 points 158 | _corners_reduced = reduce_polygon_to_4_points_new(corners_sorted_wrapped) 159 | mo.ui.plotly( 160 | px.scatter( 161 | x=_corners_reduced[:, 0], 162 | y=_corners_reduced[:, 1], 163 | width=600, 164 | height=300, 165 | ) 166 | ) 167 | return 168 | 169 | 170 | @app.cell 171 | def __(np): 172 | def reduce_polygon(polygon, angle_th=0, distance_th=0): 173 | angle_th_rad = np.deg2rad(angle_th) 174 | points_removed = [0] 175 | 176 | while len(points_removed): 177 | points_removed = list() 178 | for i in range(0, len(polygon) - 2, 2): 179 | v01 = polygon[i - 1] - polygon[i] 180 | v12 = polygon[i] - polygon[i + 1] 181 | d01 = np.linalg.norm(v01) 182 | d12 = np.linalg.norm(v12) 183 | if d01 < distance_th and d12 < distance_th: 184 | points_removed.append(i) 185 | continue 186 | angle = np.arccos(np.sum(v01 * v12) / (d01 * d12)) 187 | if angle < angle_th_rad: 188 | points_removed.append(i) 189 | polygon = np.delete(polygon, points_removed, axis=0) 190 | 191 | return polygon 192 | return reduce_polygon, 193 | 194 | 195 | @app.cell 196 | def __(mo, points, px, reduce_polygon): 197 | _reduced_polygons = reduce_polygon(points, angle_th=1, distance_th=5) 198 | mo.ui.plotly( 199 | px.scatter( 200 | x=_reduced_polygons[:, 0], 201 | y=_reduced_polygons[:, 1], 202 | width=600, 203 | height=300, 204 | ) 205 | ) 206 | return 207 | 208 | 209 | if __name__ == "__main__": 210 | app.run() 211 | -------------------------------------------------------------------------------- /notebooks/split_book_pages.py: -------------------------------------------------------------------------------- 1 | import marimo 2 | 3 | __generated_with = "0.7.0" 4 | app = marimo.App() 5 | 6 | 7 | @app.cell 8 | def __(): 9 | import os 10 | from typing import List, Any 11 | import numpy as np 12 | import skimage as skimage 13 | from skimage import io, transform, filters 14 | from scipy.stats import norm 15 | import matplotlib.pyplot as plt 16 | return Any, List, filters, io, norm, np, os, plt, skimage, transform 17 | 18 | 19 | @app.cell 20 | def __(__file__, io, os, skimage, transform): 21 | def showImg(image): 22 | return io.imshow(arr=image, plugin="matplotlib") 23 | 24 | 25 | input_image_path = os.path.join( 26 | os.path.dirname(__file__), "../tests/fixtures/book_color.jpeg" 27 | ) 28 | book = io.imread(fname=input_image_path) 29 | book_grayscale = skimage.color.rgb2gray(book) 30 | resized = transform.downscale_local_mean(book_grayscale, (4, 4)) 31 | showImg(resized) 32 | return book, book_grayscale, input_image_path, resized, showImg 33 | 34 | 35 | @app.cell 36 | def __(norm, np, plt, resized): 37 | img_width = resized.shape[1] 38 | img_height = resized.shape[0] 39 | 40 | 41 | def get_sine_probability(img_width, img_height): 42 | samples = np.arange(0, np.pi, np.pi / img_width) 43 | amplitude = np.sin(samples) 44 | gradient_image = np.broadcast_to(amplitude, (img_height, img_width)) 45 | probabilitized = resized * gradient_image 46 | return probabilitized 47 | 48 | 49 | def get_norm_probability(img_width): 50 | samples = np.arange(img_width) 51 | spread = 10 52 | amplitude = norm.pdf(samples, img_width / 2, img_width / spread) 53 | amplitude *= 1 / amplitude.max() 54 | return amplitude 55 | 56 | 57 | def apply_probability(probability, image): 58 | gradient_image = np.broadcast_to( 59 | probability, (image.shape[0], image.shape[1]) 60 | ) 61 | probabilitized = image * gradient_image 62 | return probabilitized 63 | 64 | 65 | pxInInch = 0.008 66 | prob_norm = get_norm_probability(img_width) 67 | image_prob = apply_probability(prob_norm, resized) 68 | fig, items = plt.subplots( 69 | ncols=2, figsize=(3 * img_width * pxInInch, img_height * pxInInch) 70 | ) 71 | items[0].plot(np.arange(img_width), prob_norm) 72 | items[0].set_title("Probability function") 73 | items[0].grid(True, which="both") 74 | items[0].axhline(y=0, color="k") 75 | items[1].set_title("Idealized probability for book fold") 76 | items[1].imshow(image_prob) 77 | fig.tight_layout() 78 | plt.show() 79 | return ( 80 | apply_probability, 81 | fig, 82 | get_norm_probability, 83 | get_sine_probability, 84 | image_prob, 85 | img_height, 86 | img_width, 87 | items, 88 | prob_norm, 89 | pxInInch, 90 | ) 91 | 92 | 93 | @app.cell 94 | def __(apply_probability, filters, io, prob_norm, resized): 95 | sobeled = filters.sobel_v(image=resized) 96 | io.imshow(apply_probability(prob_norm, sobeled)) 97 | return sobeled, 98 | 99 | 100 | @app.cell 101 | def __(List, input_image_path): 102 | import imageio.v3 as imageio 103 | 104 | 105 | def split_book(image) -> List[List[int]]: 106 | return [] 107 | 108 | 109 | image = imageio.imread(input_image_path, rotate=True) 110 | pages = split_book(image) 111 | pages 112 | return image, imageio, pages, split_book 113 | 114 | 115 | if __name__ == "__main__": 116 | app.run() 117 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "perspectra" 3 | version = "0.2.1" 4 | description = "Automatically extract and perspectively correct documents in images" 5 | readme = "readme.md" 6 | requires-python = ">=3.12" 7 | license = { text = "ISC" } 8 | authors = [{ name = "Adrian Sieber", email = "mail@adriansieber.com" }] 9 | keywords = ["document scanner", "perspective transformation"] 10 | classifiers = [ 11 | "Programming Language :: Python :: 3", 12 | "Development Status :: 3 - Alpha", 13 | "Natural Language :: English", 14 | "Environment :: Console", 15 | "Intended Audience :: End Users/Desktop", 16 | "Operating System :: OS Independent", 17 | "Topic :: Scientific/Engineering :: Image Recognition", 18 | ] 19 | urls = { "Homepage" = "http://github.com/ad-si/Perspectra" } 20 | dependencies = [ 21 | "imageio>=2.36.0", 22 | "matplotlib>=3.9.2", 23 | "numpy>=2.1.3", 24 | "packaging>=24.2", 25 | "pandas>=2.2.3", 26 | "plotly>=5.24.1", 27 | "scikit-image>=0.24.0", 28 | ] 29 | scripts = { perspectra = "perspectra:main" } 30 | 31 | [dependency-groups] 32 | dev = ["marimo>=0.9.20", "pytest>=8.3.3"] 33 | 34 | [build-system] 35 | requires = ["hatchling"] 36 | build-backend = "hatchling.build" 37 | 38 | [tool.hatch.build] 39 | exclude = ["app/", "chaiNNer/", "jxa/"] 40 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # Perspectra 2 | 3 | Software and corresponding workflow to scan documents and books 4 | with as little hardware as possible. 5 | 6 | Check out [github:adius/awesome-scanning] 7 | for an extensive list of alternative solutions. 8 | 9 | [github:adius/awesome-scanning]: https://github.com/adius/awesome-scanning 10 | 11 | 12 | Command | Input | Result 13 | --------|-------|------- 14 | `perspectra correct --binary=gauss-diff 01.jpeg`|![Receipt 1](images/examples/01_original.jpeg)|![Receipt 1 binarized](images/examples/01_binary.png) 15 | `perspectra correct --binary=gauss-diff 02.jpeg`|![Receipt 2](images/examples/02_original.jpeg)|![Receipt 2 binarized](images/examples/02_binary.png) 16 | `perspectra correct --gray 03.jpeg`|![Receipt 3](images/examples/03_original.jpeg)|![Receipt 3 grayscale](images/examples/03_gray.png) 17 | 18 | 19 | ## Installation 20 | 21 | We recommend to use [`uv`](https://docs.astral.sh/uv/) 22 | instead of `pip` to install the package. 23 | 24 | ```sh 25 | uv tool install perspectra 26 | ``` 27 | 28 | To install from source: 29 | 30 | ```sh 31 | git clone https://github.com/ad-si/Perspectra 32 | cd Perspectra 33 | make install 34 | ``` 35 | 36 | 37 | ## Usage 38 | 39 | ### Command Line Interface 40 | 41 | ```txt 42 | usage: perspectra [-h] [--debug] {binarize,correct,corners,renumber-pages} ... 43 | 44 | options: 45 | -h, --help show this help message and exit 46 | --debug Render debugging view 47 | 48 | subcommands: 49 | subcommands to handle files and correct photos 50 | 51 | {binarize,correct,corners,renumber-pages} 52 | additional help 53 | binarize Binarize image 54 | correct Pespectively correct and crop photos of documents. 55 | corners Returns the corners of the document in the image as 56 | [top-left, top-right, bottom-right, bottom-left] 57 | renumber-pages Renames the images in a directory according to their 58 | page numbers. The assumed layout is `cover -> odd 59 | pages -> even pages reversed` 60 | ``` 61 | 62 | 63 | ## Best Practices for Taking the Photos 64 | 65 | Your photos should ideally have following properties: 66 | 67 | - Photos with 10 - 20 Mpx 68 | - Contain 1 document 69 | - Rectangular 70 | - Pronounced corners 71 | - Only black content on white or light-colored paper 72 | - On dark background 73 | - Maximum of 30° rotation 74 | 75 | 76 | ### Camera Settings 77 | 78 | ```yaml 79 | # Rule of thumb is the inverse of your focal length, 80 | # but motion blur is pretty much the worst for readable documents, 81 | # therefore use at least half of it and never less than 1/50. 82 | shutter: 1/50 - 1/200 s 83 | 84 | # The whole document must be sharp even if you photograph it from an angle. 85 | # Therefore at least 8 f. 86 | aperture: 8-12 f 87 | 88 | # Noise is less bad than motion blur => relative high ISO 89 | # Should be the last thing you set: 90 | # As high as necessary as low as possible 91 | iso: 800-6400 92 | ``` 93 | 94 | When using `Tv` (Time Value) or `Av` (Aperture Value) mode 95 | use exposure compensation to set lightness value below 0. 96 | You really don't want to overexpose your photos as the bright pages 97 | are the first thing that clips. 98 | 99 | On the other hand, 100 | it doesn't matter if you loose background parts because they are to dark. 101 | 102 | 103 | ### Generating the Photos from a Video 104 | 105 | A good tool for this purpose is [PySceneDetect]. 106 | It's a Python/OpenCV-based scene detection program, 107 | using threshold/content analysis on a given video. 108 | 109 | [PySceneDetect]: https://github.com/Breakthrough/PySceneDetect 110 | 111 | For easy installation you can use the [docker image] 112 | 113 | [docker image]: https://github.com/handflucht/PySceneDetect 114 | 115 | 116 | Find good values for threshold: 117 | 118 | ```fish 119 | docker run \ 120 | --rm \ 121 | --volume (pwd):/video \ 122 | handflucht/pyscenedetect 123 | --input /video/page-turning.mp4 \ 124 | --downscale-factor 2 \ 125 | --detector content \ 126 | --statsfile page-turning-stats.csv 127 | ``` 128 | 129 | 130 | To launch the image run: 131 | 132 | ```fish 133 | docker run \ 134 | --interactive \ 135 | --tty \ 136 | --volume=(pwd):/video \ 137 | --entrypoint=bash \ 138 | handflucht/pyscenedetect 139 | ``` 140 | 141 | 142 | Then run in the shell: 143 | 144 | ```bash 145 | cd /video 146 | scenedetect \ 147 | --input page-turning.mp4 \ 148 | --downscale-factor 2 \ 149 | --detector content \ 150 | --threshold 3 \ 151 | --min-scene-length 80 \ 152 | --save-images 153 | ``` 154 | 155 | 156 | TODO: The correct way to do this: 157 | (after https://github.com/Breakthrough/PySceneDetect/issues/45 is implemented) 158 | 159 | ```fish 160 | docker run \ 161 | --rm \ 162 | --volume (pwd):/video \ 163 | handflucht/pyscenedetect \ 164 | --input /video/page-turning.mp4 \ 165 | --downscale-factor 2 \ 166 | --detector content \ 167 | --threshold 3 \ 168 | --min-scene-length 80 \ 169 | --save-images 170 | ``` 171 | 172 | Aim for a low threshold and a long minimum scene length. 173 | I.e. turn the page really fast and show it for a long time. 174 | -------------------------------------------------------------------------------- /scripts/readme.md: -------------------------------------------------------------------------------- 1 | # Scripts 2 | 3 | This directory contains scripts that are used to try out various things. 4 | -------------------------------------------------------------------------------- /scripts/setup-cx_freeze.py: -------------------------------------------------------------------------------- 1 | from cx_Freeze import setup, Executable 2 | 3 | buildOptions = dict( 4 | packages = [], 5 | excludes = [], 6 | ) 7 | 8 | import sys 9 | base = 'Win32GUI' if sys.platform=='win32' else None 10 | 11 | executables = [ 12 | Executable('tkinter-test.py', base=base) 13 | ] 14 | 15 | setup( 16 | name='Perspectra', 17 | version = '1.0', 18 | description = 'Extract and perspectively correct documents in images.', 19 | options = dict( 20 | build_exe = buildOptions, 21 | bdist_mac = dict( 22 | iconfile = 'images/logo.icns', 23 | ) 24 | ), 25 | executables = executables 26 | ) 27 | -------------------------------------------------------------------------------- /scripts/setup-py2app.py: -------------------------------------------------------------------------------- 1 | import io 2 | from setuptools import setup 3 | 4 | def read(*filenames, **kwargs): 5 | encoding = kwargs.get('encoding', 'utf-8') 6 | sep = kwargs.get('sep', '\n') 7 | buf = [] 8 | for filename in filenames: 9 | with io.open(filename, encoding=encoding) as f: 10 | buf.append(f.read()) 11 | return sep.join(buf) 12 | 13 | setup( 14 | name='perspectra', 15 | version='0.2.1', 16 | app=['perspectra/perspectra.py'], 17 | url='http://github.com/ad-si/Perspectra', 18 | author='Adrian Sieber', 19 | author_email='adrian@feram.co', 20 | description='Extract and perspectively correct documents in images', 21 | long_description=read('./readme.md'), 22 | packages=['perspectra'], 23 | include_package_data=True, 24 | platforms='any', 25 | classifiers = [ 26 | 'Programming Language :: Python', 27 | 'Development Status :: Beta', 28 | 'Natural Language :: English', 29 | 'Environment :: X11 Applications', 30 | 'Intended Audience :: End Users/Desktop', 31 | 'Operating System :: OS Independent', 32 | 'Topic :: Scientific/Engineering :: Image Recognition', 33 | ], 34 | scripts = ['cli.py'], 35 | setup_requires=['py2app'], 36 | options={'py2app': { 37 | 'argv_emulation': True, 38 | 'iconfile': 'images/logo.icns' 39 | }}, 40 | ) 41 | -------------------------------------------------------------------------------- /src/perspectra/__init__.py: -------------------------------------------------------------------------------- 1 | from perspectra.__main__ import main 2 | -------------------------------------------------------------------------------- /src/perspectra/__main__.py: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env python3 2 | 3 | """ 4 | The main entry point. Invoke as `perspectra`. 5 | """ 6 | 7 | import sys 8 | 9 | 10 | def main(): 11 | from perspectra import cli 12 | cli.execute_arguments(sys.argv[1:]) 13 | 14 | if __name__ == '__main__': 15 | main() 16 | -------------------------------------------------------------------------------- /src/perspectra/approximate_polygon.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def approximate_polygon(coords, tolerance, target_count): 5 | """ 6 | Approximate a polygonal chain with the specified tolerance 7 | or until target count of points is attained. 8 | It is based on the Douglas-Peucker algorithm. 9 | Note that the approximated polygon is always within the convex hull of the 10 | original polygon. 11 | Parameters 12 | ---------- 13 | coords : (N, 2) array 14 | Coordinate array. 15 | tolerance : float 16 | Maximum distance from original points of polygon to approximated 17 | polygonal chain. If tolerance is 0, the original coordinate array 18 | is returned. 19 | target_count: int 20 | Maximum count of polygon points. 21 | satisfy: {all, any} 22 | Simplify until all constraints (tolerance and target_count) 23 | or if any of them are fulfilled. 24 | Default `all` 25 | 26 | Returns 27 | ------- 28 | coords : (M, 2) array 29 | Approximated polygonal chain where M <= N. 30 | 31 | References 32 | ---------- 33 | .. [1] http://en.wikipedia.org/wiki/Ramer-Douglas-Peucker_algorithm 34 | """ 35 | 36 | validTolerance = tolerance >= 0 37 | validCount = target_count < len(coords) 38 | 39 | if not validTolerance and not validCount: 40 | return coords 41 | elif not validTolerance and validCount: 42 | # TODO: Calculate based on target_count 43 | return coords 44 | elif validTolerance and not validCount: 45 | # TODO: Calculate based on tolerance 46 | return coords 47 | elif validTolerance and validCount: 48 | # TODO: Calculate which of tolerance and target count converge first 49 | return coords 50 | 51 | chain = np.zeros(coords.shape[0], 'bool') 52 | # pre-allocate distance array for all points 53 | dists = np.zeros(coords.shape[0]) 54 | chain[0] = True 55 | chain[-1] = True 56 | pos_stack = [(0, chain.shape[0] - 1)] 57 | end_of_chain = False 58 | 59 | while not end_of_chain: 60 | start, end = pos_stack.pop() 61 | # determine properties of current line segment 62 | r0, c0 = coords[start, :] 63 | r1, c1 = coords[end, :] 64 | dr = r1 - r0 65 | dc = c1 - c0 66 | segment_angle = - np.arctan2(dr, dc) 67 | segment_dist = c0 * np.sin(segment_angle) + r0 * np.cos(segment_angle) 68 | 69 | # select points in-between line segment 70 | segment_coords = coords[start + 1:end, :] 71 | segment_dists = dists[start + 1:end] 72 | 73 | # check whether to take perpendicular or euclidean distance with 74 | # inner product of vectors 75 | 76 | # vectors from points -> start and end 77 | dr0 = segment_coords[:, 0] - r0 78 | dc0 = segment_coords[:, 1] - c0 79 | dr1 = segment_coords[:, 0] - r1 80 | dc1 = segment_coords[:, 1] - c1 81 | # vectors points -> start and end projected on start -> end vector 82 | projected_lengths0 = dr0 * dr + dc0 * dc 83 | projected_lengths1 = - dr1 * dr - dc1 * dc 84 | perp = np.logical_and(projected_lengths0 > 0, 85 | projected_lengths1 > 0) 86 | eucl = np.logical_not(perp) 87 | segment_dists[perp] = np.abs( 88 | segment_coords[perp, 0] * np.cos(segment_angle) 89 | + segment_coords[perp, 1] * np.sin(segment_angle) 90 | - segment_dist 91 | ) 92 | segment_dists[eucl] = np.minimum( 93 | # distance to start point 94 | np.sqrt(dc0[eucl] ** 2 + dr0[eucl] ** 2), 95 | # distance to end point 96 | np.sqrt(dc1[eucl] ** 2 + dr1[eucl] ** 2) 97 | ) 98 | 99 | if np.any(segment_dists > tolerance): 100 | # select point with maximum distance to line 101 | new_end = start + np.argmax(segment_dists) + 1 102 | pos_stack.append((new_end, end)) 103 | pos_stack.append((start, new_end)) 104 | chain[new_end] = True 105 | 106 | if len(pos_stack) == 0: 107 | end_of_chain = True 108 | 109 | return coords[chain, :] 110 | -------------------------------------------------------------------------------- /src/perspectra/binarize.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | import imageio 4 | import logging 5 | import numpy 6 | 7 | import skimage 8 | from skimage import ( 9 | morphology, 10 | segmentation, 11 | util, 12 | ) 13 | from skimage.color import rgb2gray 14 | from skimage.filters import ( 15 | rank, 16 | gaussian, 17 | threshold_local, 18 | threshold_otsu, 19 | threshold_sauvola, 20 | ) 21 | from skimage.util import img_as_ubyte 22 | from perspectra import multipass_cleaner 23 | 24 | 25 | class ImageDebugger: 26 | def __init__(self, level, base_path): 27 | self.level = level 28 | self.base_path = base_path 29 | self.step_counter = 0 30 | 31 | def set_level(self, level): 32 | self.level = level 33 | return self 34 | 35 | def set_base_path(self, base_path): 36 | self.base_path = base_path 37 | return self 38 | 39 | def save(self, name, image): 40 | if self.level != "debug": 41 | return 42 | self.step_counter += 1 43 | imageio.imwrite( 44 | os.path.join(self.base_path, f"{self.step_counter}-{name}.png"), 45 | image, 46 | ) 47 | return self 48 | 49 | 50 | def clear(binary_image, debugger): 51 | inverted_image = util.invert(binary_image) 52 | inverted_cleared_image = segmentation.clear_border(inverted_image) 53 | cleared_image = util.invert(inverted_cleared_image) 54 | debugger.save("cleared_image", cleared_image) 55 | return cleared_image 56 | 57 | 58 | def denoise(binary_image, debugger): 59 | inverted_image = util.invert(binary_image) 60 | inverted_denoised_image = multipass_cleaner.remove_noise(inverted_image) 61 | denoised_image = util.invert(inverted_denoised_image) 62 | debugger.save("denoised_image", denoised_image) 63 | 64 | return denoised_image 65 | 66 | 67 | def erode(image, image_name, debugger): 68 | eroded_image = morphology.erosion( 69 | util.img_as_ubyte(image), 70 | morphology.disk(25), 71 | ) 72 | debugger.save(f"eroded_{image_name}", eroded_image) 73 | return eroded_image 74 | 75 | 76 | def binarize(image, debugger, method="sauvola"): 77 | radius = 3 78 | 79 | gray_image = rgb2gray(image) 80 | debugger.save("gray_image", gray_image) 81 | 82 | if method == "sauvola": 83 | window_size = 3 # Minimal window size 84 | window_size += gray_image.size // (2**20) # Relative to image size 85 | window_size += 1 if (window_size % 2 == 0) else 0 # Must always be odd 86 | logging.info(f"window_size: {window_size}") 87 | 88 | thresh_sauvola = numpy.nan_to_num( 89 | threshold_sauvola( 90 | image=gray_image, 91 | window_size=window_size, 92 | k=0.3, # Attained through experimentation 93 | ) 94 | ) 95 | debugger.save("thresh_sauvola", thresh_sauvola) 96 | binarized_image = gray_image > thresh_sauvola 97 | 98 | elif method == "local": 99 | binarized_image = gray_image > threshold_local( 100 | image=gray_image, 101 | block_size=radius, 102 | ) 103 | 104 | elif method == "niblack": 105 | sigma = gray_image.size // (2**17) 106 | thresh_niblack = skimage.filters.threshold_niblack( 107 | image=gray_image, 108 | window_size=radius, 109 | k=0.08, 110 | ) 111 | binarized_image = gray_image > thresh_niblack 112 | 113 | elif method == "gauss-diff": 114 | sigma = gray_image.size // (2**17) 115 | high_frequencies = gray_image - gaussian( 116 | image=gray_image, 117 | sigma=sigma, 118 | ) 119 | thresh = threshold_otsu(high_frequencies) 120 | binarized_image = high_frequencies > thresh 121 | 122 | elif method == "local-otsu": 123 | warped_image_ubyte = img_as_ubyte(gray_image) 124 | selem = morphology.disk(radius) 125 | local_otsu = rank.otsu(warped_image_ubyte, selem) 126 | binarized_image = warped_image_ubyte >= local_otsu 127 | 128 | else: 129 | raise TypeError(f"{method} is no supported binarization method") 130 | 131 | debugger.save("binarized_image", binarized_image) 132 | 133 | return binarized_image 134 | 135 | 136 | def get_binarized_image( 137 | input_image_path, 138 | binarization_method, 139 | shall_clear_border, 140 | debugger, 141 | ): 142 | image = imageio.imread(input_image_path, rotate=True) 143 | 144 | binarized_image = binarize( 145 | image=image, 146 | method=binarization_method, 147 | debugger=debugger, 148 | ) 149 | if shall_clear_border: 150 | cleared_image = clear(binarized_image, debugger) 151 | erode(cleared_image, "cleared", debugger) 152 | denoised_image = denoise(cleared_image, debugger) 153 | else: 154 | erode(binarized_image, "binarized", debugger) 155 | denoised_image = denoise(binarized_image, debugger) 156 | 157 | erode(denoised_image, "denoised", debugger) 158 | 159 | return denoised_image 160 | 161 | 162 | def binarize_image(**kwargs): 163 | binarization_method = kwargs.get("binarization_method") 164 | shall_clear_border = not kwargs.get("shall_not_clear_border", False) 165 | input_image_path = kwargs.get("input_image_path") 166 | debug = kwargs.get("debug", False) 167 | 168 | file_name_segments = os.path.splitext(os.path.basename(input_image_path)) 169 | basename = file_name_segments[0] 170 | random_string = ( 171 | base64.b64encode(os.urandom(3)) 172 | .decode("utf-8") 173 | .replace("+", "-") 174 | .replace("/", "_") 175 | ) 176 | output_base_path = os.path.join(os.path.dirname(input_image_path), basename) 177 | 178 | output_image_path = ( 179 | kwargs.get("output_image_path") 180 | or f"{output_base_path}-fixed_{random_string}.png" 181 | ) 182 | 183 | if not input_image_path: 184 | raise FileNotFoundError( 185 | f"An input image and not {input_image_path} must be specified" 186 | ) 187 | 188 | debugger = ImageDebugger( 189 | level="debug" if debug else "", 190 | base_path=output_base_path, 191 | ) 192 | 193 | binarized_image = get_binarized_image( 194 | input_image_path, binarization_method, shall_clear_border, debugger 195 | ) 196 | 197 | if not debug: 198 | imageio.imwrite(output_image_path, binarized_image) 199 | -------------------------------------------------------------------------------- /src/perspectra/cli.py: -------------------------------------------------------------------------------- 1 | import os.path as path 2 | import argparse 3 | 4 | def execute_arguments(arguments): 5 | from perspectra import file_utils 6 | parser = argparse.ArgumentParser(prog="perspectra") 7 | parser.add_argument( 8 | "--debug", 9 | help="Render debugging view", 10 | action="store_true", 11 | ) 12 | 13 | # Add subparsers 14 | subparsers = parser.add_subparsers( 15 | title="subcommands", 16 | description="subcommands to handle files and correct photos", 17 | help="additional help", 18 | dest="subparser_name", 19 | ) 20 | 21 | # Add subcommand 'binarize' 22 | parser_binarize = subparsers.add_parser( 23 | "binarize", 24 | help=""" 25 | Binarize image 26 | """, 27 | ) 28 | parser_binarize.add_argument( 29 | "input_image_path", 30 | nargs="?", 31 | metavar="image-path", 32 | help="Path to image which shall be fixed", 33 | ) 34 | parser_binarize.add_argument( 35 | "--method", 36 | help="Save image as binary image", 37 | dest="binarization_method", 38 | ) 39 | parser_binarize.add_argument( 40 | "--no-clear-border", 41 | help="Do not remove any objects which touch the border", 42 | action="store_true", 43 | dest="shall_not_clear_border", 44 | ) 45 | def binarize_handler(**kwargs): 46 | from perspectra import binarize 47 | binarize.binarize_image(**kwargs) 48 | 49 | parser_binarize.set_defaults(func=binarize_handler) 50 | 51 | # Add subcommand 'correct' 52 | parser_correct = subparsers.add_parser( 53 | "correct", 54 | help=""" 55 | Pespectively correct and crop photos of documents. 56 | """, 57 | ) 58 | parser_correct.add_argument( 59 | "--gray", 60 | help="Save image as grayscale image", 61 | action="store_true", 62 | dest="output_in_gray", 63 | ) 64 | parser_correct.add_argument( 65 | "--binary", 66 | help="Save image as binary image", 67 | choices=[ 68 | "gauss-diff", 69 | "local-otsu", 70 | "local", 71 | "niblack", 72 | "sauvola", 73 | ], 74 | dest="binarization_method", 75 | ) 76 | parser_correct.add_argument( 77 | "--no-clear-border", 78 | help="Do not remove any objects which touch the border", 79 | action="store_true", 80 | dest="shall_not_clear_border", 81 | ) 82 | parser_correct.add_argument( 83 | "--marked-image", 84 | help="Copy of original image with marked corners", 85 | dest="image_marked_path", 86 | ) 87 | parser_correct.add_argument( 88 | "--output", 89 | metavar="image-path", 90 | help="Output path of fixed image", 91 | dest="output_image_path", 92 | ) 93 | parser_correct.add_argument( 94 | "input_image_path", 95 | nargs="?", 96 | metavar="image-path", 97 | help="Path to image which shall be fixed", 98 | ) 99 | def transform_handler(**kwargs): 100 | from perspectra import transformer 101 | transformer.transform_image(**kwargs) 102 | 103 | parser_correct.set_defaults(func=transform_handler) 104 | 105 | # Add subcommand 'corners' 106 | parser_corners = subparsers.add_parser( 107 | "corners", 108 | help=""" 109 | Returns the corners of the document in the image as 110 | [top-left, top-right, bottom-right, bottom-left] 111 | """, 112 | ) 113 | parser_corners.add_argument( 114 | "input_image_path", 115 | nargs="?", 116 | metavar="image-path", 117 | help="Path to image to find corners in", 118 | ) 119 | def corners_handler(**kwargs): 120 | from perspectra import transformer 121 | transformer.print_corners(**kwargs) 122 | 123 | parser_corners.set_defaults(func=corners_handler) 124 | 125 | # Add subcommand 'renumber-pages' 126 | parser_rename = subparsers.add_parser( 127 | "renumber-pages", 128 | help=""" 129 | Renames the images in a directory according to their page numbers. 130 | The assumed layout is `cover -> odd pages -> even pages reversed` 131 | """, 132 | ) 133 | parser_rename.add_argument( 134 | "book_directory", 135 | metavar="book-directory", 136 | help="Path to directory containing the images of the pages", 137 | ) 138 | def rename_handler(**kwargs): 139 | from perspectra import file_utils 140 | file_utils.renumber_pages(**kwargs) 141 | 142 | parser_rename.set_defaults(func=rename_handler) 143 | 144 | args = parser.parse_args(args=arguments) 145 | 146 | if not args.subparser_name: 147 | parser.print_help() 148 | return 149 | 150 | if args.subparser_name == "binarize": 151 | if args.input_image_path: 152 | args.input_image_path = path.abspath(args.input_image_path) 153 | 154 | elif args.subparser_name == "corners": 155 | if args.input_image_path: 156 | args.input_image_path = path.abspath(args.input_image_path) 157 | 158 | else: 159 | if args.input_image_path: 160 | args.input_image_path = path.abspath(args.input_image_path) 161 | 162 | if args.image_marked_path: 163 | args.image_marked_path = path.abspath(args.image_marked_path) 164 | 165 | if args.output_image_path: 166 | args.output_image_path = path.abspath(args.output_image_path) 167 | 168 | args.func(**vars(args)) 169 | -------------------------------------------------------------------------------- /src/perspectra/file_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Renames the images in a directory according to their pages number. 3 | The assumed layout is `cover -> odd pages -> even pages reversed`. 4 | The cover gets the page number 0. 5 | 6 | For example a book with 9 content pages and a cover starting at image 23: 7 | 8 | - img_23.jpg => 0 9 | 10 | - img_24.jpg => 1 11 | - img_25.jpg => 3 12 | - img_26.jpg => 5 13 | - img_27.jpg => 7 14 | - img_28.jpg => 9 15 | 16 | - img_29.jpg => 8 17 | - img_30.jpg => 6 18 | - img_31.jpg => 4 19 | - img_32.jpg => 2 20 | """ 21 | 22 | from pathlib import Path 23 | 24 | 25 | def getTempPath (file_path): 26 | return file_path.with_name( 27 | f'temporary-name-to-avoid-collisions_{file_path.name}' 28 | ) 29 | 30 | 31 | def renumber_pages (book_directory = '.'): 32 | # Configuration 33 | shall_run_dry = True 34 | includes_cover = True 35 | 36 | valid_file_endings = ('jpeg', 'jpg', 'png', 'tiff', 'tif', 'gif') 37 | book_dir_path = Path(book_directory).resolve() 38 | images = [ 39 | entry for entry in book_dir_path.iterdir() 40 | if entry.suffix.lower()[1:] in valid_file_endings 41 | ] 42 | 43 | if includes_cover: 44 | num_pages = len(images) 45 | num_content_pages = num_pages - 1 46 | split_point = int(num_content_pages / 2) + 1 47 | last_page_is_odd = num_pages % 2 == 0 48 | 49 | if last_page_is_odd: 50 | split_point += 1 51 | 52 | odd_pages = images[1:split_point] 53 | even_pages = images[split_point:][::-1] 54 | 55 | sorted_images = [images[0]] + [img 56 | for tup in zip(odd_pages, even_pages) 57 | for img in tup 58 | ] 59 | else: 60 | raise TypeError('TODO: Implement renaming if pages don\'t include a cover') 61 | 62 | print(f'In "{book_dir_path}" move:\n') 63 | 64 | for (index, file_path) in enumerate(sorted_images): 65 | temp_path = getTempPath(file_path) 66 | 67 | print(f'\t{file_path.name} -> {temp_path.name}', end='') 68 | if not shall_run_dry: 69 | file_path.rename(temp_path) 70 | print(' ✔︎') 71 | 72 | print() 73 | 74 | for (index, file_path) in enumerate(sorted_images): 75 | temp_path = getTempPath(file_path) 76 | name_length = len(str(num_pages)) 77 | output_path = temp_path.with_name(f'{index:0{name_length}d}.jpg') 78 | 79 | print(f'\t{temp_path.name} -> {output_path.name}', end='') 80 | if not shall_run_dry: 81 | temp_path.rename(output_path) 82 | print(' ✔︎') 83 | -------------------------------------------------------------------------------- /src/perspectra/multipass_cleaner.py: -------------------------------------------------------------------------------- 1 | from typing import (List, Any) 2 | import numpy 3 | import logging 4 | from skimage import morphology 5 | 6 | 7 | def remove_noise( 8 | original_img: List[List[int]], 9 | passes: int=7, 10 | images: List[Any]=[] 11 | ) -> List[List[int]]: 12 | """ 13 | Larger blobs must be increasingly separated to be labeled as noise 14 | """ 15 | cleaned_orig = original_img 16 | cleaned_eroded = original_img 17 | radius_step = 2 18 | 19 | for index in range(passes): 20 | cumulative_dilation_size = radius_step ** index 21 | cumulative_disk = morphology.disk(cumulative_dilation_size) 22 | logging.info(f'Cumulative dilation size: {cumulative_dilation_size}') 23 | 24 | current_dilation_size = radius_step ** (index - 1) \ 25 | if index > 0 \ 26 | else 1 27 | # dilation_disk = morphology.disk(current_dilation_size) 28 | logging.info(f'Current dilation size: {current_dilation_size}') 29 | 30 | # Noise blobs with up to 80 % more area 31 | # than the structuring element will get deleted 32 | max_noise_size = numpy.count_nonzero(cumulative_disk) * 1.5 33 | logging.info(f'Maximum noise size: {max_noise_size}') 34 | 35 | eroded = morphology.dilation( 36 | cleaned_eroded, 37 | footprint=morphology.disk(current_dilation_size) 38 | ) 39 | 40 | if images: 41 | images.append((f'eroded {index}', eroded)) 42 | 43 | cleaned_eroded = morphology.remove_small_objects( 44 | eroded, 45 | max_noise_size 46 | ) 47 | 48 | if images: 49 | images.append((f'cleaned eroded {index}', cleaned_eroded)) 50 | 51 | cleaned_orig = numpy.logical_and(cleaned_orig, cleaned_eroded) 52 | 53 | logging.info(f'Finished cleaning pass {index}\n') 54 | 55 | return cleaned_orig 56 | -------------------------------------------------------------------------------- /src/perspectra/noise_generator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import numpy 3 | from skimage import morphology, util 4 | 5 | 6 | def add_noise(image): 7 | # base_radius = 2 8 | # base_disk = morphology.disk(base_radius) 9 | total_amount = 0.001 10 | empty_image = numpy.zeros_like(image) 11 | 12 | noisy_img = util.random_noise( 13 | image=empty_image, 14 | mode='salt', 15 | amount=total_amount, 16 | rng=123, 17 | ) 18 | closed_noise = morphology.binary_closing( 19 | image=noisy_img, 20 | footprint=morphology.disk(15) 21 | ) 22 | labeled_noise = morphology.label(closed_noise) 23 | keep_percentage = 0.05 24 | number_of_noise_blobs = len(numpy.unique(labeled_noise)) 25 | random.seed(a=123) 26 | random_blob_labels = random.sample( 27 | # Start with 1 to not get the background color, which is 0 28 | range(1, number_of_noise_blobs), 29 | int(number_of_noise_blobs * keep_percentage), 30 | ) 31 | filtered_noise = numpy.isin(labeled_noise.flat, random_blob_labels) 32 | reshaped_noise = filtered_noise.reshape(image.shape) 33 | 34 | return numpy.logical_or(reshaped_noise, image) 35 | -------------------------------------------------------------------------------- /src/perspectra/splitter.py: -------------------------------------------------------------------------------- 1 | from typing import (List) 2 | import numpy 3 | from skimage import morphology 4 | 5 | 6 | def split_pages(image: List[List[int]]) -> List[List[int]]: 7 | """ 8 | 1. Mark pixels in the center as more likely to contain the split 9 | 2. Get vertical Houghlines 10 | 3. Split image 11 | """ 12 | 13 | index = 0 14 | radius_step = 2 15 | cumulative_dilation_size = radius_step ** index 16 | cumulative_disk = morphology.disk(cumulative_dilation_size) 17 | print(f'Cumulative dilation size: {cumulative_dilation_size}') 18 | 19 | current_dilation_size = radius_step ** (index - 1) \ 20 | if index > 0 \ 21 | else 1 22 | # dilation_disk = morphology.disk(current_dilation_size) 23 | print(f'Current dilation size: {current_dilation_size}') 24 | 25 | # Noise blobs with up to 80 % more area 26 | # than the structuring element will get deleted 27 | max_noise_size = numpy.count_nonzero(cumulative_disk) * 1.5 28 | print(f'Maximum noise size: {max_noise_size}') 29 | 30 | # eroded = morphology.dilation( 31 | # image, 32 | # selem=morphology.disk(current_dilation_size) 33 | # ) 34 | 35 | # if images: 36 | # images.append((f'eroded {index}', eroded)) 37 | 38 | # cleaned_eroded = morphology.remove_small_objects( 39 | # eroded, 40 | # max_noise_size 41 | # ) 42 | 43 | # if images: 44 | # images.append((f'cleaned eroded {index}', cleaned_eroded)) 45 | 46 | # cleaned_orig = numpy.logical_and( image , cleaned_eroded) 47 | 48 | print(f'Finished cleaning pass {index}\n') 49 | 50 | return [] # TODO: [leftPage, rightPage] 51 | -------------------------------------------------------------------------------- /src/perspectra/transformer.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | import base64 4 | import logging 5 | from typing import Tuple 6 | 7 | import imageio.v3 as imageio 8 | import numpy 9 | import numpy as np 10 | import skimage 11 | from skimage import ( 12 | draw, 13 | exposure, 14 | feature, 15 | io, 16 | morphology, 17 | segmentation, 18 | transform, 19 | util, 20 | ) 21 | from skimage.color import rgb2gray, label2rgb 22 | from skimage.feature import ( 23 | corner_peaks, 24 | corner_foerstner, 25 | ) 26 | from skimage.filters import ( 27 | sobel, 28 | gaussian, 29 | threshold_otsu, 30 | threshold_sauvola, 31 | ) 32 | from skimage.segmentation import watershed 33 | from skimage.util import img_as_ubyte 34 | from perspectra import multipass_cleaner 35 | 36 | 37 | class ImageDebugger: 38 | def __init__(self, level, base_path): 39 | self.level = level 40 | self.base_path = base_path 41 | self.step_counter = 0 42 | 43 | def set_level(self, level): 44 | self.level = level 45 | return self 46 | 47 | def set_base_path(self, base_path): 48 | self.base_path = base_path 49 | return self 50 | 51 | def save(self, name, image): 52 | if self.level != "debug": 53 | return 54 | self.step_counter += 1 55 | image_path = os.path.join( 56 | self.base_path, 57 | f"{self.step_counter}-{name}.png", 58 | ) 59 | imageio.imwrite(image_path, image) 60 | logging.info(f"Stored image: {image_path}") 61 | return self 62 | 63 | 64 | def load_image(file_name): 65 | file_path = os.path.join(os.getcwd(), file_name) 66 | return io.imread(file_path) 67 | 68 | 69 | def get_img_corners(shape): 70 | rows = shape[0] 71 | cols = shape[1] 72 | return [ 73 | (0, 0), 74 | (0, cols - 1), 75 | (rows - 1, cols - 1), 76 | (rows - 1, 0), 77 | ] 78 | 79 | 80 | def cartesian_to_polar(points): 81 | """ 82 | >>> cartesian_to_polar(np.array([[-1,1], [-1,-1], [1,-1], [1,1]])) 83 | array([[ 1.41421356, 45. ], 84 | [ 1.41421356, 135. ], 85 | [ 1.41421356, 225. ], 86 | [ 1.41421356, 315. ]]) 87 | """ 88 | x, y = points[:, 1], points[:, 0] 89 | r = np.hypot(x, y) 90 | thetas = np.arctan2(y, x) 91 | 92 | def norm_theta(theta): 93 | if theta < 0: 94 | return -theta 95 | else: 96 | return -(theta - 360) 97 | 98 | v_norm_theta = np.vectorize(norm_theta) 99 | 100 | thetas_norm = v_norm_theta(np.degrees(thetas)) 101 | 102 | polar_points = np.column_stack((r, thetas_norm)) 103 | 104 | return polar_points 105 | 106 | 107 | def get_sorted_corners(img_size, corners): 108 | """ 109 | Corners sorted from upper left corner (smallest row, column) clockwise 110 | using the angle of polar coordinates. 111 | 112 | >>> get_sorted_corners((0, 0), np.array([[-1,-1], [-1,1], [1,-1], [1,1]])) 113 | array([[-1, -1], 114 | [-1, 1], 115 | [ 1, 1], 116 | [ 1, -1]]) 117 | 118 | >>> get_sorted_corners((250, 200), np.array([ 119 | ... [ 14, 46], 120 | ... [234, 144], 121 | ... [ 14, 140], 122 | ... [234, 47], 123 | ... [ 44, 30], 124 | ... ])) 125 | array([[ 14, 46], 126 | [ 14, 140], 127 | [234, 144], 128 | [234, 47], 129 | [ 44, 30]]) 130 | """ 131 | # Shift coordinate system 132 | # TODO: Find centroid of corners instead of using image center 133 | rowOffset = img_size[0] / 2 134 | colOffset = img_size[1] / 2 135 | 136 | moved_corner_points = corners - np.array([rowOffset, colOffset]) 137 | 138 | polar_points = cartesian_to_polar(moved_corner_points) 139 | 140 | indices = np.argsort(polar_points[:, 1]) 141 | corners_sorted = corners[indices][::-1] 142 | 143 | left_uppermost_index = np.argmin(np.sum(corners_sorted, axis=1)) 144 | shifted_corner_points = np.roll( 145 | corners_sorted, -left_uppermost_index, axis=0 146 | ) 147 | 148 | return shifted_corner_points 149 | 150 | 151 | def get_point_angles_in_deg(points): 152 | # The vectors are differences of coordinates 153 | # a points into the point, b out of the point 154 | a = points - numpy.roll(points, 1, axis=0) 155 | b = numpy.roll(a, -1, axis=0) # same but shifted 156 | 157 | # Calculate length of those vectors 158 | aLengths = numpy.linalg.norm(a, axis=1) 159 | bLengths = numpy.linalg.norm(b, axis=1) 160 | 161 | # Calculate length of the cross product 162 | # Since 2D (not 3D) cross product 163 | # can't result in a vector, just its z-component 164 | crossproducts = numpy.cross(a, b) / aLengths / bLengths 165 | 166 | angles = numpy.arcsin(crossproducts) 167 | 168 | return angles / numpy.pi * 180 169 | 170 | 171 | def get_shape_of_fixed_image(corners: numpy.ndarray) -> Tuple[int, int, int]: 172 | # TODO: Use correct algorithm as described in the readme 173 | 174 | def maximum(a, b): 175 | return a if a > b else b 176 | 177 | top_edge_length = numpy.linalg.norm(corners[0] - corners[1]) 178 | bottom_edge_length = numpy.linalg.norm(corners[2] - corners[3]) 179 | width = int(maximum(top_edge_length, bottom_edge_length)) 180 | 181 | left_edge_length = numpy.linalg.norm(corners[0] - corners[3]) 182 | right_edge_length = numpy.linalg.norm(corners[1] - corners[2]) 183 | height = int(maximum(left_edge_length, right_edge_length)) 184 | 185 | return (height, width, 1) 186 | 187 | 188 | def get_fixed_image(image, detected_corners): 189 | shape_of_fixed_image = get_shape_of_fixed_image(detected_corners) 190 | corners_of_fixed_image = get_img_corners(shape_of_fixed_image) 191 | projectiveTransform = transform.ProjectiveTransform() 192 | # Flip coordinates as estimate expects (x, y), but images are (row, column) 193 | projectiveTransform.estimate( 194 | numpy.fliplr(numpy.array(corners_of_fixed_image)), 195 | numpy.fliplr(numpy.array(detected_corners)), 196 | ) 197 | 198 | return transform.warp( 199 | image, 200 | projectiveTransform, 201 | output_shape=shape_of_fixed_image, 202 | mode="reflect", 203 | ) 204 | 205 | 206 | def binarize(image, debugger, method="sauvola"): 207 | radius = 3 208 | 209 | gray_image = rgb2gray(image) 210 | debugger.save("gray_image", gray_image) 211 | binarized_image = None 212 | 213 | if method == "sauvola": 214 | window_size = 3 # Minimal window size 215 | window_size += image.size // 2**20 # Set relative to image size 216 | window_size += 1 if (window_size % 2 == 0) else 0 # Must always be odd 217 | logging.info(f"window_size: {window_size}") 218 | 219 | thresh_sauvola = numpy.nan_to_num( 220 | threshold_sauvola( 221 | image=gray_image, 222 | window_size=window_size, 223 | k=0.3, # Attained through experimentation 224 | ) 225 | ) 226 | debugger.save("thresh_sauvola", thresh_sauvola) 227 | binarized_image = gray_image > thresh_sauvola 228 | 229 | # elif method == 'adaptive': 230 | # binarized_image = gray_image > threshold_adaptive(image, radius) 231 | 232 | elif method == "niblack": 233 | sigma = image.size // 2**17 234 | 235 | thresh_niblack = skimage.filters.threshold_niblack( 236 | image, 237 | window_size=radius, 238 | k=0.08, 239 | ) 240 | binarized_image = image > thresh_niblack 241 | 242 | elif method == "gauss-diff": 243 | sigma = gray_image.size // (2**16) 244 | high_frequencies = numpy.subtract( 245 | gray_image, 246 | gaussian( 247 | image=gray_image, 248 | sigma=sigma, 249 | ), 250 | ) 251 | thresh = threshold_otsu(high_frequencies) 252 | binarized_image = high_frequencies > thresh 253 | 254 | elif method == "local-otsu": 255 | print("TODO") 256 | # warped_image_ubyte = img_as_ubyte(image) 257 | # selem = disk(radius) 258 | # local_otsu = rank.otsu(warped_image_ubyte, selem) 259 | # threshold_global_otsu = threshold_otsu(warped_image_ubyte) 260 | # binary_otsu = warped_image_ubyte >= local_otsu 261 | 262 | else: 263 | raise TypeError(f"{method} is no supported binarization method") 264 | 265 | debugger.save("binarized_image", binarized_image) 266 | 267 | return binarized_image 268 | 269 | 270 | def clear(binary_image, debugger): 271 | """ 272 | Remove noise from border 273 | """ 274 | inverted_image = util.invert(binary_image) 275 | inverted_cleared_image = segmentation.clear_border(inverted_image) 276 | cleared_image = util.invert(inverted_cleared_image) 277 | debugger.save("cleared_image", cleared_image) 278 | return cleared_image 279 | 280 | 281 | def denoise(binary_image, debugger): 282 | inverted_image = util.invert(binary_image) 283 | inverted_denoised_image = multipass_cleaner.remove_noise(inverted_image) 284 | denoised_image = util.invert(inverted_denoised_image) 285 | debugger.save("denoised_image", denoised_image) 286 | 287 | return denoised_image 288 | 289 | 290 | def erode(image, image_name, debugger): 291 | eroded_image = morphology.erosion( 292 | util.img_as_ubyte(image), morphology.disk(25) 293 | ) 294 | debugger.save(f"eroded_{image_name}", eroded_image) 295 | return eroded_image 296 | 297 | 298 | def get_doc_corners(debugger, output_base_path, image, **kwargs): 299 | # debug = kwargs.get("debug", False) 300 | image_marked_path = kwargs.get("image_marked_path") 301 | intermediate_height = 256 302 | 303 | if image_marked_path: 304 | image_marked = imageio.imread(image_marked_path, rotate=True) 305 | 306 | # TODO: Scale image *before* doing any computations 307 | 308 | image_gray = rgb2gray(image) 309 | image_marked_gray = rgb2gray(image_marked) 310 | 311 | # Use value > 0 in range 0 <= x <= 1 to ignore JPEG artifacts 312 | diff_corner_image = abs(image_gray - image_marked_gray) > 0.05 313 | debugger.save("diff_corner", diff_corner_image) 314 | 315 | blobs = feature.blob_doh( 316 | image=diff_corner_image, 317 | min_sigma=5, 318 | ) 319 | 320 | detected_corners = numpy.delete(blobs, 2, 1) 321 | corners_normalized = get_sorted_corners( 322 | image.shape, 323 | detected_corners, 324 | ) 325 | 326 | if not corners_normalized: 327 | logging.warn("No corners detected") 328 | return image 329 | 330 | else: 331 | scale_ratio = intermediate_height / image.shape[0] 332 | 333 | resized_image = transform.resize( 334 | image, 335 | output_shape=( 336 | intermediate_height, 337 | # TODO: Scale all images to square size 338 | int(image.shape[1] * scale_ratio), 339 | ), 340 | mode="reflect", 341 | anti_aliasing=True, 342 | ) 343 | debugger.save("resized", img_as_ubyte(resized_image)) 344 | 345 | resized_gray_image = rgb2gray(resized_image) 346 | debugger.save("resized_gray", img_as_ubyte(resized_gray_image)) 347 | 348 | blurred = gaussian(resized_gray_image, sigma=1) 349 | debugger.save("blurred", img_as_ubyte(blurred)) 350 | 351 | markers = numpy.zeros_like(resized_gray_image, dtype=int) 352 | markers[0, :] = 1 # Top row 353 | markers[-1, :] = 1 # Bottom row 354 | markers[:, 0] = 1 # Left column 355 | markers[:, -1] = 1 # Right column 356 | center = ( 357 | resized_gray_image.shape[0] // 2, 358 | resized_gray_image.shape[1] // 2, 359 | ) 360 | markers[center] = 2 361 | 362 | elevation_map = sobel(blurred) 363 | 364 | # Flatten elevation map at seed 365 | # to avoid being trapped in a local minimum 366 | rows, cols = draw.disk(center, 16) 367 | elevation_map[rows, cols] = 0.0 368 | debugger.save( 369 | "elevation_map", 370 | exposure.rescale_intensity(img_as_ubyte(elevation_map)), 371 | ) 372 | 373 | segmented_image = watershed(image=elevation_map, markers=markers) 374 | 375 | region_count = len(numpy.unique(segmented_image)) 376 | 377 | if region_count != 2: 378 | logging.error(f"Expected 2 regions and not {region_count}") 379 | return image 380 | 381 | debugger.save( 382 | "segmented", 383 | img_as_ubyte( 384 | label2rgb(segmented_image, image=resized_gray_image), 385 | ), 386 | ) 387 | 388 | segmented_relabeled = segmented_image 389 | segmented_relabeled[segmented_image == 1] = 0 390 | segmented_relabeled[segmented_image == 2] = 1 391 | 392 | # `img_as_bool` does not work here 393 | segmented_closed = segmented_relabeled.astype(bool) 394 | 395 | closing_diameter = 25 396 | pad_width = 2 * closing_diameter 397 | 398 | # Add border to avoid connection with image boundaries 399 | segmented_closed_border = numpy.pad( 400 | segmented_closed, 401 | pad_width=pad_width, 402 | mode="constant", 403 | constant_values=False, 404 | ) 405 | 406 | segmented_closed_border = morphology.binary_closing( 407 | segmented_closed_border, 408 | morphology.disk(closing_diameter), 409 | ) 410 | # Remove border 411 | segmented_closed = segmented_closed_border[ 412 | pad_width:-pad_width, 413 | pad_width:-pad_width, 414 | ] 415 | 416 | # Convert False/True to 0/1 417 | debugger.save("segmented_closed", img_as_ubyte(segmented_closed)) 418 | 419 | # Use Foerstner corner detector 420 | # as with Harris detector corners are shifted inwards 421 | w, q = corner_foerstner(segmented_closed, sigma=2) 422 | accuracy_thresh = 0.5 423 | roundness_thresh = 0.3 424 | foerstner = (q > roundness_thresh) * (w > accuracy_thresh) * w 425 | foerstner_corners = corner_peaks(foerstner, min_distance=1) 426 | logging.info(f"foerstner_corners: {foerstner_corners}") 427 | 428 | # Render corners 429 | empty_img = numpy.zeros_like(segmented_closed) 430 | empty_img[foerstner_corners[:, 0], foerstner_corners[:, 1]] = 1 431 | debugger.save("corner_foerstner", img_as_ubyte(empty_img)) 432 | 433 | foerstner_corners_sorted = get_sorted_corners( 434 | segmented_closed.shape, 435 | foerstner_corners, 436 | ) 437 | 438 | logging.info(f"foerstner corners sorted: {foerstner_corners_sorted}") 439 | 440 | point_angles_in_deg = get_point_angles_in_deg( 441 | foerstner_corners_sorted, 442 | ) 443 | logging.info(f"point_angles_in_deg: {point_angles_in_deg}") 444 | 445 | point_angles_abs = numpy.abs(point_angles_in_deg) 446 | 447 | # Get the indices of the sorted angles in descending order 448 | point_angles_abs_sorted = numpy.argsort(point_angles_abs)[::-1] 449 | logging.info(f"point_angles_abs_sorted {point_angles_abs_sorted}") 450 | 451 | top_4_indices = point_angles_abs_sorted[:4] 452 | # Sort the top 4 indices to maintain the original order 453 | # in foerstner_corners_sorted 454 | sorted_top_4_indices = np.sort(top_4_indices) 455 | 456 | # Select the top 4 corners with the largest angle 457 | # while maintaining their original order 458 | corners_final = foerstner_corners_sorted[sorted_top_4_indices] 459 | 460 | logging.info(f"corners_final: {corners_final}") 461 | 462 | rows, cols = draw.polygon_perimeter( 463 | corners_final[:, 0], 464 | corners_final[:, 1], 465 | ) 466 | image_simplified = numpy.copy(segmented_closed).astype(int) 467 | image_simplified[rows, cols] = 4 468 | debugger.save( 469 | "simplified", 470 | img_as_ubyte( 471 | label2rgb( 472 | image_simplified, 473 | image=resized_image, # Overlay over original image 474 | bg_label=0, 475 | ) 476 | ), 477 | ) 478 | 479 | if not numpy.any(corners_final): 480 | return image 481 | 482 | corners_normalized = numpy.divide(corners_final, scale_ratio) 483 | 484 | # TODO: Compare with values stored in json files 485 | 486 | return corners_normalized 487 | 488 | 489 | def setup_logger(output_base_path): 490 | os.makedirs(output_base_path, exist_ok=True) 491 | logging.basicConfig( 492 | filename=os.path.join(output_base_path, "0-log.txt"), 493 | level=logging.DEBUG, 494 | format=" - ".join( 495 | [ 496 | "%(asctime)s", 497 | "%(pathname)s:%(lineno)s", 498 | "%(levelname)s", 499 | "%(name)s", 500 | "%(message)s", 501 | ] 502 | ), 503 | ) 504 | 505 | 506 | def transform_image(**kwargs): 507 | input_image_path = kwargs.get("input_image_path") 508 | 509 | if not input_image_path: 510 | raise FileNotFoundError( 511 | f"An input image and not {input_image_path} must be specified" 512 | ) 513 | 514 | output_in_gray = kwargs.get("output_in_gray", False) 515 | binarization_method = kwargs.get("binarization_method") 516 | shall_clear_border = not kwargs.get("shall_not_clear_border", False) 517 | 518 | file_name_segments = os.path.splitext(os.path.basename(input_image_path)) 519 | basename = file_name_segments[0] 520 | output_base_path = os.path.join( 521 | os.path.dirname(input_image_path), 522 | basename, 523 | ) 524 | 525 | debug = kwargs.get("debug", False) 526 | 527 | # TODO: Accept lambda function which is only executed during debugging 528 | debugger = ImageDebugger( 529 | level="debug" if debug else "", 530 | base_path=output_base_path, 531 | ) 532 | 533 | if debug: 534 | setup_logger(output_base_path) 535 | 536 | image = imageio.imread(input_image_path, rotate=True) 537 | 538 | corners = get_doc_corners(debugger, output_base_path, image) 539 | 540 | dewarped_image = get_fixed_image(image, corners) 541 | debugger.save("dewarped", img_as_ubyte(dewarped_image)) 542 | 543 | if output_in_gray: 544 | grayscale_image = rgb2gray(dewarped_image) 545 | image_norm_intensity = exposure.rescale_intensity(grayscale_image) 546 | debugger.save("normalized_intensity", image_norm_intensity) 547 | transformed_image = image_norm_intensity 548 | 549 | elif binarization_method: 550 | binarized_image = binarize( 551 | image=dewarped_image, 552 | method=binarization_method, 553 | debugger=debugger, 554 | ) 555 | if shall_clear_border: 556 | cleared_image = clear(binarized_image, debugger) 557 | erode(cleared_image, "cleared", debugger) 558 | denoised_image = denoise(cleared_image, debugger) 559 | else: 560 | erode(binarized_image, "binarized", debugger) 561 | denoised_image = denoise(binarized_image, debugger) 562 | 563 | erode(denoised_image, "denoised", debugger) 564 | 565 | transformed_image = denoised_image 566 | 567 | # TODO: elif is_book: 568 | 569 | else: 570 | transformed_image = dewarped_image 571 | 572 | random_string = ( 573 | base64.b64encode(os.urandom(3)) 574 | .decode("utf-8") 575 | .replace("+", "-") 576 | .replace("/", "_") 577 | ) 578 | output_image_path = ( 579 | kwargs.get("output_image_path") 580 | or f"{output_base_path}-fixed_{random_string}.png" 581 | ) 582 | 583 | if not debug: 584 | imageio.imwrite( 585 | output_image_path, 586 | img_as_ubyte(transformed_image), 587 | ) 588 | print(f"Saved corrected image at \"{output_image_path}\"") 589 | 590 | 591 | def print_corners(**kwargs): 592 | input_image_path = kwargs.get("input_image_path") 593 | 594 | if not input_image_path: 595 | raise FileNotFoundError( 596 | f"An input image and not {input_image_path} must be specified" 597 | ) 598 | 599 | file_name_segments = os.path.splitext(os.path.basename(input_image_path)) 600 | basename = file_name_segments[0] 601 | output_base_path = os.path.join( 602 | os.path.dirname(input_image_path), 603 | basename, 604 | ) 605 | 606 | debug = kwargs.get("debug", False) 607 | 608 | # TODO: Accept lambda function which is only executed during debugging 609 | debugger = ImageDebugger( 610 | level="debug" if debug else "", 611 | base_path=output_base_path, 612 | ) 613 | 614 | if debug: 615 | setup_logger(output_base_path) 616 | 617 | image = imageio.imread(input_image_path, rotate=True) 618 | 619 | doc_corners = get_doc_corners(debugger, output_base_path, image) 620 | 621 | # Origin is top left corner 622 | corner_dicts = [{"x": corner[1], "y": corner[0]} for corner in doc_corners] 623 | json_str = json.dumps(corner_dicts) 624 | print(json_str) 625 | -------------------------------------------------------------------------------- /tests/fixtures/book_color.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/tests/fixtures/book_color.jpeg -------------------------------------------------------------------------------- /tests/fixtures/book_gray.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/tests/fixtures/book_gray.png -------------------------------------------------------------------------------- /tests/fixtures/doc_binary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/tests/fixtures/doc_binary.png -------------------------------------------------------------------------------- /tests/fixtures/doc_photo.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ad-si/Perspectra/24d33dc5d19febb5f4cf10f9bcda80404038ce48/tests/fixtures/doc_photo.jpeg -------------------------------------------------------------------------------- /tests/test_approximate_polygon.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from perspectra import approximate_polygon 3 | 4 | @pytest.mark.skip(reason="Investigate why this test is failing.") 5 | def test_approximate_polygon(): 6 | # approximate_polygon(coords, tolerance, target_count): 7 | result = approximate_polygon.approximate_polygon( 8 | coords = [(0, 0), (5, 0), (5, 5), (3, 5.1), (0, 5)], 9 | tolerance = 1, 10 | target_count = 4 11 | ) 12 | assert result == [(0, 0), (5, 0), (5, 5), (5, 0)] 13 | -------------------------------------------------------------------------------- /tests/test_binarize.py: -------------------------------------------------------------------------------- 1 | from perspectra import binarize 2 | import numpy as np 3 | 4 | def test_binarize_grayscale_image(): 5 | debugger = binarize.ImageDebugger( 6 | level="", 7 | base_path="", 8 | ) 9 | white = [255, 255, 255] 10 | black = [0, 0, 0] 11 | example_image = [ 12 | [black, black, black, [4, 4, 4], black], 13 | [black, white, white, white, black ], 14 | [[8, 8, 8], white, [200, 200, 200], white, black], 15 | [black, white, white, white, [9, 9, 9]], 16 | [black, black, [8, 8, 8], black, black], 17 | ] 18 | 19 | result = binarize.binarize(example_image, debugger) 20 | assert np.array_equal(result, [ 21 | [False, False, False, False, False], 22 | [False, True, True, True, False], 23 | [False, True, True, True, False], 24 | [False, True, True, True, False], 25 | [False, False, False, False, False], 26 | ]) 27 | -------------------------------------------------------------------------------- /tests/test_cleaning.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from skimage import io, util 4 | import matplotlib 5 | from matplotlib import pyplot 6 | from perspectra.multipass_cleaner import remove_noise 7 | from perspectra.noise_generator import add_noise 8 | 9 | matplotlib.use("Agg") 10 | 11 | img_path = os.path.join(os.path.dirname(__file__), "fixtures/doc_binary.png") 12 | original = util.invert(io.imread(img_path)) 13 | images = [] 14 | noisy_orig = add_noise(original) 15 | images.append(("Noisy Original", noisy_orig)) 16 | 17 | # Should actually go up to ~8, but performance becomes unbearable 18 | cleaned_img = remove_noise(noisy_orig, images=images) 19 | images.append(("Final cleaned image", cleaned_img)) 20 | images.append(("Original", original)) 21 | 22 | grid_width = int(math.sqrt(len(images))) 23 | fig, axes = pyplot.subplots( 24 | nrows=math.ceil(len(images) / grid_width), 25 | ncols=grid_width, 26 | figsize=(8, 8), 27 | sharex=True, 28 | sharey=True, 29 | ) 30 | ax = axes.ravel() 31 | 32 | for index, (title, image) in enumerate(images): 33 | ax[index].imshow(image, cmap=pyplot.cm.gray) 34 | ax[index].set_title(title) 35 | ax[index].axis("off") 36 | 37 | fig.tight_layout() 38 | pyplot.savefig("tests/test_cleaning_out.png") 39 | -------------------------------------------------------------------------------- /tests/test_segmentation.py: -------------------------------------------------------------------------------- 1 | import os 2 | import numpy as np 3 | from skimage import ( 4 | color, 5 | draw, 6 | filters, 7 | io, 8 | segmentation, 9 | transform, 10 | ) 11 | 12 | 13 | image_shape = (256, 256) 14 | spot_radius = 8 15 | border_width = 8 16 | 17 | 18 | def scale_image(new_shape, image): 19 | return transform.resize( 20 | image, 21 | output_shape=tuple(value - border_width for value in new_shape), 22 | mode='reflect', 23 | ) 24 | 25 | 26 | def get_basin_mask(image): 27 | """ 28 | Get basin mask for watershed algorithm 29 | """ 30 | shape = np.shape(image) 31 | basinMask = np.zeros(shape) 32 | basinMask[(0, 0)] = 1 33 | basinMask[tuple(value // 2 for value in shape)] = 2 34 | return basinMask 35 | 36 | 37 | def level_image(border_width, spot_radius, elevation_image): 38 | """ 39 | Level the elevation map at the center and around the border 40 | to avoid being trapped in a local minimum during flooding 41 | """ 42 | shape = np.shape(elevation_image) 43 | shape_padded = tuple(value + (2 * border_width) for value in shape) 44 | elevation_padded = np.pad(elevation_image, border_width, 'constant') 45 | center = tuple(value / 2 for value in shape_padded) 46 | rows, columns = draw.circle_perimeter( 47 | round(center[0]), 48 | round(center[1]), 49 | round(spot_radius) 50 | ) 51 | elevation_padded[rows, columns] = 0.0 52 | return elevation_padded 53 | 54 | 55 | imgs_path = os.path.join(os.path.dirname(__file__), "fixtures/*_color.jpeg") 56 | images = io.ImageCollection( 57 | load_pattern=imgs_path, 58 | conserve_memory=True, 59 | ) 60 | 61 | offset = 1 62 | rand_img_index = ( 63 | np.random.randint(0, len(images) - offset) 64 | if len(images) > offset 65 | else 0 66 | ) 67 | images = images[rand_img_index:rand_img_index + offset] 68 | images_gray = map(color.rgb2gray, images) 69 | images_scaled = map( 70 | lambda img: scale_image(image_shape, img), 71 | images_gray, 72 | ) 73 | images_blurred = map(filters.gaussian, images_scaled) 74 | images_elevation = map(filters.sobel, images_blurred) 75 | images_leveled = map( 76 | lambda img: level_image(border_width, spot_radius, img), 77 | images_elevation, 78 | ) 79 | images_segmented = map( 80 | lambda img: segmentation.watershed( 81 | img, 82 | # TODO: Add missing markers 83 | # markers=get_basin_mask(img)), 84 | ), 85 | images_leveled, 86 | ) 87 | 88 | images_final = list(images_segmented) 89 | 90 | # Save images: 91 | for i, img in enumerate(images_final): 92 | io.imsave(f"tests/{i}_segmented.png", img) 93 | -------------------------------------------------------------------------------- /tests/test_splitting.py: -------------------------------------------------------------------------------- 1 | import os 2 | import math 3 | from skimage import io, util, transform, filters, morphology 4 | from matplotlib import pyplot 5 | 6 | 7 | images = [] 8 | 9 | img_path = os.path.join(os.path.dirname(__file__), "fixtures/book_gray.png") 10 | original = io.imread(img_path) 11 | inverted = util.invert(original) 12 | intermediate_height = 300 13 | scale_ratio = intermediate_height / inverted.shape[0] 14 | resized_image = transform.resize( 15 | inverted, 16 | output_shape=( 17 | intermediate_height, 18 | int(inverted.shape[1] * scale_ratio) 19 | ) 20 | ) 21 | images.append(('Original', resized_image)) 22 | 23 | blurred = filters.gaussian(resized_image, sigma=1) 24 | images.append(('blurred', blurred)) 25 | 26 | sobel_v = filters.sobel_v(blurred) 27 | images.append(('sobel_v', sobel_v)) 28 | 29 | # sobel_v = filters.sobel_v(blurred) 30 | # images.append(('sobel_v', sobel_v)) 31 | 32 | the_gradient = morphology.black_tophat(sobel_v, morphology.disk(10)) 33 | images.append(('gradient', the_gradient)) 34 | 35 | # images.append(('threshold_adaptive', filters.threshold_adaptive(edge_image))) 36 | 37 | # images.append(('Hough Lines', sobel_v)) 38 | # lines = transform.probabilistic_hough_line( 39 | # sobel_v, 40 | # # threshold=10, 41 | # # line_length=resized_image.shape[0], 42 | # # line_gap=30, 43 | # # theta=numpy.array([math.tau/12, math.tau * 11/12]), 44 | # ) 45 | # print(len(lines)) 46 | 47 | grid_width = int(math.sqrt(len(images))) 48 | fig, axes = pyplot.subplots( 49 | nrows=math.ceil(len(images) / grid_width), 50 | ncols=grid_width, 51 | figsize=(8, 8), 52 | sharex=True, sharey=True 53 | ) 54 | ax = axes.ravel() 55 | 56 | for index, (title, image) in enumerate(images): 57 | ax[index].imshow(image, cmap=pyplot.cm.gray) 58 | ax[index].set_title(title) 59 | ax[index].axis('off') 60 | 61 | # for line in lines[:100]: 62 | # p0, p1 = line 63 | # ax[2].plot((p0[0], p1[0]), (p0[1], p1[1])) 64 | 65 | # fig.tight_layout() 66 | pyplot.savefig("tests/test_splitting_out.png") 67 | --------------------------------------------------------------------------------