├── .gitignore ├── README.txt ├── appengine ├── app.yaml ├── base.html ├── doc.html ├── index.yaml ├── main.html ├── main.py ├── model.py ├── static │ ├── README.txt │ └── scanner.css └── upload.html └── tools └── scancab /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *.pyc 3 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | ################################################################################ 2 | # 3 | # NOTICE: unmaintained! As of 2015-05-03 I no longer use this App Engine-based 4 | # version of Scanning Cabinet. 5 | # 6 | # The migration of my App Engine instance from Master/Slave datastore to HRD 7 | # failed, so I'm accelerating plans to move this project to be Camlistore-based 8 | # instead. See camlistore.org. This will just be a Camlistore app, using its 9 | # data model. 10 | # 11 | # Old README follows. 12 | # 13 | ################################################################################ 14 | 15 | 16 | This is scanningcabinet. 17 | 18 | It's my document management system. Maybe you'll like it too. 19 | 20 | Problem statement: 21 | 22 | * I'm a packrat. Yes, I might need my T-Mobile cellphone bill from 23 | March 2001 sometime. Maybe. (shutup) 24 | 25 | * My filing cabinets are full. 26 | 27 | * It's cold in San Francisco and I want to burn stuff. 28 | 29 | * I can't find tax or insurance documents when I need to anyway, 30 | because folders suck. I want tags. e.g. I can tag that one 31 | document "audi, insurance, crash, car, state farm, royal motors" 32 | and be sure I'll find it later. Which frickin' folder would I 33 | put that in anyway? Folders sucks. Yay tags. 34 | 35 | * I have a scanner. My friend's scanner is better. Borrowed that 36 | one. It has a sheet feeder. 37 | 38 | * App Engine now has a Blob API: http://bit.ly/8K4FxM 39 | 40 | * It should be easy to get documents online. Must minimize context 41 | switching between feeding the scanner and entering metadata. In fact, 42 | they should be *entirely separate* tasks. If I have to enter metadata 43 | while scanning, I'll probably just end up on reddit. 44 | 45 | * All document metadata entry should be done later. This includes 46 | clumping multi-page scans into their logical documents. I shouldn't 47 | have to even enter how many pages a document is when I scan it. 48 | I'll be scanning stacks in the auto-document-feeder anyway. 49 | 50 | * Usually I want to just burn/shred documents, but occasionally 51 | I'll need the physical document in the future (like for taxes or 52 | jury duty), so the metadata must include information about the 53 | document's physical location. (e.g. "Red Folder #1") Then when 54 | I need it again, I go linear scan Red Folder #1 looking for it. 55 | Also, I track the "due date" of the document, and show upcoming 56 | ones on the main page, so I see pending due taxes get closer and 57 | closer. Frickin' taxes. 58 | 59 | Anyway, I wrote some software. (parts are kinda crap because I always 60 | forget Python, but whatevs.) 61 | 62 | Some instructions: 63 | 64 | * tools/scancab is the client program. You use it to scan & upload. 65 | Read its docs & comments. You'll need to modify the email & 66 | password later. But first: 67 | 68 | * appengine/ is the AppEngine server component. Go to 69 | http://appspot.com/ to make an AppID ("bobscans"). Then get the 70 | 1.3.0 or higher App Engine SDK, tweak 71 | scanningcabinet/appengine/app.yaml file to match your AppID, then 72 | appcfg.py update 'appengine' to upload the app to your account. 73 | 74 | -- Now, go to https://.appspot.com/ and login. This 75 | makes your UserInfo entity in the database. That's all. 76 | 77 | -- Now, go back to http://appspot.com/, click your App, then click 78 | "Datastore Viewer" on the left. Find your UserInfo entity, click 79 | it, and modify its "upload_password" to some password you'll use 80 | for uploading. Don't use your Google password. Choose type 81 | "string". 82 | 83 | -- Now, go put your Google account's email & that password you just 84 | made up into scanningcabinet/tools/scancab 85 | 86 | * Now start scanning stuff. 87 | 88 | * Occasionally go add metadata at your app URL. 89 | 90 | Enjoy! 91 | 92 | Brad 93 | brad@danga.com 94 | -------------------------------------------------------------------------------- /appengine/app.yaml: -------------------------------------------------------------------------------- 1 | application: scanningcabinet 2 | version: 1 3 | api_version: 1 4 | runtime: python 5 | builtins: 6 | - remote_api: on 7 | 8 | handlers: 9 | - url: /remote_api 10 | script: $PYTHON_LIB/google/appengine/ext/remote_api/handler.py 11 | login: admin 12 | 13 | - url: /static 14 | static_dir: static 15 | 16 | # Post URL must not be accessible by any users. Only by 17 | # going through Blobstore API upload URL. 18 | - url: /post 19 | script: main.py 20 | 21 | - url: /uploadurl.* 22 | script: main.py 23 | 24 | - url: /resource.* 25 | script: main.py 26 | 27 | - url: .* 28 | login: required 29 | script: main.py 30 | 31 | -------------------------------------------------------------------------------- /appengine/base.html: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | {% block title %}Title{% endblock %} 8 | 9 | 10 | 11 |
12 | {% if user_info %} 13 | {% if user_info.non_owner %} 14 | {{user_info.real_email}}, acting as 15 | {% endif %} 16 | {{user_info.user.email}} | 17 | {% endif %} 18 | 19 | 20 | {% if user_info %} 21 | Log out 22 | {% else %} 23 | Log in 24 | {% endif %} 25 | 26 |
27 |
[Scanning Cabinet]
28 | 29 | {% block preamble %} 30 | {% endblock %} 31 | 32 | {% if error_messages %} 33 | 38 | {% endif %} 39 | 40 | {% block main_body %} 41 | Empty 42 | {% endblock %} 43 | 44 | 45 | -------------------------------------------------------------------------------- /appengine/doc.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}{{doc.title}} -- document {{doc.key.id}}{% endblock %} 4 | 5 | {% block preamble %} 6 | {% endblock %} 7 | 8 | {% block main_body %} 9 |

10 | {% if doc.title_or_empty_string %} 11 | {{doc.title_or_empty_string|escape}} 12 | {% else %} 13 | Document {{doc.key.id}} 14 | {% endif %} 15 |

16 |
17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 27 | 28 | 29 | 30 | 37 | 38 |
Title
Tags
Doc Date (yyyy-mm-dd)
Due Date (yyyy-mm-dd)
Location 26 | (of physical document)
31 | Other action: 36 |
39 |
40 | 41 | {% if show_single_list %} 42 |
43 | {% for page in pages %} 44 |
45 | {% endfor %} 46 |
47 | {% else %} 48 | {% for page in pages %} 49 | 50 | {% endfor %} 51 | {% endif %} 52 | {% endblock %} 53 | -------------------------------------------------------------------------------- /appengine/index.yaml: -------------------------------------------------------------------------------- 1 | indexes: 2 | - kind: MediaObject 3 | properties: 4 | - name: lacks_document 5 | - name: owner 6 | - name: creation 7 | 8 | # AUTOGENERATED 9 | 10 | # This index.yaml is automatically updated whenever the dev_appserver 11 | # detects that a new type of query is run. If you want to manage the 12 | # index.yaml file manually, remove the above marker line (the line 13 | # saying "# AUTOGENERATED"). If you want to manage some indexes 14 | # manually, move them above the marker line. The index.yaml file is 15 | # automatically uploaded to the admin console when you next deploy 16 | # your application using appcfg.py. 17 | 18 | - kind: Document 19 | properties: 20 | - name: owner 21 | - name: due_date 22 | -------------------------------------------------------------------------------- /appengine/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}Scanning Cabinet{% endblock %} 4 | 5 | {% block preamble %} 6 | {% if top_message %} 7 |
{{top_message}}
8 | {% endif %} 9 |

10 | {% if view_user %} 11 | {% ifequal view_user.key user_info.key %} 12 | Scanning Cabinet 13 | {% else %} 14 | {{view_user.user.email|escape}}'s media 15 | {% endifequal %} 16 | {% else %} 17 | Scanning Cabinet 18 | {% endif %} 19 |

20 | {% endblock %} 21 | 22 | {% block main_body %} 23 | 24 |

Search

25 |
26 |
Tag search: (comma-separated union)
27 |
28 | 29 | {% if media and not did_search %} 30 |

Un-annotated raw scans

31 |
32 | 33 |
34 | {% for item in media %} 35 |
36 |
37 | 38 | [larger]
39 | 40 |
41 |
42 | {% endfor %} 43 |
44 |
45 |
46 | {% endif %} 47 | 48 |

Documents

49 | 50 | {% if docs %} 51 | 59 | {% else %} 60 | 61 |

(tag cloud here?)

62 | 63 | {% endif %} 64 | 65 | 66 | {% if upcoming_due_docs %} 67 |

Upcoming Due Documents

68 | 69 | 78 | 79 | {% endif %} 80 | 81 | 82 | 83 | {% if untagged_docs %} 84 |

Untagged Documents

85 | 86 | 94 | 95 | {% endif %} 96 | 97 | 98 | {% endblock %} 99 | -------------------------------------------------------------------------------- /appengine/main.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # scanningcabinet's AppEngine server-side code. 4 | # 5 | # Copyright 2009 Brad Fitzpatrick 6 | # Copyright 2009 Google Inc. (sample app that scanningcabinet is based on) 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | import cgi 22 | import datetime 23 | import logging 24 | import os 25 | import re 26 | import time 27 | import urllib 28 | 29 | from google.appengine.api import images 30 | from google.appengine.api import users 31 | from google.appengine.ext import blobstore 32 | from google.appengine.ext import db 33 | from google.appengine.ext import webapp 34 | from google.appengine.ext.webapp import blobstore_handlers 35 | from google.appengine.ext.webapp import template 36 | 37 | 38 | import wsgiref.handlers 39 | 40 | from model import UserInfo 41 | from model import Document 42 | from model import MediaObject 43 | 44 | def parse_timestamp(stamp): 45 | """Parse timestamp to datetime object. 46 | 47 | Datetime parsing is not supported until Python 2.5 and microseconds until 48 | Python 2.5. 49 | 50 | Args: 51 | Date/time formatted as Python 2.6 format '%Y-%m-%d %H:%M:%S.%f'. 52 | 53 | Returns: 54 | datetime object. 55 | """ 56 | no_microseconds, microseconds = stamp.split('.', 1) 57 | time_struct = time.strptime(no_microseconds, '%Y-%m-%d %H:%M:%S') 58 | params = list(time_struct)[:6] + [int(microseconds)] 59 | return datetime.datetime(*params) 60 | 61 | 62 | def get_user_info(): 63 | """Get UserInfo for currently logged in user. 64 | 65 | This will insert the new user if it does not already exist in datastore. 66 | 67 | Returns: 68 | UserInfo record for user if user is logged in, else None. 69 | """ 70 | user = users.get_current_user() 71 | if user is None: 72 | return None 73 | auth_email = user.email() 74 | effective_email = auth_email 75 | 76 | if auth_email == 'brother@example.com': 77 | effective_email = 'test@example.com' 78 | if auth_email == 'cole@fitzpat.com': 79 | effective_email = 'bradfitz@gmail.com' 80 | 81 | if auth_email == effective_email: 82 | ui = UserInfo.get_or_insert(key_name='user:%s' % auth_email) 83 | else: 84 | ui = UserInfo.get_by_key_name('user:%s' % effective_email) 85 | if not ui: 86 | logging.error("User %s failed to act as %s; %s doesn't exist", auth_email, effective_email, effective_email) 87 | return None 88 | logging.info("User %s acting as %s", auth_email, effective_email) 89 | ui.non_owner = True 90 | ui.real_email = auth_email 91 | return ui 92 | 93 | 94 | class MainHandler(webapp.RequestHandler): 95 | """Handler for main page. 96 | 97 | If the user is logged in it will by default show all their media. 98 | 99 | If the user is not logged in it will by default show nothing, but suggest a 100 | course of action to the user. 101 | 102 | This page also shows the results of a search for a provided users shared 103 | media objects. Only public objects are shown for the searched user. If the 104 | searched for user does not exist, a message is displayed to that effect. 105 | """ 106 | 107 | def get(self): 108 | # Provide login/logout URLs. 109 | user_info = get_user_info() 110 | if user_info is None: 111 | login_url = users.create_login_url('/') 112 | else: 113 | login_url = users.create_logout_url('/') 114 | 115 | # Collect list of error messages which gets shown to the user. 116 | error_messages = self.request.params.getall('error_message') 117 | view_user = user_info # for now 118 | did_search = False 119 | 120 | # Fetch media for view user. 121 | media = MediaObject.all().filter('owner', user_info) 122 | media = media.filter('lacks_document', True) 123 | media = media.order('creation') 124 | limit = 50 125 | if self.request.get("limit"): 126 | limit = long(self.request.get("limit")) 127 | media = media.fetch(limit) 128 | 129 | docs = Document.all().filter('owner', user_info) 130 | tags = self.request.get("tags") 131 | if tags: 132 | did_search = True 133 | for tag in re.split('\s*,\s*', tags): 134 | docs = docs.filter("tags", tag) 135 | docs = docs.fetch(limit) 136 | 137 | untagged_docs = Document.all().filter('owner', user_info).filter("no_tags", True).fetch(limit) 138 | 139 | upcoming_due = Document.all().filter('owner', user_info) 140 | upcoming_due = upcoming_due.filter("due_date !=", None) 141 | upcoming_due = upcoming_due.order("due_date") 142 | upcoming_due = upcoming_due.fetch(30) 143 | 144 | top_message = "" 145 | if self.request.get("saved_doc"): 146 | docid = long(self.request.get("saved_doc")) 147 | top_message = "Saved doc %d" % (docid, docid) 148 | 149 | # Render view. 150 | self.response.out.write(template.render('main.html', { 151 | "did_search": did_search, 152 | "media": media, 153 | "docs": docs, 154 | "untagged_docs": untagged_docs, 155 | "upcoming_due_docs": upcoming_due, 156 | "view_user": view_user, 157 | "login_url": login_url, 158 | "user_info": user_info, 159 | "top_message": top_message, 160 | }, debug=True)) 161 | 162 | 163 | class MakeDocHandler(webapp.RequestHandler): 164 | def post(self): 165 | user_info = get_user_info() 166 | if user_info is None: 167 | self.redirect('/?error_message=%s' % 'log-in required') 168 | scan_ids = self.request.get_all("media_id") 169 | scans = MediaObject.get(scan_ids) 170 | doc = Document( 171 | parent=user_info, 172 | owner=user_info, 173 | pages=[scan.key() for scan in scans], 174 | title=None, 175 | description=None) 176 | def make_doc(): 177 | db.put(doc) 178 | for scan in scans: 179 | scan.lacks_document = False 180 | scan.document = doc.key() 181 | db.put(scan) 182 | db.run_in_transaction(make_doc) 183 | self.redirect(doc.display_url + "?size=1200") 184 | 185 | 186 | class UploadFormHandler(webapp.RequestHandler): 187 | """Handler to display the media object upload page. 188 | 189 | This must be a dynamic page because the upload URL must be generated 190 | by the Blobstore API. 191 | """ 192 | 193 | def get(self): 194 | user_info = get_user_info() 195 | if user_info is None: 196 | self.redirect( 197 | '/?error_message=%s' % 'You must be logged in to upload media') 198 | 199 | upload_url = blobstore.create_upload_url( 200 | '/post') 201 | 202 | self.response.out.write(template.render('upload.html', 203 | locals(), 204 | debug=True)) 205 | 206 | 207 | def lookup_and_authenticate_user(handler, claimed_email, claimed_password): 208 | if not claimed_email: 209 | return None 210 | claimed_user = UserInfo.get_by_key_name('user:%s' % claimed_email) 211 | if not claimed_user: 212 | return None 213 | if claimed_email == 'test@example.com' and \ 214 | handler.request.headers["Host"] == "localhost:8080": 215 | # No auth for testing. 216 | return claimed_user 217 | if claimed_user.upload_password and \ 218 | claimed_user.upload_password == claimed_password: 219 | return claimed_user 220 | return None 221 | 222 | 223 | class UploadUrlHandler(webapp.RequestHandler): 224 | """Handler to return a URL for a script to get an upload URL. 225 | 226 | This must be a dynamic page because the upload URL must be generated 227 | by the Blobstore API. 228 | """ 229 | 230 | def get(self): 231 | claimed_email = self.request.get("user_email") 232 | effective_user = lookup_and_authenticate_user(self, claimed_email, 233 | self.request.get("password")) 234 | 235 | if effective_user: 236 | self.response.headers['Content-Type'] = 'text/plain' 237 | upload_url = blobstore.create_upload_url('/post') 238 | self.response.out.write(upload_url) 239 | else: 240 | self.error(403) 241 | 242 | 243 | class UploadPostHandler(blobstore_handlers.BlobstoreUploadHandler): 244 | """Handle blobstore post, as forwarded by notification agent.""" 245 | 246 | def store_media(self, upload_files, error_messages): 247 | """Store media information. 248 | 249 | Writes a MediaObject to the datastore for the uploaded file. 250 | 251 | Args: 252 | upload_files: List of BlobInfo records representing the uploads. 253 | error_messages: Empty list for storing error messages to report to user. 254 | """ 255 | if not upload_files: 256 | error_messages.append('Form is missing upload file field') 257 | 258 | if len(upload_files) != 1: 259 | error_messages.append('Form has more than one image.') 260 | 261 | def get_param(name, error_message=None): 262 | """Convenience function to get a parameter from request. 263 | 264 | Returns: 265 | String value of field if it exists, else ''. If the key does not exist 266 | at all, it will return None. 267 | """ 268 | try: 269 | value = self.request.params[name] 270 | if isinstance(value, cgi.FieldStorage): 271 | value = value.value 272 | return value or '' 273 | except KeyError: 274 | #error_messages.append(error_message) 275 | return None 276 | 277 | # Check that title, description and share fields provided. Do additional 278 | # constraint check on share to make sure it is valid. 279 | width = get_param('width') 280 | height = get_param('height') 281 | 282 | # title and description are only legit for single-page doc 283 | is_doc = get_param('is_doc') # is a stand-alone single-page doc? 284 | title = get_param('title') 285 | description = get_param('description') 286 | tags = get_param('tags') # comma-separated 287 | 288 | # Make sure user is logged in. 289 | user = users.get_current_user() 290 | user_email = '' 291 | if user is None: 292 | claimed_email = get_param("user_email") 293 | effective_user = lookup_and_authenticate_user(self, claimed_email, get_param('password')) 294 | if not effective_user: 295 | error_messages.append("No user or correct 'password' argument.") 296 | user_email = claimed_email 297 | else: 298 | user_email = user.email() 299 | 300 | if error_messages: 301 | return 302 | 303 | blob_info, = upload_files 304 | 305 | def store_media(): 306 | """Store media object info in datastore. 307 | 308 | Also updates the user-info record to keep count of media objects. 309 | 310 | This function is run as a transaction. 311 | """ 312 | user_info = UserInfo.get_by_key_name('user:%s' % user_email) 313 | if user_info is None: 314 | error_messages.append('User record has been deleted. ' 315 | 'Try uploading again') 316 | return 317 | 318 | media = MediaObject( 319 | parent=user_info, 320 | owner=user_info, 321 | blob=blob_info.key(), 322 | creation=blob_info.creation, 323 | content_type=blob_info.content_type, 324 | filename=blob_info.filename, 325 | size=int(blob_info.size), 326 | lacks_document=True) 327 | 328 | user_info.media_objects += 1 329 | db.put(user_info) 330 | db.put(media) 331 | 332 | if bool(is_doc) and is_doc != "0": 333 | tag_list = [] 334 | if tags is not None: 335 | tag_list = [x for x in re.split('\s*,\s*', tags) if x] 336 | 337 | doc = Document( 338 | parent=user_info, 339 | owner=user_info, 340 | pages=[media.key()], 341 | title=title, 342 | description=description, 343 | no_tags=(len(tag_list)==0), 344 | tags=tag_list) 345 | db.put(doc) 346 | media.document = doc.key() 347 | media.lacks_document = False 348 | db.put(media) 349 | db.run_in_transaction(store_media) 350 | 351 | def post(self): 352 | """Do upload post.""" 353 | error_messages = [] 354 | 355 | upload_files = self.get_uploads('file') 356 | 357 | self.store_media(upload_files, error_messages) 358 | 359 | error_messages = tuple(urllib.quote(m) for m in error_messages) 360 | error_messages = tuple('error_message=%s' % m for m in error_messages) 361 | self.redirect('/?%s' % '&'.join(error_messages)) 362 | 363 | # Delete all blobs upon error. 364 | if error_messages: 365 | blobstore.delete(upload_files) 366 | 367 | 368 | class ShowDocHandler(webapp.RequestHandler): 369 | def get(self, docid): 370 | user_info = get_user_info() 371 | if user_info is None: 372 | self.redirect('/?error_message=%s' % 'login required to view docs') 373 | docid = long(docid) 374 | doc = Document.get_by_id(docid, parent=user_info) 375 | if doc is None: 376 | self.response.out.write("Docid %d not found." % (docid)) 377 | return 378 | pages = MediaObject.get(doc.pages) 379 | size = self.request.get("size") 380 | if not size: 381 | size = 1200 382 | show_single_list = long(size) > 600 383 | self.response.out.write(template.render('doc.html', 384 | {"doc": doc, 385 | "pages": pages, 386 | "user_info": user_info, 387 | "size": size, 388 | "show_single_list": show_single_list}, 389 | debug=True)) 390 | 391 | 392 | def break_and_delete_doc(user, doc): 393 | """Deletes the document, marking all the images in it as un-annotated.""" 394 | def tx(): 395 | db.delete(doc) 396 | scans = MediaObject.get(doc.pages) 397 | for scan in scans: 398 | scan.lacks_document = True 399 | scan.document = None 400 | db.put(scan) 401 | db.run_in_transaction(tx) 402 | return True 403 | 404 | 405 | def delete_doc_and_images(user, doc): 406 | """Deletes the document and its images.""" 407 | scans = MediaObject.get(doc.pages) 408 | for scan in scans: 409 | blobstore.delete(scan.blob.key()) 410 | def tx(): 411 | db.delete(doc) 412 | scans = MediaObject.get(doc.pages) 413 | for scan in scans: 414 | user.media_objects -= 1 415 | db.delete(scan) 416 | db.put(user) 417 | db.run_in_transaction(tx) 418 | return True 419 | 420 | 421 | class ChangeDocHandler(webapp.RequestHandler): 422 | def post(self): 423 | user_info = get_user_info() 424 | if user_info is None: 425 | self.redirect('/?error_message=%s' % 'login required to view docs') 426 | docid = long(self.request.get("docid")) 427 | doc = Document.get_by_id(docid, parent=user_info) 428 | if doc is None: 429 | self.response.out.write("Docid %d not found." % (docid)) 430 | return 431 | 432 | mode = self.request.get("mode") 433 | if mode == "break": 434 | break_and_delete_doc(user_info, doc) 435 | self.response.out.write("[<< Back] Docid %d deleted and images broken out as un-annotated." % docid) 436 | return 437 | if mode == "delete": 438 | delete_doc_and_images(user_info, doc) 439 | self.response.out.write("[<< Back] Docid %d and its images deleted." % docid) 440 | return 441 | 442 | # Simple properties: 443 | doc.physical_location = self.request.get("physical_location") 444 | doc.title = self.request.get("title") 445 | 446 | # Tags 447 | doc.tags = [x for x in re.split('\s*,\s*', self.request.get("tags")) if x] 448 | doc.no_tags = (len(doc.tags) == 0) 449 | 450 | # Document Date 451 | date = self.request.get("date") 452 | if date: 453 | doc.doc_date = datetime.datetime.strptime(date, "%Y-%m-%d") 454 | doc.no_date = False 455 | else: 456 | doc.doc_date = None 457 | doc.no_date = True 458 | 459 | # Due date 460 | due_date_str = self.request.get("due_date") 461 | doc.due_date = None 462 | if due_date_str: 463 | doc.due_date = datetime.datetime.strptime(due_date_str, "%Y-%m-%d") 464 | 465 | def store(): 466 | db.put(doc) 467 | db.run_in_transaction(store) 468 | self.redirect("/?saved_doc=" + str(docid)) 469 | 470 | 471 | class ResourceHandler(blobstore_handlers.BlobstoreDownloadHandler): 472 | """For when user requests media object. Actually serves blob.""" 473 | 474 | def get(self, media_id, unused_filename): 475 | def get_param(name, error_message=None): 476 | """Convenience function to get a parameter from request. 477 | 478 | Returns: 479 | String value of field if it exists, else ''. If the key does not exist 480 | at all, it will return None. 481 | """ 482 | try: 483 | value = self.request.params[name] 484 | if isinstance(value, cgi.FieldStorage): 485 | value = value.value 486 | return value or '' 487 | except KeyError: 488 | #error_messages.append(error_message) 489 | return None 490 | 491 | user_info = get_user_info() 492 | #if user_info is None and get_param("password") == "xx": 493 | # user_info = UserInfo.get_by_key_name('user:bradfitz@gmail.com') 494 | if user_info is None: 495 | self.redirect('/?error_message=%s' % 'log-in required') 496 | media_object = MediaObject.get_by_id(long(media_id), parent=user_info) 497 | if media_object is None: 498 | self.redirect('/?error_message=Unidentified+object') 499 | return 500 | 501 | last_modified_string = media_object.creation.strftime("%a, %d %b %Y %H:%M:%S GMT") 502 | self.response.headers['Cache-Control'] = "private" 503 | self.response.headers['Content-Type'] = str(media_object.guessed_type) 504 | self.response.headers['Last-Modified'] = last_modified_string 505 | expires = media_object.creation + datetime.timedelta(days=30) 506 | self.response.headers['Expires'] = expires.strftime("%a, %d %b %Y %H:%M:%S GMT") 507 | 508 | # Caching 509 | if self.request.headers.has_key("If-Modified-Since"): 510 | ims = self.request.headers.get("If-Modified-Since") 511 | if ims == last_modified_string: 512 | self.error(304) 513 | return 514 | modsince = datetime.datetime.strptime(ims, "%a, %d %b %Y %H:%M:%S %Z") 515 | if modsince >= media_object.creation: 516 | self.error(304) 517 | return 518 | 519 | blob_key = media_object.blob.key() 520 | 521 | resize = self.request.get('resize') 522 | if resize: 523 | image = images.Image(blob_key=str(blob_key)) 524 | image.resize(width=int(resize), height=int(resize)) 525 | self.response.out.write(image.execute_transforms()) 526 | return 527 | 528 | if 'Range' in self.request.headers: 529 | self.response.headers['Range'] = self.request.headers['Range'] 530 | 531 | self.send_blob(blob_key, str(media_object.guessed_type)) 532 | 533 | 534 | class GarbageCollectMediaHandler1(webapp.RequestHandler): 535 | def get(self): 536 | if not users.is_current_user_admin(): 537 | self.redirect('/?error_message=%s' % 'log-in required') 538 | 539 | used = set() 540 | for d in Document.all(): 541 | used |= set(d.pages) 542 | 543 | dead = dict() 544 | for i in MediaObject.all(): 545 | if i.key() not in used: 546 | dead[i.key()] = i 547 | 548 | for k in dead: 549 | dead[k].delete() 550 | 551 | self.redirect('/') 552 | 553 | class GarbageCollectMediaHandler2(webapp.RequestHandler): 554 | def get(self): 555 | if not users.is_current_user_admin(): 556 | self.redirect('/?error_message=%s' % 'log-in required') 557 | 558 | used = set() 559 | for i in MediaObject.all(): 560 | used.add(i.blob.key()) 561 | 562 | for b in blobstore.BlobInfo.all(): 563 | if b.key() not in used: 564 | b.delete() 565 | 566 | self.redirect('/') 567 | 568 | class DumpHandler(webapp.RequestHandler): 569 | def get(self): 570 | self.response.headers['Cache-Control'] = "private" 571 | self.response.headers['Content-Type'] = "text/plain; charset=utf-8" 572 | 573 | user = UserInfo.get_by_key_name('user:bradfitz@gmail.com') 574 | 575 | docs = Document.all().filter('owner', user) 576 | docs = docs.fetch(10000) 577 | self.response.out.write("# got %d docs\n" % len(docs)) 578 | for doc in docs: 579 | self.response.out.write("%s tags[%s] date[%s] title[%s] \n" % (doc.display_url, doc.tag_comma_separated, doc.date_yyyy_mm_dd, doc.title_or_empty_string)) 580 | for page in doc.pages: 581 | self.response.out.write(" has_page: %d\n" % (page.id_or_name())) 582 | meds = MediaObject.all().filter('owner', user) 583 | meds = meds.fetch(10000) 584 | self.response.out.write("# got %d mediaobjects\n" % len(meds)) 585 | for mo in meds: 586 | self.response.out.write("%s creation[%s] size[%d]\n" % (mo.url_path, str(mo.creation), mo.size)) 587 | 588 | 589 | def main(): 590 | application = webapp.WSGIApplication( 591 | [('/', MainHandler), 592 | ('/uploadurl', UploadUrlHandler), # returns a new upload URL 593 | #('/upload', UploadFormHandler), # for humans 594 | ('/post', UploadPostHandler), # for machine or humans to upload 595 | ('/makedoc', MakeDocHandler), 596 | ('/doc/(\d+)', ShowDocHandler), 597 | ('/changedoc', ChangeDocHandler), 598 | ('/resource/(\d+)(/.*)?', ResourceHandler), 599 | #('/dumpxx', DumpHandler), 600 | #('/gc_media1', GarbageCollectMediaHandler1), 601 | #('/gc_media2', GarbageCollectMediaHandler2), 602 | ], 603 | debug=True) 604 | wsgiref.handlers.CGIHandler().run(application) 605 | 606 | 607 | if __name__ == '__main__': 608 | main() 609 | -------------------------------------------------------------------------------- /appengine/model.py: -------------------------------------------------------------------------------- 1 | # 2 | # scanningcabinet's AppEngine server-side code. 3 | # 4 | # Copyright 2009 Brad Fitzpatrick 5 | # Copyright 2009 Google Inc. (sample app that scanningcabinet is based on) 6 | # 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # 19 | 20 | import mimetypes 21 | 22 | from google.appengine.ext import blobstore 23 | from google.appengine.ext import db 24 | 25 | 26 | class MigratingBlobReferenceProperty(db.Property): 27 | """Migrates pre-1.3.0 blob str props to real blobkey references.""" 28 | 29 | data_type = blobstore.BlobInfo 30 | 31 | def get_value_for_datastore(self, model_instance): 32 | """Translate model property to datastore value.""" 33 | blob_info = getattr(model_instance, self.name) 34 | if blob_info is None: 35 | return None 36 | return blob_info.key() 37 | 38 | def make_value_from_datastore(self, value): 39 | """Translate datastore value to BlobInfo.""" 40 | if value is None: 41 | return None 42 | 43 | # The two lines of difference of MigratingBlobReferenceProperty: 44 | if isinstance(value, basestring): 45 | value = blobstore.BlobKey(value) 46 | 47 | return blobstore.BlobInfo(value) 48 | 49 | def validate(self, value): 50 | """Validate that assigned value is BlobInfo. 51 | 52 | Automatically converts from strings and BlobKey instances. 53 | """ 54 | if isinstance(value, (basestring)): 55 | value = blobstore.BlobInfo(blobstore.BlobKey(value)) 56 | elif isinstance(value, blobstore.BlobKey): 57 | value = blobstore.BlobInfo(value) 58 | return super(MigratingBlobReferenceProperty, self).validate(value) 59 | 60 | 61 | class UserInfo(db.Model): 62 | """Information about a particular user and their media library.""" 63 | user = db.UserProperty(auto_current_user_add=True) 64 | media_objects = db.IntegerProperty(default=0) 65 | upload_password = db.StringProperty() 66 | 67 | # non_owner is set if a helper (e.g. Brad's brother) is helping him 68 | # tag 69 | non_owner = False 70 | real_email = "" # real user's email 71 | 72 | class Document(db.Model): 73 | """A document with 1 or more media objects (1+ pages, 0/1 preview)""" 74 | owner = db.ReferenceProperty(UserInfo, required=True) 75 | 76 | pages = db.ListProperty(db.Key, required=True) 77 | preview = db.ListProperty(db.Key) # preview images, if pages is a PDF 78 | 79 | doc_date = db.DateTimeProperty() 80 | no_date = db.BooleanProperty(required=True, default=True) 81 | 82 | creation = db.DateTimeProperty(auto_now_add=True) 83 | 84 | title = db.StringProperty() 85 | description = db.TextProperty() 86 | 87 | tags = db.StringListProperty() 88 | no_tags = db.BooleanProperty(required=True, default=True) 89 | 90 | # To find the paper document back later: 91 | physical_location = db.StringProperty() 92 | 93 | # Things I need to get to (taxes, etc.) 94 | due_date = db.DateTimeProperty() 95 | 96 | starred = db.BooleanProperty() 97 | 98 | @property 99 | def display_url(self): 100 | return '/doc/%s' % self.key().id() 101 | 102 | @property 103 | def tag_comma_separated(self): 104 | return ", ".join(self.tags) 105 | 106 | @property 107 | def date_yyyy_mm_dd(self): 108 | """Or empty string.""" 109 | if self.doc_date: 110 | return str(self.doc_date)[0:10] 111 | return "" 112 | 113 | @property 114 | def due_yyyy_mm_dd(self): 115 | """Or empty string.""" 116 | if self.due_date: 117 | return str(self.due_date)[0:10] 118 | return "" 119 | 120 | @property 121 | def title_or_empty_string(self): 122 | """The real title, or the empty string if none.""" 123 | if not self.title: 124 | return "" 125 | return self.title 126 | 127 | @property 128 | def some_title(self): 129 | if self.title: 130 | return self.title 131 | if self.tags: 132 | return ", ".join(self.tags) 133 | return self.title 134 | 135 | 136 | class MediaObject(db.Model): 137 | """Information about media object uploaded by user. 138 | 139 | Does not contain the actual object, which is in blobstore. Contains duplicate 140 | meta-information about blob for searching purposes. 141 | """ 142 | owner = db.ReferenceProperty(UserInfo, required=True) 143 | 144 | blob = MigratingBlobReferenceProperty() 145 | 146 | creation = db.DateTimeProperty() 147 | content_type = db.StringProperty() 148 | 149 | filename = db.StringProperty() # foo.jpg 150 | original_path = db.StringProperty() # scan/tax/2009/foo.jpg 151 | size = db.IntegerProperty() 152 | 153 | # If known: 154 | width = db.IntegerProperty() 155 | height = db.IntegerProperty() 156 | 157 | # If part of a document yet, a reference (db.Key) to a media object. 158 | document = db.ReferenceProperty(Document, required=False) 159 | lacks_document = db.BooleanProperty() 160 | 161 | @property 162 | def thumb_url(self): 163 | return '/resource/%d/%s?resize=300' % (self.key().id(), self.filename) 164 | 165 | @property 166 | def url_resize(self): 167 | return '/resource/%s/%s?resize=' % (self.key().id(), self.filename) 168 | 169 | @property 170 | def url_path(self): 171 | return '/resource/%s/%s' % (self.key().id(), self.filename) 172 | 173 | @property 174 | def guessed_type(self): 175 | """A guess for the content type of this media object. 176 | 177 | This is currently necessary because the production version of the 178 | Blobstore API does not try to detect content types of uploads. 179 | """ 180 | if self.content_type == 'application/octet-stream': 181 | # Try to guess. Useful for backward compatibility with older objects 182 | # that had not content type detection. 183 | mime_type, unused_parameters = mimetypes.guess_type(self.filename) 184 | return mime_type or 'text/plain' 185 | else: 186 | return self.content_type or 'text/plain' 187 | 188 | @property 189 | def is_image(self): 190 | """Returns True if this media object is an image.""" 191 | image_types = frozenset([ 192 | 'image/png', 'image/jpeg', 'image/tiff', 'image/gif', 'image/bmp']) 193 | return self.guessed_type in image_types 194 | 195 | def delete(self): 196 | """Also delete associated media blob and decrement users media count.""" 197 | super(MediaObject, self).delete() 198 | self.owner.media_objects -= 1 199 | self.owner.put() 200 | self.blob.delete() 201 | -------------------------------------------------------------------------------- /appengine/static/README.txt: -------------------------------------------------------------------------------- 1 | Placeholder file for empty git directory. 2 | 3 | JavaScript libraries (Closure?) will eventually go here. 4 | -------------------------------------------------------------------------------- /appengine/static/scanner.css: -------------------------------------------------------------------------------- 1 | .doc-page-single { 2 | border: 1px solid grey; 3 | display: block; 4 | } 5 | 6 | .doc-page-row { 7 | border: 1px solid grey; 8 | } 9 | -------------------------------------------------------------------------------- /appengine/upload.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block title %}MediaStore Upload{% endblock %} 4 | 5 | {% block main_body %} 6 |
7 | Sharing: 11 | Title:
12 | Description:
13 |
14 | Upload File:
15 | 16 |
17 | 18 |
19 | Cancel 20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /tools/scancab: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | # 3 | # Author: Brad Fitzpatrick 4 | # 5 | # You'll need Debian packages curl, sane-utils, sane, scanadf, some 6 | # perl stuff, etc. I kinda expect you to modify this to suit your 7 | # needs. I wrote it for me. This is hacky and I don't offer support. 8 | # I do, however, accept gushing emails of full of love. 9 | # 10 | # ============ 11 | # Basic usage: 12 | # ============ 13 | # 14 | # (Load scanner full of documents) 15 | # $ scancab --adf 16 | # $ scancab --adf --color 17 | # $ scancab --adf --lineart 18 | # $ scancab --duplex (both sides. implies --adf) 19 | # 20 | # Those commands above just write to the queue directory. 21 | # Make sure you mkdir ~/scancab-queue 22 | # 23 | # (But run this in the background in another terminal, 24 | # which does the actual potentially-slow uploads, making 25 | # sure to do them in the right creation order, doing retries, 26 | # etc....) 27 | # $ scancab --loop 28 | # 29 | # (Upload a certain document, but don't delete it...) 30 | # $ scancab --upload=foo.jpg 31 | # 32 | # ================= 33 | # Scanner Advice... 34 | # ================= 35 | # I can't say enough good things about my Fujitsu ScanSnap S1500M 36 | # (identical to S1500). Great, fast hardware, great Linux support. 37 | # A++. Would buy again. 38 | 39 | use strict; 40 | use LWP::Simple; 41 | use Getopt::Long; 42 | use IPC::Run (); 43 | use File::Spec::Functions qw(tmpdir catdir catfile); 44 | 45 | my $URL = "http://localhost:8080"; 46 | my $EMAIL = ""; 47 | my $password = ""; 48 | 49 | my $uploader_pid_file = ""; 50 | my $queue_dir = "$ENV{HOME}/scancab-queue/"; 51 | 52 | # Detect when we're the helper program (--scan-script) to scanadf, 53 | # which we run in --adf batch mode. (this script functions as both 54 | # the driver and the helper) 55 | if ($ENV{'SCAN_RES'} || $ENV{'SCAN_FORMAT_ID'}) { 56 | be_batch_scan_script(); 57 | exit(0); 58 | } 59 | 60 | my $upload_loop = 0; 61 | 62 | my $adf = 0; 63 | 64 | my $upload_file; 65 | my $color = 0; 66 | my $lineart = 0; 67 | my $appid = $ENV{'SCANCAB_APPID'}; 68 | my $upload_now = 0; 69 | my $duplex = 0; 70 | my $dev = 0; 71 | 72 | die unless GetOptions( 73 | "dev" => \$dev, # dev_appserver mode 74 | 75 | "appid=s" => \$appid, 76 | "email=s" => \$EMAIL, 77 | "password=s" => \$password, 78 | 79 | # Upload this file, don't delete it, then exit. 80 | "upload=s" => \$upload_file, 81 | "upload_now" => \$upload_now, # don't queue 82 | 83 | # Loop, looking in $queue_dir 84 | "loop" => \$upload_loop, 85 | 86 | # Use the auto-document-feeder. 87 | "adf" => \$adf, 88 | "duplex" => \$duplex, 89 | 90 | # Mutually exclusive: 91 | "color" => \$color, 92 | "lineart" => \$lineart, 93 | ); 94 | 95 | $adf = 1 if $duplex; 96 | 97 | die "Can't do both color and lineart.\n" if $color && $lineart; 98 | 99 | die "appid parameter must be just an appengine appid" 100 | if $appid && $appid !~ /^[\w-]+$/; 101 | 102 | my $conf_dir = "$ENV{HOME}/.config/scanningcabinet"; 103 | my $device = slurp("$conf_dir/device"); 104 | my $device_flag = $device ? "-d $device" : ""; 105 | 106 | if ($dev) { 107 | $URL = "http://localhost:8080"; 108 | $EMAIL = "test\@example.com"; 109 | $password = "test"; 110 | $queue_dir = "/tmp/scancab-queue"; 111 | mkdir $queue_dir, 0755 unless -d $queue_dir; 112 | } else { 113 | unless (-d $queue_dir) { 114 | die "Queue directory doesn't exist; please create it: $queue_dir\n"; 115 | } 116 | # ADF mode doesn't need to upload (just drops stuff into a batch directory) so 117 | # don't need to setup URL or password in that case. 118 | unless ($adf) { 119 | $appid ||= slurp("$conf_dir/appid") 120 | or warn "No appid configured in $conf_dir/appid; assuming $URL\n"; 121 | $EMAIL ||= slurp("$conf_dir/email"); 122 | $password ||= slurp("$conf_dir/password") 123 | or die "No password configured in args or in $conf_dir/password\n"; 124 | if ($appid) { 125 | $URL = "https://$appid.appspot.com"; 126 | } 127 | } 128 | $EMAIL ||= "$ENV{USER}\@gmail.com"; # uh, works for me. :) 129 | } 130 | 131 | if ($upload_loop) { 132 | chdir($queue_dir) or die "Failed to chdir to queue directory $queue_dir.\n"; 133 | while (1) { 134 | opendir(my $dh, ".") or die; 135 | my %create_time; # filename -> unixtime 136 | my @files = grep { /^image-.+-unx(\d+)\.(png|jpg)$/ && ($create_time{$_} = $1) } readdir($dh); 137 | 138 | # Note: need the fallback on ($a cmp $b) because my Fujitsu 139 | # ScanSnap S1500M is so damn fast, that sometimes files all 140 | # have the same unixtime(!): 141 | @files = sort { ($create_time{$a} <=> $create_time{$b}) || ($a cmp $b) } @files; 142 | 143 | closedir($dh); 144 | if (@files) { 145 | my $failures; 146 | foreach my $file (@files) { 147 | if (upload_file($file)) { 148 | unlink($file); 149 | } else { 150 | $failures = 1; 151 | print STDERR "# Upload error. Sleeping for 5 seconds...\n"; 152 | last; 153 | } 154 | } 155 | print "Uploads complete. Waiting for new files.\n"; 156 | } 157 | sleep 5; 158 | } 159 | } 160 | 161 | if ($upload_file) { 162 | die "File $upload_file doesn't exist.\n" unless -e $upload_file; 163 | print "Uploading $upload_file ...\n"; 164 | if ($upload_file =~ /\.pdf/i) { 165 | my $type = $color ? 'jpg' : 'tiff'; 166 | my @pdf_to_img = qw(convert); 167 | if ($type eq 'tiff') { 168 | push @pdf_to_img, qw(-monochrome); 169 | push @pdf_to_img, qw(-density 150); 170 | push @pdf_to_img, qw(-compress lzw); 171 | } elsif ($type eq 'jpg') { 172 | push @pdf_to_img, qw(-density 300); 173 | } 174 | my $tmp = tmpdir(); 175 | my $cnt = pdf_page_count($upload_file); 176 | for (my $pg = 0; $pg < $cnt; $pg++) { 177 | printf " page %04d of %04d\n", $pg + 1, $cnt; 178 | my $img = catfile($tmp, sprintf("page$$-%04d.$type", $pg + 1)); 179 | system(@pdf_to_img, "$upload_file\[$pg\]", $img) == 0 or die "Cannot convert page\n"; 180 | if (!upload_file($img)) { 181 | die "Failed to upload.\n"; 182 | } 183 | unlink $img; 184 | } 185 | } else { 186 | if (!upload_file($upload_file)) { 187 | die "Failed to upload.\n"; 188 | } 189 | } 190 | exit(0); 191 | } 192 | 193 | my $scan = 1; # implicit default mode, for now. 194 | 195 | if ($scan) { 196 | my %seen; 197 | opendir(D, ".") or die; 198 | my @files = readdir(D); 199 | foreach my $f (@files) { 200 | $seen{$f} = 1 if $f =~ /^image-\d\d\d\d/; 201 | } 202 | 203 | my $n = 1; 204 | my $base = sprintf("image-%04d", $n); 205 | while ($seen{$base} || 206 | $seen{"$base.tiff"} || 207 | $seen{"$base.jpg"} || 208 | $seen{"$base.png"}) { 209 | $n++; 210 | $base = sprintf("image-%04d", $n); 211 | } 212 | my $tiff = "$base.tiff"; 213 | 214 | my $mode = $lineart ? "Lineart" : ($color ? "Color" : "Gray"); 215 | if ($lineart) { 216 | $ENV{SCAN_LINEART} = 1; # to pass to subprocess 217 | } 218 | if ($adf) { 219 | my $extra_source = $duplex ? " --source=\"ADF Duplex\"" : ""; 220 | system("scanadf $device_flag --mode $mode --resolution 300 " . 221 | $extra_source . 222 | " --scan-script $0 " . 223 | " -s $n") and die "Failed to batch scan."; 224 | } else { 225 | my $cmd = "scanimage $device_flag --mode $mode --resolution 300 --format tiff > $tiff"; 226 | system($cmd) 227 | and die "Failed to scan while running:\n $cmd\n"; 228 | 229 | my $out = $lineart ? "$base.png" : "$base.jpg"; 230 | print "Scanned. Converting $tiff -> $out\n"; 231 | system("convert", "-quality", "90", $tiff, $out) 232 | and die "failed to convert.\n"; 233 | unlink($tiff) or die "Failed to unlink $tiff: $!"; 234 | if ($upload_now) { 235 | if (!upload_file($out)) { 236 | die "Failed to upload $out (keeping file)\n"; 237 | } 238 | unlink($out); 239 | } else { 240 | my $qfile = "$queue_dir/$out-unx" . time() . substr($out, -4); 241 | print "Moving file from $out to $qfile\n"; 242 | system("mv", $out, $qfile) and die "Failed to move file.\n"; 243 | } 244 | } 245 | exit(0); 246 | } 247 | 248 | sub upload_file { 249 | my $file = shift; 250 | print "Fetching upload URL (for $file) ...\n"; 251 | my $url_to_get_an_upload_url = "$URL/uploadurl?" . 252 | "user_email=$EMAIL&password=" . eurl($password); 253 | print "Getting an upload URL from: $url_to_get_an_upload_url\n"; 254 | my $upload_url = get($url_to_get_an_upload_url); 255 | die "Didn't get URL. Wrong password?\n\nGot: $upload_url ($@)\n" 256 | unless $upload_url =~ /^http.+/; 257 | chomp $upload_url; 258 | print "Uploading to: $upload_url ...\n"; 259 | my $stdin = ""; 260 | my ($stdout, $stderr); 261 | if (IPC::Run::run( 262 | ["curl", 263 | "-s", # silent mode 264 | "-F", "file=\@$file", 265 | "-F", "password=$password", 266 | "-F", "user_email=$EMAIL", 267 | $upload_url], 268 | \$stdin, 269 | \$stdout, 270 | \$stderr) && !$stdout && !$stderr) { 271 | print "Upload of $file: success.\n"; 272 | return 1; 273 | } 274 | print "Error uploading file: $file\n"; 275 | if ($stdout) { 276 | print "Curl returned unexpected stdout: $stdout"; 277 | } 278 | if ($stderr) { 279 | print "Curl returned unexpected stderr: $stderr"; 280 | } 281 | print "Upload of $file failed.\n"; 282 | return 0; 283 | } 284 | 285 | sub be_batch_scan_script { 286 | die "Expected $1 to be image-nnnn" unless $ARGV[0] =~ m!\bimage-\d\d\d\d$!; 287 | die "No SCAN_FORMAT\n" unless $ENV{SCAN_FORMAT}; 288 | my $filebase = $&; 289 | print "[$$] Got format: $ENV{SCAN_FORMAT} for $filebase\n"; 290 | 291 | my $ext = "jpg"; 292 | if ($ENV{SCAN_LINEART}) { 293 | $ext = "png"; 294 | } 295 | 296 | my $now = time(); 297 | my $tmp_file = "$queue_dir/$filebase-unx$now-TMP.$ext"; 298 | my $dest_file = "$queue_dir/$filebase-unx$now.$ext"; 299 | system("convert", "-quality", 95, $ARGV[0], $tmp_file) 300 | and die "Failed to convert."; 301 | rename($tmp_file, $dest_file) or die "Failed to rename $tmp_file to $dest_file: $!\n"; 302 | unlink($ARGV[0]); 303 | } 304 | 305 | sub eurl { 306 | my $a = $_[0]; 307 | $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg; 308 | $a =~ tr/ /+/; 309 | return $a; 310 | } 311 | 312 | sub slurp { 313 | my $file = shift; 314 | open(my $fh, $file) or return undef; 315 | my $contents = do { local $/; <$fh>; }; 316 | chomp $contents; 317 | return $contents; 318 | } 319 | 320 | sub pdf_page_count { 321 | my $file = shift; 322 | my $pages; 323 | open(my $fh, '-|', 'pdfinfo', $file) or die "Cannot run pdfinfo: $!\n"; 324 | while (<$fh>) { 325 | $pages = $1 if /^Pages:\s*(\d+)$/i; 326 | } 327 | close $fh; 328 | return $pages; 329 | } 330 | --------------------------------------------------------------------------------