├── .gitignore
├── README.txt
├── appengine
    ├── app.yaml
    ├── base.html
    ├── doc.html
    ├── index.yaml
    ├── main.html
    ├── main.py
    ├── model.py
    ├── static
    │   ├── README.txt
    │   └── scanner.css
    └── upload.html
└── tools
    └── scancab


/.gitignore:
--------------------------------------------------------------------------------
1 | *~
2 | *.pyc
3 | 


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | ################################################################################
 2 | #
 3 | # NOTICE: unmaintained! As of 2015-05-03 I no longer use this App Engine-based
 4 | # version of Scanning Cabinet.
 5 | #
 6 | # The migration of my App Engine instance from Master/Slave datastore to HRD
 7 | # failed, so I'm accelerating plans to move this project to be Camlistore-based
 8 | # instead. See camlistore.org. This will just be a Camlistore app, using its
 9 | # data model.
10 | #
11 | # Old README follows.
12 | #
13 | ################################################################################
14 | 
15 | 
16 | This is scanningcabinet.
17 | 
18 | It's my document management system.  Maybe you'll like it too.
19 | 
20 | Problem statement:
21 | 
22 |    * I'm a packrat.  Yes, I might need my T-Mobile cellphone bill from
23 |      March 2001 sometime.  Maybe.  (shutup)
24 | 
25 |    * My filing cabinets are full.
26 | 
27 |    * It's cold in San Francisco and I want to burn stuff.
28 | 
29 |    * I can't find tax or insurance documents when I need to anyway,
30 |      because folders suck.  I want tags.  e.g. I can tag that one
31 |      document "audi, insurance, crash, car, state farm, royal motors"
32 |      and be sure I'll find it later.  Which frickin' folder would I
33 |      put that in anyway?  Folders sucks.  Yay tags.
34 | 
35 |    * I have a scanner.  My friend's scanner is better.  Borrowed that
36 |      one.  It has a sheet feeder.
37 | 
38 |    * App Engine now has a Blob API: http://bit.ly/8K4FxM
39 | 
40 |    * It should be easy to get documents online.  Must minimize context
41 |      switching between feeding the scanner and entering metadata.  In fact,
42 |      they should be *entirely separate* tasks.  If I have to enter metadata
43 |      while scanning, I'll probably just end up on reddit.
44 | 
45 |    * All document metadata entry should be done later.  This includes
46 |      clumping multi-page scans into their logical documents.  I shouldn't
47 |      have to even enter how many pages a document is when I scan it.
48 |      I'll be scanning stacks in the auto-document-feeder anyway.
49 | 
50 |    * Usually I want to just burn/shred documents, but occasionally
51 |      I'll need the physical document in the future (like for taxes or
52 |      jury duty), so the metadata must include information about the
53 |      document's physical location. (e.g. "Red Folder #1")  Then when
54 |      I need it again, I go linear scan Red Folder #1 looking for it.
55 |      Also, I track the "due date" of the document, and show upcoming
56 |      ones on the main page, so I see pending due taxes get closer and
57 |      closer.  Frickin' taxes.
58 | 
59 | Anyway, I wrote some software.  (parts are kinda crap because I always
60 | forget Python, but whatevs.)
61 | 
62 | Some instructions:
63 | 
64 | * tools/scancab is the client program.  You use it to scan & upload.
65 |   Read its docs & comments.  You'll need to modify the email &
66 |   password later.  But first:
67 | 
68 | * appengine/ is the AppEngine server component.  Go to
69 |   http://appspot.com/ to make an AppID ("bobscans").  Then get the
70 |   1.3.0 or higher App Engine SDK, tweak
71 |   scanningcabinet/appengine/app.yaml file to match your AppID, then
72 |   appcfg.py update 'appengine' to upload the app to your account.
73 | 
74 |   -- Now, go to https://<your_appid>.appspot.com/ and login.  This
75 |      makes your UserInfo entity in the database.  That's all.
76 | 
77 |   -- Now, go back to http://appspot.com/, click your App, then click
78 |      "Datastore Viewer" on the left.  Find your UserInfo entity, click
79 |      it, and modify its "upload_password" to some password you'll use
80 |      for uploading.  Don't use your Google password.  Choose type
81 |      "string".
82 | 
83 |   -- Now, go put your Google account's email & that password you just
84 |      made up into scanningcabinet/tools/scancab
85 | 
86 | * Now start scanning stuff.
87 | 
88 | * Occasionally go add metadata at your app URL.
89 | 
90 | Enjoy!
91 | 
92 | Brad
93 | brad@danga.com
94 | 


--------------------------------------------------------------------------------
/appengine/app.yaml:
--------------------------------------------------------------------------------
 1 | application: scanningcabinet
 2 | version: 1
 3 | api_version: 1
 4 | runtime: python
 5 | builtins:
 6 | - remote_api: on
 7 | 
 8 | handlers:
 9 | - url: /remote_api
10 |   script: $PYTHON_LIB/google/appengine/ext/remote_api/handler.py
11 |   login: admin
12 | 
13 | - url: /static
14 |   static_dir: static
15 | 
16 | # Post URL must not be accessible by any users.  Only by
17 | # going through Blobstore API upload URL.
18 | - url: /post
19 |   script: main.py
20 | 
21 | - url: /uploadurl.*
22 |   script: main.py
23 | 
24 | - url: /resource.*
25 |   script: main.py
26 | 
27 | - url: .*
28 |   login: required
29 |   script: main.py
30 | 
31 | 


--------------------------------------------------------------------------------
/appengine/base.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
 2 |    "http://www.w3.org/TR/html4/strict.dtd">
 3 | 
 4 | <html lang="en">
 5 | <head>
 6 |   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
 7 |   <title>{% block title %}Title{% endblock %}</title>
 8 |   <link rel="stylesheet" type="text/css" href="/static/scanner.css" />
 9 | </head>
10 | <body>
11 |   <div align="right" style='float: right'>
12 |     {% if user_info %}
13 |       {% if user_info.non_owner %}
14 |          {{user_info.real_email}}, acting as 
15 |       {% endif %}
16 |       <b>{{user_info.user.email}}</b> |
17 |     {% endif %}
18 | 
19 |     <a href="{{login_url}}">
20 |     {% if user_info %}
21 |       Log out
22 |     {% else %}
23 |       Log in
24 |     {% endif %}
25 |     </a>
26 |   </div>
27 |   <div>[<a href='/'>Scanning Cabinet</a>]</div>
28 | 
29 |   {% block preamble %}
30 |   {% endblock %}
31 | 
32 |   {% if error_messages %}
33 |   <ul>
34 |     {% for message in error_messages %}
35 |     <li style="color: red">{{message|escape}}</li>
36 |     {% endfor %}
37 |   </ul>
38 |   {% endif %}
39 | 
40 |   {% block main_body %}
41 |   Empty
42 |   {% endblock %}
43 | </body>
44 | </html>
45 | 


--------------------------------------------------------------------------------
/appengine/doc.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block title %}{{doc.title}} -- document {{doc.key.id}}{% endblock %}
 4 | 
 5 | {% block preamble %}
 6 | {% endblock %}
 7 | 
 8 | {% block main_body %}
 9 | <h2><img src="http://www.gstatic.com/codesite/ph/images/star_off.gif" width=15 height=15 /> 
10 |     {% if doc.title_or_empty_string %}
11 |        {{doc.title_or_empty_string|escape}}
12 |     {% else %}
13 |        Document {{doc.key.id}}
14 |     {% endif %}
15 | </h2>
16 | <form method='POST' action='/changedoc'>
17 | <input type='hidden' name='docid' value='{{doc.key.id}}' />
18 | <table>
19 |   <tr><td align='right'>Title</td><td><input name='title' value="{{doc.title_or_empty_string|escape}}" size=80 /></td></tr>
20 |   <tr><td align='right'>Tags</td><td><input name='tags' value="{{doc.tag_comma_separated|escape}}" size=80/></td></tr>
21 |   <tr><td align='right'>Doc Date</td><td><input name='date' value="{{doc.date_yyyy_mm_dd}}" maxlength=10 /> (yyyy-mm-dd)</td></tr>
22 |   <tr><td align='right'>Due Date</td><td><input name='due_date' value="{{doc.due_yyyy_mm_dd}}" maxlength=10 /> (yyyy-mm-dd)</td></tr>
23 |   <tr><td align='right'>Location</td>
24 |       <td><input name='physical_location'
25 |            value="{% if doc.physical_location %}{{doc.physical_location|escape}}{% endif %}" size=60 />
26 |            (of physical document)</td>
27 |   </tr>
28 |   <tr>
29 |     <td></td>
30 |     <td><input type='submit' value="Save" />
31 |       Other action: <select name='mode'>
32 |         <option value="">(other options)</option>
33 |         <option value="break">Break; delete doc, keep images</option>
34 |         <option value="delete">Delete; delete doc &amp; images</option>
35 |       </select>
36 |     </td>
37 |   </tr>
38 | </table>
39 | </form>
40 | 
41 |   {% if show_single_list %}
42 |     <center>
43 |       {% for page in pages %}
44 |         <img src="{{page.url_resize}}{{size}}" class="doc-page-single" /><br />
45 |       {% endfor %}
46 |     </center>
47 |   {% else %}
48 |       {% for page in pages %}
49 |         <img src="{{page.url_resize}}{{size}}" class="doc-page-row" />
50 |       {% endfor %}
51 |   {% endif %}
52 | {% endblock %}
53 | 


--------------------------------------------------------------------------------
/appengine/index.yaml:
--------------------------------------------------------------------------------
 1 | indexes:
 2 | - kind: MediaObject
 3 |   properties:
 4 |   - name: lacks_document
 5 |   - name: owner
 6 |   - name: creation
 7 | 
 8 | # AUTOGENERATED
 9 | 
10 | # This index.yaml is automatically updated whenever the dev_appserver
11 | # detects that a new type of query is run.  If you want to manage the
12 | # index.yaml file manually, remove the above marker line (the line
13 | # saying "# AUTOGENERATED").  If you want to manage some indexes
14 | # manually, move them above the marker line.  The index.yaml file is
15 | # automatically uploaded to the admin console when you next deploy
16 | # your application using appcfg.py.
17 | 
18 | - kind: Document
19 |   properties:
20 |   - name: owner
21 |   - name: due_date
22 | 


--------------------------------------------------------------------------------
/appengine/main.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block title %}Scanning Cabinet{% endblock %}
 4 | 
 5 | {% block preamble %}
 6 |     {% if top_message %}
 7 |       <center><span style='background: #ffc; padding: 0.5em'>{{top_message}}</span></center>
 8 |     {% endif %}
 9 |   <h1 align="center">
10 |   {% if view_user %}
11 |     {% ifequal view_user.key user_info.key %}
12 |       Scanning Cabinet
13 |     {% else %}
14 |       {{view_user.user.email|escape}}'s media
15 |     {% endifequal %}
16 |   {% else %}
17 |     Scanning Cabinet
18 |   {% endif %}
19 |   </h1>
20 | {% endblock %}
21 | 
22 | {% block main_body %}
23 | 
24 | <h2>Search</h2>
25 | <form method='GET'>
26 | <div>Tag search: <input type='text' size='50' name='tags' /> <input type='submit' value='Search' /> (comma-separated union)</div>
27 | </form>
28 | 
29 | {% if media and not did_search %}
30 | <h2>Un-annotated raw scans</h2>
31 |     <form method='POST' action='/makedoc' />
32 |     <input type='submit' value='Make doc from selected' />
33 |     <div id='scans'>
34 |     {% for item in media %}
35 |       <div style='margin: 1em; float:left; height: auto'>
36 |         <div style='display: block'>
37 |           <input type='checkbox' id='check_{{item.key.id}}' name="media_id" value="{{item.key}}" />
38 |           [<a target=_blank href="{{item.url_resize}}800">larger</a>]<br/>
39 |           <label for='check_{{item.key.id}}'><img src="{{item.thumb_url}}" class="doc-page-row" /></label>
40 |         </div>
41 |       </div>
42 |     {% endfor %}
43 |     </div> <!-- scans -->
44 |     </form>
45 |     <br clear='both' />
46 | {% endif %}
47 | 
48 | <h2>Documents</h2>
49 | 
50 | {% if docs %}
51 |     <ul>
52 |     {% for doc in docs %}
53 |       <li><b>
54 |         <a href="{{doc.display_url}}">{{doc.some_title}}</a>
55 |       </b> [{{doc.date_yyyy_mm_dd}}]{% if doc.description %} ({{doc.description}}){% endif %}
56 |       </li>
57 |     {% endfor %}
58 |     </ul>
59 | {% else %}
60 | 
61 | <p><i>(tag cloud here?)</i></p>
62 | 
63 | {% endif %}
64 | 
65 | <!---- Upcoming due documents --->
66 | {% if upcoming_due_docs %}
67 | <h2>Upcoming Due Documents</h2>
68 | 
69 |     <ul>
70 |     {% for doc in upcoming_due_docs %}
71 |       <li><b>{{doc.due_yyyy_mm_dd}}</b> -- 
72 |         <a href="{{doc.display_url}}">{{doc.some_title}}</a>
73 |       </b>{% if doc.description %} ({{doc.description}}){% endif %}
74 |       </li>
75 |       </li>
76 |     {% endfor %}
77 |     </ul>
78 | 
79 | {%  endif %}
80 | <!---- /Upcoming due docs --->
81 | 
82 | <!---- Docs without tags --->
83 | {% if untagged_docs %}
84 | <h2>Untagged Documents</h2>
85 | 
86 |     <ul>
87 |     {% for doc in untagged_docs %}
88 |       <li><b>
89 |         <a href="{{doc.display_url}}">{{doc.some_title}}</a>
90 |       </b>{% if doc.description %} ({{doc.description}}){% endif %}
91 |       </li>
92 |     {% endfor %}
93 |     </ul>
94 | 
95 | {%  endif %}
96 | <!---- /Docs without tags --->
97 | 
98 | {% endblock %}
99 | 


--------------------------------------------------------------------------------
/appengine/main.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # scanningcabinet's AppEngine server-side code.
  4 | #
  5 | # Copyright 2009 Brad Fitzpatrick <brad@danga.com>
  6 | # Copyright 2009 Google Inc. (sample app that scanningcabinet is based on)
  7 | #
  8 | # Licensed under the Apache License, Version 2.0 (the "License");
  9 | # you may not use this file except in compliance with the License.
 10 | # You may obtain a copy of the License at
 11 | #
 12 | #     http://www.apache.org/licenses/LICENSE-2.0
 13 | #
 14 | # Unless required by applicable law or agreed to in writing, software
 15 | # distributed under the License is distributed on an "AS IS" BASIS,
 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 17 | # See the License for the specific language governing permissions and
 18 | # limitations under the License.
 19 | #
 20 | 
 21 | import cgi
 22 | import datetime
 23 | import logging
 24 | import os
 25 | import re
 26 | import time
 27 | import urllib
 28 | 
 29 | from google.appengine.api import images
 30 | from google.appengine.api import users
 31 | from google.appengine.ext import blobstore
 32 | from google.appengine.ext import db
 33 | from google.appengine.ext import webapp
 34 | from google.appengine.ext.webapp import blobstore_handlers
 35 | from google.appengine.ext.webapp import template
 36 | 
 37 | 
 38 | import wsgiref.handlers
 39 | 
 40 | from model import UserInfo
 41 | from model import Document
 42 | from model import MediaObject
 43 | 
 44 | def parse_timestamp(stamp):
 45 |   """Parse timestamp to datetime object.
 46 | 
 47 |   Datetime parsing is not supported until Python 2.5 and microseconds until
 48 |   Python 2.5.
 49 | 
 50 |   Args:
 51 |     Date/time formatted as Python 2.6 format '%Y-%m-%d %H:%M:%S.%f'.
 52 | 
 53 |   Returns:
 54 |     datetime object.
 55 |   """
 56 |   no_microseconds, microseconds = stamp.split('.', 1)
 57 |   time_struct = time.strptime(no_microseconds, '%Y-%m-%d %H:%M:%S')
 58 |   params = list(time_struct)[:6] + [int(microseconds)]
 59 |   return datetime.datetime(*params)
 60 | 
 61 | 
 62 | def get_user_info():
 63 |   """Get UserInfo for currently logged in user.
 64 | 
 65 |   This will insert the new user if it does not already exist in datastore.
 66 | 
 67 |   Returns:
 68 |     UserInfo record for user if user is logged in, else None.
 69 |   """
 70 |   user = users.get_current_user()
 71 |   if user is None:
 72 |     return None
 73 |   auth_email = user.email()
 74 |   effective_email = auth_email
 75 | 
 76 |   if auth_email == 'brother@example.com':
 77 |     effective_email = 'test@example.com'
 78 |   if auth_email == 'cole@fitzpat.com':
 79 |     effective_email = 'bradfitz@gmail.com'
 80 | 
 81 |   if auth_email == effective_email:
 82 |     ui = UserInfo.get_or_insert(key_name='user:%s' % auth_email)
 83 |   else:
 84 |     ui = UserInfo.get_by_key_name('user:%s' % effective_email)
 85 |     if not ui:
 86 |       logging.error("User %s failed to act as %s; %s doesn't exist", auth_email, effective_email, effective_email)
 87 |       return None
 88 |     logging.info("User %s acting as %s", auth_email, effective_email)
 89 |     ui.non_owner = True
 90 |     ui.real_email = auth_email
 91 |   return ui
 92 | 
 93 | 
 94 | class MainHandler(webapp.RequestHandler):
 95 |   """Handler for main page.
 96 | 
 97 |   If the user is logged in it will by default show all their media.
 98 | 
 99 |   If the user is not logged in it will by default show nothing, but suggest a
100 |   course of action to the user.
101 | 
102 |   This page also shows the results of a search for a provided users shared
103 |   media objects.  Only public objects are shown for the searched user. If the
104 |   searched for user does not exist, a message is displayed to that effect.
105 |   """
106 | 
107 |   def get(self):
108 |     # Provide login/logout URLs.
109 |     user_info = get_user_info()
110 |     if user_info is None:
111 |       login_url = users.create_login_url('/')
112 |     else:
113 |       login_url = users.create_logout_url('/')
114 | 
115 |     # Collect list of error messages which gets shown to the user.
116 |     error_messages = self.request.params.getall('error_message')
117 |     view_user = user_info  # for now
118 |     did_search = False
119 | 
120 |     # Fetch media for view user.
121 |     media = MediaObject.all().filter('owner', user_info)
122 |     media = media.filter('lacks_document', True)
123 |     media = media.order('creation')
124 |     limit = 50
125 |     if self.request.get("limit"):
126 |       limit = long(self.request.get("limit"))
127 |     media = media.fetch(limit)
128 | 
129 |     docs = Document.all().filter('owner', user_info)
130 |     tags = self.request.get("tags")
131 |     if tags:
132 |       did_search = True
133 |       for tag in re.split('\s*,\s*', tags):
134 |         docs = docs.filter("tags", tag)
135 |     docs = docs.fetch(limit)
136 | 
137 |     untagged_docs = Document.all().filter('owner', user_info).filter("no_tags", True).fetch(limit)
138 | 
139 |     upcoming_due = Document.all().filter('owner', user_info)
140 |     upcoming_due = upcoming_due.filter("due_date !=", None)
141 |     upcoming_due = upcoming_due.order("due_date")
142 |     upcoming_due = upcoming_due.fetch(30)
143 | 
144 |     top_message = ""
145 |     if self.request.get("saved_doc"):
146 |       docid = long(self.request.get("saved_doc"))
147 |       top_message = "Saved <a href='/doc/%d'>doc %d</a>" % (docid, docid)
148 | 
149 |     # Render view.
150 |     self.response.out.write(template.render('main.html', {
151 |         "did_search": did_search,
152 |         "media": media,
153 |         "docs": docs,
154 |         "untagged_docs": untagged_docs,
155 |         "upcoming_due_docs": upcoming_due,
156 |         "view_user": view_user,
157 |         "login_url": login_url,
158 |         "user_info": user_info,
159 |         "top_message": top_message,
160 |         }, debug=True))
161 | 
162 | 
163 | class MakeDocHandler(webapp.RequestHandler):
164 |   def post(self):
165 |     user_info = get_user_info()
166 |     if user_info is None:
167 |       self.redirect('/?error_message=%s' % 'log-in required')
168 |     scan_ids = self.request.get_all("media_id")
169 |     scans = MediaObject.get(scan_ids)
170 |     doc = Document(
171 |         parent=user_info,
172 |         owner=user_info,
173 |         pages=[scan.key() for scan in scans],
174 |         title=None,
175 |         description=None)
176 |     def make_doc():
177 |       db.put(doc)
178 |       for scan in scans:
179 |         scan.lacks_document = False
180 |         scan.document = doc.key()
181 |         db.put(scan)
182 |     db.run_in_transaction(make_doc)
183 |     self.redirect(doc.display_url + "?size=1200")
184 | 
185 | 
186 | class UploadFormHandler(webapp.RequestHandler):
187 |   """Handler to display the media object upload page.
188 | 
189 |   This must be a dynamic page because the upload URL must be generated
190 |   by the Blobstore API.
191 |   """
192 | 
193 |   def get(self):
194 |     user_info = get_user_info()
195 |     if user_info is None:
196 |       self.redirect(
197 |           '/?error_message=%s' % 'You must be logged in to upload media')
198 | 
199 |     upload_url = blobstore.create_upload_url(
200 |         '/post')
201 | 
202 |     self.response.out.write(template.render('upload.html',
203 |                                             locals(),
204 |                                             debug=True))
205 | 
206 | 
207 | def lookup_and_authenticate_user(handler, claimed_email, claimed_password):
208 |   if not claimed_email:
209 |     return None
210 |   claimed_user = UserInfo.get_by_key_name('user:%s' % claimed_email)
211 |   if not claimed_user:
212 |     return None
213 |   if claimed_email == 'test@example.com' and \
214 |         handler.request.headers["Host"] == "localhost:8080":
215 |     # No auth for testing.
216 |     return claimed_user
217 |   if claimed_user.upload_password and \
218 |         claimed_user.upload_password == claimed_password:
219 |     return claimed_user
220 |   return None
221 | 
222 | 
223 | class UploadUrlHandler(webapp.RequestHandler):
224 |   """Handler to return a URL for a script to get an upload URL.
225 | 
226 |   This must be a dynamic page because the upload URL must be generated
227 |   by the Blobstore API.
228 |   """
229 | 
230 |   def get(self):
231 |     claimed_email = self.request.get("user_email")
232 |     effective_user = lookup_and_authenticate_user(self, claimed_email,
233 |                                                   self.request.get("password"))
234 | 
235 |     if effective_user:
236 |       self.response.headers['Content-Type'] = 'text/plain'
237 |       upload_url = blobstore.create_upload_url('/post')
238 |       self.response.out.write(upload_url)
239 |     else:
240 |       self.error(403)
241 | 
242 | 
243 | class UploadPostHandler(blobstore_handlers.BlobstoreUploadHandler):
244 |   """Handle blobstore post, as forwarded by notification agent."""
245 | 
246 |   def store_media(self, upload_files, error_messages):
247 |     """Store media information.
248 | 
249 |     Writes a MediaObject to the datastore for the uploaded file.
250 | 
251 |     Args:
252 |       upload_files: List of BlobInfo records representing the uploads.
253 |       error_messages: Empty list for storing error messages to report to user.
254 |     """
255 |     if not upload_files:
256 |       error_messages.append('Form is missing upload file field')
257 | 
258 |     if len(upload_files) != 1:
259 |       error_messages.append('Form has more than one image.')
260 | 
261 |     def get_param(name, error_message=None):
262 |       """Convenience function to get a parameter from request.
263 | 
264 |       Returns:
265 |         String value of field if it exists, else ''.  If the key does not exist
266 |         at all, it will return None.
267 |       """
268 |       try:
269 |         value = self.request.params[name]
270 |         if isinstance(value, cgi.FieldStorage):
271 |           value = value.value
272 |         return value or ''
273 |       except KeyError:
274 |         #error_messages.append(error_message)
275 |         return None
276 | 
277 |     # Check that title, description and share fields provided.  Do additional
278 |     # constraint check on share to make sure it is valid.
279 |     width = get_param('width')
280 |     height = get_param('height')
281 | 
282 |     # title and description are only legit for single-page doc
283 |     is_doc = get_param('is_doc')  # is a stand-alone single-page doc?
284 |     title = get_param('title')
285 |     description = get_param('description')
286 |     tags = get_param('tags')  # comma-separated
287 | 
288 |     # Make sure user is logged in.
289 |     user = users.get_current_user()
290 |     user_email = ''
291 |     if user is None:
292 |       claimed_email = get_param("user_email")
293 |       effective_user = lookup_and_authenticate_user(self, claimed_email, get_param('password'))
294 |       if not effective_user:
295 |         error_messages.append("No user or correct 'password' argument.")
296 |       user_email = claimed_email
297 |     else:
298 |       user_email = user.email()
299 | 
300 |     if error_messages:
301 |       return
302 | 
303 |     blob_info, = upload_files
304 | 
305 |     def store_media():
306 |       """Store media object info in datastore.
307 | 
308 |       Also updates the user-info record to keep count of media objects.
309 | 
310 |       This function is run as a transaction.
311 |       """
312 |       user_info = UserInfo.get_by_key_name('user:%s' % user_email)
313 |       if user_info is None:
314 |         error_messages.append('User record has been deleted.  '
315 |                               'Try uploading again')
316 |         return
317 | 
318 |       media = MediaObject(
319 |           parent=user_info,
320 |           owner=user_info,
321 |           blob=blob_info.key(),
322 |           creation=blob_info.creation,
323 |           content_type=blob_info.content_type,
324 |           filename=blob_info.filename,
325 |           size=int(blob_info.size),
326 |           lacks_document=True)
327 | 
328 |       user_info.media_objects += 1
329 |       db.put(user_info)
330 |       db.put(media)
331 | 
332 |       if bool(is_doc) and is_doc != "0":
333 |         tag_list = []
334 |         if tags is not None:
335 |           tag_list = [x for x in re.split('\s*,\s*', tags) if x]
336 | 
337 |         doc = Document(
338 |             parent=user_info,
339 |             owner=user_info,
340 |             pages=[media.key()],
341 |             title=title,
342 |             description=description,
343 |             no_tags=(len(tag_list)==0),
344 |             tags=tag_list)
345 |         db.put(doc)
346 |         media.document = doc.key()
347 |         media.lacks_document = False
348 |         db.put(media)
349 |     db.run_in_transaction(store_media)
350 | 
351 |   def post(self):
352 |     """Do upload post."""
353 |     error_messages = []
354 | 
355 |     upload_files = self.get_uploads('file')
356 | 
357 |     self.store_media(upload_files, error_messages)
358 | 
359 |     error_messages = tuple(urllib.quote(m) for m in error_messages)
360 |     error_messages = tuple('error_message=%s' % m for m in error_messages)
361 |     self.redirect('/?%s' % '&'.join(error_messages))
362 | 
363 |     # Delete all blobs upon error.
364 |     if error_messages:
365 |       blobstore.delete(upload_files)
366 | 
367 | 
368 | class ShowDocHandler(webapp.RequestHandler):
369 |   def get(self, docid):
370 |     user_info = get_user_info()
371 |     if user_info is None:
372 |       self.redirect('/?error_message=%s' % 'login required to view docs')
373 |     docid = long(docid)
374 |     doc = Document.get_by_id(docid, parent=user_info)
375 |     if doc is None:
376 |       self.response.out.write("Docid %d not found." % (docid))
377 |       return
378 |     pages = MediaObject.get(doc.pages)
379 |     size = self.request.get("size")
380 |     if not size:
381 |       size = 1200
382 |     show_single_list = long(size) > 600
383 |     self.response.out.write(template.render('doc.html',
384 |                                             {"doc": doc,
385 |                                              "pages": pages,
386 |                                              "user_info": user_info,
387 |                                              "size": size,
388 |                                              "show_single_list": show_single_list},
389 |                                             debug=True))
390 | 
391 | 
392 | def break_and_delete_doc(user, doc):
393 |   """Deletes the document, marking all the images in it as un-annotated."""
394 |   def tx():
395 |     db.delete(doc)
396 |     scans = MediaObject.get(doc.pages)
397 |     for scan in scans:
398 |       scan.lacks_document = True
399 |       scan.document = None
400 |       db.put(scan)
401 |   db.run_in_transaction(tx)
402 |   return True
403 | 
404 | 
405 | def delete_doc_and_images(user, doc):
406 |   """Deletes the document and its images."""
407 |   scans = MediaObject.get(doc.pages)
408 |   for scan in scans:
409 |     blobstore.delete(scan.blob.key())
410 |   def tx():
411 |     db.delete(doc)
412 |     scans = MediaObject.get(doc.pages)
413 |     for scan in scans:
414 |       user.media_objects -= 1
415 |       db.delete(scan)
416 |     db.put(user)
417 |   db.run_in_transaction(tx)
418 |   return True
419 | 
420 | 
421 | class ChangeDocHandler(webapp.RequestHandler):
422 |   def post(self):
423 |     user_info = get_user_info()
424 |     if user_info is None:
425 |       self.redirect('/?error_message=%s' % 'login required to view docs')
426 |     docid = long(self.request.get("docid"))
427 |     doc = Document.get_by_id(docid, parent=user_info)
428 |     if doc is None:
429 |       self.response.out.write("Docid %d not found." % (docid))
430 |       return
431 | 
432 |     mode = self.request.get("mode")
433 |     if mode == "break":
434 |       break_and_delete_doc(user_info, doc)
435 |       self.response.out.write("[&lt;&lt; <a href='/'>Back</a>] Docid %d deleted and images broken out as un-annotated." % docid)
436 |       return
437 |     if mode == "delete":
438 |       delete_doc_and_images(user_info, doc)
439 |       self.response.out.write("[&lt;&lt; <a href='/'>Back</a>] Docid %d and its images deleted." % docid)
440 |       return
441 | 
442 |     # Simple properties:
443 |     doc.physical_location = self.request.get("physical_location")
444 |     doc.title = self.request.get("title")
445 | 
446 |     # Tags
447 |     doc.tags = [x for x in re.split('\s*,\s*', self.request.get("tags")) if x]
448 |     doc.no_tags = (len(doc.tags) == 0)
449 | 
450 |     # Document Date
451 |     date = self.request.get("date")
452 |     if date:
453 |       doc.doc_date = datetime.datetime.strptime(date, "%Y-%m-%d")
454 |       doc.no_date = False
455 |     else:
456 |       doc.doc_date = None
457 |       doc.no_date = True
458 | 
459 |     # Due date
460 |     due_date_str = self.request.get("due_date")
461 |     doc.due_date = None
462 |     if due_date_str:
463 |       doc.due_date = datetime.datetime.strptime(due_date_str, "%Y-%m-%d")
464 | 
465 |     def store():
466 |       db.put(doc)
467 |     db.run_in_transaction(store)
468 |     self.redirect("/?saved_doc=" + str(docid))
469 | 
470 | 
471 | class ResourceHandler(blobstore_handlers.BlobstoreDownloadHandler):
472 |   """For when user requests media object.  Actually serves blob."""
473 | 
474 |   def get(self, media_id, unused_filename):
475 |     def get_param(name, error_message=None):
476 |       """Convenience function to get a parameter from request.
477 | 
478 |       Returns:
479 |         String value of field if it exists, else ''.  If the key does not exist
480 |         at all, it will return None.
481 |       """
482 |       try:
483 |         value = self.request.params[name]
484 |         if isinstance(value, cgi.FieldStorage):
485 |           value = value.value
486 |         return value or ''
487 |       except KeyError:
488 |         #error_messages.append(error_message)
489 |         return None
490 | 
491 |     user_info = get_user_info()
492 |     #if user_info is None and get_param("password") == "xx":
493 |     #  user_info = UserInfo.get_by_key_name('user:bradfitz@gmail.com')      
494 |     if user_info is None:
495 |       self.redirect('/?error_message=%s' % 'log-in required')
496 |     media_object = MediaObject.get_by_id(long(media_id), parent=user_info)
497 |     if media_object is None:
498 |       self.redirect('/?error_message=Unidentified+object')
499 |       return
500 | 
501 |     last_modified_string = media_object.creation.strftime("%a, %d %b %Y %H:%M:%S GMT")
502 |     self.response.headers['Cache-Control'] = "private"
503 |     self.response.headers['Content-Type'] = str(media_object.guessed_type)
504 |     self.response.headers['Last-Modified'] = last_modified_string
505 |     expires = media_object.creation + datetime.timedelta(days=30)
506 |     self.response.headers['Expires'] = expires.strftime("%a, %d %b %Y %H:%M:%S GMT")
507 | 
508 |     # Caching
509 |     if self.request.headers.has_key("If-Modified-Since"):
510 |       ims = self.request.headers.get("If-Modified-Since")
511 |       if ims == last_modified_string:
512 |         self.error(304)
513 |         return
514 |       modsince = datetime.datetime.strptime(ims, "%a, %d %b %Y %H:%M:%S %Z")
515 |       if modsince >= media_object.creation:
516 |         self.error(304)
517 |         return
518 | 
519 |     blob_key = media_object.blob.key()
520 | 
521 |     resize = self.request.get('resize')
522 |     if resize:
523 |       image = images.Image(blob_key=str(blob_key))
524 |       image.resize(width=int(resize), height=int(resize))
525 |       self.response.out.write(image.execute_transforms())
526 |       return
527 | 
528 |     if 'Range' in self.request.headers:
529 |       self.response.headers['Range'] = self.request.headers['Range']
530 | 
531 |     self.send_blob(blob_key, str(media_object.guessed_type))
532 | 
533 | 
534 | class GarbageCollectMediaHandler1(webapp.RequestHandler):
535 |   def get(self):
536 |     if not users.is_current_user_admin():
537 |       self.redirect('/?error_message=%s' % 'log-in required')
538 | 
539 |     used = set()
540 |     for d in Document.all():
541 |       used |= set(d.pages)
542 | 
543 |     dead = dict()
544 |     for i in MediaObject.all():
545 |       if i.key() not in used:
546 |         dead[i.key()] = i
547 | 
548 |     for k in dead:
549 |       dead[k].delete()
550 | 
551 |     self.redirect('/')
552 | 
553 | class GarbageCollectMediaHandler2(webapp.RequestHandler):
554 |   def get(self):
555 |     if not users.is_current_user_admin():
556 |       self.redirect('/?error_message=%s' % 'log-in required')
557 | 
558 |     used = set()
559 |     for i in MediaObject.all():
560 |       used.add(i.blob.key())
561 | 
562 |     for b in blobstore.BlobInfo.all():
563 |       if b.key() not in used:
564 |         b.delete()
565 | 
566 |     self.redirect('/')
567 | 
568 | class DumpHandler(webapp.RequestHandler):
569 |   def get(self):
570 |     self.response.headers['Cache-Control'] = "private"
571 |     self.response.headers['Content-Type'] = "text/plain; charset=utf-8"
572 | 
573 |     user = UserInfo.get_by_key_name('user:bradfitz@gmail.com')
574 | 
575 |     docs = Document.all().filter('owner', user)
576 |     docs = docs.fetch(10000)
577 |     self.response.out.write("# got %d docs\n" % len(docs))
578 |     for doc in docs:
579 |       self.response.out.write("%s tags[%s] date[%s] title[%s] \n" % (doc.display_url, doc.tag_comma_separated, doc.date_yyyy_mm_dd, doc.title_or_empty_string))
580 |       for page in doc.pages:
581 |         self.response.out.write(" has_page: %d\n" % (page.id_or_name()))
582 |     meds = MediaObject.all().filter('owner', user)
583 |     meds = meds.fetch(10000)
584 |     self.response.out.write("# got %d mediaobjects\n" % len(meds))
585 |     for mo in meds:
586 |       self.response.out.write("%s creation[%s] size[%d]\n" % (mo.url_path, str(mo.creation), mo.size))
587 | 
588 | 
589 | def main():
590 |   application = webapp.WSGIApplication(
591 |       [('/', MainHandler),
592 |        ('/uploadurl', UploadUrlHandler),  # returns a new upload URL
593 |        #('/upload', UploadFormHandler),    # for humans
594 |        ('/post', UploadPostHandler),      # for machine or humans to upload
595 |        ('/makedoc', MakeDocHandler),
596 |        ('/doc/(\d+)', ShowDocHandler),
597 |        ('/changedoc', ChangeDocHandler),
598 |        ('/resource/(\d+)(/.*)?', ResourceHandler),
599 |        #('/dumpxx', DumpHandler),
600 |        #('/gc_media1', GarbageCollectMediaHandler1),
601 |        #('/gc_media2', GarbageCollectMediaHandler2),
602 |        ],
603 |                                        debug=True)
604 |   wsgiref.handlers.CGIHandler().run(application)
605 | 
606 | 
607 | if __name__ == '__main__':
608 |   main()
609 | 


--------------------------------------------------------------------------------
/appengine/model.py:
--------------------------------------------------------------------------------
  1 | #
  2 | # scanningcabinet's AppEngine server-side code.
  3 | #
  4 | # Copyright 2009 Brad Fitzpatrick <brad@danga.com>
  5 | # Copyright 2009 Google Inc. (sample app that scanningcabinet is based on)
  6 | #
  7 | # Licensed under the Apache License, Version 2.0 (the "License");
  8 | # you may not use this file except in compliance with the License.
  9 | # You may obtain a copy of the License at
 10 | #
 11 | #     http://www.apache.org/licenses/LICENSE-2.0
 12 | #
 13 | # Unless required by applicable law or agreed to in writing, software
 14 | # distributed under the License is distributed on an "AS IS" BASIS,
 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 16 | # See the License for the specific language governing permissions and
 17 | # limitations under the License.
 18 | #
 19 | 
 20 | import mimetypes
 21 | 
 22 | from google.appengine.ext import blobstore
 23 | from google.appengine.ext import db
 24 | 
 25 | 
 26 | class MigratingBlobReferenceProperty(db.Property):
 27 |   """Migrates pre-1.3.0 blob str props to real blobkey references."""
 28 | 
 29 |   data_type = blobstore.BlobInfo
 30 | 
 31 |   def get_value_for_datastore(self, model_instance):
 32 |     """Translate model property to datastore value."""
 33 |     blob_info = getattr(model_instance, self.name)
 34 |     if blob_info is None:
 35 |       return None
 36 |     return blob_info.key()
 37 | 
 38 |   def make_value_from_datastore(self, value):
 39 |     """Translate datastore value to BlobInfo."""
 40 |     if value is None:
 41 |       return None
 42 | 
 43 |     # The two lines of difference of MigratingBlobReferenceProperty:
 44 |     if isinstance(value, basestring):
 45 |       value = blobstore.BlobKey(value)
 46 | 
 47 |     return blobstore.BlobInfo(value)
 48 | 
 49 |   def validate(self, value):
 50 |     """Validate that assigned value is BlobInfo.
 51 | 
 52 |     Automatically converts from strings and BlobKey instances.
 53 |     """
 54 |     if isinstance(value, (basestring)):
 55 |       value = blobstore.BlobInfo(blobstore.BlobKey(value))
 56 |     elif isinstance(value, blobstore.BlobKey):
 57 |       value = blobstore.BlobInfo(value)
 58 |     return super(MigratingBlobReferenceProperty, self).validate(value)
 59 | 
 60 | 
 61 | class UserInfo(db.Model):
 62 |   """Information about a particular user and their media library."""
 63 |   user = db.UserProperty(auto_current_user_add=True)
 64 |   media_objects = db.IntegerProperty(default=0)
 65 |   upload_password = db.StringProperty()
 66 | 
 67 |   # non_owner is set if a helper (e.g. Brad's brother) is helping him
 68 |   # tag
 69 |   non_owner = False
 70 |   real_email = ""    # real user's email
 71 | 
 72 | class Document(db.Model):
 73 |   """A document with 1 or more media objects (1+ pages, 0/1 preview)"""
 74 |   owner = db.ReferenceProperty(UserInfo, required=True)
 75 | 
 76 |   pages = db.ListProperty(db.Key, required=True)
 77 |   preview = db.ListProperty(db.Key)  # preview images, if pages is a PDF
 78 | 
 79 |   doc_date = db.DateTimeProperty()
 80 |   no_date = db.BooleanProperty(required=True, default=True)
 81 | 
 82 |   creation = db.DateTimeProperty(auto_now_add=True)
 83 | 
 84 |   title = db.StringProperty()
 85 |   description = db.TextProperty()
 86 | 
 87 |   tags = db.StringListProperty()
 88 |   no_tags = db.BooleanProperty(required=True, default=True)
 89 | 
 90 |   # To find the paper document back later:
 91 |   physical_location = db.StringProperty()
 92 | 
 93 |   # Things I need to get to (taxes, etc.)
 94 |   due_date = db.DateTimeProperty()
 95 | 
 96 |   starred = db.BooleanProperty()
 97 | 
 98 |   @property
 99 |   def display_url(self):
100 |     return '/doc/%s' % self.key().id()
101 | 
102 |   @property
103 |   def tag_comma_separated(self):
104 |     return ", ".join(self.tags)
105 | 
106 |   @property
107 |   def date_yyyy_mm_dd(self):
108 |     """Or empty string."""
109 |     if self.doc_date:
110 |       return str(self.doc_date)[0:10]
111 |     return ""
112 | 
113 |   @property
114 |   def due_yyyy_mm_dd(self):
115 |     """Or empty string."""
116 |     if self.due_date:
117 |       return str(self.due_date)[0:10]
118 |     return ""
119 | 
120 |   @property
121 |   def title_or_empty_string(self):
122 |     """The real title, or the empty string if none."""
123 |     if not self.title:
124 |       return ""
125 |     return self.title
126 | 
127 |   @property
128 |   def some_title(self):
129 |     if self.title:
130 |       return self.title
131 |     if self.tags:
132 |       return ", ".join(self.tags)
133 |     return self.title
134 | 
135 | 
136 | class MediaObject(db.Model):
137 |   """Information about media object uploaded by user.
138 | 
139 |   Does not contain the actual object, which is in blobstore.  Contains duplicate
140 |   meta-information about blob for searching purposes.
141 |   """
142 |   owner = db.ReferenceProperty(UserInfo, required=True)
143 | 
144 |   blob = MigratingBlobReferenceProperty()
145 | 
146 |   creation = db.DateTimeProperty()
147 |   content_type = db.StringProperty()
148 | 
149 |   filename = db.StringProperty()  # foo.jpg
150 |   original_path = db.StringProperty()  # scan/tax/2009/foo.jpg
151 |   size = db.IntegerProperty()
152 | 
153 |   # If known:
154 |   width = db.IntegerProperty()
155 |   height = db.IntegerProperty()
156 | 
157 |   # If part of a document yet, a reference (db.Key) to a media object.
158 |   document = db.ReferenceProperty(Document, required=False)
159 |   lacks_document = db.BooleanProperty()
160 | 
161 |   @property
162 |   def thumb_url(self):
163 |     return '/resource/%d/%s?resize=300' % (self.key().id(), self.filename)
164 | 
165 |   @property
166 |   def url_resize(self):
167 |     return '/resource/%s/%s?resize=' % (self.key().id(), self.filename)
168 | 
169 |   @property
170 |   def url_path(self):
171 |     return '/resource/%s/%s' % (self.key().id(), self.filename)
172 | 
173 |   @property
174 |   def guessed_type(self):
175 |     """A guess for the content type of this media object.
176 | 
177 |     This is currently necessary because the production version of the
178 |     Blobstore API does not try to detect content types of uploads.
179 |     """
180 |     if self.content_type == 'application/octet-stream':
181 |       # Try to guess.  Useful for backward compatibility with older objects
182 |       # that had not content type detection.
183 |       mime_type, unused_parameters = mimetypes.guess_type(self.filename)
184 |       return mime_type or 'text/plain'
185 |     else:
186 |       return self.content_type or 'text/plain'
187 | 
188 |   @property
189 |   def is_image(self):
190 |     """Returns True if this media object is an image."""
191 |     image_types = frozenset([
192 |         'image/png', 'image/jpeg', 'image/tiff', 'image/gif', 'image/bmp'])
193 |     return self.guessed_type in image_types
194 | 
195 |   def delete(self):
196 |     """Also delete associated media blob and decrement users media count."""
197 |     super(MediaObject, self).delete()
198 |     self.owner.media_objects -= 1
199 |     self.owner.put()
200 |     self.blob.delete()
201 | 


--------------------------------------------------------------------------------
/appengine/static/README.txt:
--------------------------------------------------------------------------------
1 | Placeholder file for empty git directory.
2 | 
3 | JavaScript libraries (Closure?) will eventually go here.
4 | 


--------------------------------------------------------------------------------
/appengine/static/scanner.css:
--------------------------------------------------------------------------------
1 | .doc-page-single {
2 |   border: 1px solid grey;
3 |   display: block;
4 | }
5 | 
6 | .doc-page-row {
7 |   border: 1px solid grey;
8 | }
9 | 


--------------------------------------------------------------------------------
/appengine/upload.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block title %}MediaStore Upload{% endblock %}
 4 | 
 5 | {% block main_body %}
 6 |   <form action="{{upload_url}}" method="POST" enctype="multipart/form-data">
 7 |       Sharing: <select name="share">
 8 |         <option value="private">Private</option>
 9 |         <option value="public">Public</option>
10 |       </select>
11 |       Title: <input type="text" size="40" name="title"><br>
12 |       Description:<br>
13 |       <textarea cols="80" rows="20" name="description"></textarea><br>
14 |       Upload File: <input type="file" name="file"><br>
15 |       <input type="submit" name="submit" value="Submit">
16 |   </form>
17 | 
18 |   <hr>
19 |   <a href="/">Cancel</a>
20 | {% endblock %}
21 | 


--------------------------------------------------------------------------------
/tools/scancab:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/perl
  2 | #
  3 | # Author: Brad Fitzpatrick <brad@danga.com>
  4 | #
  5 | # You'll need Debian packages curl, sane-utils, sane, scanadf, some
  6 | # perl stuff, etc.  I kinda expect you to modify this to suit your
  7 | # needs.  I wrote it for me.  This is hacky and I don't offer support.
  8 | # I do, however, accept gushing emails of full of love.
  9 | #
 10 | # ============
 11 | # Basic usage:
 12 | # ============
 13 | #
 14 | #   (Load scanner full of documents)
 15 | #   $ scancab --adf
 16 | #   $ scancab --adf --color
 17 | #   $ scancab --adf --lineart
 18 | #   $ scancab --duplex          (both sides.  implies --adf)
 19 | #
 20 | #   Those commands above just write to the queue directory.
 21 | #   Make sure you mkdir ~/scancab-queue
 22 | #
 23 | #   (But run this in the background in another terminal,
 24 | #    which does the actual potentially-slow uploads, making
 25 | #    sure to do them in the right creation order, doing retries,
 26 | #    etc....)
 27 | #   $ scancab --loop
 28 | #
 29 | #   (Upload a certain document, but don't delete it...)
 30 | #   $ scancab --upload=foo.jpg
 31 | #
 32 | # =================
 33 | # Scanner Advice...
 34 | # =================
 35 | # I can't say enough good things about my Fujitsu ScanSnap S1500M
 36 | # (identical to S1500).  Great, fast hardware, great Linux support.
 37 | # A++.  Would buy again.
 38 | 
 39 | use strict;
 40 | use LWP::Simple;
 41 | use Getopt::Long;
 42 | use IPC::Run ();
 43 | use File::Spec::Functions qw(tmpdir catdir catfile);
 44 | 
 45 | my $URL = "http://localhost:8080";
 46 | my $EMAIL = "";
 47 | my $password = "";
 48 | 
 49 | my $uploader_pid_file = "";
 50 | my $queue_dir = "$ENV{HOME}/scancab-queue/";
 51 | 
 52 | # Detect when we're the helper program (--scan-script) to scanadf,
 53 | # which we run in --adf batch mode.  (this script functions as both
 54 | # the driver and the helper)
 55 | if ($ENV{'SCAN_RES'} || $ENV{'SCAN_FORMAT_ID'}) {
 56 |     be_batch_scan_script();
 57 |     exit(0);
 58 | }
 59 | 
 60 | my $upload_loop = 0;
 61 | 
 62 | my $adf = 0;
 63 | 
 64 | my $upload_file;
 65 | my $color = 0;
 66 | my $lineart = 0;
 67 | my $appid = $ENV{'SCANCAB_APPID'};
 68 | my $upload_now = 0;
 69 | my $duplex = 0;
 70 | my $dev = 0;
 71 | 
 72 | die unless GetOptions(
 73 |     "dev" => \$dev,   # dev_appserver mode
 74 | 
 75 |     "appid=s" => \$appid,
 76 |     "email=s" => \$EMAIL,
 77 |     "password=s" => \$password,
 78 | 
 79 |     # Upload this file, don't delete it, then exit.
 80 |     "upload=s" => \$upload_file,
 81 |     "upload_now" => \$upload_now,  # don't queue
 82 | 
 83 |     # Loop, looking in $queue_dir 
 84 |     "loop" => \$upload_loop, 
 85 | 
 86 |     # Use the auto-document-feeder.
 87 |     "adf" => \$adf,
 88 |     "duplex" => \$duplex,
 89 | 
 90 |     # Mutually exclusive:
 91 |     "color" => \$color,
 92 |     "lineart" => \$lineart,
 93 |     );
 94 | 
 95 | $adf = 1 if $duplex;
 96 | 
 97 | die "Can't do both color and lineart.\n" if $color && $lineart;
 98 | 
 99 | die "appid parameter must be just an appengine appid"
100 |     if $appid && $appid !~ /^[\w-]+$/;
101 | 
102 | my $conf_dir = "$ENV{HOME}/.config/scanningcabinet";
103 | my $device = slurp("$conf_dir/device");
104 | my $device_flag = $device ? "-d $device" : "";
105 | 
106 | if ($dev) {
107 |     $URL = "http://localhost:8080";
108 |     $EMAIL = "test\@example.com";
109 |     $password = "test";
110 |     $queue_dir = "/tmp/scancab-queue";
111 |     mkdir $queue_dir, 0755 unless -d $queue_dir;
112 | } else {
113 |     unless (-d $queue_dir) {
114 |         die "Queue directory doesn't exist; please create it: $queue_dir\n";
115 |     }
116 |     # ADF mode doesn't need to upload (just drops stuff into a batch directory) so
117 |     # don't need to setup URL or password in that case.
118 |     unless ($adf) {
119 |         $appid ||= slurp("$conf_dir/appid")
120 |             or warn "No appid configured in $conf_dir/appid; assuming $URL\n";
121 |         $EMAIL ||= slurp("$conf_dir/email");
122 |         $password ||= slurp("$conf_dir/password")
123 |             or die "No password configured in args or in $conf_dir/password\n";
124 |         if ($appid) {
125 |             $URL = "https://$appid.appspot.com";
126 |         }
127 |     }
128 |     $EMAIL ||= "$ENV{USER}\@gmail.com";  # uh, works for me. :)
129 | }
130 | 
131 | if ($upload_loop) {
132 |     chdir($queue_dir) or die "Failed to chdir to queue directory $queue_dir.\n";
133 |     while (1) {
134 |         opendir(my $dh, ".") or die;
135 |         my %create_time;  # filename -> unixtime
136 |         my @files = grep { /^image-.+-unx(\d+)\.(png|jpg)$/ && ($create_time{$_} = $1) } readdir($dh);
137 | 
138 |         # Note: need the fallback on ($a cmp $b) because my Fujitsu
139 |         # ScanSnap S1500M is so damn fast, that sometimes files all
140 |         # have the same unixtime(!):
141 |         @files = sort { ($create_time{$a} <=> $create_time{$b}) || ($a cmp $b) } @files;
142 | 
143 |         closedir($dh);
144 |         if (@files) {
145 |             my $failures;
146 |             foreach my $file (@files) {
147 |                 if (upload_file($file)) {
148 |                     unlink($file);
149 |                 } else {
150 |                     $failures = 1;
151 |                     print STDERR "# Upload error.  Sleeping for 5 seconds...\n";
152 |                     last;
153 |                 }
154 |             }
155 |             print "Uploads complete.  Waiting for new files.\n";
156 |         }
157 |         sleep 5;
158 |     }
159 | }
160 | 
161 | if ($upload_file) {
162 |     die "File $upload_file doesn't exist.\n" unless -e $upload_file;
163 |     print "Uploading $upload_file ...\n";
164 |     if ($upload_file =~ /\.pdf/i) {
165 |         my $type = $color ? 'jpg' : 'tiff';
166 |         my @pdf_to_img = qw(convert);
167 |         if ($type eq 'tiff') {
168 |             push @pdf_to_img, qw(-monochrome);
169 |             push @pdf_to_img, qw(-density 150);
170 |             push @pdf_to_img, qw(-compress lzw);
171 |         } elsif ($type eq 'jpg') {
172 |             push @pdf_to_img, qw(-density 300);
173 |         }
174 |         my $tmp = tmpdir();
175 |         my $cnt = pdf_page_count($upload_file);
176 |         for (my $pg = 0; $pg < $cnt; $pg++) {
177 |             printf "   page %04d of %04d\n", $pg + 1, $cnt;
178 |             my $img = catfile($tmp, sprintf("page$$-%04d.$type", $pg + 1));
179 |             system(@pdf_to_img, "$upload_file\[$pg\]", $img) == 0 or die "Cannot convert page\n";
180 |             if (!upload_file($img)) {
181 |                die "Failed to upload.\n";
182 |             }
183 |             unlink $img;
184 |         }
185 |     } else {
186 |         if (!upload_file($upload_file)) {
187 |             die "Failed to upload.\n";
188 |         }
189 |     }
190 |     exit(0);
191 | }
192 | 
193 | my $scan = 1;   # implicit default mode, for now.
194 | 
195 | if ($scan) {
196 |     my %seen;
197 |     opendir(D, ".") or die;
198 |     my @files = readdir(D);
199 |     foreach my $f (@files) {
200 |         $seen{$f} = 1 if $f =~ /^image-\d\d\d\d/;
201 |     }
202 | 
203 |     my $n = 1;
204 |     my $base = sprintf("image-%04d", $n);
205 |     while ($seen{$base} ||
206 |            $seen{"$base.tiff"} ||
207 |            $seen{"$base.jpg"} ||
208 |            $seen{"$base.png"}) {
209 |         $n++;
210 |         $base = sprintf("image-%04d", $n);
211 |     }
212 |     my $tiff = "$base.tiff";
213 | 
214 |     my $mode = $lineart ? "Lineart" : ($color ? "Color" : "Gray");
215 |     if ($lineart) {
216 |         $ENV{SCAN_LINEART} = 1;  # to pass to subprocess
217 |     }
218 |     if ($adf) {
219 |         my $extra_source = $duplex ? " --source=\"ADF Duplex\"" : "";
220 |         system("scanadf $device_flag --mode $mode --resolution 300 " .
221 |                $extra_source .
222 |                "  --scan-script $0 " .
223 |                "  -s $n") and die "Failed to batch scan.";
224 |     } else {
225 |         my $cmd = "scanimage $device_flag --mode $mode --resolution 300 --format tiff > $tiff";
226 |         system($cmd)
227 |             and die "Failed to scan while running:\n  $cmd\n";
228 | 
229 |         my $out = $lineart ? "$base.png" : "$base.jpg";
230 |         print "Scanned.  Converting $tiff -> $out\n";
231 |         system("convert", "-quality", "90", $tiff, $out)
232 |             and die "failed to convert.\n";
233 |         unlink($tiff) or die "Failed to unlink $tiff: $!";
234 |         if ($upload_now) {
235 |             if (!upload_file($out)) {
236 |                 die "Failed to upload $out (keeping file)\n";
237 |             }
238 |             unlink($out);
239 |         } else {
240 |             my $qfile = "$queue_dir/$out-unx" . time() . substr($out, -4);
241 |             print "Moving file from $out to $qfile\n";
242 |             system("mv", $out, $qfile) and die "Failed to move file.\n";
243 |         }
244 |     }
245 |     exit(0);
246 | }
247 | 
248 | sub upload_file {
249 |     my $file = shift;
250 |     print "Fetching upload URL (for $file) ...\n";
251 |     my $url_to_get_an_upload_url = "$URL/uploadurl?" .
252 |         "user_email=$EMAIL&password=" . eurl($password);
253 |     print "Getting an upload URL from: $url_to_get_an_upload_url\n";
254 |     my $upload_url = get($url_to_get_an_upload_url);
255 |     die "Didn't get URL.  Wrong password?\n\nGot: $upload_url ($@)\n"
256 |         unless $upload_url =~ /^http.+/;
257 |     chomp $upload_url;
258 |     print "Uploading to: $upload_url ...\n";
259 |     my $stdin = "";
260 |     my ($stdout, $stderr);
261 |     if (IPC::Run::run(
262 |             ["curl",
263 |              "-s", # silent mode
264 |              "-F", "file=\@$file",
265 |              "-F", "password=$password",
266 |              "-F", "user_email=$EMAIL",
267 |              $upload_url],
268 |             \$stdin,
269 |             \$stdout,
270 |             \$stderr) && !$stdout && !$stderr) {
271 |         print "Upload of $file: success.\n";
272 |         return 1;
273 |     }
274 |     print "Error uploading file: $file\n";
275 |     if ($stdout) {
276 |         print "Curl returned unexpected stdout: $stdout";
277 |     }
278 |     if ($stderr) {
279 |         print "Curl returned unexpected stderr: $stderr";
280 |     }
281 |     print "Upload of $file failed.\n";
282 |     return 0;
283 | }
284 | 
285 | sub be_batch_scan_script {
286 |     die "Expected $1 to be image-nnnn" unless $ARGV[0] =~ m!\bimage-\d\d\d\d$!;
287 |     die "No SCAN_FORMAT\n" unless $ENV{SCAN_FORMAT};
288 |     my $filebase = $&;
289 |     print "[$$] Got format: $ENV{SCAN_FORMAT} for $filebase\n";
290 | 
291 |     my $ext = "jpg";
292 |     if ($ENV{SCAN_LINEART}) {
293 |         $ext = "png";
294 |     }
295 | 
296 |     my $now = time();
297 |     my $tmp_file  = "$queue_dir/$filebase-unx$now-TMP.$ext";
298 |     my $dest_file = "$queue_dir/$filebase-unx$now.$ext";
299 |     system("convert", "-quality", 95, $ARGV[0], $tmp_file)
300 |         and die "Failed to convert.";
301 |     rename($tmp_file, $dest_file) or die "Failed to rename $tmp_file to $dest_file: $!\n";
302 |     unlink($ARGV[0]);
303 | }
304 | 
305 | sub eurl {
306 |     my $a = $_[0];
307 |     $a =~ s/([^a-zA-Z0-9_\,\-.\/\\\: ])/uc sprintf("%%%02x",ord($1))/eg;
308 |     $a =~ tr/ /+/;
309 |     return $a;
310 | }
311 | 
312 | sub slurp {
313 |     my $file = shift;
314 |     open(my $fh, $file) or return undef;
315 |     my $contents = do { local $/; <$fh>; };
316 |     chomp $contents;
317 |     return $contents;
318 | }
319 | 
320 | sub pdf_page_count {
321 |     my $file = shift;
322 |     my $pages;
323 |     open(my $fh, '-|', 'pdfinfo', $file) or die "Cannot run pdfinfo: $!\n";
324 |     while (<$fh>) {
325 |         $pages = $1 if /^Pages:\s*(\d+)$/i;
326 |     }
327 |     close $fh;
328 |     return $pages;
329 | }
330 | 


--------------------------------------------------------------------------------