├── .auth ├── LICENSE ├── README.md ├── auth.py └── rs.py /.auth: -------------------------------------------------------------------------------- 1 | md5----------------------------> module perm 2 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Thomas Hirsch 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | python-remotestorage 2 | ==================== 3 | 4 | An implementation of remotestorage for Python, using a git backend. 5 | 6 | Use at your own risk. 7 | 8 | The remotestorage implementation takes the form of two WSGI daemons, one for storage and one for authentication. The most convenient way (IMHO) to run these, is to have them supervised by daemontools, runit or supervisord. An http server such as nginx should forward the HTTP requests to the two daemons. 9 | 10 | auth.py 11 | ======= 12 | 13 | The auth daemon provides oauth2 implicit grant tokens[http://tools.ietf.org/html/rfc6749#section-4.2], and is able to verify these. 14 | Authorized clients and their permissions are currently stored in an .auth configuration file. 15 | 16 | A line in the .auth file consists of three entries: 17 | 18 | md5 module permissions 19 | 20 | * md5 is an md5 hash generated by the auth module upon the first request. The user is instructed to add this to the configuration to authorize the service. 21 | * module is a module to grant access to, or 'root'. 22 | * permissions is a permissions string such as 'r' or 'rw' 23 | 24 | If during a request the corresponding md5 hash is found in the file, access is granted and a token or code is provided to the client. 25 | 26 | IMPORTANT: The current implementation does not re-verify that the user permits access after the initial accreditation, nor does it set a cookie to verify that it is still the same user doing the requests. 27 | 28 | rs.py 29 | ===== 30 | 31 | This is the git remote storage daemon. It replies to any GET, PUT, DELETE and OPTION request according to the standard. 32 | 33 | Storage is currently implemented by creating a git repository per module in the current directory. External git repositories are on the todo list. Any PUT or DELETE operation results in a commit by the daemon. 34 | 35 | -------------------------------------------------------------------------------- /auth.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | """ 3 | remote storage web service authentication (oauth2) module. 4 | should implement https://tools.ietf.org/id/draft-dejong-remotestorage-00.txt 5 | and [OAUTH2] 6 | 7 | To be run behind an nginx server, and supervised by daemontools.""" 8 | 9 | from flup.server.fcgi import WSGIServer 10 | import datetime, time, os, sys, re 11 | import optparse 12 | import urllib 13 | import traceback 14 | import simplejson as json 15 | 16 | from hashlib import md5 17 | 18 | __usage__ = "%prog -n " 19 | __version__ = "$Id$" 20 | __author__ = "Thomas Hirsch " 21 | 22 | FCGI_SOCKET_DIR = '/tmp' 23 | FCGI_SOCKET_UMASK = 0111 24 | 25 | SALT = "salt of the earth" #UPDATE THIS TO GENERATE UNIQUE CODES 26 | 27 | TOKEN_EXPIRY = 10 # 10 seconds 28 | ACCESS_TOKEN_EXPIRY = 300 # 5 minutes 29 | REFRESH_TOKEN_EXPIRY = 3600 # 1 hour 30 | 31 | MSG_AUTH_REQUEST = """ 32 | 33 | 34 | The following (currently unauthorized) application applied to access your storage:
35 | CLIENT: "%s" -- who they pretend to be.
36 | URI : "%s" -- make sure this is a fully qualified URI.
37 | SCOPE : "%s" -- just a suggestion.
38 | STATE : %s -- cross-site forgery possible if not provided.
39 | If you wish to grant access, please register the following code: "%s".
40 | refresh 41 |
42 | 43 | 44 | """ 45 | 46 | codeTokens = {} # FIXME: this is not multi process safe 47 | accessTokens = {} 48 | refreshTokens = {} 49 | 50 | def log (msg): 51 | fd = open("auth.log", "a") 52 | fd.write(msg+"\n") 53 | fd.close() 54 | 55 | def fail(start_response, responseCode, msg): 56 | start_response(responseCode, [('Content-Type', 'text/plain')]) 57 | return msg 58 | 59 | def buildToken(now, code): 60 | m = md5() 61 | m.update(SALT) 62 | m.update(code) 63 | m.update(str(now)) 64 | token = m.hexdigest() 65 | return token 66 | 67 | 68 | def auth(environ, start_response): 69 | result = "" 70 | responseCode = "200 Ok" 71 | returnContentType = 'text/html' 72 | redirect = False 73 | 74 | request = environ['REQUEST_URI'] 75 | method = environ['REQUEST_METHOD'] 76 | 77 | log ("%s %s" % (method, request)) 78 | 79 | if method == "POST": 80 | options = environ['wsgi.nput'].read() 81 | application = request.split("/")[-1] 82 | elif method == "GET": 83 | application, options = request.split("/")[-1].split("?") 84 | 85 | p = dict([x.split("=") for x in options.split("&")]) 86 | 87 | m = md5() 88 | m.update(SALT) 89 | m.update(p.get('client_id','')) 90 | 91 | decodedURI = urllib.unquote(p.get('redirect_uri','')) 92 | if '?' in decodedURI: 93 | m.update(decodedURI.split("?")[0]) 94 | elif '#' in decodedURI: 95 | m.update(decodedURI.split("#")[0]) 96 | else: 97 | m.update(decodedURI) 98 | 99 | 100 | code = m.hexdigest() 101 | now = time.time() 102 | token = buildToken(now, code) 103 | 104 | keys = dict([(access[0], access[1:]) for access in [line.strip().split(" ") for line in open(".auth", "r")]]) 105 | 106 | if application == "auth": 107 | #TODO: handle refresh tokens 108 | if code in keys: 109 | #TODO: at this point, we need a mechanism for the user to give permission to proceed. Not implemented yet. 110 | expiry, scope = keys[code] 111 | if p['response_type'] == "code": 112 | codeTokens[token] = (now, code) 113 | uri = decodedURI+"?code=%s&state=%s" % (token, p['state']) 114 | elif p['response_type'] == "token": 115 | accessTokens[token] = (now, code, scope) 116 | log ("new token %s" % token) 117 | uri = "#access_token=%s&token_type=bearer&expires_in=%d&scope=%s&state=%s" 118 | uri = decodedURI + uri % (token, ACCESS_TOKEN_EXPIRY, "+".join(keys[code]), p.get('state','')) 119 | 120 | redirect = uri 121 | result = "Redirecting to %s." % uri 122 | else: 123 | result = MSG_AUTH_REQUEST 124 | result = result % (p['client_id'], decodedURI, p.get('scope','Not specified'), 'state' in p, code, p['client_id'], p['redirect_uri'], p.get('state',''), p['response_type']) 125 | 126 | # elif application == "token": # IS THIS EVEN USED? 127 | # if not "grant_type" in p: 128 | # return "parameter grant_type missing" #fail 129 | # 130 | # if p['grant_type'] == "authorization_code" : 131 | # codeToken = p['code'] 132 | # if codeToken in codeTokens: 133 | # timestamp, client = codeTokens[codeToken] 134 | # if time.time()-timestamp > TOKEN_EXPIRY: 135 | # return "timeout" #fail 136 | # if code != client: 137 | # return "token does not match client" #fail 138 | # 139 | # accessTokens[token] = (now, code) 140 | # refreshToken = buildToken(time.time(), SALT + code) #FIXME: use more variance? 141 | # refreshTokens[refreshToken] = (now, code) 142 | # 143 | # returnContentType = "application/json;charset=UTF-8" 144 | # jsondata = { 145 | # "access_token": token, 146 | # "token_type": "bearer", 147 | # "expires_in": ACCESSTOKEN_EXPIRY, 148 | # "refresh_token": refreshToken, 149 | # } 150 | # result = json.dumps(jsondata) 151 | 152 | elif application == "vrfy": 153 | returnContentType = "application/json;charset=UTF-8" 154 | if p['access_type'] == "token": 155 | token = p['token'] 156 | log ("vrfy token %s is valid: %s" % (token, str(token in accessTokens))) 157 | if token in accessTokens: 158 | timestamp, code, scope = accessTokens[token] 159 | if time.time() > timestamp + ACCESS_TOKEN_EXPIRY: 160 | result = json.dumps({}) 161 | responseCode = "401 Expired" 162 | else: 163 | result = json.dumps({"verified-for":code, 164 | "scope":scope, 165 | "expires":timestamp + ACCESS_TOKEN_EXPIRY}) 166 | else: 167 | result = json.dumps({}) 168 | responseCode = "401 Unauthorized" 169 | 170 | # TODO: Add headers if required 171 | headers = [('Content-Type', returnContentType), 172 | ('Cache-Control', 'no-store'), 173 | ('Pragma', 'no-cache'), 174 | ] 175 | if redirect: 176 | headers.append(('Location', redirect)) 177 | responseCode = "302 Granted" 178 | start_response(responseCode, headers) 179 | return result 180 | 181 | def get_application(): 182 | return auth 183 | 184 | def get_socketpath(name, server_number): 185 | return os.path.join(FCGI_SOCKET_DIR, 'fcgi-%s-%s.socket' % (name, server_number)) 186 | 187 | def main(args_in, app_name="rsauth"): 188 | p = optparse.OptionParser(description=__doc__, version=__version__) 189 | p.set_usage(__usage__) 190 | p.add_option("-v", action="store_true", dest="verbose", help="verbose logging") 191 | p.add_option("-n", type="int", dest="server_num", help="Server instance number") 192 | opt, args = p.parse_args(args_in) 193 | 194 | if not opt.server_num: 195 | print "ERROR: server number not specified" 196 | p.print_help() 197 | 198 | print "Running test cases." 199 | print auth({'REQUEST_URI':'//auth?client_id=test&redirect_uri=foo&response_type=code&state=elated','REQUEST_METHOD':'GET'}, lambda x,y:None) 200 | print auth({'REQUEST_URI':'//auth?client_id=test&redirect_uri=foo&response_type=token&state=elated','REQUEST_METHOD':'GET'}, lambda x,y:None) 201 | print auth({'REQUEST_URI':'//vrfy?token=test&access_type=token&state=elated','REQUEST_METHOD':'GET'}, lambda x,y:None) 202 | return 203 | 204 | socketfile = get_socketpath(app_name, opt.server_num) 205 | app = get_application() 206 | 207 | try: 208 | WSGIServer(app, 209 | bindAddress = socketfile, 210 | umask = FCGI_SOCKET_UMASK, 211 | multiplexed = True, 212 | ).run() 213 | finally: 214 | # Clean up server socket file 215 | os.unlink(socketfile) 216 | 217 | if __name__ == '__main__': 218 | main(sys.argv[1:]) 219 | -------------------------------------------------------------------------------- /rs.py: -------------------------------------------------------------------------------- 1 | #!/bin/env python 2 | """ 3 | remote storage web service. 4 | should implement https://tools.ietf.org/id/draft-dejong-remotestorage-00.txt 5 | """ 6 | 7 | from flup.server.fcgi import WSGIServer 8 | import datetime, time, os, sys, re 9 | import optparse 10 | import urllib 11 | import traceback 12 | import simplejson as json 13 | 14 | from dulwich.repo import Repo 15 | from dulwich.errors import * 16 | 17 | from pyoauth2.provider import Provider # TODO 18 | 19 | __usage__ = "%prog -n " 20 | __version__ = "$Id$" 21 | __author__ = "Thomas Hirsch " 22 | 23 | FCGI_SOCKET_DIR = '/tmp' 24 | FCGI_SOCKET_UMASK = 0111 25 | 26 | HTTP_200_OK = '200 Ok' 27 | HTTP_304_GETFAILED = '304 Conditional GET request failed' #TODO 28 | HTTP_400_MALFORMED = '400 Malformed request' #TODO 29 | HTTP_401_UNAUTHORIZED = '401 Insufficient permissions' #TODO 30 | HTTP_404_NOTFOUND = '404 Node does not exist' #TODO 31 | HTTP_409_PUTFAILED = '409 Contitional PUT request failed' #TODO 32 | HTTP_420_OVERLOAD = '420 Too Many Requests' #TODO 33 | HTTP_500_INTERNAL_ERROR = '500 Internal Server Error' 34 | 35 | RE_FILENAME = '([a-ZA-Z0-9%\._-]+)' 36 | RE_PATH = '([a-zA-Z0-9%\._-]+[a-zA-Z0-9%/\._-]*)' # NOTE: allowing dots in filenames is not allowed in [UNHOSTED] RFC! 37 | 38 | TOKEN_VERIFY_URI = 'http://relet.net/auth/vrfy' 39 | 40 | def log(msg): 41 | fd = open("rs.log","a") 42 | fd.write(msg+"\n") 43 | fd.close() 44 | 45 | def verify_path(path): 46 | try: 47 | return re.match(RE_PATH, path).groups()[0] == path 48 | except: 49 | return False 50 | 51 | def verify_repository(name, create): 52 | try: 53 | repo = Repo(name) 54 | return repo 55 | except NotGitRepository,ex: 56 | if create: 57 | os.mkdir(name) 58 | 59 | repo = Repo.init(name) 60 | return repo 61 | 62 | def fail(start_response, responseCode, msg): 63 | log ("%s %s" % (responseCode, msg)) 64 | start_response(responseCode, [('Content-Type', 'text/plain')]) 65 | return msg 66 | 67 | def verify(authorization): 68 | request = TOKEN_VERIFY_URI+"?access_type=token&token=%s" % authorization 69 | try: 70 | response = urllib.urlopen(request).read() 71 | vrfy = json.loads(response) 72 | if 'verified-for' in vrfy: 73 | return vrfy['scope'] 74 | except IOError,ex: # unauthorized, probably 75 | pass 76 | return False 77 | 78 | def rs(environ, start_response): 79 | result = {} 80 | responseCode = HTTP_200_OK 81 | returnContentType = 'text/json' 82 | customHeaders = None 83 | 84 | log("%s %s" % (environ['REQUEST_METHOD'], environ['REQUEST_URI'])) 85 | 86 | cursor=None 87 | try: 88 | method = environ['REQUEST_METHOD'] 89 | 90 | if not 'HTTP_ORIGIN' in environ: 91 | return fail(start_response, HTTP_400_MALFORMED, "ORIGIN header field is mandatory") # FIXME for all requests?! 92 | origin = environ['HTTP_ORIGIN'] 93 | 94 | if method == "OPTIONS": 95 | 96 | headers = [ 97 | ('Content-Type', returnContentType), 98 | ('Access-Control-Allow-ORIGIN', origin), 99 | ('Access-Control-Allow-Methods', 'GET, PUT, DELETE'), 100 | ('Access-Control-Allow-Headers', 'origin, authorization'), 101 | ] 102 | start_response(responseCode, headers) 103 | return str(headers) 104 | 105 | #if not 'HTTP_ETAG' in environ: 106 | # return fail(start_response, HTTP_400_MALFORMED, "ETag header field is mandatory") # FIXME for all requests?! 107 | version = environ.get('HTTP_ETAG',0) 108 | 109 | authorization = environ.get('HTTP_AUTHORIZATION',None) 110 | if not authorization: 111 | return fail(start_response, HTTP_401_UNAUTHORIZED, "Not a public storage") 112 | 113 | authtype, authtoken = authorization.split(" ") 114 | if not authtype == "Bearer": 115 | return fail(start_response, HTTP_401_UNAUTHORIZED, "Only Bearer tokens supported") 116 | scope = verify(authtoken) 117 | if not scope: 118 | return fail(start_response, HTTP_401_UNAUTHORIZED, "Invalid access token") 119 | 120 | path = urllib.unquote(environ['REQUEST_URI']) 121 | 122 | parms = path.split("/") 123 | filepath = "/".join(parms[3:]) 124 | repository = parms[2] 125 | 126 | ifUnmodifiedSince = long(environ.get('HTTP_IF-UNMODIFIED-SINCE','0')) 127 | ifModifiedSince = long(environ.get('HTTP_IF-MODIFIED-SINCE','0')) 128 | 129 | if (not verify_path(repository)) or (not verify_path("repo/"+filepath)): 130 | log("repository = %s -> %s" % (repository, verify_path(repository))) 131 | log("filepath = %s -> %s" % (filepath, verify_path("repo/"+filepath))) 132 | return fail(start_response, HTTP_400_MALFORMED, "URI contains illegal characters") 133 | 134 | repo = verify_repository(repository, create=(method == "PUT")) 135 | index = repo.open_index() 136 | 137 | if method == "GET": #===================================================================== 138 | if parms[-1] == '': # i.e. the path terminated in a / and we deal with a directory 139 | folder = {} 140 | version = 0 141 | for path in index: # for all entries 142 | if path[:len(filepath)] == filepath: # if the entry starts with this path 143 | if not "/" in path[len(filepath):]: # and the entry is a file (does not contain further /'es ) 144 | name = path[len(filepath):] 145 | nodeversion = index[path][1][0] # version is the files modification timestamp 146 | folder[name] = nodeversion 147 | version = max(version, nodeversion) 148 | else: 149 | name = path[len(filepath):].split("/")[0] # or if it is a directory 150 | nodeversion = index[path][1][0] # get the version of the file in this dir 151 | if nodeversion > folder.get(name,0): # and if the timestamp is newer 152 | folder[name] = nodeversion # this shall be our folder version number 153 | version = max(version, nodeversion) 154 | 155 | if len(folder) == 0: 156 | return fail(start_response, HTTP_404_NOTFOUND, "Folder not found.") 157 | if long(ifModifiedSince) >= long(version): 158 | return fail(start_response, HTTP_304_GETFAILED, "Last folder version is %s." % version) 159 | 160 | returnContentType = 'application/json' 161 | result = json.dumps(folder) 162 | 163 | else: # assume the request is for a file 164 | try: 165 | ctime, mtime, dev, ino, mode, uid, gid, size, sha1, flags = index[filepath] 166 | except: 167 | return fail(start_response, HTTP_404_NOTFOUND, "Node not found.") 168 | 169 | version = mtime[0] 170 | 171 | if long(ifModifiedSince) >= long(version): 172 | return fail(start_response, HTTP_304_GETFAILED, "Last node version is %s." % version) 173 | 174 | content = repo.get_object(sha1).as_raw_string() 175 | 176 | walk = repo.get_walker(paths=[filepath]) 177 | returnContentType = walk._next().commit.message.split(" ")[-1] 178 | 179 | result = content 180 | 181 | elif method == "PUT": #=================================================================== 182 | if parms[-1] == '': # i.e. the path terminated in a / and we deal with a directory 183 | return fail(start_response, HTTP_400_MALFORMED, "Cannot PUT an empty folder. PUT the contents directly.") 184 | 185 | try: 186 | ctime, mtime, dev, ino, mode, uid, gid, size, sha1, flags = index[filepath] 187 | if ifUnmodifiedSince > 0 and ifUnmodifiedSince <= mtime[0]: 188 | return fail(start_response, HTTP_409_PUTFAILED, "Last node version is %s." % mtime[0]) 189 | 190 | except: 191 | pass #ok, the node does not exist, so we create it. 192 | 193 | content = environ['wsgi.input'].read() 194 | 195 | if not 'CONTENT_TYPE' in environ: 196 | return fail(start_response, HTTP_400_MALFORMED, "Content-Type header field is mandatory for PUT requests") 197 | 198 | contentType = environ['CONTENT_TYPE'] 199 | 200 | try: 201 | nudir = "/".join(parms[2:-1]) 202 | os.makedirs(nudir) 203 | log("made directory %s" % nudir) 204 | except: 205 | pass 206 | log("writing %s" % (repository + "/" + filepath)) 207 | f = open(repository + "/" + filepath, "w") 208 | f.write(content) 209 | f.close() 210 | repo.stage([filepath]) 211 | 212 | sha1 = repo.do_commit('+'+filepath+" "+contentType, committer='python-remotestorage') 213 | version = repo.object_store[sha1].commit_time 214 | 215 | result = "Stored with sha1 %s and timestamp %s." % (sha1, version) 216 | 217 | elif method == "DELETE": #================================================================= 218 | if parms[-1] == '': # i.e. the path terminated in a / and we deal with a directory 219 | return fail(start_response, HTTP_400_MALFORMED, "Cannot DELETE a folder. DELETE the contents directly.") 220 | 221 | try: 222 | ctime, mtime, dev, ino, mode, uid, gid, size, sha1, flags = index[filepath] 223 | if ifUnmodifiedSince > 0 and ifUnmodifiedSince <= mtime[0]: 224 | return fail(start_response, HTTP_409_PUTFAILED, "Last node version is %s." % mtime[0]) 225 | except: 226 | return fail(start_response, HTTP_404_NOTFOUND, "Node not found.") 227 | 228 | os.remove(repository + "/" + filepath) 229 | #TODO: remove empty directories 230 | 231 | repo.stage([filepath]) 232 | 233 | sha1 = repo.do_commit('-'+filepath, committer='python-remotestorage') 234 | version = repo.object_store[sha1].commit_time 235 | 236 | result = "Removed with sha1 %s and timestamp %s." % (sha1, version) 237 | 238 | except Exception,ex: 239 | responseCode = HTTP_500_INTERNAL_ERROR 240 | exc_type, exc_value, exc_traceback = sys.exc_info() 241 | traceback.print_exc() 242 | traceback.print_tb(exc_traceback, limit=1, file=sys.stdout) 243 | result['exception']=str(ex) 244 | 245 | headers = [('ETag', version), 246 | ('Content-Type', returnContentType), 247 | ('Access-Control-Allow-ORIGIN', origin), 248 | ] 249 | if customHeaders: 250 | headers.append(customHeaders) 251 | start_response(responseCode, headers) 252 | return result 253 | 254 | def get_application(): 255 | return rs 256 | 257 | def get_socketpath(name, server_number): 258 | return os.path.join(FCGI_SOCKET_DIR, 'fcgi-%s-%s.socket' % (name, server_number)) 259 | 260 | def main(args_in, app_name="rs"): 261 | p = optparse.OptionParser(description=__doc__, version=__version__) 262 | p.set_usage(__usage__) 263 | p.add_option("-v", action="store_true", dest="verbose", help="verbose logging") 264 | p.add_option("-n", type="int", dest="server_num", help="Server instance number") 265 | opt, args = p.parse_args(args_in) 266 | 267 | if not opt.server_num: 268 | print "ERROR: server number not specified" 269 | p.print_help() 270 | 271 | print rs({'REQUEST_URI':'//public/documents','REQUEST_METHOD':'GET','HTTP_ORIGIN':'http://litewrite.net', 'HTTP_AUTHORIZATION':'Bearer 6c517dbce2ae68497bd3fe4ce1cc65eb'}, lambda x,y:None) 272 | 273 | print verify_path("repo/locations/collections/") 274 | print verify_path("repo/pictures/Camera/D7C5FF07-5711-46BF-AD83-9EF05C6D6780.jpg") 275 | return 276 | 277 | socketfile = get_socketpath(app_name, opt.server_num) 278 | app = get_application() 279 | 280 | try: 281 | WSGIServer(app, 282 | bindAddress = socketfile, 283 | umask = FCGI_SOCKET_UMASK, 284 | multiplexed = True, 285 | ).run() 286 | finally: 287 | # Clean up server socket file 288 | os.unlink(socketfile) 289 | 290 | if __name__ == '__main__': 291 | main(sys.argv[1:]) 292 | --------------------------------------------------------------------------------