├── .gitignore ├── README.md ├── requirements.txt ├── s3plz ├── __init__.py └── utils.py ├── setup.py └── tests └── test_s3plz.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.DS_Store 2 | *.pyc 3 | build/ 4 | dist/ 5 | *.egg-info/ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # s3plz 2 | 3 | A polite, minimal interface for sending python objects 4 | to and from Amazon S3. 5 | 6 | ## Installation 7 | 8 | ``` 9 | pip install s3plz 10 | ``` 11 | 12 | ## Tests 13 | 14 | To run tests, you must first set these 15 | environmental variabels: 16 | 17 | ``` 18 | export AWS_ACCESS_KEY_ID='fdsaf' 19 | export AWS_SECRET_ACCESS_KEY='fdsaf' 20 | export S3PLZ_TEST_BUCKET='s3://my-cool-bucket' 21 | 22 | ``` 23 | and then run: 24 | ``` 25 | nosetests 26 | ``` 27 | 28 | ## Bacic Usage 29 | 30 | ```python 31 | 32 | import s3plz 33 | 34 | # Return an `s3plz.S3` object with 35 | # methods for sending objects 36 | # to and from Amazon S3. 37 | 38 | plz = s3plz.connect('s3://asteroid', 39 | key='navigate', 40 | secret='shield', 41 | serializer="json.gz", 42 | public = False 43 | ) 44 | 45 | # You can also set `AWS_ACCESS_KEY_ID` and 46 | # `AWS_SECRET_ACCESS_KEY` as environmental variables 47 | # instead of passing `key` and `secret` to `s3plz.connect` 48 | 49 | 50 | 51 | # Serialize an object, format its 52 | # filepath, put it on s3, and return 53 | # the formatted filepath (with an absolute s3path) 54 | # for your records 55 | 56 | obj1 = {"key": "value"} 57 | filepath = 'test/{key}.json.gz' 58 | 59 | fp = plz.put(obj1, filepath, **obj1) 60 | print fp 61 | 62 | # >>> 's3://asteroid/test/value.json.gz' 63 | # you can now fetch this object with its filepath 64 | 65 | obj2 = plz.get(fp) 66 | assert(obj1 == obj2) 67 | 68 | ``` 69 | 70 | ## Customization. 71 | 72 | ## Filepaths 73 | 74 | `s3plz` will attempt to format your filepath 75 | for you given arbitrary `**kwargs` passed to 76 | any method. You also have access to UTC 77 | time via the "@" operator. 78 | 79 | These include: 80 | 81 | - **@second**: `56` 82 | - **@minute**: `54` 83 | - **@hour**: `23` 84 | - **@day**: `29` 85 | - **@month**: `01` 86 | - **@year**: `2014` 87 | - **@timestamp**: `1234567` 88 | - **@date_path**: `2014/01/14` 89 | - **@date_slug**: `2014-01-14` 90 | - **@datetime_slug**: `2013-12-12-06-08-52` 91 | - **@uid**: `dasfas-23r32-sad-3sadf-sdf` 92 | 93 | For instance, 94 | 95 | ``` python 96 | import s3plz 97 | 98 | obj = {"key": "value"} 99 | filepath = 'test/{key}/{@date_path}/{@uid}.json.gz' 100 | 101 | plz = s3plz.connect('s3://my-bucket', serializer="json.gz") 102 | fp = plz.put(obj, filepath, **obj) 103 | print fp 104 | # >>> 's3://my-bucket/test/value/2014/08/25/3225-sdsa-35235-asdfas-235.json.gz' 105 | 106 | ``` 107 | 108 | ### Serialization 109 | 110 | By default, `s3plz` will send strings to/from S3. You can also serialize / deserialize objects to / from `json.gz`, `json`, `gz`, `zip`, or `pickle` (set with `serializer` via `s3plz.connect`). These can also be changed on-the-fly by passing `serializer` as a `kwarg` into `get`, `put`, `stream` or `upsert`. 111 | 112 | For example, 113 | 114 | ```python 115 | import s3plz 116 | 117 | s3 = s3plz.connect('s3://my-bucket') 118 | obj1 = {"foo":"bar"} 119 | fp = s3.put(obj1, "test/{foo}.json.gz", serializer="json.gz", **obj1) 120 | obj2 = s3.get(fp, serializer="json.gz") 121 | assert(obj1 == obj2) 122 | 123 | string1 = "hello world" 124 | fp = s3.put(string1, "test/string.zip", serializer="zip") 125 | string2 = s3.get(fp, serializer="zip") 126 | assert(string1 == string2) 127 | ``` 128 | 129 | However, you can also inherit from the core `s3plz.S3` class and overwrite the `serialize` and `deserialize` methods: 130 | 131 | ```python 132 | 133 | from s3plz import S3 134 | 135 | class SqlAlchemyToS3(S3): 136 | 137 | def serialize(self, obj): 138 | return "Do something here." 139 | 140 | def deserialize(self, string): 141 | return "Undo it." 142 | 143 | s3 = SqlAlchemyToS3('s3://bucket') 144 | print s3.get('s3://bucket/file.mycoolformat') 145 | # >>> `A SqLAlchemy Model` 146 | ``` 147 | 148 | ## API 149 | Full API Methods 150 | 151 | - `S3.put(data, filepath, headers, serializer, **kw)` 152 | * desciption: Upload a file if it doesnt already exist, 153 | otherwise return False 154 | * params: 155 | - `data`: Object to upload 156 | - `filepath`: filepath format string 157 | - `headers`: http headers to set on the object. 158 | - `serializer`: serializer to use for the object. 159 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 160 | * returns: s3uri / False 161 | 162 | - `S3.upsert(data, filepath, headers, serializer, **kw)` 163 | * desciption: Upload a file if it doesnt already exist, otherwise return False 164 | * params: 165 | - `data`: Object to upload 166 | - `filepath`: filepath format string 167 | - `headers`: http headers to set on the object. 168 | - `serializer`: serializer to use for the object. 169 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 170 | * returns: s3uri / False 171 | 172 | - `S3.get(filepath, headers, serializer, **kw)` 173 | * desciption: Download a file from s3. If it doesn't exist return None. 174 | * params: 175 | - `filepath`: filepath format string 176 | - `headers`: http headers to set on the object. 177 | - `serializer`: serializer to use for the object. 178 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 179 | * returns: deserialized contents 180 | 181 | - `S3.get_meta(filepath,**kw)` 182 | * desciption: Get a dictionary of metadata fields for a filepath. If it doesn't exist, return {} 183 | * params: 184 | - `filepath`: filepath format string 185 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 186 | * returns: dict of metadata. 187 | 188 | - `S3.get_age(filepath, **kw)` 189 | * desciption: Get the age of a a filepath. If it doesn't exist, return {} 190 | * params: 191 | - `filepath`: filepath format string 192 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 193 | * returns: datetime.timedelta 194 | 195 | - `S3.exists(filepath, **kw)` 196 | * desciption: Check if a file exists on s3. 197 | * params: 198 | - `filepath`: filepath format string 199 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 200 | * returns: s3_uri / False 201 | 202 | - `S3.ls(filepath, **kw)` 203 | * desciption: Return a generator of filepaths under a directory. 204 | * params: 205 | - `filepath`: filepath format string 206 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 207 | * returns: generator of s3_uri's 208 | 209 | - `S3.stream(filepath, headers, serializer, **kw)` 210 | * desciption: Return a generator of tuples of (s3_uri, contents) under a directory. 211 | * params: 212 | - `filepath`: filepath format string 213 | - `headers`: http headers to set on the object. 214 | - `serializer`: serializer to use for the object. 215 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 216 | * returns: generator of tuples of (s3_uri, contents) from s3. 217 | 218 | - `S3.delete(filepath, **kw)` 219 | * desciption: Delete a filepath from s3. 220 | * params: 221 | - `filepath`: filepath format string 222 | - `**kw`: arbitrary kwargs to pass to the `filepath` format string. 223 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto 2 | 3 | -------------------------------------------------------------------------------- /s3plz/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | import boto 4 | import boto.s3 5 | import os 6 | import utils 7 | import warnings 8 | 9 | from boto.s3.key import Key 10 | from dateutil import parser 11 | 12 | 13 | def connect(uri, **kw): 14 | """ 15 | A wrapper for the core class 16 | for a more elegant API: 17 | 18 | import s3plz 19 | 20 | s3 = s3plz.connect('s3://enigma-euclid') 21 | s3.get('test/file.txt') 22 | 23 | """ 24 | return S3(uri, **kw) 25 | 26 | 27 | class S3AuthError(Exception): 28 | """ 29 | If Auth values are not set, this error will be thrown. 30 | """ 31 | 32 | 33 | class S3: 34 | 35 | """ 36 | A class for connecting to a s3 bucket and 37 | uploading/downloading files. 38 | 39 | Includes support for automatically formatting 40 | filepaths from time / contextual variables / uids 41 | as well as serializing and deserializing objects 42 | to and from s3. 43 | """ 44 | 45 | serializers = { 46 | 'json': utils.to_json, 47 | 'gz': utils.to_gz, 48 | 'zip': utils.to_zip, 49 | 'pickle': utils.to_pickle, 50 | } 51 | 52 | deserializers = { 53 | 'json': utils.from_json, 54 | 'gz': utils.from_gz, 55 | 'zip': utils.from_zip, 56 | 'pickle': utils.from_pickle, 57 | } 58 | 59 | def __init__(self, uri, **kw): 60 | 61 | # get bucket name / abs root. 62 | self.bucket_name = utils.parse_s3_bucket(uri) 63 | self.s3root = "s3://{}".format(self.bucket_name) 64 | 65 | # connect to bucket 66 | self.bucket = self._connect_to_bucket(**kw) 67 | 68 | # set public/private 69 | self.acl_str = self._set_acl_str(kw.get('public', False)) 70 | 71 | # set a default serializer for this connection. 72 | self._serializer = kw.get('serializer', None) 73 | 74 | def put(self, data, filepath, **kw): 75 | """ 76 | Upload a file to s3 with serialization. 77 | """ 78 | return self._put(data, filepath, **kw) 79 | 80 | def create(self, data, filepath, **kw): 81 | """ 82 | Upload a file if it doesnt already exist, 83 | otherwise return False 84 | """ 85 | k = self._gen_key_from_fp(filepath, **kw) 86 | if not k.exists(): 87 | return self._put(data, filepath, **kw) 88 | else: 89 | return False 90 | 91 | def upsert(self, data, filepath, **kw): 92 | """ 93 | Synonym for create, kept for backwards 94 | compatibility, can/will eventually be 95 | deprecated in a major version release 96 | """ 97 | warnings.warn("""upsert is a deprecated method kept for backwards compatibility. 98 | please move to `S3.create` for the same functionality""") 99 | k = self._gen_key_from_fp(filepath, **kw) 100 | if not k.exists(): 101 | return self._put(data, filepath, **kw) 102 | else: 103 | return False 104 | 105 | def get(self, filepath, **kw): 106 | """ 107 | Download a file from s3. If it doesn't exist return None. 108 | """ 109 | return self._get(filepath, **kw) 110 | 111 | def get_meta(self, filepath, **kw): 112 | """ 113 | Get a dictionary of metadata fields for a filepath. 114 | """ 115 | k = self._gen_key_from_fp(filepath, **kw) 116 | k.name = k.key 117 | k = self.bucket.get_key(k.name) 118 | if k: 119 | return { 120 | "content_type": k.content_type, 121 | "last_modified": parser.parse(k.last_modified), 122 | "content_language": k.content_language, 123 | "content_encoding": k.content_encoding, 124 | "content_length": k.content_length 125 | } 126 | else: 127 | return None 128 | 129 | def get_age(self, filepath, **kw): 130 | """ 131 | Get the age of a filepath. Returns a datetime.timedelta object. 132 | """ 133 | meta = self.get_meta(filepath, **kw) 134 | if meta: 135 | if not meta['last_modified']: 136 | return None 137 | return utils.now(ts=False) - meta['last_modified'] 138 | else: 139 | return None 140 | 141 | def exists(self, filepath, **kw): 142 | """ 143 | Check if a file exists on s3. 144 | """ 145 | k = self._gen_key_from_fp(filepath, **kw) 146 | if k.exists(): 147 | return self._make_abs(str(k.key)) 148 | else: 149 | return False 150 | 151 | def ls(self, directory='', **kw): 152 | """ 153 | Return a generator of filepaths under a directory. 154 | """ 155 | directory = self._format_filepath(directory, **kw) 156 | 157 | # s3 requires directories end with '/' 158 | if not directory.endswith('/'): 159 | directory += "/" 160 | 161 | for k in self.bucket.list(directory): 162 | yield self._make_abs(str(k.key)) 163 | 164 | def stream(self, directory='', **kw): 165 | """ 166 | Return a generator which contains a 167 | tuple of (filepath, filecontents) from s3. 168 | """ 169 | directory = self._format_filepath(directory, **kw) 170 | 171 | # s3 requires directories end with '/' 172 | if not directory.endswith('/'): 173 | directory += "/" 174 | 175 | for k in self.bucket.list(directory): 176 | fp = self._make_abs(str(k.key)) 177 | obj = self._get(fp, **kw) 178 | yield fp, obj 179 | 180 | def delete(self, filepath, **kw): 181 | """ 182 | Delete a file from s3. 183 | """ 184 | return self._delete(filepath, **kw) 185 | 186 | def serialize(self, obj, **kw): 187 | """ 188 | Function for serializing object => string. 189 | This can be overwritten for custom 190 | uses. 191 | 192 | The default is to do nothing ('serializer'=None) 193 | If the connection is intialized with 'serializer' set to 194 | 'json.gz', 'json', 'gz', or 'zip', we'll do the 195 | transformations. 196 | 197 | Any number of serializers can be specified in dot delimited 198 | format, and will be applied left to right. 199 | """ 200 | serializer = kw.get('serializer', self._serializer) 201 | 202 | # Default is do nothing 203 | if serializer is None: 204 | return obj 205 | 206 | result = obj 207 | for name in serializer.split('.'): 208 | # Apply dot seperated serializers left to right 209 | try: 210 | result = self.serializers[name](result) 211 | except KeyError: 212 | raise NotImplementedError( 213 | '{} is not a supported serializer. Try one of: {}'.format( 214 | name, 215 | ','.join(self.serializers.keys()) 216 | ) 217 | ) 218 | 219 | return result 220 | 221 | def deserialize(self, string, **kw): 222 | """ 223 | Function for serializing object => string. 224 | This can be overwritten for custom 225 | uses. 226 | 227 | The default is to do nothing ('serializer'=None) 228 | If the connection is intialized with 'serializer' set to 229 | 'json.gz', 'json', 'gz', or 'zip', we'll do the 230 | transformations. 231 | 232 | Any number of serializers can be specified in dot delimited 233 | format, and will be applied right to left. 234 | """ 235 | 236 | serializer = kw.get('serializer', self._serializer) 237 | 238 | # Default is do nothing 239 | if serializer is None: 240 | return string 241 | 242 | result = string 243 | for name in reversed(serializer.split('.')): 244 | # Apply dot seperated serializers left to right 245 | try: 246 | result = self.deserializers[name](result) 247 | except KeyError: 248 | raise NotImplementedError( 249 | '{} is not a supported deserializer. Try one of: {}'.format( 250 | name, 251 | ','.join(self.deserializers.keys()) 252 | ) 253 | ) 254 | 255 | return result 256 | 257 | def _set_acl_str(self, public): 258 | """ 259 | Simplified lookup for acl string settings. 260 | """ 261 | 262 | return {True: 'public-read', False: 'private'}.get(public) 263 | 264 | def _connect_to_bucket(self, **kw): 265 | """ 266 | Connect to a pre-existing s3 code. via 267 | kwargs or OS 268 | """ 269 | 270 | # get keys from kwargs / environment 271 | key = kw.get('key', \ 272 | os.getenv('AWS_ACCESS_KEY_ID')) 273 | secret = kw.get('secret', \ 274 | os.getenv('AWS_SECRET_ACCESS_KEY')) 275 | 276 | # check for valid key / secret 277 | if not key or not secret: 278 | raise S3AuthError, \ 279 | 'You must pass in a "key" and "secret" to s3plz.connect() or set ' \ 280 | '"AWS_ACCESS_KEY_ID" and "AWS_SECRET_ACCESS_KEY" as environment variables.' 281 | 282 | try: 283 | conn = boto.connect_s3(key, secret) 284 | except Exception as e: 285 | raise S3AuthError, \ 286 | "Your supplied credentials were invalid!" 287 | 288 | # lookup bucket 289 | return conn.get_bucket(self.bucket_name) 290 | 291 | # bucket doesn't exist. 292 | raise ValueError( 293 | 'Bucket {} Does Not Exist!'\ 294 | .format(self.bucket_name)) 295 | 296 | def _gen_key_from_fp(self, filepath, **kw): 297 | """ 298 | Take in a filepath and create a `boto.Key` for 299 | interacting with the file. Optionally reset serializer too! 300 | 301 | """ 302 | k = Key(self.bucket) 303 | fp = self._format_filepath(filepath, **kw) 304 | k.key = fp 305 | k.name = fp 306 | return k 307 | 308 | def _format_filepath(self, filepath, **kw): 309 | """ 310 | Allow for inclusion of absolute filepaths / format strings. 311 | """ 312 | if filepath.startswith('s3://'): 313 | # boto doesn't accept absolute s3paths 314 | filepath = filepath.replace(self.s3root, '') 315 | 316 | if filepath.startswith('/'): 317 | # these can be left straggling 318 | # by the above conditional 319 | filepath = filepath[1:] 320 | 321 | return utils.format_filepath(filepath, **kw) 322 | 323 | def _make_abs(self, filepath): 324 | """ 325 | Output only absolute filepaths. Fight me. 326 | """ 327 | if not filepath.startswith('s3://'): 328 | filepath = '{}/{}'.format(self.s3root, filepath) 329 | return filepath 330 | 331 | def _put(self, data, filepath, **kw): 332 | """ 333 | Wrapper for serialization => s3 334 | """ 335 | headers = kw.pop('headers', {}) 336 | k = self._gen_key_from_fp(filepath, **kw) 337 | string = self.serialize(data, **kw) 338 | k.set_contents_from_string(string, headers=headers) 339 | k.set_acl(self.acl_str) 340 | return self._make_abs(str(k.key)) 341 | 342 | def _get(self, filepath, **kw): 343 | """ 344 | Wrapper for s3 => deserialization 345 | """ 346 | headers = kw.pop('headers', {}) 347 | k = self._gen_key_from_fp(filepath, **kw) 348 | if k.exists(): 349 | string = k.get_contents_as_string(headers=headers) 350 | return self.deserialize(string, **kw) 351 | else: 352 | return None 353 | 354 | def _delete(self, filepath, **kw): 355 | """ 356 | Wrapper for delete. Unnecessary but 357 | Is nice to have for expanding on 358 | the core class without writing `boto` code. 359 | """ 360 | k = self._gen_key_from_fp(filepath, **kw) 361 | self.bucket.delete_key(k) 362 | return self._make_abs(str(k.key)) 363 | -------------------------------------------------------------------------------- /s3plz/utils.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from urlparse import urlparse 4 | from datetime import datetime 5 | import json 6 | import gzip 7 | import zipfile 8 | import uuid 9 | import pickle 10 | import cStringIO 11 | import pytz 12 | 13 | 14 | def now(ts=True): 15 | dt = datetime.now(pytz.utc) 16 | if ts: 17 | return int(dt.strftime('%s')) 18 | return dt 19 | 20 | 21 | def is_s3_uri(uri): 22 | """ 23 | Return True if *uri* can be parsed into an S3 URI, False otherwise. 24 | """ 25 | try: 26 | 27 | parse_s3_bucket(uri) 28 | return True 29 | 30 | except ValueError: 31 | 32 | return False 33 | 34 | 35 | def parse_s3_bucket(uri, _return_path=False): 36 | """Parse an S3 URI into (bucket, key) 37 | 38 | >>> parse_s3_uri('s3://walrus/tmp/') 39 | ('walrus', 'tmp/') 40 | 41 | If ``uri`` is not an S3 URI, raise a ValueError 42 | """ 43 | if not uri.endswith('/'): 44 | uri += '/' 45 | 46 | components = urlparse(uri) 47 | 48 | if (components.scheme not in ('s3', 's3n') 49 | or '/' not in components.path): 50 | 51 | raise ValueError('Invalid S3 URI: {}'.format(uri)) 52 | 53 | if _return_path: 54 | return components.netloc, components.path 55 | 56 | else: 57 | return components.netloc 58 | 59 | 60 | def filepath_opts(): 61 | """ 62 | Get a dictionary of timestrings 63 | to pass as default options 64 | for ``s3pyo.utils.format_filepath`` 65 | 66 | These can be accessed with the '@' key. 67 | 68 | """ 69 | dt = now(ts=False) 70 | return { 71 | '@second': "%02d" % int(dt.second), 72 | '@minute': "%02d" % int(dt.minute), 73 | '@hour': "%02d" % int(dt.hour), 74 | '@day': "%02d" % int(dt.day), 75 | '@month': "%02d" % int(dt.month), 76 | '@year': dt.year, 77 | '@timestamp': dt.strftime('%s'), 78 | '@date_path': dt.strftime('%Y/%m/%d'), 79 | '@date_slug': dt.date().isoformat(), 80 | '@datetime_slug': dt.strftime('%Y-%m-%d-%H-%M-%S'), 81 | '@uid': uuid.uuid1() 82 | } 83 | 84 | 85 | def s3_to_url(s3uri): 86 | # get the bucket & path, this is a hack for 87 | # internal purposes, soorry. 88 | bucket, path = parse_s3_bucket(s3uri, _return_path=True) 89 | return "http://{}.s3.amazonaws.com/{}".format(bucket, path) 90 | 91 | 92 | def url_to_s3(url): 93 | nohttp = url.split('http://')[1] 94 | bucket, path = nohttp.split('.s3.amazonaws.com/') 95 | return "s3://{}/{}".forat(bucket, path) 96 | 97 | 98 | def format_filepath(fp, **kw): 99 | """ 100 | Given a format string, 101 | fill in fields with defaults / data. 102 | 103 | Since .format() is idempotent, it wont 104 | affect non-format strings. Thanks @jak 105 | """ 106 | kw.update(filepath_opts()) 107 | return fp.format(**kw) 108 | 109 | 110 | def to_gz(s): 111 | """ 112 | string > gzip 113 | """ 114 | assert(isinstance(s, basestring)) 115 | out = cStringIO.StringIO() 116 | with gzip.GzipFile(fileobj=out, mode="w") as f: 117 | f.write(s) 118 | return out.getvalue() 119 | 120 | 121 | def from_gz(s): 122 | """ 123 | gzip > string 124 | """ 125 | fileobj = cStringIO.StringIO(s) 126 | with gzip.GzipFile(fileobj=fileobj, mode="r") as f: 127 | return f.read() 128 | 129 | 130 | def from_json(s): 131 | """ 132 | jsonstring > obj 133 | """ 134 | return json.loads(s) 135 | 136 | 137 | def to_json(obj): 138 | """ 139 | obj > jsonstring 140 | """ 141 | return json.dumps(obj) 142 | 143 | 144 | def to_zip(s): 145 | """ 146 | string > zip 147 | """ 148 | fileobj = cStringIO.StringIO() 149 | with zipfile.ZipFile(fileobj, 'w') as f: 150 | f.writestr('s3plz.txt', s) 151 | return fileobj.getvalue() 152 | 153 | 154 | def from_zip(s): 155 | """ 156 | zip > string 157 | """ 158 | zpd = cStringIO.StringIO(s) 159 | zpf = zipfile.ZipFile(zpd, "r") 160 | return zpf.read(zpf.namelist()[0]) 161 | 162 | 163 | def to_pickle(obj): 164 | """ 165 | obj > picklestring 166 | """ 167 | return pickle.dumps(obj) 168 | 169 | 170 | def from_pickle(s): 171 | """ 172 | picklestring > object 173 | """ 174 | return pickle.loads(s) 175 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | def build(): 4 | setup( 5 | name = "s3plz", 6 | version = "0.1.7", 7 | author = "Brian Abelson", 8 | author_email = "brian@enigma.io", 9 | description = "A polite interface for sending python objects to and from Amazon S3.", 10 | license = "MIT", 11 | keywords = "s3, aws", 12 | url = "https://github.com/enigma-io/s3plz", 13 | packages = ['s3plz'], 14 | install_requires = [ 15 | "boto", 16 | "python-dateutil", 17 | "pytz" 18 | ], 19 | classifiers=[ 20 | "Development Status :: 3 - Alpha", 21 | "Topic :: Communications :: Email", 22 | "License :: OSI Approved :: MIT License", 23 | ] 24 | ) 25 | 26 | if __name__ == '__main__': 27 | build() -------------------------------------------------------------------------------- /tests/test_s3plz.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import unittest 3 | import s3plz 4 | import os 5 | from datetime import timedelta 6 | import logging 7 | logging.getLogger('boto').setLevel(logging.CRITICAL) 8 | 9 | MY_TEST_BUCKET = os.getenv('S3PLZ_TEST_BUCKET', None) 10 | if not MY_TEST_BUCKET: 11 | raise ValueError(""" 12 | To run tests, you must set these 13 | environmental variabels: 14 | 15 | export AWS_ACCESS_KEY_ID='fdsaf' 16 | export AWS_ACCESS_KEY_SECRET='fdsaf' 17 | export S3PLZ_TEST_BUCKET='s3://my-cool-bucket' 18 | """ 19 | ) 20 | 21 | class TestS3plz(unittest.TestCase): 22 | 23 | def test_s3plz(self): 24 | """ 25 | Simple workflow which addresses 26 | all methods and use cases. 27 | """ 28 | # try: 29 | # connect to s3 30 | plz = s3plz.connect(MY_TEST_BUCKET, 31 | public = False 32 | ) 33 | 34 | # create an object and formatstring 35 | obj1 = {"key": "value"} 36 | formatstring = 's3plztest-0.1.2/{@date_path}/{key}/{@uid}.json.gz' 37 | 38 | # put the object 39 | fp1 = plz.put(obj1, formatstring, serializer = "json.gz", **obj1) 40 | 41 | # check exists / put method (did the object make it there?) 42 | assert(plz.exists(fp1) is not False) 43 | 44 | # check get method 45 | obj2 = plz.get(fp1, serializer = "json.gz",) 46 | 47 | # check whether serialization / deserialization works 48 | assert(obj1 == obj2) 49 | 50 | # check ls / filepath formatting. 51 | for fp in plz.ls('s3plztest-0.1.2/'): 52 | print fp 53 | assert('value' in fp) 54 | 55 | # check streaming method / deserialization 56 | for fp, obj in plz.stream('s3plztest-0.1.2/', serializer="json.gz"): 57 | print obj 58 | assert("key" in obj) 59 | assert(isinstance(obj, dict)) 60 | assert("value" in fp) 61 | 62 | # check upsert method / whether 63 | # updates to contextual time variables 64 | # are reflected in formatted filepaths. 65 | fp2 = plz.upsert(obj1, formatstring, serializer="json.gz", **obj1) 66 | assert(fp2 is not False) 67 | assert(fp1 != fp2) 68 | 69 | # check on-the-fly serialization 70 | obj1 = {"foo":"value"} 71 | fp = plz.put(obj1, "s3plztest-0.1.2/{foo}.json.gz", serializer="json.gz", **obj1) 72 | obj2 = plz.get(fp, serializer="json.gz") 73 | assert(obj1 == obj2) 74 | 75 | string1 = "hello world" 76 | fp = plz.put(string1, "s3plztest-0.1.2/string.zip", serializer="zip") 77 | string2 = plz.get(fp, serializer="zip") 78 | assert(string1 == string2) 79 | 80 | # check metadata 81 | meta = plz.get_meta(fp) 82 | assert(meta['last_modified'] is not None) 83 | 84 | # check age 85 | age = plz.get_age(fp) 86 | assert(isinstance(age, timedelta)) 87 | 88 | # check whether seri 89 | 90 | # check whether delete method works 91 | for fp in plz.ls('s3plztest-0.1.2/'): 92 | plz.delete(fp) 93 | assert(len(list(plz.ls('s3plztest-0.1.2/'))) == 0) 94 | 95 | 96 | def test_json(self): 97 | obj1 = {"key": "value"} 98 | jsonstring = s3plz.utils.to_json(obj1) 99 | obj2 = s3plz.utils.from_json(jsonstring) 100 | assert(obj1 == obj2) 101 | 102 | def test_gz(self): 103 | string1 = "uqbar" 104 | gzstring = s3plz.utils.to_gz(string1) 105 | string2 = s3plz.utils.from_gz(gzstring) 106 | assert(string1 == string2) 107 | 108 | def test_zip(self): 109 | string1 = "uqbar" 110 | gzstring = s3plz.utils.to_zip(string1) 111 | string2 = s3plz.utils.from_zip(gzstring) 112 | assert(string1 == string2) 113 | 114 | def test_pickle(self): 115 | string1 = "uqbar" 116 | gzstring = s3plz.utils.to_pickle(string1) 117 | string2 = s3plz.utils.from_pickle(gzstring) 118 | assert(string1 == string2) 119 | 120 | def test_auth_error(self): 121 | print "bad s3plz connections should throw an S3AuthError" 122 | try: 123 | s3plz.connect(MY_TEST_BUCKET, key=None, secret=None) 124 | except s3plz.S3AuthError: 125 | assert True 126 | else: 127 | assert False 128 | 129 | 130 | if __name__ == '__main__': 131 | unittest.main() 132 | --------------------------------------------------------------------------------