├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── Vagrantfile ├── provision.sh ├── s3nb ├── __init__.py ├── ipy2.py └── ipy3.py └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | .vagrant/ 3 | ipython/ 4 | credentials 5 | s3nb.log 6 | .checkpoints/ 7 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2014 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | SSH=vagrant ssh 2 | 3 | CONFIG_FILE=config/jupyter_notebook_config.py 4 | 5 | AWS_USER=s3nb 6 | 7 | .PHONY=clean configure creds kill restart run 8 | 9 | clean: 10 | rm -rf clean/ credentials 11 | 12 | configure: 13 | mv ${CONFIG_FILE} ${CONFIG_FILE}.orig 14 | echo "c = get_config()" >> ${CONFIG_FILE} 15 | echo "c.NotebookApp.log_level = 'DEBUG'" >> ${CONFIG_FILE} 16 | echo "c.NotebookApp.contents_manager_class = 's3nb.S3ContentsManager'" >> ${CONFIG_FILE} 17 | echo "c.S3ContentsManager.s3_base_uri = '${S3_BASE_URI}'" >> ${CONFIG_FILE} 18 | echo "c.S3ContentsManager.checkpoints_kwargs = {'root_dir': '/vagrant/.checkpoints'}" 19 | 20 | creds: 21 | grep -A2 ${AWS_USER} ~/.aws/credentials | sed 's/${AWS_USER}/default/g' > credentials 22 | ${SSH} -c "mkdir -p ~/.aws && ln -sf /vagrant/credentials ~/.aws/credentials" 23 | 24 | kill: 25 | ${SSH} -c "tmux kill-session -t server || true" 26 | 27 | restart: kill run; 28 | 29 | run: 30 | ${SSH} -c "tmux new-session -d -n run -s server 'PYTHONPATH=/vagrant jupyter notebook --config=/vagrant/${CONFIG_FILE} --ip=0.0.0.0 --no-browser > /vagrant/s3nb.log 2>&1'" 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # S3-backed notebook manager for IPython 2 | 3 | ## Setup 4 | 5 | 1. Install: 6 | 7 | from pypi: 8 | ```bash 9 | pip install s3nb 10 | ``` 11 | 12 | from source with pip: 13 | ```bash 14 | pip install git+https://github.com/monetate/s3nb 15 | ``` 16 | 17 | or from source the old fashioned way: 18 | ```bash 19 | git clone git@github.com:monetate/s3nb.git 20 | cd s3nb 21 | python ./setup.py install 22 | ``` 23 | 24 | 2. Configure 25 | 26 | ``` bash 27 | # set this - notebooks will be stored relative to this uri 28 | S3_NOTEBOOK_URI=s3://path/to/notebooks/ 29 | 30 | # and this 31 | IPYTHON_MAJOR_VERSION=4 32 | 33 | # optionally set this - checkpoints will be stored locally, relative to this path (for IPython 3) 34 | CHECKPOINT_ROOT_DIR=~/.checkpoints 35 | 36 | # optionally set this 37 | PROFILE=s3nbserver 38 | 39 | # shouldn't need to edit beyond this point 40 | 41 | ## IPython 2.x 42 | IPYNB_MANAGER=S3NotebookManager 43 | IPYNB_MANAGER_CFG=notebook_manager_class 44 | 45 | ## IPython 3.x or 4.x 46 | if [ $IPYTHON_MAJOR_VERSION == 3 ] || [ $IPYTHON_MAJOR_VERSION == 4 ]; then 47 | IPYNB_MANAGER=S3ContentsManager 48 | IPYNB_MANAGER_CFG=contents_manager_class 49 | fi 50 | 51 | IPYTHONDIR=${IPYTHONDIR:-$HOME/.ipython} 52 | PROFILE_DIR=${IPYTHONDIR}/profile_${PROFILE} 53 | 54 | if [ ! -d $PROFILE_DIR ]; then 55 | ipython profile create $PROFILE 56 | IPYNB_CONFIG=${PROFILE_DIR}/ipython_notebook_config.py 57 | mv $IPYNB_CONFIG $IPYNB_CONFIG.orig 58 | cat > $IPYNB_CONFIG <> ${IPYNB_CONFIG} 68 | fi 69 | ``` 70 | 71 | 3. If you haven't already, configure AWS variables for boto. [Follow these instructions](http://blogs.aws.amazon.com/security/post/Tx3D6U6WSFGOK2H/A-New-and-Standardized-Way-to-Manage-Credentials-in-the-AWS-SDKs). 72 | 73 | 4. Run 74 | ``` bash 75 | jupyter notebook --config=~/.ipython/s3nbserver/ipython_notebook_config.py 76 | ``` 77 | 78 | ## Development 79 | 80 | 1. Provision a virtual machine with `vagrant up` 81 | 2. Create an IPython profile with `make configure -e S3_BASE_URI=YOUR_BUCKET` 82 | 4. Share you AWS credentials with the virtual machine with `make creds -e AWS_USER=YOUR_USER` 83 | 4. Run the notebook server with `make run` 84 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | Vagrant.configure(2) do |config| 2 | config.vm.provider "virtualbox" do |v| 3 | v.memory = 1024 4 | end 5 | 6 | config.vm.box = "ubuntu/trusty64" 7 | config.vm.network :forwarded_port, host: 8888, guest: 8888 8 | config.vm.provision :shell, path: "provision.sh" 9 | end 10 | -------------------------------------------------------------------------------- /provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | apt-get update 4 | 5 | # Install IPython 6 | apt-get install -y python-pip python2.7-dev libzmq-dev 7 | 8 | pip install boto 9 | pip install "ipython[notebook]" 10 | -------------------------------------------------------------------------------- /s3nb/__init__.py: -------------------------------------------------------------------------------- 1 | imported = False 2 | 3 | try: 4 | from .ipy2 import S3NotebookManager 5 | imported = True 6 | except ImportError: 7 | pass 8 | 9 | try: 10 | from .ipy3 import S3ContentsManager 11 | imported = True 12 | except ImportError: 13 | pass 14 | 15 | if not imported: 16 | raise ImportError("failed to import any s3nb managers") 17 | -------------------------------------------------------------------------------- /s3nb/ipy2.py: -------------------------------------------------------------------------------- 1 | """ 2 | An ipython 2.x notebook manager that uses s3 for storage. 3 | 4 | # Configuration file for ipython-notebook. 5 | c = get_config() 6 | 7 | c.NotebookApp.notebook_manager_class = 's3nb.S3NotebookManager' 8 | c.NotebookApp.log_level = 'DEBUG' 9 | c.S3NotebookManager.s3_base_uri = 's3://bucket/notebook/prefix/' 10 | """ 11 | import datetime 12 | import tempfile 13 | from os.path import join, splitext 14 | 15 | import boto 16 | from tornado import web 17 | 18 | from IPython.html.services.notebooks.nbmanager import NotebookManager 19 | from IPython.nbformat import current 20 | from IPython.utils.traitlets import Unicode 21 | from IPython.utils import tz 22 | 23 | 24 | # s3 return different time formats in different situations apparently 25 | S3_TIMEFORMAT_GET_KEY = '%a, %d %b %Y %H:%M:%S GMT' 26 | S3_TIMEFORMAT_BUCKET_LIST = '%Y-%m-%dT%H:%M:%S.000Z' 27 | 28 | 29 | class S3NotebookManager(NotebookManager): 30 | s3_bucket = Unicode(u"", config=True) 31 | s3_prefix = Unicode(u"", config=True) 32 | notebook_dir = Unicode(u"", config=True) # not used 33 | 34 | @staticmethod 35 | def _parse_s3_uri(uri, delimiter='/'): 36 | if not uri.startswith("s3://"): 37 | raise Exception("Unexpected s3 uri scheme in '{}', expected s3://".format(uri)) 38 | return uri[5:].split(delimiter, 1) 39 | 40 | def _s3_key_dir_to_model(self, key): 41 | self.log.debug("_s3_key_dir_to_model: {}: {}".format(key, key.name)) 42 | model = { 43 | 'name': key.name.rsplit(self.s3_key_delimiter, 2)[-2], 44 | 'path': key.name, 45 | 'last_modified': None, # key.last_modified, 46 | 'created': None, # key.last_modified, 47 | 'type': 'directory', 48 | } 49 | self.log.debug("_s3_key_dir_to_model: {}: {}".format(key.name, model)) 50 | return model 51 | 52 | def _s3_key_notebook_to_model(self, key, timeformat): 53 | self.log.debug("_s3_key_notebook_to_model: {}: {}".format(key, key.name)) 54 | model = { 55 | 'name': key.name.rsplit(self.s3_key_delimiter, 1)[-1], 56 | 'path': key.name, 57 | 'last_modified': datetime.datetime.strptime( 58 | key.last_modified, timeformat).replace(tzinfo=tz.UTC), 59 | 'created': None, 60 | 'type': 'notebook', 61 | } 62 | self.log.debug("_s3_key_notebook_to_model: {}: {}".format(key.name, model)) 63 | return model 64 | 65 | def _notebook_s3_key_string(self, path, name): 66 | key = self.s3_prefix + path.strip(self.s3_key_delimiter) 67 | # append delimiter if path is non-empty to avoid s3://bucket// 68 | if path != '': 69 | key += self.s3_key_delimiter 70 | key += name 71 | return key 72 | 73 | def _notebook_s3_key(self, path, name): 74 | key = self._notebook_s3_key_string(path, name) 75 | self.log.debug('_notebook_s3_key: looking in bucket:{} for:{}'.format(self.bucket.name, key)) 76 | return self.bucket.get_key(key) 77 | 78 | def __init__(self, **kwargs): 79 | super(S3NotebookManager, self).__init__(**kwargs) 80 | config = kwargs['parent'].config[self.__class__.__name__] # this can't be right 81 | self.s3_base_uri = config['s3_base_uri'] 82 | self.s3_key_delimiter = config.get('s3_key_delimiter', '/') 83 | self.s3_bucket, self.s3_prefix = self._parse_s3_uri(self.s3_base_uri, self.s3_key_delimiter) 84 | # ensure prefix ends with the delimiter 85 | if not self.s3_prefix.endswith(self.s3_key_delimiter): 86 | self.s3_prefix += self.s3_key_delimiter 87 | self.s3_connection = boto.connect_s3() 88 | self.bucket = self.s3_connection.get_bucket(self.s3_bucket) 89 | 90 | def info_string(self): 91 | return "Serving notebooks from {}".format(self.s3_base_uri) 92 | 93 | def path_exists(self, path): 94 | self.log.debug('path_exists: {}'.format(locals())) 95 | return True 96 | 97 | def is_hidden(self, path): 98 | self.log.debug('is_hidden: {}'.format(locals())) 99 | return False 100 | 101 | def list_dirs(self, path): 102 | self.log.debug('list_dirs: {}'.format(locals())) 103 | key = self.s3_prefix + path.strip(self.s3_key_delimiter) 104 | # append delimiter if path is non-empty to avoid s3://bucket// 105 | if path != '': 106 | key += self.s3_key_delimiter 107 | self.log.debug('list_dirs: looking in bucket:{} under:{}'.format(self.bucket.name, key)) 108 | notebooks = [] 109 | for k in self.bucket.list(key, self.s3_key_delimiter): 110 | if k.name.endswith(self.s3_key_delimiter): 111 | notebooks.append(self._s3_key_dir_to_model(k)) 112 | self.log.debug('list_dirs: found {}'.format(k.name)) 113 | return notebooks 114 | 115 | def list_notebooks(self, path=''): 116 | self.log.debug('list_notebooks: {}'.format(locals())) 117 | key = self.s3_prefix + path.strip(self.s3_key_delimiter) 118 | # append delimiter if path is non-empty to avoid s3://bucket// 119 | if path != '': 120 | key += self.s3_key_delimiter 121 | self.log.debug('list_notebooks: looking in bucket:{} under:{}'.format(self.bucket.name, key)) 122 | notebooks = [] 123 | for k in self.bucket.list(key, self.s3_key_delimiter): 124 | if k.name.endswith(self.filename_ext): 125 | notebooks.append(self._s3_key_notebook_to_model(k, timeformat=S3_TIMEFORMAT_BUCKET_LIST)) 126 | self.log.debug('list_notebooks: found {}'.format(k.name)) 127 | return notebooks 128 | 129 | def notebook_exists(self, name, path=''): 130 | self.log.debug('notebook_exists: {}'.format(locals())) 131 | k = self._notebook_s3_key(path, name) 132 | return k is not None and not k.name.endswith(self.s3_key_delimiter) 133 | 134 | def get_notebook(self, name, path='', content=True): 135 | self.log.debug('get_notebook: {}'.format(locals())) 136 | k = self._notebook_s3_key(path, name) 137 | model = self._s3_key_notebook_to_model(k, timeformat=S3_TIMEFORMAT_GET_KEY) 138 | if content: 139 | try: 140 | with tempfile.NamedTemporaryFile() as f: 141 | k.get_file(f) 142 | f.seek(0) 143 | nb = current.read(f, u'json') 144 | except Exception as e: 145 | raise web.HTTPError(400, u"Unreadable Notebook: %s %s" % (os_path, e)) 146 | self.mark_trusted_cells(nb, name, path) 147 | model['content'] = nb 148 | return model 149 | 150 | def save_notebook(self, model, name, path=''): 151 | self.log.debug('save_notebook: {}'.format(locals())) 152 | if 'content' not in model: 153 | raise web.HTTPError(400, u'No notebook JSON data provided') 154 | 155 | k = boto.s3.key.Key(self.bucket) 156 | k.key = self._notebook_s3_key_string(path, name) 157 | 158 | nb = current.to_notebook_json(model['content']) 159 | self.check_and_sign(nb, name, path) 160 | 161 | try: 162 | with tempfile.NamedTemporaryFile() as f: 163 | current.write(nb, f, u'json') 164 | f.seek(0) 165 | k.set_contents_from_file(f) 166 | except Exception as e: 167 | raise web.HTTPError(400, u"Unexpected Error Writing Notebook: %s %s %s" % (path, name, e)) 168 | 169 | return self.get_notebook(name, path, content=False) 170 | 171 | def update_notebook(self, model, name, path=''): 172 | self.log.debug('update_notebook: {}'.format(locals())) 173 | 174 | # support updating just name or path even though there doesn't seem to be a way to do this via the UI 175 | new_name = model.get('name', name) 176 | new_path = model.get('path', path) 177 | if path != new_path or name != new_name: 178 | src_key = self._notebook_s3_key_string(path, name) 179 | dst_key = self._notebook_s3_key_string(new_path, new_name) 180 | self.log.debug('copying notebook in bucket: {} from {} to {}'.format(self.bucket.name, src_key, dst_key)) 181 | if self.bucket.get_key(dst_key): 182 | raise web.HTTPError(409, u'Notebook with name already exists: %s' % src_key) 183 | self.bucket.copy_key(dst_key, self.bucket.name, src_key) 184 | self.log.debug('removing notebook in bucket: {} : {}'.format(self.bucket.name, src_key)) 185 | self.bucket.delete_key(src_key) 186 | 187 | return self.get_notebook(new_name, new_path, content=False) 188 | 189 | def delete_notebook(self, name, path=''): 190 | self.log.debug('delete_notebook: {}'.format(locals())) 191 | 192 | key = self._notebook_s3_key_string(path, name) 193 | self.log.debug('removing notebook in bucket: {} : {}'.format(self.bucket.name, key)) 194 | self.bucket.delete_key(key) 195 | 196 | def copy_notebook(self, from_name, to_name=None, path=''): 197 | """ 198 | Copy an existing notebook and return its new model. 199 | 200 | If to_name not specified, increment from_name-Copy#.ipynb. 201 | """ 202 | self.log.debug('copy_notebook: {}'.format(locals())) 203 | if to_name is None: 204 | from_name_root, _ = splitext(from_name) 205 | to_name = self.increment_filename(from_name_root + '-Copy', path) 206 | 207 | model = self.get_notebook(from_name, path) 208 | model['name'] = to_name 209 | 210 | self.log.debug('copying notebook from {} to {} with path {}'.format(from_name, to_name, path)) 211 | self.create_notebook(model, path) 212 | 213 | return model 214 | 215 | # Checkpoint methods 216 | checkpoint_dir = Unicode(u'ipynb_checkpoints', config=True) 217 | 218 | def get_checkpoint_name(self, checkpoint_id, name): 219 | basename, _ = splitext(name) 220 | checkpoint_name = '{name}--{checkpoint_id}{ext}'.format( 221 | name=basename, 222 | checkpoint_id=checkpoint_id, 223 | ext=self.filename_ext 224 | ) 225 | 226 | return checkpoint_name 227 | 228 | def get_checkpoint_path(self, path=''): 229 | return join(path, self.checkpoint_dir) 230 | 231 | def get_checkpoint_model(self, checkpoint_id, name, path=''): 232 | checkpoint_id = u'checkpoint' 233 | checkpoint_path = self.get_checkpoint_path(path) 234 | checkpoint_name = self.get_checkpoint_name(checkpoint_id, name) 235 | 236 | key = self._notebook_s3_key(checkpoint_path, checkpoint_name) 237 | checkpoint_notebook_model = self._s3_key_notebook_to_model( 238 | key, 239 | timeformat=S3_TIMEFORMAT_GET_KEY 240 | ) 241 | checkpoint_model = { 242 | 'id': checkpoint_id, 243 | 'last_modified': checkpoint_notebook_model['last_modified'] 244 | } 245 | 246 | return checkpoint_model 247 | 248 | def create_checkpoint(self, name, path=''): 249 | checkpoint_id = u'checkpoint' 250 | checkpoint_name = self.get_checkpoint_name(checkpoint_id, name) 251 | checkpoint_path = self.get_checkpoint_path(path) 252 | 253 | self.log.debug('creating checkpoint for notebook {}'.format(name)) 254 | model = self.get_notebook(name, path) 255 | model['name'] = checkpoint_name 256 | self.create_notebook(model, checkpoint_path) 257 | 258 | return self.get_checkpoint_model(checkpoint_id, name, path) 259 | 260 | def restore_checkpoint(self, checkpoint_id, name, path=''): 261 | checkpoint_name = self.get_checkpoint_name(checkpoint_id, name) 262 | checkpoint_path = self.get_checkpoint_path(path) 263 | 264 | self.log.info('Restoring {} from checkpoint {}'.format(name, checkpoint_name)) 265 | model = self.get_notebook(checkpoint_name, checkpoint_path) 266 | model['name'] = name 267 | self.create_notebook(model, path) 268 | 269 | def list_checkpoints(self, name, path=''): 270 | checkpoint_id = u'checkpoint' 271 | checkpoint_name = self.get_checkpoint_name(checkpoint_id, name) 272 | checkpoint_path = self.get_checkpoint_path(path) 273 | 274 | key = self._notebook_s3_key(checkpoint_path, checkpoint_name) 275 | if key is None: 276 | return [] 277 | else: 278 | return [self.get_checkpoint_model(checkpoint_id, name, path)] 279 | -------------------------------------------------------------------------------- /s3nb/ipy3.py: -------------------------------------------------------------------------------- 1 | import codecs 2 | from collections import namedtuple 3 | import datetime 4 | import tempfile 5 | 6 | import boto 7 | 8 | from tornado import web 9 | 10 | from IPython import nbformat 11 | from IPython.html.services.contents.filecheckpoints import GenericFileCheckpoints 12 | from IPython.html.services.contents.manager import ContentsManager 13 | from IPython.utils import tz 14 | 15 | 16 | # s3 return different time formats in different situations apparently 17 | S3_TIMEFORMAT_GET_KEY = '%a, %d %b %Y %H:%M:%S GMT' 18 | S3_TIMEFORMAT_BUCKET_LIST = '%Y-%m-%dT%H:%M:%S.000Z' 19 | 20 | fakekey = namedtuple('fakekey', 'name') 21 | 22 | 23 | class S3ContentsManager(ContentsManager): 24 | @staticmethod 25 | def _parse_s3_uri(uri, delimiter='/'): 26 | if not uri.startswith("s3://"): 27 | raise Exception("Unexpected s3 uri scheme in '{}', expected s3://".format(uri)) 28 | return uri[5:].split(delimiter, 1) 29 | 30 | def _path_to_s3_key(self, path): 31 | return self.s3_prefix + path.strip(self.s3_key_delimiter) 32 | 33 | def _path_to_s3_key_dir(self, path): 34 | key = self._path_to_s3_key(path) 35 | # append delimiter if path is non-empty to avoid s3://bucket// 36 | if path != '': 37 | key += self.s3_key_delimiter 38 | return key 39 | 40 | def _get_key_dir_name(self, name): 41 | try: 42 | return name.rsplit(self.s3_key_delimiter, 2)[-2] 43 | except IndexError: 44 | return '' 45 | 46 | def _s3_key_dir_to_model(self, key): 47 | self.log.debug("_s3_key_dir_to_model: %s: %s", key, key.name) 48 | model = { 49 | 'name': self._get_key_dir_name(key.name), 50 | 'path': key.name.replace(self.s3_prefix, ''), 51 | 'last_modified': datetime.datetime.utcnow(), # key.last_modified, will be used in an HTTP header 52 | 'created': None, # key.last_modified, 53 | 'type': 'directory', 54 | 'content': None, 55 | 'mimetype': None, 56 | 'writable': True, 57 | 'format': None, 58 | } 59 | self.log.debug("_s3_key_dir_to_model: %s: %s", key.name, model) 60 | return model 61 | 62 | def _s3_key_file_to_model(self, key, timeformat): 63 | self.log.debug("_s3_key_file_to_model: %s: %s", key, key.name) 64 | model = { 65 | 'content': None, 66 | 'name': key.name.rsplit(self.s3_key_delimiter, 1)[-1], 67 | 'path': key.name.replace(self.s3_prefix, ''), 68 | 'last_modified': datetime.datetime.strptime( 69 | key.last_modified, timeformat).replace(tzinfo=tz.UTC), 70 | 'created': None, 71 | 'type': 'file', 72 | 'mimetype': None, 73 | 'writable': True, 74 | 'format': None, 75 | } 76 | self.log.debug("_s3_key_file_to_model: %s: %s", key.name, model) 77 | return model 78 | 79 | def _s3_key_notebook_to_model(self, key, timeformat): 80 | self.log.debug("_s3_key_notebook_to_model: %s: %s", key, key.name) 81 | model = { 82 | 'content': None, 83 | 'name': key.name.rsplit(self.s3_key_delimiter, 1)[-1], 84 | 'path': key.name.replace(self.s3_prefix, ''), 85 | 'last_modified': datetime.datetime.strptime( 86 | key.last_modified, timeformat).replace(tzinfo=tz.UTC), 87 | 'created': None, 88 | 'type': 'notebook', 89 | 'mimetype': None, 90 | 'writable': True, 91 | 'format': None, 92 | } 93 | self.log.debug("_s3_key_notebook_to_model: %s: %s", key.name, model) 94 | return model 95 | 96 | def __init__(self, **kwargs): 97 | super(S3ContentsManager, self).__init__(**kwargs) 98 | config = self.config[self.__class__.__name__] # this still can't be right 99 | self.s3_base_uri = config['s3_base_uri'] 100 | self.s3_key_delimiter = config.get('s3_key_delimiter', '/') 101 | self.s3_bucket, self.s3_prefix = self._parse_s3_uri(self.s3_base_uri, self.s3_key_delimiter) 102 | # ensure prefix ends with the delimiter 103 | if not self.s3_prefix.endswith(self.s3_key_delimiter) and self.s3_prefix != '': 104 | self.s3_prefix += self.s3_key_delimiter 105 | self.s3_connection = boto.connect_s3() 106 | self.bucket = self.s3_connection.get_bucket(self.s3_bucket) 107 | self.log.debug("initialized base_uri: %s bucket: %s prefix: %s", 108 | self.s3_base_uri, self.s3_bucket, self.s3_prefix) 109 | 110 | def _checkpoints_class_default(self): 111 | return GenericFileCheckpoints 112 | 113 | def list_dirs(self, path): 114 | self.log.debug('list_dirs: %s', locals()) 115 | key = self._path_to_s3_key_dir(path) 116 | self.log.debug('list_dirs: looking in bucket:%s under:%s', self.bucket.name, key) 117 | dirs = [] 118 | for k in self.bucket.list(key, self.s3_key_delimiter): 119 | if k.name.endswith(self.s3_key_delimiter) and k.name != key: 120 | dirs.append(self._s3_key_dir_to_model(k)) 121 | self.log.debug('list_dirs: found %s', k.name) 122 | return dirs 123 | 124 | def list_files(self, path): 125 | self.log.debug('list_files: %s', locals()) 126 | key = self._path_to_s3_key_dir(path) 127 | self.log.debug('list_files: looking in bucket:%s under:%s', self.bucket.name, key) 128 | files = [] 129 | for k in self.bucket.list(key, self.s3_key_delimiter): 130 | if not k.name.endswith(self.s3_key_delimiter) and not k.name.endswith('.ipynb') and k.name != key: 131 | files.append(self._s3_key_file_to_model(k, timeformat=S3_TIMEFORMAT_BUCKET_LIST)) 132 | self.log.debug('list_files: found %s', k.name) 133 | return files 134 | 135 | def list_notebooks(self, path=''): 136 | self.log.debug('list_notebooks: %s', locals()) 137 | key = self._path_to_s3_key_dir(path) 138 | self.log.debug('list_notebooks: looking in bucket:%s under:%s', self.bucket.name, key) 139 | notebooks = [] 140 | for k in self.bucket.list(key, self.s3_key_delimiter): 141 | if k.name.endswith('.ipynb'): 142 | notebooks.append(self._s3_key_notebook_to_model(k, timeformat=S3_TIMEFORMAT_BUCKET_LIST)) 143 | self.log.debug('list_notebooks: found %s', k.name) 144 | return notebooks 145 | 146 | def delete(self, path): 147 | self.log.debug('delete: %s', locals()) 148 | key = self._path_to_s3_key(path) 149 | self.log.debug('removing notebook in bucket: %s : %s', self.bucket.name, key) 150 | self.bucket.delete_key(key) 151 | 152 | def get(self, path, content=True, type=None, format=None): 153 | self.log.debug('get: %s', locals()) 154 | # get: {'content': 1, 'path': '', 'self': , 'type': u'directory', 'format': None} 155 | # get: {'content': False, 'path': u'graphaelli/notebooks/2015-01 Hack.ipynb', 'self': , 'type': None, 'format': None} 156 | 157 | if type == 'directory': 158 | key = self._path_to_s3_key_dir(path) 159 | model = self._s3_key_dir_to_model(fakekey(key)) 160 | if content: 161 | model['content'] = self.list_dirs(path) + self.list_notebooks(path) + self.list_files(path) 162 | model['format'] = 'json' 163 | return model 164 | elif type == 'notebook' or (type is None and path.endswith('.ipynb')): 165 | key = self._path_to_s3_key(path) 166 | k = self.bucket.get_key(key) 167 | if not k: 168 | raise web.HTTPError(400, "{} not found".format(key)) 169 | model = self._s3_key_notebook_to_model(k, timeformat=S3_TIMEFORMAT_GET_KEY) 170 | if content: 171 | try: 172 | with tempfile.NamedTemporaryFile() as t: 173 | # download bytes 174 | k.get_file(t) 175 | t.seek(0) 176 | # read with utf-8 encoding 177 | with codecs.open(t.name, mode='r', encoding='utf-8') as f: 178 | nb = nbformat.read(f, as_version=4) 179 | except Exception as e: 180 | raise web.HTTPError(400, u"Unreadable Notebook: %s %s" % (path, e)) 181 | self.mark_trusted_cells(nb, path) 182 | model['content'] = nb 183 | model['format'] = 'json' 184 | self.validate_notebook_model(model) 185 | return model 186 | else: # assume that it is file 187 | key = self._path_to_s3_key(path) 188 | k = self.bucket.get_key(key) 189 | 190 | model = self._s3_key_file_to_model(k, timeformat=S3_TIMEFORMAT_GET_KEY) 191 | 192 | if content: 193 | try: 194 | model['content'] = k.get_contents_as_string() 195 | except Exception as e: 196 | raise web.HTTPError(400, u"Unreadable file: %s %s" % (path, e)) 197 | 198 | model['mimetype'] = 'text/plain' 199 | model['format'] = 'text' 200 | 201 | return model 202 | 203 | def dir_exists(self, path): 204 | self.log.debug('dir_exists: %s', locals()) 205 | key = self._path_to_s3_key(path) 206 | if path == '': 207 | return True 208 | else: 209 | try: 210 | next(iter(self.bucket.list(key, self.s3_key_delimiter))) 211 | return True 212 | except StopIteration: 213 | return False 214 | 215 | def is_hidden(self, path): 216 | self.log.debug('is_hidden %s', locals()) 217 | return False 218 | 219 | def file_exists(self, path): 220 | self.log.debug('file_exists: %s', locals()) 221 | if path == '': 222 | return False 223 | key = self._path_to_s3_key(path) 224 | k = self.bucket.get_key(key) 225 | return k is not None and not k.name.endswith(self.s3_key_delimiter) 226 | 227 | exists = file_exists 228 | 229 | def new_untitled(self, path='', type='', ext=''): 230 | self.log.debug('new_untitled: %s', locals()) 231 | model = { 232 | 'mimetype': None, 233 | 'created': datetime.datetime.utcnow(), 234 | 'last_modified': datetime.datetime.utcnow(), 235 | 'writable': True, 236 | } 237 | 238 | if type: 239 | model['type'] = type 240 | 241 | if ext == '.ipynb': 242 | model.setdefault('type', 'notebook') 243 | else: 244 | model.setdefault('type', 'file') 245 | 246 | insert = '' 247 | if model['type'] == 'directory': 248 | untitled = self.untitled_directory 249 | insert = ' ' 250 | elif model['type'] == 'notebook': 251 | untitled = self.untitled_notebook 252 | ext = '.ipynb' 253 | elif model['type'] == 'file': 254 | untitled = self.untitled_file 255 | else: 256 | raise web.HTTPError(400, "Unexpected model type: %r" % model['type']) 257 | 258 | name = self.increment_filename(untitled + ext, self.s3_prefix + path, insert=insert) 259 | path = u'{0}/{1}'.format(path, name) 260 | model.update({ 261 | 'name': name, 262 | 'path': path, 263 | }) 264 | return self.new(model, path) 265 | 266 | def _save_file(self, path, content, format): 267 | if format != 'text': 268 | raise web.HTTPError(400, u'Only text files are supported') 269 | 270 | try: 271 | bcontent = content.encode('utf8') 272 | except Exception as e: 273 | raise web.HTTPError(400, u'Encoding error saving {}: {}'.format(content, e)) 274 | 275 | k = boto.s3.key.Key(self.bucket) 276 | k.key = self._path_to_s3_key(path) 277 | 278 | with tempfile.NamedTemporaryFile() as f: 279 | f.write(content) 280 | f.seek(0) 281 | k.set_contents_from_file(f) 282 | 283 | def _save_notebook(self, path, nb): 284 | self.log.debug('_save_notebook: %s', locals()) 285 | 286 | k = boto.s3.key.Key(self.bucket) 287 | k.key = self._path_to_s3_key(path) 288 | 289 | try: 290 | with tempfile.NamedTemporaryFile() as t, codecs.open(t.name, mode='w', encoding='utf-8') as f: 291 | # write tempfile with utf-8 encoding 292 | nbformat.write(nb, f, version=nbformat.NO_CONVERT) 293 | # upload as bytes (t's fp didn't advance) 294 | k.set_contents_from_file(t) 295 | except Exception as e: 296 | raise web.HTTPError(400, u"Unexpected Error Writing Notebook: %s %s" % (path, e)) 297 | 298 | def rename(self, old_path, new_path): 299 | self.log.debug('rename: %s', locals()) 300 | if new_path == old_path: 301 | return 302 | 303 | src_key = self._path_to_s3_key(old_path) 304 | dst_key = self._path_to_s3_key(new_path) 305 | self.log.debug('copying notebook in bucket: %s from %s to %s', self.bucket.name, src_key, dst_key) 306 | if self.bucket.get_key(dst_key): 307 | raise web.HTTPError(409, u'Notebook with name already exists: %s' % dst_key) 308 | self.bucket.copy_key(dst_key, self.bucket.name, src_key) 309 | self.log.debug('removing notebook in bucket: %s : %s', self.bucket.name, src_key) 310 | self.bucket.delete_key(src_key) 311 | 312 | def save(self, model, path): 313 | """ very similar to filemanager.save """ 314 | self.log.debug('save: %s', locals()) 315 | 316 | if 'type' not in model: 317 | raise web.HTTPError(400, u'No file type provided') 318 | if 'content' not in model and model['type'] != 'directory': 319 | raise web.HTTPError(400, u'No file content provided') 320 | 321 | # self.run_pre_save_hook(model=model, path=path) 322 | 323 | if model['type'] == 'notebook': 324 | nb = nbformat.from_dict(model['content']) 325 | self.check_and_sign(nb, path) 326 | self._save_notebook(path, nb) 327 | elif model['type'] == 'file': 328 | self._save_file(path, model['content'], model.get('format')) 329 | elif model['type'] == 'directory': 330 | pass # keep symmetry with filemanager.save 331 | else: 332 | raise web.HTTPError(400, "Unhandled contents type: %s" % model['type']) 333 | 334 | validation_message = None 335 | if model['type'] == 'notebook': 336 | self.validate_notebook_model(model) 337 | validation_message = model.get('message', None) 338 | 339 | model = self.get(path, content=False, type=model['type']) 340 | if validation_message: 341 | model['message'] = validation_message 342 | 343 | # self.run_post_save_hook(model=model, os_path=path) 344 | 345 | model['content'] = None 346 | 347 | return model 348 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup 3 | except ImportError: 4 | from distutils.core import setup 5 | 6 | setup( 7 | name = 's3nb', 8 | version = '0.0.5', 9 | author = "Monetate Inc.", 10 | author_email = "graphaelli@monetate.com", 11 | description = "s3 backed notebook manager for ipython 2.0+", 12 | install_requires = ['ipython[notebook]>=2.0', 'boto'], 13 | keywords = "ipython", 14 | license = "Python", 15 | long_description = """This package enables storage of ipynb files in s3""", 16 | platforms = 'any', 17 | packages = ['s3nb'], 18 | url = "https://github.com/monetate/s3nb", 19 | classifiers = [ 20 | "Development Status :: 4 - Beta", 21 | "Intended Audience :: Developers", 22 | "Operating System :: OS Independent", 23 | "Programming Language :: Python", 24 | ] 25 | ) 26 | --------------------------------------------------------------------------------