├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── DCO ├── LICENSE.md ├── MAINTAINERS ├── README.md ├── docs └── keyfile.png ├── jgscm ├── __init__.py └── tests │ ├── __init__.py │ ├── credentials.json.enc │ └── test.py ├── requirements.txt └── setup.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | env/ 13 | build/ 14 | develop-eggs/ 15 | dist/ 16 | downloads/ 17 | eggs/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *,cover 47 | .hypothesis/ 48 | 49 | # Translations 50 | *.mo 51 | *.pot 52 | 53 | # Django stuff: 54 | *.log 55 | local_settings.py 56 | 57 | # Flask stuff: 58 | instance/ 59 | .webassets-cache 60 | 61 | # Scrapy stuff: 62 | .scrapy 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # IPython Notebook 71 | .ipynb_checkpoints 72 | 73 | # pyenv 74 | .python-version 75 | 76 | # celery beat schedule file 77 | celerybeat-schedule 78 | 79 | # dotenv 80 | .env 81 | 82 | # virtualenv 83 | venv/ 84 | ENV/ 85 | 86 | # Spyder project settings 87 | .spyderproject 88 | 89 | # Rope project settings 90 | .ropeproject 91 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: false 3 | dist: trusty 4 | cache: 5 | directories: 6 | - "$HOME/.cache/pip" 7 | python: 8 | - 2.7 9 | - 3.4 10 | - 3.5 11 | - 3.6 12 | env: 13 | - GOOGLE_APPLICATION_CREDENTIALS=credentials.json 14 | before_install: 15 | - if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then openssl aes-256-cbc -K $encrypted_e3a216ea47b7_key -iv $encrypted_e3a216ea47b7_iv 16 | -in jgscm/tests/credentials.json.enc -out credentials.json -d; fi 17 | - pip install --upgrade pip 18 | - pip install pycodestyle codecov 19 | install: 20 | - pip install -e . 21 | script: 22 | - pycodestyle --max-line-length=100 . 23 | - python -c "import jgscm" 24 | - if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then coverage run --concurrency=multiprocessing -m unittest discover && coverage combine; fi 25 | after_success: 26 | - if [ "$TRAVIS_PULL_REQUEST" = "false" ]; then codecov; fi 27 | notifications: 28 | email: false 29 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, race, 10 | religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at conduct@sourced.tech. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing Guidelines 2 | 3 | JGSCM project is [MIT licensed](LICENSE.md) and accepts 4 | contributions via GitHub pull requests. This document outlines some of the 5 | conventions on development workflow, commit message formatting, contact points, 6 | and other resources to make it easier to get your contribution accepted. 7 | 8 | ## Certificate of Origin 9 | 10 | By contributing to this project you agree to the [Developer Certificate of 11 | Origin (DCO)](DCO). This document was created by the Linux Kernel community and is a 12 | simple statement that you, as a contributor, have the legal right to make the 13 | contribution. 14 | 15 | In order to show your agreement with the DCO you should include at the end of commit message, 16 | the following line: `Signed-off-by: John Doe `, using your real name. 17 | 18 | This can be done easily using the [`-s`](https://github.com/git/git/blob/b2c150d3aa82f6583b9aadfecc5f8fa1c74aca09/Documentation/git-commit.txt#L154-L161) flag on the `git commit`. 19 | 20 | 21 | ## Support Channels 22 | 23 | The official support channels, for both users and contributors, are: 24 | 25 | - GitHub [issues](https://github.com/src-d/jgscm/issues)* 26 | - Slack: #general room in the [source{d} Slack](https://join.slack.com/t/sourced-community/shared_invite/enQtMjc4Njk5MzEyNzM2LTFjNzY4NjEwZGEwMzRiNTM4MzRlMzQ4MmIzZjkwZmZlM2NjODUxZmJjNDI1OTcxNDAyMmZlNmFjODZlNTg0YWM) 27 | 28 | *Before opening a new issue or submitting a new pull request, it's helpful to 29 | search the project - it's likely that another user has already reported the 30 | issue you're facing, or it's a known issue that we're already aware of. 31 | 32 | 33 | ## How to Contribute 34 | 35 | Pull Requests (PRs) are the main and exclusive way to contribute to the official JGSCM project. 36 | In order for a PR to be accepted it needs to pass a list of requirements: 37 | 38 | - Code Coverage does not decrease. 39 | - All the tests pass. 40 | - The code is formatted according to [![PEP8](https://img.shields.io/badge/code%20style-pep8-orange.svg)](https://www.python.org/dev/peps/pep-0008/). 41 | - If the PR is a bug fix, it has to include a new unit test that fails before the patch is merged. 42 | - If the PR is a new feature, it has to come with a suite of unit tests, that tests the new functionality. 43 | - In any case, all the PRs have to pass the personal evaluation of at least one of the [maintainers](MAINTAINERS.md). 44 | 45 | 46 | ### Format of the commit message 47 | 48 | The commit summary must start with a capital letter and with a verb in present tense. No dot in the end. 49 | 50 | ``` 51 | Add a feature 52 | Remove unused code 53 | Fix a bug 54 | ``` 55 | 56 | Every commit details should describe what was changed, under which context and, if applicable, the GitHub issue it relates to. -------------------------------------------------------------------------------- /DCO: -------------------------------------------------------------------------------- 1 | Developer Certificate of Origin 2 | Version 1.1 3 | 4 | Copyright (C) 2004, 2006 The Linux Foundation and its contributors. 5 | 1 Letterman Drive 6 | Suite D4700 7 | San Francisco, CA, 94129 8 | 9 | Everyone is permitted to copy and distribute verbatim copies of this 10 | license document, but changing it is not allowed. 11 | 12 | 13 | Developer's Certificate of Origin 1.1 14 | 15 | By making a contribution to this project, I certify that: 16 | 17 | (a) The contribution was created in whole or in part by me and I 18 | have the right to submit it under the open source license 19 | indicated in the file; or 20 | 21 | (b) The contribution is based upon previous work that, to the best 22 | of my knowledge, is covered under an appropriate open source 23 | license and I have the right under that license to submit that 24 | work with modifications, whether created in whole or in part 25 | by me, under the same open source license (unless I am 26 | permitted to submit under a different license), as indicated 27 | in the file; or 28 | 29 | (c) The contribution was provided directly to me by some other 30 | person who certified (a), (b) or (c) and I have not modified 31 | it. 32 | 33 | (d) I understand and agree that this project and the contribution 34 | are public and that a record of the contribution (including all 35 | personal information I submit with it, including my sign-off) is 36 | maintained indefinitely and may be redistributed consistent with 37 | this project or the open source license(s) involved. -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | ===================== 3 | 4 | Copyright © `2016` `source{d}` 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. 23 | -------------------------------------------------------------------------------- /MAINTAINERS: -------------------------------------------------------------------------------- 1 | Vadim Markovtsev (@vmarkovtsev) -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Jupyter FS in Google Cloud Storage [![PyPI](https://img.shields.io/pypi/v/jgscm.svg)](https://pypi.python.org/pypi/jgscm) [![Build Status](https://travis-ci.org/src-d/jgscm.svg?branch=master)](https://travis-ci.org/src-d/jgscm) [![codecov](https://codecov.io/github/src-d/jgscm/coverage.svg)](https://codecov.io/gh/src-d/jgscm) 2 | ======================================== 3 | 4 | Jupyter Google Storage Contents Manager allows working with Jupyter notebooks directly in Google Cloud Storage. 5 | It aims to be a complete drop-in replacement for the stock filesystem 6 | [ContentsManager](http://jupyter-notebook.readthedocs.io/en/latest/extending/contents.html). 7 | Thus JGSCM is only compatible with a relatively modern IPython/Jupyter stack (version 4 and above). 8 | 9 | The root level of the virtual file system is the list of buckets, which 10 | are presented as directories. In turn, each bucket is presented as an 11 | ordinary folder where users can create files, subdirectories and notebooks. 12 | Besides, snapshots are completely supported too. 13 | 14 | Installation 15 | ------------ 16 | ``` 17 | pip install jgscm 18 | ``` 19 | You must point to jgscm in [Jupyter settings](http://jupyter-notebook.readthedocs.io/en/latest/config.html). 20 | Usually you'd need to edit `~/.jupyter/jupyter_notebook_config.py` and 21 | insert the following: 22 | ```python 23 | c.NotebookApp.contents_manager_class = 'jgscm.GoogleStorageContentManager' 24 | # c.GoogleStorageContentManager.project = '' 25 | # c.GoogleStorageContentManager.keyfile = '' 26 | ``` 27 | `project` and `keyfile` must be set if [gcloud](https://github.com/GoogleCloudPlatform/gcloud-python) 28 | cannot determine the defaults. Read more about it in one of the next sections. 29 | 30 | Contributions 31 | ------------- 32 | ...are welcome! See [CONTRIBUTING](CONTRIBUTING.md) and [code of conduct](CODE_OF_CONDUCT.md). 33 | 34 | License 35 | ------- 36 | MIT, see [LICENSE](LICENSE.md). 37 | 38 | Usage 39 | ----- 40 | Just launch and use Jupyter as usual. 41 | 42 | On the root level, you may not create files, only directories and the latter 43 | are considered as new buckets. On deeper levels, there should be no difference. 44 | You can change the Google Cloud project in order to work with 45 | corresponding buckets in Jupyter configuration (see [Projects and keyfiles](#projects-and-keyfiles)). 46 | 47 | GCS treats blobs with names ending with "/" as folders and so does this 48 | project. Since there is no special handling of directories in GCS, 49 | when you delete some blob which is the only one in it's parent directory, 50 | and the parent directory was not created explicitly, it will disappear as well. 51 | This behavior is similar to some old-style source control systems. 52 | 53 | GCS API invocations can take some time. While JGSCM does it's best to reduce 54 | the number of calls, they still can introduce substantial delays in 55 | Jupyter UI. Please, be patient. 56 | 57 | There is an ability to specify the starting path instead of the buckets listing: 58 | ```python 59 | c.GoogleStorageContentManager.default_path = 'path/without/starting/slash' 60 | ``` 61 | (`--notebook-dir` does not seem to work). 62 | 63 | Checkpoints 64 | ----------- 65 | Checkpoints are stored in .ipynb_checkpoints directory as usual. That 66 | name can be changed via `c.GoogleStorageCheckpoints.checkpoint_dir` in 67 | Jupyter configuration. You can set the dedicated bucket for snapshots via 68 | `c.GoogleStorageCheckpoints.checkpoint_bucket`. 69 | 70 | The name of each checkpoint is \-[UUID4](https://en.wikipedia.org/wiki/Universally_unique_identifier).ipynb. 71 | 72 | Hidden files and directories 73 | ---------------------------- 74 | As with any UNIX filesystem, files and directories with names starting 75 | with dot "`.`" are considered hidden by default. You can change this by 76 | setting `c.GoogleStorageContentManager.hide_dotted_blobs` to `False`. 77 | 78 | Projects and keyfiles 79 | --------------------- 80 | Usually, if you launch Jupyter in Google Cloud, the default project is picked 81 | up and no special keys and tokens must be supplied. The same is true 82 | if you launch Jupyter in the environment which was configured for 83 | Google Cloud (`gcloud init`). If this is not the case, you can explicitly 84 | set the Google Cloud project and authentication credentials. 85 | 86 | Open Jupyter configuration and set 87 | ```python 88 | c.GoogleStorageContentManager.project = '...' 89 | c.GoogleStorageContentManager.keyfile = '...' 90 | ``` 91 | 92 | Regarding keyfiles, please read the [official documentation](https://cloud.google.com/storage/docs/authentication). 93 | Go to API Manager / Credentials in cloud console to generate it: 94 | 95 | ![API Manager / Credentials](docs/keyfile.png) 96 | 97 | Testing 98 | ------- 99 | ``` 100 | PYTHONPATH=`pwd` python3 -W ignore::DeprecationWarning jgscm/tests/test.py 101 | ``` 102 | JGSCM writes logs at DEBUG verbosity level (`c.Application.log_level = "DEBUG"`). 103 | -------------------------------------------------------------------------------- /docs/keyfile.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/src-d/jgscm/65ee2fe74d2db05b0873ee9e39af42925fc0ea83/docs/keyfile.png -------------------------------------------------------------------------------- /jgscm/__init__.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import errno 3 | from itertools import islice 4 | import os 5 | import sys 6 | import uuid 7 | 8 | from google.cloud.exceptions import NotFound, Forbidden, BadRequest 9 | from google.cloud.storage import Client as GSClient, Blob 10 | import nbformat 11 | from notebook.services.contents.checkpoints import Checkpoints, \ 12 | GenericCheckpointsMixin 13 | try: 14 | import notebook.transutils 15 | # https://github.com/jupyter/notebook/issues/3056 16 | except ImportError: 17 | pass 18 | from notebook.services.contents.manager import ContentsManager 19 | from tornado import web 20 | from tornado.escape import url_unescape 21 | from traitlets import Any, Bool, Int, Unicode, default 22 | 23 | 24 | if sys.version_info[0] == 2: 25 | import socket 26 | BrokenPipeError = socket.error 27 | base64.encodebytes = base64.encodestring 28 | base64.decodebytes = base64.decodestring 29 | else: 30 | unicode = str 31 | 32 | 33 | class GoogleStorageCheckpoints(GenericCheckpointsMixin, Checkpoints): 34 | checkpoint_dir = Unicode( 35 | ".ipynb_checkpoints", 36 | config=True, 37 | help="""The directory name in which to keep file checkpoints 38 | 39 | This is a path relative to the file"s own directory. 40 | 41 | By default, it is .ipynb_checkpoints 42 | """, 43 | ) 44 | checkpoint_bucket = Unicode( 45 | "", config=True, help="The bucket name where to keep file checkpoints." 46 | " If empty, the current bucket is used." 47 | ) 48 | 49 | def create_file_checkpoint(self, content, format, path): 50 | """Create a checkpoint of the current state of a file 51 | 52 | Returns a checkpoint model for the new checkpoint. 53 | """ 54 | checkpoint_id = str(uuid.uuid4()) 55 | cp = self._get_checkpoint_path(checkpoint_id, path) 56 | self.log.debug("creating checkpoint %s for %s as %s", 57 | checkpoint_id, path, cp) 58 | blob = self.parent._save_file(cp, content, format) 59 | return { 60 | "id": checkpoint_id, 61 | "last_modified": blob.updated, 62 | } 63 | 64 | def create_notebook_checkpoint(self, nb, path): 65 | """Create a checkpoint of the current state of a file 66 | 67 | Returns a checkpoint model for the new checkpoint. 68 | """ 69 | checkpoint_id = str(uuid.uuid4()) 70 | cp = self._get_checkpoint_path(checkpoint_id, path) 71 | self.log.debug("creating checkpoint %s for %s as %s", 72 | checkpoint_id, path, cp) 73 | blob = self.parent._save_notebook(cp, nb) 74 | return { 75 | "id": checkpoint_id, 76 | "last_modified": blob.updated, 77 | } 78 | 79 | def get_file_checkpoint(self, checkpoint_id, path): 80 | """Get the content of a checkpoint for a non-notebook file. 81 | 82 | Returns a dict of the form: 83 | { 84 | "type": "file", 85 | "content": , 86 | "format": {"text","base64"}, 87 | } 88 | """ 89 | self.log.info("restoring %s from checkpoint %s", path, checkpoint_id) 90 | cp = self._get_checkpoint_path(checkpoint_id, path) 91 | exists, blob = self.parent._fetch(cp) 92 | if not exists: 93 | raise web.HTTPError(404, u"No such checkpoint: %s for %s" % ( 94 | checkpoint_id, path)) 95 | content, fmt = self.parent._read_file(blob, None) 96 | return { 97 | "type": "file", 98 | "content": content, 99 | "format": fmt 100 | } 101 | 102 | def get_notebook_checkpoint(self, checkpoint_id, path): 103 | """Get the content of a checkpoint for a notebook. 104 | 105 | Returns a dict of the form: 106 | { 107 | "type": "notebook", 108 | "content": , 109 | } 110 | """ 111 | self.log.info("restoring %s from checkpoint %s", path, checkpoint_id) 112 | cp = self._get_checkpoint_path(checkpoint_id, path) 113 | exists, blob = self.parent._fetch(cp) 114 | if not exists: 115 | raise web.HTTPError(404, u"No such checkpoint: %s for %s" % ( 116 | checkpoint_id, path)) 117 | nb = self.parent._read_notebook(blob) 118 | return { 119 | "type": "notebook", 120 | "content": nb 121 | } 122 | 123 | def rename_checkpoint(self, checkpoint_id, old_path, new_path): 124 | """Rename a single checkpoint from old_path to new_path.""" 125 | old_cp = self._get_checkpoint_path(checkpoint_id, old_path) 126 | new_cp = self._get_checkpoint_path(checkpoint_id, new_path) 127 | self.parent.rename_file(old_cp, new_cp) 128 | 129 | def delete_checkpoint(self, checkpoint_id, path): 130 | """delete a checkpoint for a file""" 131 | cp = self._get_checkpoint_path(checkpoint_id, path) 132 | self.parent.delete_file(cp) 133 | 134 | def list_checkpoints(self, path): 135 | """Return a list of checkpoints for a given file""" 136 | cp = self._get_checkpoint_path(None, path) 137 | bucket_name, bucket_path = self.parent._parse_path(cp) 138 | try: 139 | bucket = self.parent._get_bucket(bucket_name) 140 | it = bucket.list_blobs(prefix=bucket_path, delimiter="/", 141 | max_results=self.parent.max_list_size) 142 | checkpoints = [{ 143 | "id": os.path.splitext(file.path)[0][-36:], 144 | "last_modified": file.updated, 145 | } for file in islice(it, self.parent.max_list_size)] 146 | except NotFound: 147 | return [] 148 | checkpoints.sort(key=lambda c: c["last_modified"], reverse=True) 149 | self.log.debug("list_checkpoints: %s: %s", path, checkpoints) 150 | return checkpoints 151 | 152 | def _get_checkpoint_path(self, checkpoint_id, path): 153 | if path.startswith("/"): 154 | path = path[1:] 155 | bucket_name, bucket_path = self.parent._parse_path(path) 156 | if self.checkpoint_bucket: 157 | bucket_name = self.checkpoint_bucket 158 | slash = bucket_path.rfind("/") + 1 159 | name, ext = os.path.splitext(bucket_path[slash:]) 160 | if checkpoint_id is not None: 161 | return "%s/%s%s/%s-%s%s" % ( 162 | bucket_name, bucket_path[:slash], self.checkpoint_dir, name, 163 | checkpoint_id, ext) 164 | return "%s/%s%s/%s" % (bucket_name, bucket_path[:slash], 165 | self.checkpoint_dir, name) 166 | 167 | 168 | class GoogleStorageContentManager(ContentsManager): 169 | project = Unicode( 170 | "", config=True, 171 | help="The name of the project in Google Cloud to use. If you do not " 172 | "set this parameter, google.cloud will pick the default project " 173 | "from the execution context if it exists." 174 | ) 175 | keyfile = Unicode( 176 | "", config=True, 177 | help="The path to the Google Cloud API JSON keyfile which is needed " 178 | "for authorization. If you do not set this parameter, " 179 | "google.cloud will be OK if the default project exists." 180 | ) 181 | max_list_size = Int(128, config=True, help="list_blobs() limit") 182 | cache_buckets = Bool(True, config=True, 183 | help="Value indicating whether to cache the bucket " 184 | "objects for faster operations.") 185 | hide_dotted_blobs = Bool(True, config=True, 186 | help="Consider blobs which names start with dot " 187 | "as hidden.") 188 | # redefine untitled_directory to change the default value 189 | untitled_directory = Unicode( 190 | "untitled-folder", config=True, 191 | help="The base name used when creating untitled directories.") 192 | default_path = Unicode( 193 | "", config=True, help="The default path to open.") 194 | post_save_hook = Any(None, config=True, 195 | help="""Python callable or importstring thereof 196 | 197 | to be called on the path of a file just saved. 198 | 199 | This can be used to process the file on disk, 200 | such as converting the notebook to a script or HTML via nbconvert. 201 | 202 | It will be called as (all arguments passed by keyword):: 203 | 204 | hook(os_path=path, model=model, contents_manager=instance) 205 | 206 | - path: the GCS path to the file just written 207 | - model: the model representing the file 208 | - contents_manager: this ContentsManager instance 209 | """ 210 | ) 211 | 212 | def __init__(self, *args, **kwargs): 213 | # Stub for the GSClient instance (set lazily by the client property). 214 | self._client = None 215 | super(GoogleStorageContentManager, self).__init__(*args, **kwargs) 216 | 217 | def debug_args(fn): 218 | def wrapped_fn(self, *args, **kwargs): 219 | self.log.debug("call %s(%s%s%s)", fn.__name__, 220 | ", ".join(repr(a) for a in args), 221 | ", " if args and kwargs else "", 222 | ", ".join("%s=%r" % p for p in kwargs.items())) 223 | result = fn(self, *args, **kwargs) 224 | self.log.debug("result %s %s", fn.__name__, result) 225 | return result 226 | 227 | return wrapped_fn 228 | 229 | @debug_args 230 | def is_hidden(self, path): 231 | if path == "": 232 | return False 233 | if path.startswith("/"): 234 | path = path[1:] 235 | bucket_name, bucket_path = self._parse_path(path) 236 | try: 237 | bucket = self._get_bucket(bucket_name) 238 | except Forbidden: 239 | return True 240 | if bucket is None: 241 | return True 242 | if self.hide_dotted_blobs and \ 243 | self._get_blob_name(bucket_path).startswith("."): 244 | return True 245 | return False 246 | 247 | @debug_args 248 | def file_exists(self, path=""): 249 | if path == "" or path.endswith("/"): 250 | return False 251 | if path.startswith("/"): 252 | path = path[1:] 253 | bucket_name, bucket_path = self._parse_path(path) 254 | if not bucket_path: 255 | return False 256 | bucket = self._get_bucket(bucket_name) 257 | if bucket is None or bucket_path == "": 258 | return False 259 | return bucket.blob(bucket_path).exists() 260 | 261 | @debug_args 262 | def dir_exists(self, path): 263 | if path.startswith("/"): 264 | path = path[1:] 265 | if path == "": 266 | return True 267 | if not path.endswith("/"): 268 | path += "/" 269 | bucket_name, blob_prefix_name = self._parse_path(path) 270 | # Get the bucket, fail if the bucket cannot be found. 271 | bucket = self._get_bucket(bucket_name) 272 | if not bucket: 273 | return False 274 | # Only check that bucket exists. 275 | if not blob_prefix_name: 276 | return True 277 | # Check that some blobs exist with the prefix as a path. 278 | if list(bucket.list_blobs(prefix=blob_prefix_name, max_results=1)): 279 | return True 280 | return False 281 | 282 | @debug_args 283 | def get(self, path, content=True, type=None, format=None): 284 | if isinstance(path, Blob): 285 | obj = path 286 | path = self._get_blob_path(obj) 287 | elif path.startswith("/"): 288 | path = path[1:] 289 | if not path: 290 | path = self.default_path 291 | 292 | type = self._resolve_storagetype(path, type) 293 | if type == "directory": 294 | if path and not path.endswith("/"): 295 | path += "/" 296 | exists, members = self._fetch(path, content=content) 297 | if not exists: 298 | raise web.HTTPError(404, u"No such directory: %s" % path) 299 | model = self._dir_model(path, members, content=content) 300 | else: 301 | exists, blob = self._fetch(path) 302 | if not exists: 303 | raise web.HTTPError(404, u"No such file: %s" % path) 304 | if type == "notebook" or (type is None and path.endswith(".ipynb")): 305 | model = self._notebook_model(blob, content=content) 306 | else: 307 | model = self._file_model(blob, content=content, format=format) 308 | return model 309 | 310 | @debug_args 311 | def save(self, model, path): 312 | if path.startswith("/"): 313 | path = path[1:] 314 | if "type" not in model: 315 | raise web.HTTPError(400, u"No file type provided") 316 | if "content" not in model and model["type"] != "directory": 317 | raise web.HTTPError(400, u"No file content provided") 318 | if "/" not in path and self.default_path: 319 | path = "%s/%s" % (self.default_path, path) 320 | bucket_name, bucket_path = self._parse_path(path) 321 | if bucket_path == "" and model["type"] != "directory": 322 | raise web.HTTPError(403, u"You may only create directories " 323 | u"(buckets) at the root level.") 324 | if bucket_path != "" and model["type"] == "directory" and \ 325 | bucket_path[-1] != "/": 326 | path += "/" 327 | self.log.debug("Saving %s", path) 328 | 329 | self.run_pre_save_hook(model=model, path=path) 330 | 331 | try: 332 | if model["type"] == "notebook": 333 | nb = nbformat.from_dict(model["content"]) 334 | self.check_and_sign(nb, path) 335 | self._save_notebook(path, nb) 336 | # One checkpoint should always exist for notebooks. 337 | if not self.checkpoints.list_checkpoints(path): 338 | self.create_checkpoint(path) 339 | elif model["type"] == "file": 340 | # Missing format will be handled internally by _save_file. 341 | self._save_file(path, model["content"], model.get("format")) 342 | elif model["type"] == "directory": 343 | self._save_directory(path, model) 344 | else: 345 | raise web.HTTPError( 346 | 00, u"Unhandled contents type: %s" % model["type"]) 347 | except web.HTTPError: 348 | raise 349 | except Exception as e: 350 | self.log.error(u"Error while saving file: %s %s", path, e, 351 | exc_info=True) 352 | raise web.HTTPError( 353 | 500, u"Unexpected error while saving file: %s %s" % (path, e)) 354 | 355 | validation_message = None 356 | if model["type"] == "notebook": 357 | self.validate_notebook_model(model) 358 | validation_message = model.get("message", None) 359 | 360 | model = self.get(path, content=False) 361 | if validation_message: 362 | model["message"] = validation_message 363 | 364 | self.run_post_save_hook(model=model, os_path=path) 365 | 366 | return model 367 | 368 | @debug_args 369 | def delete_file(self, path): 370 | if path.startswith("/"): 371 | path = path[1:] 372 | bucket_name, bucket_path = self._parse_path(path) 373 | bucket = self._get_bucket(bucket_name, throw=True) 374 | if bucket_path == "": 375 | bucket.delete() 376 | del self._bucket_cache[bucket_name] 377 | return 378 | it = bucket.list_blobs(prefix=bucket_path, delimiter="/", 379 | max_results=self.max_list_size) 380 | files = list(islice(it, self.max_list_size)) 381 | folders = it.prefixes 382 | bucket.delete_blobs(files) 383 | for folder in folders: 384 | self.delete_file(bucket_name + "/" + folder) 385 | 386 | @debug_args 387 | def rename_file(self, old_path, new_path): 388 | if old_path.startswith("/"): 389 | old_path = old_path[1:] 390 | if new_path.startswith("/"): 391 | new_path = new_path[1:] 392 | old_bucket_name, old_bucket_path = self._parse_path(old_path) 393 | old_bucket = self._get_bucket(old_bucket_name, throw=True) 394 | new_bucket_name, new_bucket_path = self._parse_path(new_path) 395 | new_bucket = self._get_bucket(new_bucket_name, throw=True) 396 | old_blob = old_bucket.get_blob(old_bucket_path) 397 | if old_bucket_name == new_bucket_name: 398 | if old_blob is not None: 399 | old_bucket.rename_blob(old_blob, new_bucket_path) 400 | return 401 | if not old_bucket_path.endswith("/"): 402 | old_bucket_path += "/" 403 | if not new_bucket_path.endswith("/"): 404 | new_bucket_path += "/" 405 | it = old_bucket.list_blobs(prefix=old_bucket_path, delimiter="/", 406 | max_results=self.max_list_size) 407 | old_blobs = list(islice(it, self.max_list_size)) 408 | folders = it.prefixes 409 | for ob in old_blobs: 410 | old_bucket.rename_blob( 411 | ob, new_bucket_path + self._get_blob_name(ob)) 412 | for f in folders: 413 | self.rename_file( 414 | old_bucket_name + "/" + f, 415 | new_bucket_name + "/" + 416 | f.replace(old_bucket_path, new_bucket_path, 1)) 417 | return 418 | if old_blob is not None: 419 | old_bucket.copy_blob(old_blob, new_bucket, new_bucket_path) 420 | old_bucket.delete_blob(old_blob) 421 | return 422 | if not old_bucket_path.endswith("/"): 423 | old_bucket_path += "/" 424 | if not new_bucket_path.endswith("/"): 425 | new_bucket_path += "/" 426 | it = old_bucket.list_blobs(prefix=old_bucket_path, delimiter="/", 427 | max_results=self.max_list_size) 428 | old_blobs = list(islice(it, self.max_list_size)) 429 | folders = it.prefixes 430 | for ob in old_blobs: 431 | old_bucket.copy_blob(ob, new_bucket, new_bucket_path + 432 | self._get_blob_name(ob)) 433 | ob.delete() 434 | for f in folders: 435 | self.rename_file( 436 | old_bucket_name + "/" + f, 437 | new_bucket_name + "/" + 438 | f.replace(old_bucket_path, new_bucket_path, 1)) 439 | 440 | @property 441 | def client(self): 442 | """ 443 | :return: used instance of :class:`google.cloud.storage.Client`. 444 | """ 445 | if self._client is not None: 446 | return self._client 447 | if not self.project: 448 | self._client = GSClient() 449 | else: 450 | self._client = GSClient.from_service_account_json( 451 | self.keyfile, project=self.project) 452 | return self._client 453 | 454 | def run_post_save_hook(self, model, os_path): 455 | """Run the post-save hook if defined, and log errors""" 456 | if self.post_save_hook: 457 | try: 458 | self.log.debug("Running post-save hook on %s", os_path) 459 | self.post_save_hook(os_path=os_path, 460 | model=model, 461 | contents_manager=self) 462 | except Exception: 463 | self.log.error("Post-save hook failed on %s", os_path, exc_info=True) 464 | 465 | @default("checkpoints_class") 466 | def _checkpoints_class_default(self): 467 | return GoogleStorageCheckpoints 468 | 469 | def _resolve_storagetype(self, path, storagetype): 470 | """Based on the arguments and status of GCS, return a valid type.""" 471 | if "/" not in path or path.endswith("/") or path == "": 472 | if storagetype not in (None, "directory"): 473 | raise web.HTTPError( 474 | 400, u"%s is not a directory" % path, reason="bad type") 475 | return "directory" 476 | if storagetype is None and path.endswith(".ipynb"): 477 | return "notebook" 478 | if storagetype is not None: 479 | return storagetype 480 | # If type cannot be inferred from the argument set, use 481 | # the storage API to see if a blob or a prefix exists. 482 | if self.file_exists(path): 483 | return "file" 484 | if self.dir_exists(path): 485 | return "directory" 486 | raise web.HTTPError( 487 | 404, u"%s does not exist" % path, reason="bad type") 488 | 489 | def _get_bucket(self, name, throw=False): 490 | """ 491 | Get the bucket by it's name. Uses cache by default. 492 | :param name: bucket name. 493 | :param throw: If True raises NotFound exception, otherwise, returns 494 | None. 495 | :return: instance of :class:`google.cloud.storage.Bucket` or None. 496 | """ 497 | if not self.cache_buckets: 498 | try: 499 | return self.client.get_bucket(name) 500 | except NotFound: 501 | if throw: 502 | raise 503 | return None 504 | try: 505 | cache = self._bucket_cache 506 | except AttributeError: 507 | self._bucket_cache = cache = {} 508 | try: 509 | return cache[name] 510 | except KeyError: 511 | try: 512 | bucket = self.client.get_bucket(name) 513 | except BrokenPipeError as e: 514 | if e.errno in (None, errno.EPIPE): 515 | return self._get_bucket(name, throw) 516 | else: 517 | raise 518 | except (BadRequest, NotFound): 519 | if throw: 520 | raise 521 | return None 522 | cache[name] = bucket 523 | return bucket 524 | 525 | @staticmethod 526 | def _parse_path(path): 527 | """ 528 | Splits the path into bucket name and path inside the bucket. 529 | :param path: string to split. 530 | :return: tuple(bucket name, bucket path). 531 | """ 532 | bucket, _, blobname = path.partition("/") 533 | return bucket, blobname 534 | 535 | @staticmethod 536 | def _get_blob_path(blob): 537 | """ 538 | Gets blob path. 539 | :param blob: instance of :class:`google.cloud.storage.Blob`. 540 | :return: path string. 541 | """ 542 | return blob.bucket.name + "/" + blob.name 543 | 544 | @staticmethod 545 | def _get_blob_name(blob): 546 | """ 547 | Gets blob name (last part of the path). 548 | :param blob: instance of :class:`google.cloud.storage.Blob`. 549 | :return: name string. 550 | """ 551 | if isinstance(blob, Blob): 552 | return os.path.basename(blob.name) 553 | assert isinstance(blob, (unicode, str)) 554 | if blob.endswith("/"): 555 | blob = blob[:-1] 556 | return os.path.basename(blob) 557 | 558 | @staticmethod 559 | def _get_dir_name(path): 560 | """ 561 | Extracts directory name like os.path.dirname. 562 | :param path: GCS path string. 563 | :return: directory name string. 564 | """ 565 | if path.endswith("/"): 566 | path = path[:-1] 567 | return path.rsplit("/", 1)[-1] 568 | 569 | @debug_args 570 | def _fetch(self, path, content=True): 571 | """ 572 | Retrieves the blob by it's path. 573 | :param path: blob path or directory name. 574 | :param content: If False, just check if path exists. 575 | :return: tuple(exists Bool, :class:`google.cloud.storage.Blob` or 576 | tuple(file [Blob], folders list)). 577 | """ 578 | if path == "": 579 | try: 580 | buckets = self.client.list_buckets() 581 | return True, ([], [b.name + "/" for b in buckets]) 582 | except BrokenPipeError as e: 583 | if e.errno in (None, errno.EPIPE): 584 | return self._fetch(path, content) 585 | else: 586 | raise 587 | try: 588 | bucket_name, bucket_path = self._parse_path(path) 589 | except ValueError: 590 | return False, None 591 | try: 592 | bucket = self._get_bucket(bucket_name) 593 | except Forbidden: 594 | return True, None 595 | if bucket is None: 596 | return False, None 597 | if bucket_path == "" and not content: 598 | return True, None 599 | if bucket_path == "" or bucket_path.endswith("/"): 600 | if bucket_path != "": 601 | try: 602 | exists = bucket.blob(bucket_path).exists() 603 | except BrokenPipeError as e: 604 | if e.errno in (None, errno.EPIPE): 605 | return self._fetch(path, content) 606 | else: 607 | raise 608 | if exists and not content: 609 | return True, None 610 | # blob may not exist but at the same time be a part of a path 611 | max_list_size = self.max_list_size if content else 1 612 | try: 613 | it = bucket.list_blobs(prefix=bucket_path, delimiter="/", 614 | max_results=max_list_size) 615 | try: 616 | files = list(islice(it, max_list_size)) 617 | except BrokenPipeError as e: 618 | if e.errno in (None, errno.EPIPE): 619 | return self._fetch(path, content) 620 | else: 621 | raise 622 | except NotFound: 623 | del self._bucket_cache[bucket_name] 624 | return False, None 625 | folders = it.prefixes 626 | return (bool(files or folders or bucket_path == ""), 627 | (files, folders) if content else None) 628 | if not content: 629 | return bucket.blob(bucket_path).exists(), None 630 | try: 631 | blob = bucket.get_blob(bucket_path) 632 | except BrokenPipeError as e: 633 | if e.errno in (None, errno.EPIPE): 634 | return self._fetch(path, content) 635 | else: 636 | raise 637 | return blob is not None, blob 638 | 639 | def _base_model(self, blob): 640 | """Builds the common base of a contents model""" 641 | last_modified = blob.updated 642 | created = last_modified 643 | model = { 644 | "name": self._get_blob_name(blob), 645 | "path": self._get_blob_path(blob), 646 | "last_modified": last_modified, 647 | "created": created, 648 | "content": None, 649 | "format": None, 650 | "mimetype": blob.content_type, 651 | "writable": True 652 | } 653 | return model 654 | 655 | def _read_file(self, blob, format): 656 | """Reads a non-notebook file. 657 | 658 | blob: instance of :class:`google.cloud.storage.Blob`. 659 | format: 660 | If "text", the contents will be decoded as UTF-8. 661 | If "base64", the raw bytes contents will be encoded as base64. 662 | If not specified, try to decode as UTF-8, and fall back to base64 663 | """ 664 | bcontent = blob.download_as_string() 665 | 666 | if format is None or format == "text": 667 | # Try to interpret as unicode if format is unknown or if unicode 668 | # was explicitly requested. 669 | try: 670 | return bcontent.decode("utf8"), "text" 671 | except UnicodeError: 672 | if format == "text": 673 | raise web.HTTPError( 674 | 400, "%s is not UTF-8 encoded" % 675 | self._get_blob_path(blob), 676 | reason="bad format", 677 | ) 678 | return base64.encodebytes(bcontent).decode("ascii"), "base64" 679 | 680 | def _file_model(self, blob, content=True, format=None): 681 | """Builds a model for a file 682 | 683 | if content is requested, include the file contents. 684 | 685 | format: 686 | If "text", the contents will be decoded as UTF-8. 687 | If "base64", the raw bytes contents will be encoded as base64. 688 | If not specified, try to decode as UTF-8, and fall back to base64 689 | """ 690 | model = self._base_model(blob) 691 | model["type"] = "file" 692 | 693 | if content: 694 | content, format = self._read_file(blob, format) 695 | if model["mimetype"] == "text/plain": 696 | default_mime = { 697 | "text": "text/plain", 698 | "base64": "application/octet-stream" 699 | }[format] 700 | model["mimetype"] = default_mime 701 | 702 | model.update( 703 | content=content, 704 | format=format, 705 | ) 706 | 707 | return model 708 | 709 | def _read_notebook(self, blob): 710 | """ 711 | Reads a notebook file from GCS blob. 712 | :param blob: :class:`google.cloud.storage.Blob` instance. 713 | :return: :class:`nbformat.notebooknode.NotebookNode` instance. 714 | """ 715 | data = blob.download_as_string().decode("utf-8") 716 | nb = nbformat.reads(data, as_version=4) 717 | self.mark_trusted_cells(nb, self._get_blob_path(blob)) 718 | return nb 719 | 720 | def _notebook_model(self, blob, content=True): 721 | """Builds a notebook model. 722 | 723 | if content is requested, the notebook content will be populated 724 | as a JSON structure (not double-serialized) 725 | """ 726 | model = self._base_model(blob) 727 | model["type"] = "notebook" 728 | if content: 729 | nb = self._read_notebook(blob) 730 | model["content"] = nb 731 | model["mimetype"] = "application/x-ipynb+json" 732 | model["format"] = "json" 733 | self.validate_notebook_model(model) 734 | return model 735 | 736 | def _dir_model(self, path, members, content=True): 737 | """Builds a model for a directory 738 | 739 | if content is requested, will include a listing of the directory 740 | """ 741 | model = { 742 | "type": "directory", 743 | "name": self._get_dir_name(path), 744 | "path": path.rstrip('/'), 745 | "last_modified": "", 746 | "created": "", 747 | "content": None, 748 | "format": None, 749 | "mimetype": "application/x-directory", 750 | "writable": (members is not None or not self.is_hidden(path)) 751 | } 752 | if content: 753 | blobs, folders = members 754 | model["content"] = contents = [] 755 | for blob in blobs: 756 | if self._get_blob_path(blob) != path and \ 757 | self.should_list(self._get_blob_name(blob)): 758 | contents.append(self.get( 759 | path=blob, 760 | content=False) 761 | ) 762 | if path != "": 763 | tmpl = "%s/%%s" % self._parse_path(path)[0] 764 | else: 765 | tmpl = "%s" 766 | _, this = self._parse_path(path) 767 | for folder in folders: 768 | if self.should_list(folder) and folder != this: 769 | contents.append(self.get( 770 | path=tmpl % folder, 771 | content=False) 772 | ) 773 | model["format"] = "json" 774 | 775 | return model 776 | 777 | def _save_notebook(self, path, nb): 778 | """ 779 | Uploads notebook to GCS. 780 | :param path: blob path. 781 | :param nb: :class:`nbformat.notebooknode.NotebookNode` instance. 782 | :return: created :class:`google.cloud.storage.Blob`. 783 | """ 784 | bucket_name, bucket_path = self._parse_path(path) 785 | bucket = self._get_bucket(bucket_name, throw=True) 786 | data = nbformat.writes(nb, version=nbformat.NO_CONVERT) 787 | blob = bucket.blob(bucket_path) 788 | blob.upload_from_string(data, "application/x-ipynb+json") 789 | return blob 790 | 791 | def _save_file(self, path, content, format): 792 | """Uploads content of a generic file to GCS. 793 | :param: path blob path. 794 | :param: content file contents string. 795 | :param: format the description of the input format, can be either 796 | "text" or "base64". 797 | :return: created :class:`google.cloud.storage.Blob`. 798 | """ 799 | bucket_name, bucket_path = self._parse_path(path) 800 | bucket = self._get_bucket(bucket_name, throw=True) 801 | 802 | if format not in {"text", "base64"}: 803 | raise web.HTTPError( 804 | 400, 805 | u"Must specify format of file contents as \"text\" or " 806 | u"\"base64\"", 807 | ) 808 | try: 809 | if format == "text": 810 | bcontent = content.encode("utf8") 811 | else: 812 | b64_bytes = content.encode("ascii") 813 | bcontent = base64.decodebytes(b64_bytes) 814 | except Exception as e: 815 | raise web.HTTPError( 816 | 400, u"Encoding error saving %s: %s" % (path, e) 817 | ) 818 | blob = bucket.blob(bucket_path) 819 | blob.upload_from_string(bcontent) 820 | return blob 821 | 822 | def _save_directory(self, path, model): 823 | """Creates a directory in GCS.""" 824 | exists, obj = self._fetch(path) 825 | if exists: 826 | if isinstance(obj, Blob): 827 | raise web.HTTPError(400, u"Not a directory: %s" % path) 828 | else: 829 | self.log.debug("Directory %r already exists", path) 830 | return 831 | bucket_name, bucket_path = self._parse_path(path) 832 | if bucket_path == "": 833 | self.client.create_bucket(bucket_name) 834 | else: 835 | bucket = self._get_bucket(bucket_name, throw=True) 836 | bucket.blob(bucket_path).upload_from_string( 837 | b"", content_type="application/x-directory") 838 | 839 | debug_args = staticmethod(debug_args) 840 | -------------------------------------------------------------------------------- /jgscm/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/src-d/jgscm/65ee2fe74d2db05b0873ee9e39af42925fc0ea83/jgscm/tests/__init__.py -------------------------------------------------------------------------------- /jgscm/tests/credentials.json.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/src-d/jgscm/65ee2fe74d2db05b0873ee9e39af42925fc0ea83/jgscm/tests/credentials.json.enc -------------------------------------------------------------------------------- /jgscm/tests/test.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from datetime import datetime 3 | import pickle 4 | from unittest import main, TestCase 5 | import uuid 6 | import sys 7 | 8 | import nbformat 9 | from tornado import web 10 | 11 | from jgscm import GoogleStorageContentManager 12 | 13 | if sys.version_info[0] == 2: 14 | import socket 15 | BrokenPipeError = socket.error 16 | base64.encodebytes = base64.encodestring 17 | base64.decodebytes = base64.decodestring 18 | else: 19 | unicode = str 20 | 21 | 22 | class TestGoogleStorageContentManager(TestCase): 23 | BUCKET = "%s-%s" % ("jgcsm", uuid.uuid4()) 24 | NOTEBOOK = """{ 25 | "cells": [ 26 | { 27 | "cell_type": "code", 28 | "execution_count": 1, 29 | "metadata": { 30 | "collapsed": false 31 | }, 32 | "outputs": [ 33 | { 34 | "name": "stdout", 35 | "output_type": "stream", 36 | "text": [ 37 | "Populating the interactive namespace from numpy and matplotlib\\n" 38 | ] 39 | } 40 | ], 41 | "source": [ 42 | "%pylab inline" 43 | ] 44 | }, 45 | { 46 | "cell_type": "code", 47 | "execution_count": null, 48 | "metadata": { 49 | "collapsed": true 50 | }, 51 | "outputs": [], 52 | "source": [] 53 | } 54 | ], 55 | "metadata": { 56 | "kernelspec": { 57 | "display_name": "Python 3", 58 | "language": "python", 59 | "name": "python3" 60 | }, 61 | "language_info": { 62 | "codemirror_mode": { 63 | "name": "ipython", 64 | "version": 3 65 | }, 66 | "file_extension": ".py", 67 | "mimetype": "text/x-python", 68 | "name": "python", 69 | "nbconvert_exporter": "python", 70 | "pygments_lexer": "ipython3", 71 | "version": "3.5.1+" 72 | } 73 | }, 74 | "nbformat": 4, 75 | "nbformat_minor": 0 76 | }""" 77 | 78 | @classmethod 79 | def setUpClass(cls): 80 | GoogleStorageContentManager().client.bucket(cls.BUCKET).create() 81 | 82 | @classmethod 83 | def tearDownClass(cls): 84 | GoogleStorageContentManager().client.bucket(cls.BUCKET).delete( 85 | force=True) 86 | 87 | def setUp(self): 88 | super(TestGoogleStorageContentManager, self).setUp() 89 | self.contents_manager = GoogleStorageContentManager() 90 | 91 | @property 92 | def bucket(self): 93 | return self.contents_manager._get_bucket(self.BUCKET) 94 | 95 | def path(self, sub): 96 | return "/" + self.BUCKET + "/" + sub 97 | 98 | def test_file_exists(self): 99 | self.assertFalse(self.contents_manager.file_exists("")) 100 | self.assertFalse(self.contents_manager.file_exists("/")) 101 | self.assertFalse(self.contents_manager.file_exists(self.BUCKET)) 102 | self.assertFalse(self.contents_manager.file_exists(self.BUCKET)) 103 | bucket = self.bucket 104 | blob = bucket.blob("test") 105 | blob.upload_from_string(b"test") 106 | try: 107 | self.assertTrue(self.contents_manager.file_exists(self.path( 108 | "test"))) 109 | self.assertFalse(self.contents_manager.file_exists(self.path( 110 | "test_"))) 111 | finally: 112 | blob.delete() 113 | blob = bucket.blob("test/other") 114 | blob.upload_from_string(b"test") 115 | try: 116 | self.assertFalse(self.contents_manager.file_exists(self.path( 117 | "test"))) 118 | self.assertFalse(self.contents_manager.file_exists(self.path( 119 | "test/"))) 120 | self.assertTrue(self.contents_manager.file_exists(self.path( 121 | "test/other"))) 122 | self.assertFalse(self.contents_manager.file_exists(self.path( 123 | "test/other/"))) 124 | finally: 125 | blob.delete() 126 | 127 | def test_dir_exists(self): 128 | self.assertTrue(self.contents_manager.dir_exists("")) 129 | self.assertTrue(self.contents_manager.dir_exists("/")) 130 | self.assertTrue(self.contents_manager.dir_exists("/" + self.BUCKET)) 131 | self.assertTrue(self.contents_manager.dir_exists(self.BUCKET)) 132 | self.assertTrue(self.contents_manager.dir_exists( 133 | "/" + self.BUCKET + "/")) 134 | self.assertTrue(self.contents_manager.dir_exists(self.BUCKET + "/")) 135 | self.assertFalse(self.contents_manager.dir_exists( 136 | self.BUCKET + "/" + "test")) 137 | self.assertFalse(self.contents_manager.dir_exists( 138 | "/" + self.BUCKET + "/" + "test")) 139 | self.assertFalse(self.contents_manager.dir_exists( 140 | self.BUCKET + "blahblah")) 141 | self.assertFalse(self.contents_manager.dir_exists( 142 | "/" + self.BUCKET + "blahblah")) 143 | bucket = self.bucket 144 | blob = bucket.blob("test") 145 | blob.upload_from_string(b"test") 146 | try: 147 | self.assertFalse(self.contents_manager.dir_exists(self.path( 148 | "wtf"))) 149 | self.assertFalse(self.contents_manager.dir_exists(self.path( 150 | "test"))) 151 | self.assertFalse(self.contents_manager.dir_exists(self.path( 152 | "test/"))) 153 | finally: 154 | blob.delete() 155 | blob = bucket.blob("test/") 156 | blob.upload_from_string(b"") 157 | try: 158 | self.assertTrue(self.contents_manager.dir_exists(self.path( 159 | "test"))) 160 | self.assertTrue(self.contents_manager.dir_exists(self.path( 161 | "test/"))) 162 | finally: 163 | blob.delete() 164 | blob = bucket.blob("test/other/") 165 | blob.upload_from_string(b"") 166 | try: 167 | self.assertTrue(self.contents_manager.dir_exists(self.path( 168 | "test"))) 169 | self.assertTrue(self.contents_manager.dir_exists(self.path( 170 | "test/"))) 171 | self.assertTrue(self.contents_manager.dir_exists(self.path( 172 | "test/other"))) 173 | self.assertTrue(self.contents_manager.dir_exists(self.path( 174 | "test/other/"))) 175 | finally: 176 | blob.delete() 177 | blob = bucket.blob("test/other") 178 | blob.upload_from_string(b"data") 179 | try: 180 | self.assertTrue(self.contents_manager.dir_exists(self.path( 181 | "test"))) 182 | self.assertTrue(self.contents_manager.dir_exists(self.path( 183 | "test/"))) 184 | self.assertFalse(self.contents_manager.dir_exists(self.path( 185 | "test/other"))) 186 | self.assertFalse(self.contents_manager.dir_exists(self.path( 187 | "test/other/"))) 188 | finally: 189 | blob.delete() 190 | 191 | def test_is_hidden(self): 192 | self.assertFalse(self.contents_manager.is_hidden(self.BUCKET)) 193 | self.assertFalse(self.contents_manager.is_hidden("/" + self.BUCKET)) 194 | self.assertFalse(self.contents_manager.is_hidden(self.BUCKET + "/")) 195 | self.assertFalse(self.contents_manager.is_hidden( 196 | "/" + self.BUCKET + "/")) 197 | self.assertFalse(self.contents_manager.is_hidden(self.path( 198 | "something"))) 199 | self.assertTrue(self.contents_manager.is_hidden( 200 | self.BUCKET + "blahblah")) 201 | self.assertTrue(self.contents_manager.is_hidden( 202 | self.BUCKET + "blahblah/test")) 203 | 204 | self.contents_manager.hide_dotted_blobs = True 205 | self.assertTrue(self.contents_manager.is_hidden(self.path( 206 | ".test"))) 207 | self.assertTrue(self.contents_manager.is_hidden(self.path( 208 | ".test/"))) 209 | self.contents_manager.hide_dotted_blobs = False 210 | self.assertFalse(self.contents_manager.is_hidden(self.path( 211 | ".test"))) 212 | self.assertFalse(self.contents_manager.is_hidden(self.path( 213 | ".test/"))) 214 | self.contents_manager.hide_dotted_blobs = True 215 | self.assertFalse(self.contents_manager.is_hidden(self.path( 216 | ".test/other"))) 217 | self.contents_manager.hide_dotted_blobs = False 218 | self.assertFalse(self.contents_manager.is_hidden(self.path( 219 | ".test/other"))) 220 | self.contents_manager.hide_dotted_blobs = True 221 | 222 | def test_get(self): 223 | model = self.contents_manager.get("/") 224 | self.assertEqual(model["type"], "directory") 225 | self.assertEqual(model["mimetype"], "application/x-directory") 226 | self.assertEqual(model["name"], "") 227 | self.assertEqual(model["path"], "") 228 | self.assertEqual(model["last_modified"], "") 229 | self.assertEqual(model["created"], "") 230 | self.assertEqual(model["format"], "json") 231 | self.assertEqual(model["writable"], True) 232 | self.assertIsInstance(model["content"], list) 233 | self.assertGreaterEqual(len(model["content"]), 1) 234 | for m in model["content"]: 235 | self.assertEqual(m["type"], "directory") 236 | self.assertEqual(m["mimetype"], "application/x-directory") 237 | if m["name"] == self.BUCKET: 238 | self.assertEqual(m["path"], self.BUCKET) 239 | self.assertEqual(m["last_modified"], "") 240 | self.assertEqual(m["created"], "") 241 | self.assertIsNone(m["format"]) 242 | self.assertIsNone(m["content"]) 243 | self.assertEqual(m["writable"], True) 244 | 245 | model = self.contents_manager.get(self.path("")) 246 | self.assertEqual(model["type"], "directory") 247 | self.assertEqual(model["mimetype"], "application/x-directory") 248 | self.assertEqual(model["name"], self.BUCKET) 249 | self.assertEqual(model["path"], self.BUCKET) 250 | self.assertEqual(model["last_modified"], "") 251 | self.assertEqual(model["created"], "") 252 | self.assertEqual(model["format"], "json") 253 | self.assertEqual(model["content"], []) 254 | self.assertEqual(model["writable"], True) 255 | 256 | model2 = self.contents_manager.get(self.path(""), type="directory") 257 | self.assertEqual(model, model2) 258 | with self.assertRaises(web.HTTPError): 259 | self.contents_manager.get(self.path(""), type="file") 260 | with self.assertRaises(web.HTTPError): 261 | self.contents_manager.get(self.path(""), type="notebook") 262 | 263 | bucket = self.bucket 264 | blob = bucket.blob("test/other.txt") 265 | blob.upload_from_string(b"contents") 266 | try: 267 | model = self.contents_manager.get(self.path("test/other.txt")) 268 | self.assertIsInstance(model, dict) 269 | self.assertEqual(model["type"], "file") 270 | self.assertEqual(model["mimetype"], "text/plain") 271 | self.assertEqual(model["name"], "other.txt") 272 | self.assertEqual(model["path"], self.path("test/other.txt")[1:]) 273 | self.assertEqual(model["last_modified"], blob.updated) 274 | self.assertIsInstance(model["last_modified"], datetime) 275 | self.assertEqual(model["created"], blob.updated) 276 | self.assertIsInstance(model["created"], datetime) 277 | self.assertEqual(model["content"], u"contents") 278 | self.assertEqual(model["format"], "text") 279 | self.assertEqual(model["writable"], True) 280 | 281 | model2 = self.contents_manager.get(self.path("test/other.txt"), 282 | type="file") 283 | self.assertEqual(model, model2) 284 | with self.assertRaises(web.HTTPError): 285 | self.contents_manager.get(self.path("test/other.txt"), 286 | type="directory") 287 | with self.assertRaises(nbformat.reader.NotJSONError): 288 | self.contents_manager.get(self.path("test/other.txt"), 289 | type="notebook") 290 | except: # nopep8 291 | blob.delete() 292 | raise 293 | 294 | model = self.contents_manager.get(self.path("")) 295 | self.assertEqual(model["type"], "directory") 296 | self.assertEqual(model["mimetype"], "application/x-directory") 297 | self.assertEqual(model["name"], self.BUCKET) 298 | self.assertEqual(model["path"], self.BUCKET) 299 | self.assertEqual(model["last_modified"], "") 300 | self.assertEqual(model["created"], "") 301 | self.assertEqual(model["format"], "json") 302 | self.assertEqual(model["writable"], True) 303 | self.assertIsInstance(model["content"], list) 304 | self.assertEqual(len(model["content"]), 1) 305 | model = model["content"][0] 306 | self.assertEqual(model["type"], "directory") 307 | self.assertEqual(model["mimetype"], "application/x-directory") 308 | self.assertEqual(model["name"], "test") 309 | self.assertEqual(model["path"], self.path("test")[1:]) 310 | self.assertEqual(model["last_modified"], "") 311 | self.assertEqual(model["created"], "") 312 | self.assertIsNone(model["content"]) 313 | self.assertIsNone(model["format"]) 314 | self.assertEqual(model["writable"], True) 315 | 316 | blob2 = bucket.blob("test/fold/another.txt") 317 | blob2.upload_from_string(b"contents") 318 | try: 319 | model = self.contents_manager.get(self.path("test/")) 320 | self.assertIsInstance(model, dict) 321 | self.assertEqual(model["type"], "directory") 322 | self.assertEqual(model["mimetype"], "application/x-directory") 323 | self.assertEqual(model["name"], "test") 324 | self.assertEqual(model["path"], self.path("test")[1:]) 325 | self.assertEqual(model["last_modified"], "") 326 | self.assertEqual(model["created"], "") 327 | self.assertEqual(model["format"], "json") 328 | self.assertEqual(model["writable"], True) 329 | self.assertIsInstance(model["content"], list) 330 | self.assertEqual(len(model["content"]), 2) 331 | fc, dc = model["content"] 332 | finally: 333 | blob.delete() 334 | blob2.delete() 335 | self.assertIsInstance(fc, dict) 336 | self.assertEqual(fc["type"], "file") 337 | self.assertEqual(fc["mimetype"], "text/plain") 338 | self.assertEqual(fc["name"], "other.txt") 339 | self.assertEqual(fc["path"], self.path("test/other.txt")[1:]) 340 | self.assertIsNone(fc["content"]) 341 | self.assertIsNone(fc["format"]) 342 | self.assertEqual(fc["last_modified"], blob.updated) 343 | self.assertIsInstance(fc["last_modified"], datetime) 344 | self.assertEqual(fc["created"], blob.updated) 345 | self.assertIsInstance(fc["created"], datetime) 346 | 347 | self.assertIsInstance(dc, dict) 348 | self.assertEqual(dc["type"], "directory") 349 | self.assertEqual(dc["mimetype"], "application/x-directory") 350 | self.assertEqual(dc["name"], "fold") 351 | self.assertEqual(dc["path"], self.path("test/fold")[1:]) 352 | self.assertIsNone(dc["content"]) 353 | self.assertIsNone(dc["format"]) 354 | self.assertEqual(dc["last_modified"], "") 355 | self.assertEqual(dc["created"], "") 356 | 357 | def test_get_base64(self): 358 | bucket = self.bucket 359 | blob = bucket.blob("test.pickle") 360 | obj = {"one": 1, "two": [2, 3]} 361 | blob.upload_from_string(pickle.dumps(obj)) 362 | model = self.contents_manager.get( 363 | self.path("test.pickle"), format="base64") 364 | self.assertEqual(model["type"], "file") 365 | self.assertEqual(model["mimetype"], "application/octet-stream") 366 | self.assertEqual(model["format"], "base64") 367 | content = model["content"] 368 | self.assertIsInstance(content, unicode) 369 | bd = base64.decodebytes(content.encode()) 370 | self.assertEqual(obj, pickle.loads(bd)) 371 | 372 | def test_get_notebook(self): 373 | bucket = self.bucket 374 | blob = bucket.blob("test.ipynb") 375 | blob.upload_from_string(self.NOTEBOOK.encode()) 376 | try: 377 | model = self.contents_manager.get( 378 | self.path("test.ipynb"), type="notebook") 379 | self.assertEqual(model["type"], "notebook") 380 | self.assertEqual(model["mimetype"], "application/x-ipynb+json") 381 | self.assertEqual(model["format"], "json") 382 | self.assertIsInstance(model["content"], 383 | nbformat.notebooknode.NotebookNode) 384 | finally: 385 | blob.delete() 386 | 387 | def test_delete_file(self): 388 | bucket = self.bucket 389 | blob = bucket.blob("test/other.txt") 390 | blob.upload_from_string(b"contents") 391 | try: 392 | self.contents_manager.delete_file(self.path("test/other.txt")) 393 | self.assertFalse(blob.exists()) 394 | except: # nopep8 395 | blob.delete() 396 | raise 397 | self.contents_manager.delete_file(self.BUCKET) 398 | self.assertFalse(bucket.exists()) 399 | bucket.create() 400 | blob = bucket.blob("test/other/") 401 | blob.upload_from_string(b"contents") 402 | try: 403 | self.contents_manager.delete_file(self.path("test/other/")) 404 | self.assertFalse(blob.exists()) 405 | self.assertFalse(bucket.blob("test/").exists()) 406 | except: # nopep8 407 | blob.delete() 408 | raise 409 | blob1 = bucket.blob("test/other.txt") 410 | blob1.upload_from_string(b"contents") 411 | blob2 = bucket.blob("test/next/another.txt") 412 | blob2.upload_from_string(b"contents") 413 | try: 414 | self.contents_manager.delete_file(self.path("test/")) 415 | self.assertFalse(blob1.exists()) 416 | self.assertFalse(blob2.exists()) 417 | self.assertFalse(bucket.blob("test/").exists()) 418 | except: # nopep8 419 | try: 420 | blob1.delete() 421 | finally: 422 | blob2.delete() 423 | raise 424 | 425 | def test_rename_file(self): 426 | bucket = self.bucket 427 | blob = bucket.blob("test/other.txt") 428 | blob.upload_from_string(b"contents") 429 | try: 430 | self.contents_manager.rename_file(self.path("test/other.txt"), 431 | self.path("test1/other1.txt")) 432 | self.assertFalse(blob.exists()) 433 | except: # nopep8 434 | blob.delete() 435 | raise 436 | blob = bucket.blob("test1/other1.txt") 437 | self.assertTrue(blob.exists()) 438 | blob.delete() 439 | 440 | blob1 = bucket.blob("test/other.txt") 441 | blob1.upload_from_string(b"contents") 442 | blob2 = bucket.blob("test/other1.txt") 443 | blob2.upload_from_string(b"contents") 444 | try: 445 | self.contents_manager.rename_file(self.path("test/"), 446 | self.path("test1/")) 447 | self.assertFalse(blob1.exists()) 448 | self.assertFalse(blob2.exists()) 449 | except: # nopep8 450 | try: 451 | blob1.delete() 452 | finally: 453 | blob2.delete() 454 | raise 455 | blob = bucket.blob("test1/other.txt") 456 | self.assertTrue(blob.exists()) 457 | blob.delete() 458 | blob = bucket.blob("test1/other1.txt") 459 | self.assertTrue(blob.exists()) 460 | blob.delete() 461 | 462 | blob = bucket.blob("test/dir/other.txt") 463 | blob.upload_from_string(b"contents") 464 | try: 465 | self.contents_manager.rename_file(self.path("test/"), 466 | self.path("test1/")) 467 | self.assertFalse(blob.exists()) 468 | except: # nopep8 469 | blob.delete() 470 | raise 471 | blob = bucket.blob("test1/dir/other.txt") 472 | self.assertTrue(blob.exists()) 473 | 474 | new_bucket = self.contents_manager.client.bucket( 475 | "jgscm-%s-new" % uuid.uuid4()) 476 | new_bucket.create() 477 | try: 478 | self.contents_manager.rename_file( 479 | self.path("test1/"), new_bucket.name + "/" + "test/") 480 | self.assertFalse(blob.exists()) 481 | except: # nopep8 482 | blob.delete() 483 | new_bucket.delete(force=True) 484 | raise 485 | try: 486 | self.assertTrue(new_bucket.blob("test/dir/other.txt").exists()) 487 | finally: 488 | new_bucket.delete(force=True) 489 | 490 | def test_save_dir(self): 491 | self.contents_manager.save({ 492 | "type": "directory" 493 | }, self.path("test/")) 494 | bucket = self.bucket 495 | blob = bucket.blob("test/") 496 | self.assertTrue(blob.exists()) 497 | blob.delete() 498 | 499 | def test_save_file(self): 500 | self.contents_manager.save({ 501 | "type": "file", 502 | "content": "blah-blah-blah", 503 | "format": "text" 504 | }, self.path("test.txt")) 505 | bucket = self.bucket 506 | blob = bucket.blob("test.txt") 507 | self.assertTrue(blob.exists()) 508 | try: 509 | self.assertEqual(blob.download_as_string(), b"blah-blah-blah") 510 | finally: 511 | blob.delete() 512 | 513 | obj = {"one": 1, "two": [2, 3]} 514 | self.contents_manager.save({ 515 | "type": "file", 516 | "content": base64.encodebytes(pickle.dumps(obj)).decode("ascii"), 517 | "format": "base64" 518 | }, self.path("test.pickle")) 519 | bucket = self.bucket 520 | blob = bucket.blob("test.pickle") 521 | self.assertTrue(blob.exists()) 522 | try: 523 | self.assertEqual(blob.download_as_string(), pickle.dumps(obj)) 524 | finally: 525 | blob.delete() 526 | 527 | def test_save_notebook(self): 528 | nb = nbformat.reads(self.NOTEBOOK, 4) 529 | self.contents_manager.save({ 530 | "type": "notebook", 531 | "content": nb 532 | }, self.path("test.ipynb")) 533 | bucket = self.bucket 534 | blob = bucket.blob("test.ipynb") 535 | self.assertTrue(blob.exists()) 536 | try: 537 | self.assertEqual(blob.download_as_string(), self.NOTEBOOK.encode()) 538 | finally: 539 | blob.delete() 540 | 541 | 542 | if __name__ == "__main__": 543 | main() 544 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | google-cloud==0.32.0 2 | notebook==4.2.2 3 | nbformat==4.1.0 4 | tornado==4.4.1 5 | traitlets==4.2.2 -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | setup( 5 | name="jgscm", 6 | description="Jupyter Google Cloud Storage ContentsManager", 7 | version="0.1.9", 8 | license="MIT", 9 | author="Vadim Markovtsev", 10 | author_email="vadim@sourced.tech", 11 | url="https://github.com/src-d/jgscm", 12 | download_url="https://github.com/src-d/jgscm", 13 | packages=["jgscm"], 14 | keywords=["jupyter", "ipython", "gcloud", "gcs"], 15 | install_requires=["google-cloud>=0.32.0", "notebook>=4.2", "nbformat>=4.1", 16 | "tornado>=4", "traitlets>=4.2"], 17 | package_data={"": ["requirements.txt", "LICENSE", "README.md"]}, 18 | classifiers=[ 19 | "Development Status :: 3 - Alpha", 20 | "Intended Audience :: End Users/Desktop", 21 | "License :: OSI Approved :: MIT License", 22 | "Programming Language :: Python :: 2.7", 23 | "Programming Language :: Python :: 3.2", 24 | "Programming Language :: Python :: 3.3", 25 | "Programming Language :: Python :: 3.4", 26 | "Topic :: Software Development :: Libraries" 27 | ] 28 | ) 29 | --------------------------------------------------------------------------------