├── .github └── workflows │ └── ci.yaml ├── .gitignore ├── .pre-commit-config.yaml ├── LICENSE ├── MANIFEST.in ├── README.rst ├── bin └── pgcontents ├── examples ├── example_checkpoints_config.py ├── example_jupyter_notebook_config.py └── hybrid_manager_example.py ├── notebook5_constraints.txt ├── notebook6_constraints.txt ├── pgcontents ├── __init__.py ├── alembic.ini ├── alembic.ini.template ├── alembic │ ├── README │ ├── __init__.py │ ├── env.py │ ├── script.py.mako │ └── versions │ │ ├── 1217e5fbdbd9_.py │ │ ├── 2d46c89138b0_.py │ │ ├── 32518998055d_.py │ │ ├── 33b3645dc7f5_.py │ │ ├── 3d5ea85fc44f_.py │ │ ├── 551f95fbd4a2_.py │ │ └── 597680fc6b80_.py ├── api_utils.py ├── checkpoints.py ├── constants.py ├── crypto.py ├── db_utils.py ├── error.py ├── hybridmanager.py ├── managerbase.py ├── pgmanager.py ├── query.py ├── schema.py ├── tests │ ├── __init__.py │ ├── test_encryption.py │ ├── test_hybrid_manager.py │ ├── test_pgcontents_api.py │ ├── test_pgmanager.py │ ├── test_synchronization.py │ └── utils.py └── utils │ ├── __init__.py │ ├── ipycompat.py │ ├── migrate.py │ └── sync.py ├── setup.cfg ├── setup.py └── tox.ini /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: PGContents CI 2 | on: 3 | push: 4 | branches: 5 | - master 6 | pull_request: 7 | branches: 8 | - master 9 | 10 | jobs: 11 | 12 | pre-commit: 13 | name: pre-commit 14 | runs-on: ubuntu-18.04 15 | steps: 16 | - uses: actions/checkout@v1 17 | - uses: actions/setup-python@v1 18 | with: 19 | python-version: 3.6 20 | - uses: actions/cache@v1 21 | with: 22 | path: ~/.cache/pre-commit 23 | key: pre-commit-${{ hashFiles('.pre-commit-config.yaml') }} 24 | - uses: pre-commit/action@v1.0.1 25 | 26 | tests: 27 | name: ${{ matrix.name }} 28 | runs-on: ubuntu-18.04 29 | strategy: 30 | fail-fast: false 31 | matrix: 32 | name: 33 | - py27-notebook5 34 | - py35-notebook5 35 | - py36-notebook5 36 | 37 | - py35-notebook6 38 | - py36-notebook6 39 | 40 | include: 41 | - name: py27-notebook5 42 | python-version: 2.7 43 | 44 | - name: py35-notebook5 45 | python-version: 3.5 46 | 47 | - name: py36-notebook5 48 | python-version: 3.6 49 | 50 | - name: py35-notebook6 51 | python-version: 3.5 52 | 53 | - name: py36-notebook6 54 | python-version: 3.6 55 | 56 | services: 57 | postgres: 58 | image: "postgres:9.5-alpine" 59 | ports: 60 | - 5432:5432 61 | env: 62 | POSTGRES_DB: pgcontents_testing 63 | POSTGRES_USER: pgcontents_user 64 | POSTGRES_PASSWORD: pgcontents_pass 65 | 66 | steps: 67 | 68 | - name: Dump Matrix Config 69 | env: 70 | MATRIX: ${{ toJson(matrix) }} 71 | run: echo "$MATRIX" 72 | 73 | - name: Checkout Repository 74 | uses: actions/checkout@v1 75 | 76 | - name: Setup Python 77 | uses: actions/setup-python@v1 78 | with: 79 | python-version: ${{ matrix.python-version }} 80 | 81 | - uses: actions/cache@v1 82 | with: 83 | path: ~/.cache/pip 84 | key: ${{ matrix.name }}-pip-${{ hashFiles('**/*_constraints.txt') }} 85 | 86 | - name: Setup LibPQ 87 | run: | 88 | sudo apt-get update 89 | sudo apt-get install libpq-dev 90 | 91 | - name: Install Tox 92 | run: | 93 | python -m pip install -U pip setuptools 94 | python -m pip install tox 95 | 96 | - name: Run Tox 97 | run: tox -e ${{ matrix.name }} 98 | env: 99 | PGCONTENTS_TEST_DB_URL: "postgresql://pgcontents_user:pgcontents_pass@localhost/pgcontents_testing" 100 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # PyInstaller 26 | # Usually these files are written by a python script from a template 27 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 28 | *.manifest 29 | *.spec 30 | 31 | # Installer logs 32 | pip-log.txt 33 | pip-delete-this-directory.txt 34 | 35 | # Unit test / coverage reports 36 | htmlcov/ 37 | .tox/ 38 | .coverage 39 | .cache 40 | .noseids 41 | nosetests.xml 42 | coverage.xml 43 | /.pytest_cache/* 44 | 45 | # Translations 46 | *.mo 47 | *.pot 48 | 49 | # Django stuff: 50 | *.log 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # PyBuilder 56 | target/ 57 | 58 | #IntelliJ IDEA 59 | .idea/* 60 | *.iml 61 | 62 | *~ 63 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.6 3 | 4 | repos: 5 | 6 | - repo: https://github.com/pre-commit/pre-commit-hooks 7 | rev: v2.1.0 8 | hooks: 9 | - id: check-merge-conflict 10 | - id: check-yaml 11 | - id: check-symlinks 12 | - id: end-of-file-fixer 13 | - id: trailing-whitespace 14 | 15 | - repo: https://gitlab.com/pycqa/flake8 16 | rev: 3.7.7 17 | hooks: 18 | - id: flake8 19 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.rst 2 | include pgcontents/alembic.ini.template 3 | include pgcontents/alembic/script.py.mako 4 | include pgcontents/alembic/versions/* 5 | include LICENSE 6 | include requirements*.txt 7 | 8 | global-exclude *~ 9 | global-exclude *.pyc 10 | global-exclude *.pyo 11 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | PGContents 2 | ========== 3 | 4 | PGContents is a PostgreSQL-backed implementation of `IPEP 27 `_. It aims to a be a transparent, drop-in replacement for IPython's standard filesystem-backed storage system. PGContents' `PostgresContentsManager` class can be used to replace all local filesystem storage with database-backed storage, while its `PostgresCheckpoints` class can be used to replace just IPython's checkpoint storage. These features are useful when running IPython in environments where you either don't have access to—or don't trust the reliability of—the local filesystem of your notebook server. 5 | 6 | This repository developed as part of the `Quantopian Research Environment `_. 7 | 8 | Getting Started 9 | --------------- 10 | **Prerequisites:** 11 | - Write access to an empty `PostgreSQL `_ database. 12 | - A Python installation with `Jupyter Notebook `_ >= 5.0. 13 | 14 | **Installation:** 15 | 16 | 0. Install ``pgcontents`` from PyPI via ``pip install pgcontents``. 17 | 1. Run ``pgcontents init`` to configure your database. You will be prompted for a database URL for pgcontents to use for storage. (Alternatively, you can set the ``PGCONTENTS_DB_URL`` environment variable, or pass ``--db-url`` on the command line). 18 | 2. Configure Jupyter to use pgcontents as its storage backend. This can be done from the command line or by modifying your notebook config file. On a Unix-like system, your notebook config will be located at ``~/.jupyter/jupyter_notebook_config.py``. See the ``examples`` directory for example configuration files. 19 | 3. Enjoy your filesystem-free Jupyter experience! 20 | 21 | Demo Video 22 | ---------- 23 | You can see a demo of PGContents in action in `this presentation from JupyterCon 2017`_. 24 | 25 | .. _`this presentation from JupyterCon 2017` : https://youtu.be/TtsbspKHJGo?t=917 26 | -------------------------------------------------------------------------------- /bin/pgcontents: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | from getpass import getuser 5 | from os import getcwd 6 | import subprocess 7 | from textwrap import dedent 8 | 9 | import click 10 | 11 | import nbformat 12 | from nbconvert.preprocessors.clearoutput import ClearOutputPreprocessor 13 | from pgcontents.constants import ( 14 | ALEMBIC_DIR_LOCATION, 15 | DB_URL_ENVVAR, 16 | ) 17 | from pgcontents.crypto import single_password_crypto_factory 18 | from pgcontents.pgmanager import PostgresContentsManager 19 | 20 | from pgcontents.utils.migrate import ( 21 | temp_alembic_ini, 22 | upgrade, 23 | ) 24 | from tornado.web import HTTPError 25 | 26 | 27 | @click.group(context_settings=dict(help_option_names=['-h', '--help'])) 28 | def main(): 29 | """ 30 | Create or manage a pgcontents database. 31 | 32 | For documentation on sub-commands, do: 33 | 34 | pgcontents [COMMAND] --help 35 | """ 36 | pass 37 | 38 | 39 | # Options 40 | _db_url = click.option( 41 | '-l', '--db-url', 42 | type=click.STRING, 43 | help='SQLAlchemy connection string for database.', 44 | envvar=DB_URL_ENVVAR, 45 | prompt="File Database URL", 46 | confirmation_prompt=True, 47 | ) 48 | _directory = click.option( 49 | '-d', '--directory', 50 | type=click.Path( 51 | exists=True, 52 | writable=True, 53 | resolve_path=True, 54 | ), 55 | help="Path to a local directory.", 56 | default=getcwd(), 57 | ) 58 | _users = click.option( 59 | '-u', '--users', 60 | type=click.STRING, 61 | help="Comma-separated list of users to sync.", 62 | default=getuser(), 63 | ) 64 | 65 | 66 | CONFIRM_MIGRATION = dedent( 67 | """ 68 | About to run schema migrations against supplied database URL. If you have 69 | stored data from a previous pgcontents installation, it may not be 70 | correctly preserved. 71 | 72 | It is *HIGHLY* recommended that you back up stored data before proceeding. 73 | 74 | Proceed?""" 75 | ) 76 | 77 | 78 | @main.command('init') 79 | @_db_url 80 | @click.option( 81 | '-r', '--revision', 82 | type=click.STRING, 83 | help="Revision ID to upgrade to.", 84 | default='head', 85 | ) 86 | @click.option( 87 | '--prompt/--no-prompt', 88 | default=True, 89 | help="If passed, don't prompt for confirmation before migrating DB.", 90 | ) 91 | def init(db_url, revision, prompt): 92 | """ 93 | Migrate a database to the current pgcontents schema. 94 | """ 95 | click.echo("Initializing pgcontents...") 96 | if prompt: 97 | click.confirm(CONFIRM_MIGRATION, abort=True) 98 | upgrade(db_url, revision) 99 | 100 | click.echo('Initialization completed successfully.') 101 | 102 | 103 | @main.command() 104 | @_db_url 105 | def gen_migration(db_url): 106 | """ 107 | Use alembic revision --autogenerate to create a new migration from 108 | schema.py. 109 | 110 | You probably don't need to use this unless you're doing local development 111 | or changing the default pgcontents schema. 112 | """ 113 | with temp_alembic_ini(ALEMBIC_DIR_LOCATION, db_url) as alembic_ini: 114 | subprocess.check_call( 115 | ['alembic', '-c', alembic_ini, 'revision', '--autogenerate'] 116 | ) 117 | 118 | 119 | @main.command() 120 | @click.option( 121 | '-u', '--user', 122 | help='Owner of the notebook to be fetched.', 123 | ) 124 | @click.option( 125 | '-f', '--filename', 126 | help='Name of the file to fetch in the DB.', 127 | ) 128 | @click.option( 129 | '-o', '--output', 130 | help="Local filesystem destination.", 131 | type=click.Path( 132 | file_okay=True, 133 | dir_okay=False, 134 | writable=True, 135 | ), 136 | ) 137 | @click.option( 138 | '-k', '--key', 139 | help="Decryption key.", 140 | type=click.STRING, 141 | envvar='PGCONTENTS_DECRYPTION_KEY', 142 | ) 143 | @click.option( 144 | '-t', '--type', 145 | help="Type of file to fetch (notebook or file).", 146 | default='notebook', 147 | type=click.Choice(['file', 'notebook']), 148 | show_default=True, 149 | ) 150 | @click.option( 151 | '--clear-output', 152 | help="Clear notebook output before writing?", 153 | default=False, 154 | is_flag=True, 155 | ) 156 | @_db_url 157 | def fetch(db_url, user, filename, key, output, type, clear_output): 158 | """Fetch a notebook from the database to the local filesystem. 159 | """ 160 | if db_url is None: 161 | raise click.UsageError("-l/--db-url is required") 162 | if user is None: 163 | raise click.UsageError("-u/--user is required") 164 | if filename is None: 165 | raise click.UsageError("-f/--filename is required") 166 | if output is None: 167 | output = filename 168 | 169 | crypto = single_password_crypto_factory(key)(user) 170 | 171 | mgr = PostgresContentsManager( 172 | db_url=db_url, 173 | user_id=user, 174 | # User should already exist. 175 | create_directory_on_startup=False, 176 | create_user_on_startup=False, 177 | crypto=crypto, 178 | ) 179 | 180 | try: 181 | result = mgr.get(filename, content=True, type=type) 182 | except HTTPError as e: 183 | if e.status_code == 404: 184 | raise click.ClickException("No such file: {!r}".format(filename)) 185 | elif e.status_code == 500: 186 | raise click.ClickException( 187 | "Failed to load file: {!r}. Is the decryption key correct?" 188 | .format(filename) 189 | ) 190 | else: 191 | raise click.ClickException("Unknown error: %s" % e) 192 | 193 | nb = nbformat.from_dict(result['content']) 194 | if clear_output: 195 | ClearOutputPreprocessor().preprocess(nb, resources=None) 196 | 197 | nbformat.write(nb, open(output, 'w'), version=nbformat.NO_CONVERT) 198 | 199 | 200 | if __name__ == "__main__": 201 | main() 202 | -------------------------------------------------------------------------------- /examples/example_checkpoints_config.py: -------------------------------------------------------------------------------- 1 | from pgcontents import PostgresCheckpoints 2 | c = get_config() # noqa 3 | 4 | # Tell IPython to use PostgresCheckpoints for checkpoint storage. 5 | c.NotebookApp.checkpoints_class = PostgresCheckpoints 6 | 7 | # Set the url for the database used to store files. See 8 | # http://docs.sqlalchemy.org/en/rel_0_9/core/engines.html#postgresql 9 | # for more info on db url formatting. 10 | c.PostgresContentsManager.db_url = 'postgresql://ssanderson:secret_password@myhost.org:5432/pgcontents' # noqa 11 | -------------------------------------------------------------------------------- /examples/example_jupyter_notebook_config.py: -------------------------------------------------------------------------------- 1 | from pgcontents import PostgresContentsManager 2 | c = get_config() # noqa 3 | 4 | # Tell IPython to use PostgresContentsManager for all storage. 5 | c.NotebookApp.contents_manager_class = PostgresContentsManager 6 | 7 | # Set the url for the database used to store files. See 8 | # http://docs.sqlalchemy.org/en/rel_0_9/core/engines.html#postgresql 9 | # for more info on db url formatting. 10 | c.PostgresContentsManager.db_url = 'postgresql://ssanderson@/pgcontents' 11 | 12 | # PGContents associates each running notebook server with a user, allowing 13 | # multiple users to connect to the same database without trampling each other's 14 | # notebooks. By default, we use the result of result of getpass.getuser(), but 15 | # a username can be specified manually like so: 16 | c.PostgresContentsManager.user_id = 'my_awesome_username' 17 | 18 | # Set a maximum file size, if desired. 19 | c.PostgresContentsManager.max_file_size_bytes = 1000000 # 1MB File cap 20 | -------------------------------------------------------------------------------- /examples/hybrid_manager_example.py: -------------------------------------------------------------------------------- 1 | # This example shows how to configure Jupyter/IPython to use the more complex 2 | # HybridContentsManager. 3 | 4 | # A HybridContentsManager implements the contents API by delegating requests to 5 | # other contents managers. Each sub-manager is associated with a root 6 | # directory, and all requests for data within that directory are routed to the 7 | # sub-manager. 8 | 9 | # A HybridContentsManager needs two pieces of information at configuration 10 | # time: 11 | 12 | # 1. ``manager_classes``, a map from root directory to the type of contents 13 | # manager to use for that root directory. 14 | # 2. ``manager_kwargs``, a map from root directory to a dict of keywords to 15 | # pass to the associated sub-manager. 16 | 17 | from pgcontents.pgmanager import PostgresContentsManager 18 | from pgcontents.hybridmanager import HybridContentsManager 19 | 20 | # Using Jupyter (IPython >= 4.0). 21 | # from notebook.services.contents.filemanager import FileContentsManager 22 | # Using Legacy IPython. 23 | from IPython.html.services.contents.filemanager import FileContentsManager 24 | 25 | c = get_config() # noqa 26 | 27 | c.NotebookApp.contents_manager_class = HybridContentsManager 28 | c.HybridContentsManager.manager_classes = { 29 | # Associate the root directory with a PostgresContentsManager. 30 | # This manager will receive all requests that don't fall under any of the 31 | # other managers. 32 | '': PostgresContentsManager, 33 | # Associate /directory with a FileContentsManager. 34 | 'directory': FileContentsManager, 35 | # Associate /other_directory with another FileContentsManager. 36 | 'other_directory': FileContentsManager, 37 | } 38 | c.HybridContentsManager.manager_kwargs = { 39 | # Args for root PostgresContentsManager. 40 | '': { 41 | 'db_url': 'postgresql://ssanderson@/pgcontents_testing', 42 | 'user_id': 'my_awesome_username', 43 | 'max_file_size_bytes': 1000000, # Optional 44 | }, 45 | # Args for the FileContentsManager mapped to /directory 46 | 'directory': { 47 | 'root_dir': '/home/ssanderson/some_local_directory', 48 | }, 49 | # Args for the FileContentsManager mapped to /other_directory 50 | 'other_directory': { 51 | 'root_dir': '/home/ssanderson/some_other_local_directory', 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /notebook5_constraints.txt: -------------------------------------------------------------------------------- 1 | alembic==1.3.2 2 | attrs==19.3.0 3 | backports-abc==0.5 4 | backports.shutil-get-terminal-size==1.0.0 5 | bleach==3.1.0 6 | certifi==2019.11.28 7 | cffi==1.13.2 8 | chardet==3.0.4 9 | Click==7.0 10 | configparser==4.0.2 11 | contextlib2==0.6.0.post1 12 | cryptography==2.8 13 | decorator==4.4.1 14 | defusedxml==0.6.0 15 | entrypoints==0.3 16 | enum34==1.1.6 17 | flake8==3.7.9 18 | funcsigs==1.0.2 19 | functools32==3.2.3.post2 20 | futures==3.3.0 21 | idna==2.8 22 | importlib-metadata==1.3.0 23 | ipaddress==1.0.23 24 | ipykernel==4.10.1 25 | ipython==5.8.0 26 | ipython-genutils==0.2.0 27 | Jinja2==2.10.3 28 | jsonschema==3.2.0 29 | jupyter-client==5.3.4 30 | jupyter-core==4.6.1 31 | Mako==1.1.0 32 | MarkupSafe==1.1.1 33 | mccabe==0.6.1 34 | mistune==0.8.4 35 | mock==3.0.5 36 | more-itertools==5.0.0 37 | nbconvert==5.6.1 38 | nbformat==4.4.0 39 | nose==1.3.7 40 | nose-ignore-docstring==0.2 41 | notebook==5.7.8 42 | pandocfilters==1.4.2 43 | pathlib2==2.3.5 44 | pexpect==4.7.0 45 | pickleshare==0.7.5 46 | prometheus-client==0.7.1 47 | prompt-toolkit==1.0.18 48 | psycopg2==2.8.4 49 | ptyprocess==0.6.0 50 | pycodestyle==2.5.0 51 | pycparser==2.19 52 | pyflakes==2.1.1 53 | Pygments==2.5.2 54 | pyrsistent==0.15.6 55 | python-dateutil==2.8.1 56 | python-editor==1.0.4 57 | pyzmq==18.1.1 58 | requests==2.22.0 59 | scandir==1.10.0 60 | Send2Trash==1.5.0 61 | simplegeneric==0.8.1 62 | singledispatch==3.4.0.3 63 | six==1.13.0 64 | SQLAlchemy==1.3.12 65 | terminado==0.8.3 66 | testpath==0.4.4 67 | tornado==5.1.1 68 | traitlets==4.3.3 69 | typing==3.7.4.1 70 | urllib3==1.25.7 71 | wcwidth==0.1.7 72 | webencodings==0.5.1 73 | zipp==0.6.0 74 | -------------------------------------------------------------------------------- /notebook6_constraints.txt: -------------------------------------------------------------------------------- 1 | alembic==1.3.2 2 | attrs==19.3.0 3 | backcall==0.1.0 4 | bleach==3.1.0 5 | certifi==2019.11.28 6 | cffi==1.13.2 7 | chardet==3.0.4 8 | Click==7.0 9 | cryptography==2.8 10 | decorator==4.4.1 11 | defusedxml==0.6.0 12 | entrypoints==0.3 13 | flake8==3.7.9 14 | idna==2.8 15 | importlib-metadata==1.3.0 16 | ipykernel==5.1.3 17 | ipython==7.9.0 18 | ipython-genutils==0.2.0 19 | jedi==0.15.2 20 | Jinja2==2.10.3 21 | jsonschema==3.2.0 22 | jupyter-client==5.3.4 23 | jupyter-core==4.6.1 24 | Mako==1.1.0 25 | MarkupSafe==1.1.1 26 | mccabe==0.6.1 27 | mistune==0.8.4 28 | mock==3.0.5 29 | more-itertools==8.0.2 30 | nbconvert==5.6.1 31 | nbformat==4.4.0 32 | nose==1.3.7 33 | nose-ignore-docstring==0.2 34 | notebook==6.0.2 35 | pandocfilters==1.4.2 36 | parso==0.5.2 37 | pexpect==4.7.0 38 | pickleshare==0.7.5 39 | prometheus-client==0.7.1 40 | prompt-toolkit==2.0.10 41 | psycopg2==2.8.4 42 | ptyprocess==0.6.0 43 | pycodestyle==2.5.0 44 | pycparser==2.19 45 | pyflakes==2.1.1 46 | Pygments==2.5.2 47 | pyrsistent==0.15.6 48 | python-dateutil==2.8.1 49 | python-editor==1.0.4 50 | pyzmq==18.1.1 51 | requests==2.22.0 52 | Send2Trash==1.5.0 53 | six==1.13.0 54 | SQLAlchemy==1.3.12 55 | terminado==0.8.3 56 | testpath==0.4.4 57 | tornado==6.0.3 58 | traitlets==4.3.3 59 | urllib3==1.25.7 60 | wcwidth==0.1.7 61 | webencodings==0.5.1 62 | zipp==0.6.0 63 | -------------------------------------------------------------------------------- /pgcontents/__init__.py: -------------------------------------------------------------------------------- 1 | from .checkpoints import PostgresCheckpoints 2 | from .hybridmanager import HybridContentsManager 3 | from .pgmanager import PostgresContentsManager 4 | 5 | __all__ = [ 6 | 'HybridContentsManager', 7 | 'PostgresCheckpoints', 8 | 'PostgresContentsManager', 9 | ] 10 | -------------------------------------------------------------------------------- /pgcontents/alembic.ini: -------------------------------------------------------------------------------- 1 | [alembic] 2 | script_location = {alembic_dir_location} 3 | sqlalchemy.url = {sqlalchemy_url} 4 | 5 | # Logging configuration 6 | [loggers] 7 | keys = root,sqlalchemy,alembic 8 | 9 | [handlers] 10 | keys = console 11 | 12 | [formatters] 13 | keys = generic 14 | 15 | [logger_root] 16 | level = WARN 17 | handlers = console 18 | qualname = 19 | 20 | [logger_sqlalchemy] 21 | level = WARN 22 | handlers = 23 | qualname = sqlalchemy.engine 24 | 25 | [logger_alembic] 26 | level = INFO 27 | handlers = 28 | qualname = alembic 29 | 30 | [handler_console] 31 | class = StreamHandler 32 | args = (sys.stderr,) 33 | level = NOTSET 34 | formatter = generic 35 | 36 | [formatter_generic] 37 | format = %(levelname)-5.5s [%(name)s] %(message)s 38 | datefmt = %H:%M:%S 39 | -------------------------------------------------------------------------------- /pgcontents/alembic.ini.template: -------------------------------------------------------------------------------- 1 | # A generic, single database configuration. 2 | 3 | [alembic] 4 | # path to migration scripts 5 | script_location = {alembic_dir_location} 6 | 7 | # template used to generate migration files 8 | # file_template = %%(rev)s_%%(slug)s 9 | 10 | # max length of characters to apply to the 11 | # "slug" field 12 | #truncate_slug_length = 40 13 | 14 | # set to 'true' to run the environment during 15 | # the 'revision' command, regardless of autogenerate 16 | # revision_environment = false 17 | 18 | # set to 'true' to allow .pyc and .pyo files without 19 | # a source .py file to be detected as revisions in the 20 | # versions/ directory 21 | # sourceless = false 22 | 23 | # version location specification; this defaults 24 | # to alembic/versions. When using multiple version 25 | # directories, initial revisions must be specified with --version-path 26 | # version_locations = %(here)s/bar %(here)s/bat alembic/versions 27 | 28 | # the output encoding used when revision files 29 | # are written from script.py.mako 30 | # output_encoding = utf-8 31 | 32 | sqlalchemy.url = {sqlalchemy_url} 33 | 34 | 35 | # Logging configuration 36 | [loggers] 37 | keys = root,sqlalchemy,alembic 38 | 39 | [handlers] 40 | keys = console 41 | 42 | [formatters] 43 | keys = generic 44 | 45 | [logger_root] 46 | level = WARN 47 | handlers = console 48 | qualname = 49 | 50 | [logger_sqlalchemy] 51 | level = WARN 52 | handlers = 53 | qualname = sqlalchemy.engine 54 | 55 | [logger_alembic] 56 | level = INFO 57 | handlers = 58 | qualname = alembic 59 | 60 | [handler_console] 61 | class = StreamHandler 62 | args = (sys.stderr,) 63 | level = NOTSET 64 | formatter = generic 65 | 66 | [formatter_generic] 67 | format = %(levelname)-5.5s [%(name)s] %(message)s 68 | datefmt = %H:%M:%S 69 | -------------------------------------------------------------------------------- /pgcontents/alembic/README: -------------------------------------------------------------------------------- 1 | Generic single-database configuration. 2 | -------------------------------------------------------------------------------- /pgcontents/alembic/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quantopian/pgcontents/51f8febcf6ece4e88b047768b9ce18553162d63c/pgcontents/alembic/__init__.py -------------------------------------------------------------------------------- /pgcontents/alembic/env.py: -------------------------------------------------------------------------------- 1 | from __future__ import with_statement 2 | from alembic import context 3 | from logging.config import fileConfig 4 | from sqlalchemy import engine_from_config, pool 5 | from os.path import dirname, abspath 6 | 7 | import sys 8 | 9 | from pgcontents.schema import metadata 10 | 11 | # this is the Alembic Config object, which provides 12 | # access to the values within the .ini file in use. 13 | config = context.config 14 | 15 | # Interpret the config file for Python logging. 16 | # This line sets up loggers basically. 17 | fileConfig(config.config_file_name) 18 | 19 | # add your model's MetaData object here 20 | # for 'autogenerate' support 21 | # from myapp import mymodel 22 | # target_metadata = mymodel.Base.metadata 23 | sys.path.append(dirname(dirname(dirname(abspath(__file__))))) 24 | 25 | target_metadata = metadata 26 | 27 | # other values from the config, defined by the needs of env.py, 28 | # can be acquired: 29 | # my_important_option = config.get_main_option("my_important_option") 30 | # ... etc. 31 | include_schemas = True 32 | 33 | 34 | def run_migrations_offline(): 35 | """Run migrations in 'offline' mode. 36 | 37 | This configures the context with just a URL 38 | and not an Engine, though an Engine is acceptable 39 | here as well. By skipping the Engine creation 40 | we don't even need a DBAPI to be available. 41 | 42 | Calls to context.execute() here emit the given string to the 43 | script output. 44 | 45 | """ 46 | url = config.get_main_option("sqlalchemy.url") 47 | context.configure(url=url, target_metadata=target_metadata) 48 | 49 | with context.begin_transaction(): 50 | context.run_migrations() 51 | 52 | 53 | def run_migrations_online(): 54 | """Run migrations in 'online' mode. 55 | 56 | In this scenario we need to create an Engine 57 | and associate a connection with the context. 58 | 59 | """ 60 | engine = engine_from_config( 61 | config.get_section(config.config_ini_section), 62 | prefix='sqlalchemy.', 63 | poolclass=pool.NullPool) 64 | 65 | connection = engine.connect() 66 | context.configure( 67 | connection=connection, 68 | target_metadata=target_metadata, 69 | include_schemas=True, 70 | ) 71 | 72 | try: 73 | with context.begin_transaction(): 74 | context.run_migrations() 75 | finally: 76 | connection.close() 77 | engine.dispose() 78 | 79 | 80 | if context.is_offline_mode(): 81 | run_migrations_offline() 82 | else: 83 | run_migrations_online() 84 | -------------------------------------------------------------------------------- /pgcontents/alembic/script.py.mako: -------------------------------------------------------------------------------- 1 | """${message} 2 | 3 | Revision ID: ${up_revision} 4 | Revises: ${down_revision | comma,n} 5 | Create Date: ${create_date} 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = ${repr(up_revision)} 11 | down_revision = ${repr(down_revision)} 12 | branch_labels = ${repr(branch_labels)} 13 | depends_on = ${repr(depends_on)} 14 | 15 | from alembic import op 16 | import sqlalchemy as sa 17 | ${imports if imports else ""} 18 | 19 | def upgrade(): 20 | ${upgrades if upgrades else "pass"} 21 | 22 | 23 | def downgrade(): 24 | ${downgrades if downgrades else "pass"} 25 | -------------------------------------------------------------------------------- /pgcontents/alembic/versions/1217e5fbdbd9_.py: -------------------------------------------------------------------------------- 1 | """Add unique files constraint. 2 | 3 | Revision ID: 1217e5fbdbd9 4 | Revises: 32518998055d 5 | Create Date: 2015-03-23 14:49:33.176862 6 | 7 | """ 8 | from textwrap import dedent 9 | 10 | # revision identifiers, used by Alembic. 11 | revision = '1217e5fbdbd9' 12 | down_revision = '32518998055d' 13 | branch_labels = None 14 | depends_on = None 15 | 16 | from alembic import op 17 | 18 | 19 | def upgrade(): 20 | 21 | temp_select = dedent( 22 | """ 23 | SELECT DISTINCT ON 24 | (f.user_id, f.parent_name, f.name) 25 | id, name, user_id, parent_name, content, created_at 26 | INTO TEMP TABLE migrate_temp 27 | FROM 28 | pgcontents.files AS f 29 | ORDER BY 30 | f.user_id, f.parent_name, f.name, f.created_at 31 | """ 32 | ) 33 | 34 | drop_existing_rows = "TRUNCATE TABLE pgcontents.files" 35 | copy_from_temp_table = dedent( 36 | """ 37 | INSERT INTO pgcontents.files 38 | SELECT id, name, user_id, parent_name, content, created_at 39 | FROM migrate_temp 40 | """ 41 | ) 42 | drop_temp_table = "DROP TABLE migrate_temp" 43 | 44 | op.execute(temp_select) 45 | op.execute(drop_existing_rows) 46 | op.execute(copy_from_temp_table) 47 | op.execute(drop_temp_table) 48 | 49 | op.create_unique_constraint( 50 | u'uix_filepath_username', 51 | 'files', 52 | ['user_id', 'parent_name', 'name'], 53 | schema='pgcontents', 54 | ) 55 | 56 | 57 | def downgrade(): 58 | op.drop_constraint( 59 | u'uix_filepath_username', 60 | 'files', 61 | schema='pgcontents', 62 | type_='unique' 63 | ) 64 | -------------------------------------------------------------------------------- /pgcontents/alembic/versions/2d46c89138b0_.py: -------------------------------------------------------------------------------- 1 | """Change properties on foreign key constraints. 2 | 3 | Revision ID: 2d46c89138b0 4 | Revises: 1217e5fbdbd9 5 | Create Date: 2015-05-14 16:53:00.073652 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = '2d46c89138b0' 11 | down_revision = '1217e5fbdbd9' 12 | branch_labels = None 13 | depends_on = None 14 | 15 | from alembic import op 16 | import sqlalchemy as sa 17 | 18 | 19 | def upgrade(): 20 | 21 | # Drop the existing foreign key 22 | op.drop_constraint( 23 | 'directories_parent_user_id_fkey', 24 | 'directories', 25 | type_='foreignkey', 26 | schema='pgcontents' 27 | ) 28 | 29 | # Add the foreign key back, make it DEFERRABLE INITIALLY IMMEDIATE 30 | op.create_foreign_key( 31 | 'directories_parent_user_id_fkey', 32 | 'directories', 33 | 'directories', 34 | ['parent_user_id', 'parent_name'], 35 | ['user_id', 'name'], 36 | deferrable=True, 37 | initially='IMMEDIATE', 38 | source_schema='pgcontents', 39 | referent_schema='pgcontents', 40 | ) 41 | 42 | # Drop the existing foreign key 43 | op.drop_constraint( 44 | 'files_user_id_fkey', 45 | 'files', 46 | type_='foreignkey', 47 | schema='pgcontents' 48 | ) 49 | 50 | # Add the foreign key back, make it cascade on update 51 | op.create_foreign_key( 52 | 'files_user_id_fkey', 53 | 'files', 54 | 'directories', 55 | ['user_id', 'parent_name'], 56 | ['user_id', 'name'], 57 | onupdate='CASCADE', 58 | source_schema='pgcontents', 59 | referent_schema='pgcontents', 60 | ) 61 | 62 | def downgrade(): 63 | 64 | op.drop_constraint( 65 | 'directories_parent_user_id_fkey', 66 | 'directories', 67 | type_='foreignkey', 68 | schema='pgcontents' 69 | ) 70 | 71 | # Add the foreign key back, without any deferrable settings 72 | op.create_foreign_key( 73 | 'directories_parent_user_id_fkey', 74 | 'directories', 75 | 'directories', 76 | ['parent_user_id', 'parent_name'], 77 | ['user_id', 'name'], 78 | source_schema='pgcontents', 79 | referent_schema='pgcontents', 80 | ) 81 | 82 | op.drop_constraint( 83 | 'files_user_id_fkey', 84 | 'files', 85 | type_='foreignkey', 86 | schema='pgcontents' 87 | ) 88 | 89 | # Add the foreign key back, without any onupdate setting 90 | op.create_foreign_key( 91 | 'files_user_id_fkey', 92 | 'files', 93 | 'directories', 94 | ['user_id', 'parent_name'], 95 | ['user_id', 'name'], 96 | source_schema='pgcontents', 97 | referent_schema='pgcontents', 98 | ) 99 | -------------------------------------------------------------------------------- /pgcontents/alembic/versions/32518998055d_.py: -------------------------------------------------------------------------------- 1 | """Remove extra checkpoints table. 2 | 3 | Revision ID: 32518998055d 4 | Revises: 597680fc6b80 5 | Create Date: 2015-03-23 14:35:24.572173 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = '32518998055d' 11 | down_revision = '597680fc6b80' 12 | branch_labels = None 13 | depends_on = None 14 | 15 | from alembic import op 16 | import sqlalchemy as sa 17 | from sqlalchemy.dialects import postgresql 18 | 19 | 20 | def upgrade(): 21 | op.drop_table('checkpoints', schema='pgcontents') 22 | 23 | 24 | def downgrade(): 25 | op.create_table( 26 | 'checkpoints', 27 | sa.Column( 28 | 'id', 29 | sa.INTEGER(), 30 | server_default=sa.text( 31 | u"nextval('pgcontents.checkpoints_id_seq'::regclass)" 32 | ), 33 | nullable=False, 34 | ), 35 | sa.Column( 36 | 'file_id', 37 | sa.INTEGER(), 38 | autoincrement=False, 39 | nullable=False, 40 | ), 41 | sa.Column( 42 | 'created_at', 43 | postgresql.TIMESTAMP(), 44 | autoincrement=False, 45 | nullable=False 46 | ), 47 | sa.ForeignKeyConstraint( 48 | ['file_id'], 49 | [u'pgcontents.files.id'], 50 | name=u'checkpoints_file_id_fkey', 51 | onupdate=u'CASCADE', 52 | ondelete=u'CASCADE' 53 | ), 54 | sa.PrimaryKeyConstraint( 55 | 'id', 56 | name=u'checkpoints_pkey' 57 | ), 58 | schema='pgcontents', 59 | ) 60 | -------------------------------------------------------------------------------- /pgcontents/alembic/versions/33b3645dc7f5_.py: -------------------------------------------------------------------------------- 1 | """empty message 2 | 3 | Revision ID: 33b3645dc7f5 4 | Revises: 5 | Create Date: 2014-12-17 11:37:24.122882 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = '33b3645dc7f5' 11 | down_revision = None 12 | branch_labels = None 13 | depends_on = None 14 | 15 | from alembic import op 16 | import sqlalchemy as sa 17 | 18 | 19 | def upgrade(): 20 | ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('users', 22 | sa.Column('id', sa.Unicode(length=30), nullable=False), 23 | sa.PrimaryKeyConstraint('id') 24 | ) 25 | op.create_table('directories', 26 | sa.Column('user_id', sa.Unicode(length=30), nullable=False), 27 | sa.Column('name', sa.Unicode(length=70), nullable=False), 28 | sa.Column('parent_user_id', sa.Unicode(length=30), nullable=True), 29 | sa.Column('parent_name', sa.Unicode(length=70), nullable=True), 30 | sa.CheckConstraint(u"left(name, 1) = '/'", name=u'directories_startwith_slash'), 31 | sa.CheckConstraint(u"length(regexp_replace(name, '[^/]+', '', 'g')) - 1= length(regexp_replace(parent_name, '[^/]+', '', 'g'))", name=u'directories_slash_count'), 32 | sa.CheckConstraint(u"right(name, 1) = '/'", name=u'directories_endwith_slash'), 33 | sa.CheckConstraint(u'(parent_name IS NULL AND parent_user_id IS NULL) OR (parent_name IS NOT NULL AND parent_user_id IS NOT NULL)', name=u'directories_null_user_id_match'), 34 | sa.CheckConstraint(u'position(parent_name in name) != 0', name=u'directories_parent_name_prefix'), 35 | sa.CheckConstraint(u'user_id = parent_user_id', name=u'directories_match_user_id'), 36 | sa.ForeignKeyConstraint(['parent_user_id', 'parent_name'], [u'directories.user_id', u'directories.name'], name=u'directories_parent_user_id_fkey'), 37 | sa.ForeignKeyConstraint(['user_id'], [u'users.id'], ), 38 | sa.PrimaryKeyConstraint('user_id', 'name') 39 | ) 40 | op.create_table('files', 41 | sa.Column('id', sa.Integer(), nullable=False), 42 | sa.Column('name', sa.Unicode(length=40), nullable=False), 43 | sa.Column('user_id', sa.Unicode(length=30), nullable=False), 44 | sa.Column('parent_name', sa.Unicode(length=70), nullable=False), 45 | sa.Column('content', sa.LargeBinary(length=100000), nullable=False), 46 | sa.Column('created_at', sa.DateTime(), nullable=False), 47 | sa.ForeignKeyConstraint(['user_id', 'parent_name'], [u'directories.user_id', u'directories.name'], ), 48 | sa.ForeignKeyConstraint(['user_id'], [u'users.id'], ), 49 | sa.PrimaryKeyConstraint('id') 50 | ) 51 | op.create_table('checkpoints', 52 | sa.Column('id', sa.Integer(), nullable=False), 53 | sa.Column('file_id', sa.Integer(), nullable=False), 54 | sa.Column('created_at', sa.DateTime(), nullable=False), 55 | sa.ForeignKeyConstraint(['file_id'], [u'files.id'], onupdate=u'CASCADE', ondelete=u'CASCADE'), 56 | sa.PrimaryKeyConstraint('id') 57 | ) 58 | ### end Alembic commands ### 59 | 60 | 61 | def downgrade(): 62 | ### commands auto generated by Alembic - please adjust! ### 63 | op.drop_table('checkpoints') 64 | op.drop_table('files') 65 | op.drop_table('directories') 66 | op.drop_table('users') 67 | ### end Alembic commands ### 68 | -------------------------------------------------------------------------------- /pgcontents/alembic/versions/3d5ea85fc44f_.py: -------------------------------------------------------------------------------- 1 | """empty message 2 | 3 | Revision ID: 3d5ea85fc44f 4 | Revises: 33b3645dc7f5 5 | Create Date: 2014-12-28 21:13:07.328787 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = '3d5ea85fc44f' 11 | down_revision = '33b3645dc7f5' 12 | branch_labels = None 13 | depends_on = None 14 | 15 | from alembic import op 16 | import sqlalchemy as sa 17 | 18 | 19 | def upgrade(): 20 | ### commands auto generated by Alembic - please adjust! ### 21 | op.create_table('remote_checkpoints', 22 | sa.Column('id', sa.Integer(), nullable=False), 23 | sa.Column('user_id', sa.Unicode(length=30), nullable=False), 24 | sa.Column('path', sa.Unicode(length=70), nullable=False), 25 | sa.Column('content', sa.LargeBinary(length=100000), nullable=False), 26 | sa.Column('last_modified', sa.DateTime(), nullable=False), 27 | sa.ForeignKeyConstraint(['user_id'], [u'users.id'], ), 28 | sa.PrimaryKeyConstraint('id') 29 | ) 30 | ### end Alembic commands ### 31 | 32 | 33 | def downgrade(): 34 | ### commands auto generated by Alembic - please adjust! ### 35 | op.drop_table('remote_checkpoints') 36 | ### end Alembic commands ### 37 | -------------------------------------------------------------------------------- /pgcontents/alembic/versions/551f95fbd4a2_.py: -------------------------------------------------------------------------------- 1 | """Increase max size on FilePath. 2 | 3 | Revision ID: 551f95fbd4a2 4 | Revises: 3d5ea85fc44f 5 | Create Date: 2015-01-16 00:15:50.761819 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = '551f95fbd4a2' 11 | down_revision = '3d5ea85fc44f' 12 | branch_labels = None 13 | depends_on = None 14 | 15 | from alembic import op 16 | import sqlalchemy as sa 17 | 18 | 19 | OldFilePath = sa.Unicode(70) 20 | NewFilePath = sa.Unicode(300) 21 | 22 | tables_cols = [ 23 | ('directories', 'name'), 24 | ('directories', 'parent_name'), 25 | ('files', 'parent_name'), 26 | ('remote_checkpoints', 'path'), 27 | ] 28 | 29 | 30 | def upgrade(): 31 | op.alter_column('files', 'name', type_=NewFilePath) 32 | for tablename, colname in tables_cols: 33 | op.alter_column(tablename, colname, type_=NewFilePath) 34 | 35 | def downgrade(): 36 | op.alter_column('files', 'name', type_=sa.Unicode(40)) 37 | for tablename, colname in tables_cols: 38 | op.alter_column(tablename, colname, type_=OldFilePath) 39 | -------------------------------------------------------------------------------- /pgcontents/alembic/versions/597680fc6b80_.py: -------------------------------------------------------------------------------- 1 | """Move pgcontents data to its own schema. 2 | 3 | Revision ID: 597680fc6b80 4 | Revises: 551f95fbd4a2 5 | Create Date: 2015-03-17 20:18:34.371236 6 | 7 | """ 8 | 9 | # revision identifiers, used by Alembic. 10 | revision = '597680fc6b80' 11 | down_revision = '551f95fbd4a2' 12 | branch_labels = None 13 | depends_on = None 14 | 15 | from alembic import op 16 | import sqlalchemy as sa 17 | from sqlalchemy.dialects import postgresql 18 | 19 | 20 | def upgrade(): 21 | conn = op.get_bind() 22 | conn.execute('CREATE SCHEMA pgcontents') 23 | conn.execute('ALTER TABLE users SET SCHEMA pgcontents') 24 | conn.execute('ALTER TABLE directories SET SCHEMA pgcontents') 25 | conn.execute('ALTER TABLE files SET SCHEMA pgcontents') 26 | conn.execute('ALTER TABLE checkpoints SET SCHEMA pgcontents') 27 | conn.execute('ALTER TABLE remote_checkpoints SET SCHEMA pgcontents') 28 | 29 | 30 | def downgrade(): 31 | conn = op.get_bind() 32 | conn.execute('ALTER TABLE pgcontents.users SET SCHEMA public') 33 | conn.execute('ALTER TABLE pgcontents.directories SET SCHEMA public') 34 | conn.execute('ALTER TABLE pgcontents.files SET SCHEMA public') 35 | conn.execute('ALTER TABLE pgcontents.checkpoints SET SCHEMA public') 36 | conn.execute('ALTER TABLE pgcontents.remote_checkpoints SET SCHEMA public') 37 | conn.execute('DROP SCHEMA pgcontents') 38 | -------------------------------------------------------------------------------- /pgcontents/api_utils.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for implementing the ContentsManager API. 3 | """ 4 | from __future__ import unicode_literals 5 | from base64 import ( 6 | b64decode, 7 | b64encode, 8 | ) 9 | from datetime import datetime 10 | from functools import wraps 11 | import mimetypes 12 | import posixpath 13 | 14 | from tornado.web import HTTPError 15 | from .error import CorruptedFile, PathOutsideRoot 16 | from .utils.ipycompat import reads, writes 17 | 18 | NBFORMAT_VERSION = 4 19 | 20 | # We don't currently track created/modified dates for directories, so this 21 | # value is always used instead. 22 | DUMMY_CREATED_DATE = datetime.fromtimestamp(0) 23 | 24 | 25 | def base_model(path): 26 | return { 27 | "name": path.rsplit('/', 1)[-1], 28 | "path": path, 29 | "writable": True, 30 | "last_modified": None, 31 | "created": None, 32 | "content": None, 33 | "format": None, 34 | "mimetype": None, 35 | } 36 | 37 | 38 | def base_directory_model(path): 39 | m = base_model(path) 40 | m.update( 41 | type='directory', 42 | last_modified=DUMMY_CREATED_DATE, 43 | created=DUMMY_CREATED_DATE, 44 | ) 45 | return m 46 | 47 | 48 | def api_path_join(*paths): 49 | """ 50 | Join API-style paths. 51 | """ 52 | return posixpath.join(*paths).strip('/') 53 | 54 | 55 | def normalize_api_path(api_path): 56 | """ 57 | Resolve paths with '..' to normalized paths, raising an error if the final 58 | result is outside root. 59 | """ 60 | normalized = posixpath.normpath(api_path.strip('/')) 61 | if normalized == '.': 62 | normalized = '' 63 | elif normalized.startswith('..'): 64 | raise PathOutsideRoot(normalized) 65 | return normalized 66 | 67 | 68 | def from_api_dirname(api_dirname): 69 | """ 70 | Convert API-style directory name into a db-style directory name. 71 | """ 72 | normalized = normalize_api_path(api_dirname) 73 | if normalized == '': 74 | return '/' 75 | return '/' + normalized + '/' 76 | 77 | 78 | def from_api_filename(api_path): 79 | """ 80 | Convert an API-style path into a db-style path. 81 | """ 82 | normalized = normalize_api_path(api_path) 83 | assert len(normalized), "Empty path in from_api_filename" 84 | return '/' + normalized 85 | 86 | 87 | def to_api_path(db_path): 88 | """ 89 | Convert database path into API-style path. 90 | """ 91 | return db_path.strip('/') 92 | 93 | 94 | def split_api_filepath(path): 95 | """ 96 | Split an API file path into directory and name. 97 | """ 98 | parts = path.rsplit('/', 1) 99 | if len(parts) == 1: 100 | name = parts[0] 101 | dirname = '/' 102 | else: 103 | name = parts[1] 104 | dirname = parts[0] + '/' 105 | 106 | return from_api_dirname(dirname), name 107 | 108 | 109 | def writes_base64(nb, version=NBFORMAT_VERSION): 110 | """ 111 | Write a notebook as base64. 112 | """ 113 | return b64encode(writes(nb, version=version).encode('utf-8')) 114 | 115 | 116 | def reads_base64(nb, as_version=NBFORMAT_VERSION): 117 | """ 118 | Read a notebook from base64. 119 | """ 120 | try: 121 | return reads(b64decode(nb).decode('utf-8'), as_version=as_version) 122 | except Exception as e: 123 | raise CorruptedFile(e) 124 | 125 | 126 | def _decode_text_from_base64(path, bcontent): 127 | content = b64decode(bcontent) 128 | try: 129 | return (content.decode('utf-8'), 'text') 130 | except UnicodeError: 131 | raise HTTPError( 132 | 400, 133 | "%s is not UTF-8 encoded" % path, reason='bad format' 134 | ) 135 | 136 | 137 | def _decode_unknown_from_base64(path, bcontent): 138 | """ 139 | Decode base64 data of unknown format. 140 | 141 | Attempts to interpret data as utf-8, falling back to ascii on failure. 142 | """ 143 | content = b64decode(bcontent) 144 | try: 145 | return (content.decode('utf-8'), 'text') 146 | except UnicodeError: 147 | pass 148 | return bcontent.decode('ascii'), 'base64' 149 | 150 | 151 | def from_b64(path, bcontent, format): 152 | """ 153 | Decode base64 content for a file. 154 | 155 | format: 156 | If 'text', the contents will be decoded as UTF-8. 157 | If 'base64', do nothing. 158 | If not specified, try to decode as UTF-8, and fall back to base64 159 | 160 | Returns a triple of decoded_content, format, and mimetype. 161 | """ 162 | decoders = { 163 | 'base64': lambda path, bcontent: (bcontent.decode('ascii'), 'base64'), 164 | 'text': _decode_text_from_base64, 165 | None: _decode_unknown_from_base64, 166 | } 167 | 168 | try: 169 | content, real_format = decoders[format](path, bcontent) 170 | except HTTPError: 171 | # Pass through HTTPErrors, since we intend for them to bubble all the 172 | # way back to the API layer. 173 | raise 174 | except Exception as e: 175 | # Anything else should be wrapped in a CorruptedFile, since it likely 176 | # indicates misconfiguration of encryption. 177 | raise CorruptedFile(e) 178 | 179 | default_mimes = { 180 | 'text': 'text/plain', 181 | 'base64': 'application/octet-stream', 182 | } 183 | mimetype = mimetypes.guess_type(path)[0] or default_mimes[real_format] 184 | 185 | return content, real_format, mimetype 186 | 187 | 188 | def to_b64(content, fmt): 189 | allowed_formats = {'text', 'base64'} 190 | if fmt not in allowed_formats: 191 | raise ValueError( 192 | "Expected file contents in {allowed}, got {fmt}".format( 193 | allowed=allowed_formats, 194 | fmt=fmt, 195 | ) 196 | ) 197 | if fmt == 'text': 198 | # Unicode -> bytes -> base64-encoded bytes. 199 | return b64encode(content.encode('utf8')) 200 | else: 201 | return content.encode('ascii') 202 | 203 | 204 | def prefix_dirs(path): 205 | """ 206 | Return an iterable of all prefix directories of path, descending from root. 207 | """ 208 | _dirname = posixpath.dirname 209 | path = path.strip('/') 210 | out = [] 211 | while path != '': 212 | path = _dirname(path) 213 | out.append(path) 214 | return reversed(out) 215 | 216 | 217 | def outside_root_to_404(fn): 218 | """ 219 | Decorator for converting PathOutsideRoot errors to 404s. 220 | """ 221 | @wraps(fn) 222 | def wrapped(*args, **kwargs): 223 | try: 224 | return fn(*args, **kwargs) 225 | except PathOutsideRoot as e: 226 | raise HTTPError(404, "Path outside root: [%s]" % e.args[0]) 227 | return wrapped 228 | -------------------------------------------------------------------------------- /pgcontents/checkpoints.py: -------------------------------------------------------------------------------- 1 | """ 2 | An IPython FileContentsManager that uses Postgres for checkpoints. 3 | """ 4 | from __future__ import unicode_literals 5 | 6 | from .api_utils import ( 7 | _decode_unknown_from_base64, 8 | outside_root_to_404, 9 | reads_base64, 10 | to_b64, 11 | writes_base64, 12 | ) 13 | from .managerbase import PostgresManagerMixin 14 | from .query import ( 15 | delete_remote_checkpoints, 16 | delete_single_remote_checkpoint, 17 | get_remote_checkpoint, 18 | list_remote_checkpoints, 19 | move_remote_checkpoints, 20 | purge_remote_checkpoints, 21 | save_remote_checkpoint, 22 | ) 23 | from .utils.ipycompat import Checkpoints, GenericCheckpointsMixin 24 | 25 | 26 | class PostgresCheckpoints(PostgresManagerMixin, 27 | GenericCheckpointsMixin, 28 | Checkpoints): 29 | """ 30 | A Checkpoints implementation that saves checkpoints to a remote database. 31 | """ 32 | 33 | @outside_root_to_404 34 | def create_notebook_checkpoint(self, nb, path): 35 | """Create a checkpoint of the current state of a notebook 36 | 37 | Returns a checkpoint_id for the new checkpoint. 38 | """ 39 | b64_content = writes_base64(nb) 40 | with self.engine.begin() as db: 41 | return save_remote_checkpoint( 42 | db, 43 | self.user_id, 44 | path, 45 | b64_content, 46 | self.crypto.encrypt, 47 | self.max_file_size_bytes, 48 | ) 49 | 50 | @outside_root_to_404 51 | def create_file_checkpoint(self, content, format, path): 52 | """Create a checkpoint of the current state of a file 53 | 54 | Returns a checkpoint_id for the new checkpoint. 55 | """ 56 | try: 57 | b64_content = to_b64(content, format) 58 | except ValueError as e: 59 | self.do_400(str(e)) 60 | with self.engine.begin() as db: 61 | return save_remote_checkpoint( 62 | db, 63 | self.user_id, 64 | path, 65 | b64_content, 66 | self.crypto.encrypt, 67 | self.max_file_size_bytes, 68 | ) 69 | 70 | @outside_root_to_404 71 | def delete_checkpoint(self, checkpoint_id, path): 72 | """delete a checkpoint for a file""" 73 | with self.engine.begin() as db: 74 | return delete_single_remote_checkpoint( 75 | db, self.user_id, path, checkpoint_id, 76 | ) 77 | 78 | def get_checkpoint_content(self, checkpoint_id, path): 79 | """Get the content of a checkpoint.""" 80 | with self.engine.begin() as db: 81 | return get_remote_checkpoint( 82 | db, 83 | self.user_id, 84 | path, 85 | checkpoint_id, 86 | self.crypto.decrypt, 87 | )['content'] 88 | 89 | @outside_root_to_404 90 | def get_notebook_checkpoint(self, checkpoint_id, path): 91 | b64_content = self.get_checkpoint_content(checkpoint_id, path) 92 | return { 93 | 'type': 'notebook', 94 | 'content': reads_base64(b64_content), 95 | } 96 | 97 | @outside_root_to_404 98 | def get_file_checkpoint(self, checkpoint_id, path): 99 | b64_content = self.get_checkpoint_content(checkpoint_id, path) 100 | content, format = _decode_unknown_from_base64(path, b64_content) 101 | return { 102 | 'type': 'file', 103 | 'content': content, 104 | 'format': format, 105 | } 106 | 107 | @outside_root_to_404 108 | def list_checkpoints(self, path): 109 | """Return a list of checkpoints for a given file""" 110 | with self.engine.begin() as db: 111 | return list_remote_checkpoints(db, self.user_id, path) 112 | 113 | @outside_root_to_404 114 | def rename_all_checkpoints(self, old_path, new_path): 115 | """Rename all checkpoints for old_path to new_path.""" 116 | with self.engine.begin() as db: 117 | return move_remote_checkpoints( 118 | db, 119 | self.user_id, 120 | old_path, 121 | new_path, 122 | ) 123 | 124 | @outside_root_to_404 125 | def delete_all_checkpoints(self, path): 126 | """Delete all checkpoints for the given path.""" 127 | with self.engine.begin() as db: 128 | delete_remote_checkpoints(db, self.user_id, path) 129 | 130 | def purge_db(self): 131 | """ 132 | Purge all database records for the current user. 133 | """ 134 | with self.engine.begin() as db: 135 | purge_remote_checkpoints(db, self.user_id) 136 | -------------------------------------------------------------------------------- /pgcontents/constants.py: -------------------------------------------------------------------------------- 1 | """ 2 | Shared constants. 3 | """ 4 | from os.path import ( 5 | dirname, 6 | join, 7 | ) 8 | 9 | ALEMBIC_DIR_LOCATION = join(dirname(__file__), 'alembic') 10 | with open(join(dirname(__file__), 'alembic.ini.template')) as f: 11 | ALEMBIC_INI_TEMPLATE = f.read() 12 | 13 | DB_URL_ENVVAR = 'PGCONTENTS_DB' 14 | UNLIMITED = 0 15 | -------------------------------------------------------------------------------- /pgcontents/crypto.py: -------------------------------------------------------------------------------- 1 | """ 2 | Interface definition for encryption/decryption plugins for 3 | PostgresContentsManager, and implementations of the interface. 4 | 5 | Encryption backends should raise pgcontents.error.CorruptedFile if they 6 | encounter an input that they cannot decrypt. 7 | """ 8 | import sys 9 | import base64 10 | from functools import wraps 11 | 12 | from cryptography.fernet import Fernet 13 | from cryptography.hazmat.backends import default_backend 14 | from cryptography.hazmat.primitives import hashes 15 | from cryptography.hazmat.primitives.kdf.pbkdf2 import PBKDF2HMAC 16 | 17 | from .error import CorruptedFile 18 | 19 | if sys.version_info.major == 3: 20 | unicode = str 21 | 22 | 23 | class NoEncryption(object): 24 | """ 25 | No-op encryption backend. 26 | 27 | encrypt() and decrypt() simply return their inputs. 28 | 29 | Methods 30 | ------- 31 | encrypt : callable[bytes -> bytes] 32 | decrypt : callable[bytes -> bytes] 33 | """ 34 | def encrypt(self, b): 35 | return b 36 | 37 | def decrypt(self, b): 38 | return b 39 | 40 | 41 | class FernetEncryption(object): 42 | """ 43 | Notebook encryption using cryptography.fernet for symmetric-key encryption. 44 | 45 | Parameters 46 | ---------- 47 | fernet : cryptography.fernet.Fernet 48 | The Fernet object to use for encryption. 49 | 50 | Methods 51 | ------- 52 | encrypt : callable[bytes -> bytes] 53 | decrypt : callable[bytes -> bytes] 54 | 55 | Notes 56 | ----- 57 | ``cryptography.fernet.MultiFernet`` can be used instead of a vanilla 58 | ``Fernet`` to allow zero-downtime key rotation. 59 | 60 | See Also 61 | -------- 62 | :func:`pgcontents.utils.sync.reencrypt_user` 63 | """ 64 | __slots__ = ('_fernet',) 65 | 66 | def __init__(self, fernet): 67 | self._fernet = fernet 68 | 69 | def encrypt(self, s): 70 | return self._fernet.encrypt(s) 71 | 72 | def decrypt(self, s): 73 | try: 74 | return self._fernet.decrypt(s) 75 | except Exception as e: 76 | raise CorruptedFile(e) 77 | 78 | def __copy__(self, memo): 79 | # Any value that appears in an IPython/Jupyter Config object needs to 80 | # be deepcopy-able. Cryptography's Fernet objects aren't deepcopy-able, 81 | # so we copy our underlying state to a new FernetEncryption object. 82 | return FernetEncryption(self._fernet) 83 | 84 | def __deepcopy__(self, memo): 85 | # Any value that appears in an IPython/Jupyter Config object needs to 86 | # be deepcopy-able. Cryptography's Fernet objects aren't deepcopy-able, 87 | # so we copy our underlying state to a new FernetEncryption object. 88 | return FernetEncryption(self._fernet) 89 | 90 | 91 | class FallbackCrypto(object): 92 | """ 93 | Notebook encryption that accepts a list of crypto instances and decrypts by 94 | trying them in order. 95 | 96 | Sub-cryptos should raise ``CorruptedFile`` if they're unable to decrypt an 97 | input. 98 | 99 | This is conceptually similar to the technique used by 100 | ``cryptography.fernet.MultiFernet`` for implementing key rotation. 101 | 102 | Parameters 103 | ---------- 104 | cryptos : list[object] 105 | A sequence of cryptos to use for decryption. cryptos[0] will always be 106 | used for encryption. 107 | 108 | Methods 109 | ------- 110 | encrypt : callable[bytes -> bytes] 111 | decrypt : callable[bytes -> bytes] 112 | 113 | Notes 114 | ----- 115 | Since NoEncryption will always succeed, it is only supported as the last 116 | entry in ``cryptos``. Passing a list with a NoEncryption not in the last 117 | location will raise a ValueError. 118 | """ 119 | __slots__ = ('_cryptos',) 120 | 121 | def __init__(self, cryptos): 122 | # Only the last crypto can be a ``NoEncryption``. 123 | for c in cryptos[:-1]: 124 | if isinstance(c, NoEncryption): 125 | raise ValueError( 126 | "NoEncryption is only supported as the last fallback." 127 | ) 128 | 129 | self._cryptos = cryptos 130 | 131 | def encrypt(self, s): 132 | return self._cryptos[0].encrypt(s) 133 | 134 | def decrypt(self, s): 135 | errors = [] 136 | for c in self._cryptos: 137 | try: 138 | return c.decrypt(s) 139 | except CorruptedFile as e: 140 | errors.append(e) 141 | raise CorruptedFile(errors) 142 | 143 | 144 | def ascii_unicode_to_bytes(v): 145 | assert isinstance(v, unicode), "Expected unicode, got %s" % type(v) 146 | return v.encode('ascii') 147 | 148 | 149 | def derive_single_fernet_key(password, user_id): 150 | """ 151 | Convert a secret key and a user ID into an encryption key to use with a 152 | ``cryptography.fernet.Fernet``. 153 | 154 | Taken from 155 | https://cryptography.io/en/latest/fernet/#using-passwords-with-fernet 156 | 157 | Parameters 158 | ---------- 159 | password : unicode 160 | ascii-encodable key to derive 161 | user_id : unicode 162 | ascii-encodable user_id to use as salt 163 | """ 164 | password = ascii_unicode_to_bytes(password) 165 | user_id = ascii_unicode_to_bytes(user_id) 166 | 167 | kdf = PBKDF2HMAC( 168 | algorithm=hashes.SHA256(), 169 | length=32, 170 | salt=user_id, 171 | iterations=100000, 172 | backend=default_backend(), 173 | ) 174 | return base64.urlsafe_b64encode(kdf.derive(password)) 175 | 176 | 177 | def derive_fallback_fernet_keys(passwords, user_id): 178 | """ 179 | Derive a list of per-user Fernet keys from a list of master keys and a 180 | username. 181 | 182 | If a None is encountered in ``passwords``, it is forwarded. 183 | 184 | Parameters 185 | ---------- 186 | passwords : list[unicode] 187 | List of ascii-encodable keys to derive. 188 | user_id : unicode or None 189 | ascii-encodable user_id to use as salt 190 | """ 191 | # Normally I wouldn't advocate for these kinds of assertions, but we really 192 | # really really don't want to mess up deriving encryption keys. 193 | assert isinstance(passwords, (list, tuple)), \ 194 | "Expected list or tuple of keys, got %s." % type(passwords) 195 | 196 | def derive_single_allow_none(k): 197 | if k is None: 198 | return None 199 | return derive_single_fernet_key(k, user_id).decode('ascii') 200 | 201 | return list(map(derive_single_allow_none, passwords)) 202 | 203 | 204 | def no_password_crypto_factory(): 205 | """ 206 | Create and return a function suitable for passing as a crypto_factory to 207 | ``pgcontents.utils.sync.reencrypt_all_users`` 208 | 209 | The factory here always returns NoEncryption(). This is useful when passed 210 | as ``old_crypto_factory`` to a database that hasn't yet been encrypted. 211 | """ 212 | def factory(user_id): 213 | return NoEncryption() 214 | return factory 215 | 216 | 217 | def single_password_crypto_factory(password): 218 | """ 219 | Create and return a function suitable for passing as a crypto_factory to 220 | ``pgcontents.utils.sync.reencrypt_all_users`` 221 | 222 | The factory here returns a ``FernetEncryption`` that uses a key derived 223 | from ``password`` and salted with the supplied user_id. 224 | """ 225 | @memoize_single_arg 226 | def factory(user_id): 227 | return FernetEncryption( 228 | Fernet(derive_single_fernet_key(password, user_id)) 229 | ) 230 | return factory 231 | 232 | 233 | def memoize_single_arg(f): 234 | """ 235 | Decorator memoizing a single-argument function 236 | """ 237 | memo = {} 238 | 239 | @wraps(f) 240 | def memoized_f(arg): 241 | try: 242 | return memo[arg] 243 | except KeyError: 244 | result = memo[arg] = f(arg) 245 | return result 246 | return memoized_f 247 | -------------------------------------------------------------------------------- /pgcontents/db_utils.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2014 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | Utilities for working with databases. 17 | """ 18 | 19 | from contextlib import contextmanager 20 | from six.moves import map, zip 21 | 22 | from psycopg2.errorcodes import ( 23 | FOREIGN_KEY_VIOLATION, 24 | UNIQUE_VIOLATION, 25 | ) 26 | from sqlalchemy import Column 27 | from sqlalchemy.exc import IntegrityError 28 | from sqlalchemy.sql.elements import Cast 29 | 30 | 31 | def is_unique_violation(error): 32 | return error.orig.pgcode == UNIQUE_VIOLATION 33 | 34 | 35 | def is_foreign_key_violation(error): 36 | return error.orig.pgcode == FOREIGN_KEY_VIOLATION 37 | 38 | 39 | @contextmanager 40 | def ignore_unique_violation(): 41 | """ 42 | Context manager for gobbling unique violations. 43 | 44 | NOTE: If a unique violation is raised, the existing psql connection will 45 | not accept new commands. This just silences the python-level error. If 46 | you need emit another command after possibly ignoring a unique violation, 47 | you should explicitly use savepoints. 48 | """ 49 | try: 50 | yield 51 | except IntegrityError as error: 52 | if not is_unique_violation(error): 53 | raise 54 | 55 | 56 | def _get_name(column_like): 57 | """ 58 | Get the name from a column-like SQLAlchemy expression. 59 | 60 | Works for Columns and Cast expressions. 61 | """ 62 | if isinstance(column_like, Column): 63 | return column_like.name 64 | elif isinstance(column_like, Cast): 65 | return column_like.clause.name 66 | 67 | 68 | def to_dict_no_content(fields, row): 69 | """ 70 | Convert a SQLAlchemy row that does not contain a 'content' field to a dict. 71 | 72 | If row is None, return None. 73 | 74 | Raises AssertionError if there is a field named 'content' in ``fields``. 75 | """ 76 | assert(len(fields) == len(row)) 77 | 78 | field_names = list(map(_get_name, fields)) 79 | assert 'content' not in field_names, "Unexpected content field." 80 | 81 | return dict(zip(field_names, row)) 82 | 83 | 84 | def to_dict_with_content(fields, row, decrypt_func): 85 | """ 86 | Convert a SQLAlchemy row that contains a 'content' field to a dict. 87 | 88 | ``decrypt_func`` will be applied to the ``content`` field of the row. 89 | 90 | If row is None, return None. 91 | 92 | Raises AssertionError if there is no field named 'content' in ``fields``. 93 | """ 94 | assert(len(fields) == len(row)) 95 | 96 | field_names = list(map(_get_name, fields)) 97 | assert 'content' in field_names, "Missing content field." 98 | 99 | result = dict(zip(field_names, row)) 100 | result['content'] = decrypt_func(result['content']) 101 | return result 102 | -------------------------------------------------------------------------------- /pgcontents/error.py: -------------------------------------------------------------------------------- 1 | """ 2 | Errors and exceptions for PostgresContentsManager. 3 | """ 4 | 5 | 6 | class NoSuchDirectory(Exception): 7 | pass 8 | 9 | 10 | class NoSuchFile(Exception): 11 | pass 12 | 13 | 14 | class NoSuchCheckpoint(Exception): 15 | pass 16 | 17 | 18 | class PathOutsideRoot(Exception): 19 | pass 20 | 21 | 22 | class FileExists(Exception): 23 | pass 24 | 25 | 26 | class DirectoryExists(Exception): 27 | pass 28 | 29 | 30 | class DirectoryNotEmpty(Exception): 31 | pass 32 | 33 | 34 | class FileTooLarge(Exception): 35 | pass 36 | 37 | 38 | class RenameRoot(Exception): 39 | pass 40 | 41 | 42 | class CorruptedFile(Exception): 43 | pass 44 | -------------------------------------------------------------------------------- /pgcontents/hybridmanager.py: -------------------------------------------------------------------------------- 1 | """ 2 | Multi-backend ContentsManager. 3 | """ 4 | from __future__ import unicode_literals 5 | 6 | from six import iteritems 7 | from tornado.web import HTTPError 8 | 9 | from .api_utils import ( 10 | base_directory_model, 11 | normalize_api_path, 12 | outside_root_to_404, 13 | ) 14 | from .utils.ipycompat import ContentsManager, Dict 15 | 16 | 17 | @outside_root_to_404 18 | def _resolve_path(path, manager_dict): 19 | """ 20 | Resolve a path based on a dictionary of manager prefixes. 21 | 22 | Returns a triple of (prefix, manager, manager_relative_path). 23 | """ 24 | path = normalize_api_path(path) 25 | parts = path.split('/') 26 | 27 | # Try to find a sub-manager for the first subdirectory. 28 | mgr = manager_dict.get(parts[0]) 29 | if mgr is not None: 30 | return parts[0], mgr, '/'.join(parts[1:]) 31 | 32 | # Try to find use the root manager, if one was supplied. 33 | mgr = manager_dict.get('') 34 | if mgr is not None: 35 | return '', mgr, path 36 | 37 | raise HTTPError( 38 | 404, 39 | "Couldn't resolve path [{path}] and " 40 | "no root manager supplied!".format(path=path) 41 | ) 42 | 43 | 44 | def _get_arg(argname, args, kwargs): 45 | """ 46 | Get an argument, either from kwargs or from the first entry in args. 47 | Raises a TypeError if argname not in kwargs and len(args) == 0. 48 | 49 | Mutates kwargs in place if the value is found in kwargs. 50 | """ 51 | try: 52 | return kwargs.pop(argname), args 53 | except KeyError: 54 | pass 55 | try: 56 | return args[0], args[1:] 57 | except IndexError: 58 | raise TypeError("No value passed for %s" % argname) 59 | 60 | 61 | def _apply_prefix(prefix, model): 62 | """ 63 | Prefix all path entries in model with the given prefix. 64 | """ 65 | if not isinstance(model, dict): 66 | raise TypeError("Expected dict for model, got %s" % type(model)) 67 | 68 | # We get unwanted leading/trailing slashes if prefix or model['path'] are 69 | # '', both of which are legal values. 70 | model['path'] = '/'.join((prefix, model['path'])).strip('/') 71 | if model['type'] in ('notebook', 'file'): 72 | return model 73 | 74 | if model['type'] != 'directory': 75 | raise ValueError("Unknown model type %s." % type(model)) 76 | 77 | content = model.get('content', None) 78 | if content is not None: 79 | for sub_model in content: 80 | _apply_prefix(prefix, sub_model) 81 | 82 | return model 83 | 84 | 85 | # Dispatch decorators. 86 | def path_dispatch1(mname, returns_model): 87 | """ 88 | Decorator for methods that accept path as a first argument. 89 | """ 90 | def _wrapper(self, *args, **kwargs): 91 | path, args = _get_arg('path', args, kwargs) 92 | prefix, mgr, mgr_path = _resolve_path(path, self.managers) 93 | result = getattr(mgr, mname)(mgr_path, *args, **kwargs) 94 | if returns_model and prefix: 95 | return _apply_prefix(prefix, result) 96 | else: 97 | return result 98 | 99 | return _wrapper 100 | 101 | 102 | def path_dispatch2(mname, first_argname, returns_model): 103 | """ 104 | Decorator for methods that accept path as a second argument. 105 | """ 106 | def _wrapper(self, *args, **kwargs): 107 | other, args = _get_arg(first_argname, args, kwargs) 108 | path, args = _get_arg('path', args, kwargs) 109 | prefix, mgr, mgr_path = _resolve_path(path, self.managers) 110 | result = getattr(mgr, mname)(other, mgr_path, *args, **kwargs) 111 | if returns_model and prefix: 112 | return _apply_prefix(prefix, result) 113 | else: 114 | return result 115 | return _wrapper 116 | 117 | 118 | def path_dispatch_kwarg(mname, path_default, returns_model): 119 | """ 120 | Parameterized decorator for methods that accept path as a second 121 | argument. 122 | """ 123 | def _wrapper(self, path=path_default, **kwargs): 124 | prefix, mgr, mgr_path = _resolve_path(path, self.managers) 125 | result = getattr(mgr, mname)(path=mgr_path, **kwargs) 126 | if returns_model and prefix: 127 | return _apply_prefix(prefix, result) 128 | else: 129 | return result 130 | return _wrapper 131 | 132 | 133 | def path_dispatch_old_new(mname, returns_model): 134 | """ 135 | Decorator for methods accepting old_path and new_path. 136 | """ 137 | def _wrapper(self, old_path, new_path, *args, **kwargs): 138 | old_prefix, old_mgr, old_mgr_path = _resolve_path( 139 | old_path, self.managers 140 | ) 141 | new_prefix, new_mgr, new_mgr_path = _resolve_path( 142 | new_path, self.managers, 143 | ) 144 | if old_mgr is not new_mgr: 145 | # TODO: Consider supporting this via get+delete+save. 146 | raise HTTPError( 147 | 400, 148 | "Can't move files between backends ({old} -> {new})".format( 149 | old=old_path, 150 | new=new_path, 151 | ) 152 | ) 153 | assert new_prefix == old_prefix 154 | result = getattr(new_mgr, mname)( 155 | old_mgr_path, 156 | new_mgr_path, 157 | *args, 158 | **kwargs 159 | ) 160 | if returns_model and new_prefix: 161 | return _apply_prefix(new_prefix, result) 162 | else: 163 | return result 164 | return _wrapper 165 | 166 | 167 | class HybridContentsManager(ContentsManager): 168 | """ 169 | ContentsManager subclass that delegates specific subdirectories to other 170 | ContentsManager/Checkpoints pairs. 171 | """ 172 | 173 | manager_classes = Dict( 174 | config=True, 175 | help=("Dict mapping root dir -> ContentsManager class.") 176 | ) 177 | 178 | manager_kwargs = Dict( 179 | config=True, 180 | help=("Dict of dicts mapping root dir -> kwargs for manager.") 181 | ) 182 | 183 | managers = Dict(help=("Dict mapping root dir -> ContentsManager.")) 184 | 185 | def _managers_default(self): 186 | return { 187 | key: mgr_cls( 188 | parent=self, 189 | log=self.log, 190 | **self.manager_kwargs.get(key, {}) 191 | ) 192 | for key, mgr_cls in iteritems(self.manager_classes) 193 | } 194 | 195 | def _managers_changed(self, name, old, new): 196 | """ 197 | Strip slashes from directories before updating. 198 | """ 199 | for key in new: 200 | if '/' in key: 201 | raise ValueError( 202 | "Expected directory names w/o slashes. Got [%s]" % key 203 | ) 204 | self.managers = {k.strip('/'): v for k, v in new.items()} 205 | 206 | @property 207 | def root_manager(self): 208 | return self.managers.get('') 209 | 210 | def _extra_root_dirs(self): 211 | return [ 212 | base_directory_model(path) 213 | for path in self.managers 214 | if path 215 | ] 216 | 217 | is_hidden = path_dispatch1('is_hidden', False) 218 | dir_exists = path_dispatch1('dir_exists', False) 219 | file_exists = path_dispatch_kwarg('file_exists', '', False) 220 | exists = path_dispatch1('exists', False) 221 | 222 | save = path_dispatch2('save', 'model', True) 223 | rename = path_dispatch_old_new('rename', False) 224 | 225 | __get = path_dispatch1('get', True) 226 | __delete = path_dispatch1('delete', False) 227 | 228 | @outside_root_to_404 229 | def get(self, path, content=True, type=None, format=None): 230 | """ 231 | Special case handling for listing root dir. 232 | """ 233 | path = normalize_api_path(path) 234 | if path: 235 | return self.__get(path, content=content, type=type, format=format) 236 | if not content: 237 | return base_directory_model('') 238 | 239 | extra_content = self._extra_root_dirs() 240 | rm = self.root_manager 241 | if rm is None: 242 | root_model = base_directory_model('') 243 | root_model.update( 244 | format='json', 245 | content=extra_content, 246 | ) 247 | else: 248 | root_model = rm.get( 249 | path, 250 | content=content, 251 | type=type, 252 | format=format, 253 | ) 254 | # Append the extra directories. 255 | root_model['content'].extend(extra_content) 256 | return root_model 257 | 258 | @outside_root_to_404 259 | def delete(self, path): 260 | """ 261 | Ensure that roots of our managers can't be deleted. This should be 262 | enforced by https://github.com/ipython/ipython/pull/8168, but rogue 263 | implementations might override this behavior. 264 | """ 265 | path = normalize_api_path(path) 266 | if path in self.managers: 267 | raise HTTPError( 268 | 400, "Can't delete root of %s" % self.managers[path] 269 | ) 270 | return self.__delete(path) 271 | 272 | create_checkpoint = path_dispatch1('create_checkpoint', False) 273 | list_checkpoints = path_dispatch1('list_checkpoints', False) 274 | restore_checkpoint = path_dispatch2( 275 | 'restore_checkpoint', 276 | 'checkpoint_id', 277 | False, 278 | ) 279 | delete_checkpoint = path_dispatch2( 280 | 'delete_checkpoint', 281 | 'checkpoint_id', 282 | False, 283 | ) 284 | -------------------------------------------------------------------------------- /pgcontents/managerbase.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mixin for classes interacting with the pgcontents database. 3 | """ 4 | from getpass import getuser 5 | from sqlalchemy import ( 6 | create_engine, 7 | ) 8 | from sqlalchemy.engine.base import Engine 9 | from tornado.web import HTTPError 10 | 11 | from .constants import UNLIMITED 12 | from .crypto import NoEncryption 13 | from .query import ensure_db_user 14 | from .utils.ipycompat import Any, Bool, Instance, Integer, HasTraits, Unicode 15 | 16 | 17 | class PostgresManagerMixin(HasTraits): 18 | """ 19 | Shared behavior for Postgres-backed ContentsManagers. 20 | """ 21 | db_url = Unicode( 22 | default_value="postgresql://{user}@/pgcontents".format( 23 | user=getuser(), 24 | ), 25 | config=True, 26 | help="Connection string for the database.", 27 | ) 28 | 29 | user_id = Unicode( 30 | default_value=getuser(), 31 | allow_none=True, 32 | config=True, 33 | help="Name for the user whose contents we're managing.", 34 | ) 35 | 36 | create_user_on_startup = Bool( 37 | default_value=True, 38 | config=True, 39 | help="Create a user for user_id automatically?", 40 | ) 41 | 42 | max_file_size_bytes = Integer( 43 | default_value=UNLIMITED, 44 | config=True, 45 | help="Maximum size in bytes of a file that will be saved.", 46 | ) 47 | 48 | crypto = Any( 49 | default_value=NoEncryption(), 50 | allow_none=False, 51 | config=True, 52 | help=( 53 | "Object with encrypt() and decrypt() methods to " 54 | "call on data entering/exiting the database.", 55 | ) 56 | ) 57 | 58 | engine = Instance(Engine) 59 | 60 | def _engine_default(self): 61 | return create_engine(self.db_url, echo=False) 62 | 63 | def __init__(self, *args, **kwargs): 64 | super(PostgresManagerMixin, self).__init__(*args, **kwargs) 65 | if self.create_user_on_startup: 66 | self.ensure_user() 67 | 68 | def ensure_user(self): 69 | with self.engine.begin() as db: 70 | ensure_db_user(db, self.user_id) 71 | 72 | def no_such_entity(self, path): 73 | self.do_404( 74 | u"No such entity: [{path}]".format(path=path) 75 | ) 76 | 77 | def not_empty(self, path): 78 | self.do_400( 79 | u"Directory not empty: [{path}]".format(path=path) 80 | ) 81 | 82 | def file_too_large(self, path): 83 | self.do_413(u"File is too large to save: [{path}]".format(path=path)) 84 | 85 | def already_exists(self, path): 86 | self.do_409(u"File already exists: [{path}]".format(path=path)) 87 | 88 | def do_400(self, msg): 89 | raise HTTPError(400, msg) 90 | 91 | def do_404(self, msg): 92 | raise HTTPError(404, msg) 93 | 94 | def do_409(self, msg): 95 | raise HTTPError(409, msg) 96 | 97 | def do_413(self, msg): 98 | raise HTTPError(413, msg) 99 | 100 | def do_500(self, msg): 101 | raise HTTPError(500, msg) 102 | -------------------------------------------------------------------------------- /pgcontents/pgmanager.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2014 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | PostgreSQL implementation of IPython/Jupyter ContentsManager API. 17 | """ 18 | from __future__ import unicode_literals 19 | from itertools import chain 20 | from tornado import web 21 | from traitlets import default 22 | 23 | from .api_utils import ( 24 | base_directory_model, 25 | base_model, 26 | from_b64, 27 | outside_root_to_404, 28 | reads_base64, 29 | to_api_path, 30 | to_b64, 31 | writes_base64, 32 | ) 33 | from .checkpoints import PostgresCheckpoints 34 | from .error import ( 35 | CorruptedFile, 36 | DirectoryExists, 37 | DirectoryNotEmpty, 38 | FileExists, 39 | FileTooLarge, 40 | NoSuchDirectory, 41 | NoSuchFile, 42 | PathOutsideRoot, 43 | RenameRoot, 44 | ) 45 | from .managerbase import PostgresManagerMixin 46 | from .query import ( 47 | delete_directory, 48 | delete_file, 49 | dir_exists, 50 | ensure_directory, 51 | file_exists, 52 | get_directory, 53 | get_file, 54 | get_file_id, 55 | purge_user, 56 | rename_directory, 57 | rename_file, 58 | save_file, 59 | ) 60 | from .utils.ipycompat import Bool, ContentsManager, from_dict 61 | 62 | 63 | class PostgresContentsManager(PostgresManagerMixin, ContentsManager): 64 | """ 65 | ContentsManager that persists to a postgres database rather than to the 66 | local filesystem. 67 | """ 68 | create_directory_on_startup = Bool( 69 | config=True, 70 | help="Create a root directory automatically?", 71 | ) 72 | 73 | @default('checkpoints_class') 74 | def _default_checkpoints_class(self): 75 | return PostgresCheckpoints 76 | 77 | @default('checkpoints_kwargs') 78 | def _default_checkpoints_kwargs(self): 79 | klass = PostgresContentsManager 80 | try: 81 | kw = super(klass, self)._checkpoints_kwargs_default() 82 | except AttributeError: 83 | kw = super(klass, self)._default_checkpoints_kwargs() 84 | 85 | kw.update({ 86 | 'create_user_on_startup': self.create_user_on_startup, 87 | 'crypto': self.crypto, 88 | 'db_url': self.db_url, 89 | 'max_file_size_bytes': self.max_file_size_bytes, 90 | 'user_id': self.user_id, 91 | }) 92 | return kw 93 | 94 | @default('create_directory_on_startup') 95 | def _default_create_directory_on_startup(self): 96 | return self.create_user_on_startup 97 | 98 | def __init__(self, *args, **kwargs): 99 | super(PostgresContentsManager, self).__init__(*args, **kwargs) 100 | if self.create_directory_on_startup: 101 | self.ensure_root_directory() 102 | 103 | def ensure_root_directory(self): 104 | with self.engine.begin() as db: 105 | ensure_directory(db, self.user_id, '') 106 | 107 | def purge_db(self): 108 | """ 109 | Clear all matching our user_id. 110 | """ 111 | with self.engine.begin() as db: 112 | purge_user(db, self.user_id) 113 | 114 | @outside_root_to_404 115 | def guess_type(self, path, allow_directory=True): 116 | """ 117 | Guess the type of a file. 118 | 119 | If allow_directory is False, don't consider the possibility that the 120 | file is a directory. 121 | """ 122 | if path.endswith('.ipynb'): 123 | return 'notebook' 124 | elif allow_directory and self.dir_exists(path): 125 | return 'directory' 126 | else: 127 | return 'file' 128 | 129 | # Begin ContentsManager API. 130 | @outside_root_to_404 131 | def dir_exists(self, path): 132 | with self.engine.begin() as db: 133 | return dir_exists(db, self.user_id, path) 134 | 135 | def is_hidden(self, path): 136 | return False 137 | 138 | @outside_root_to_404 139 | def file_exists(self, path): 140 | with self.engine.begin() as db: 141 | return file_exists(db, self.user_id, path) 142 | 143 | @outside_root_to_404 144 | def get(self, path, content=True, type=None, format=None): 145 | if type is None: 146 | type = self.guess_type(path) 147 | try: 148 | fn = { 149 | 'notebook': self._get_notebook, 150 | 'directory': self._get_directory, 151 | 'file': self._get_file, 152 | }[type] 153 | except KeyError: 154 | raise ValueError("Unknown type passed: '{}'".format(type)) 155 | 156 | try: 157 | return fn(path=path, content=content, format=format) 158 | except CorruptedFile as e: 159 | self.log.error( 160 | u'Corrupted file encountered at path %r. %s', 161 | path, e, exc_info=True, 162 | ) 163 | self.do_500("Unable to read stored content at path %r." % path) 164 | 165 | @outside_root_to_404 166 | def get_file_id(self, path): 167 | """ 168 | Get the id of a file in the database. This function is specific to 169 | this implementation of ContentsManager and is not in the base class. 170 | """ 171 | with self.engine.begin() as db: 172 | try: 173 | file_id = get_file_id(db, self.user_id, path) 174 | except NoSuchFile: 175 | self.no_such_entity(path) 176 | 177 | return file_id 178 | 179 | def _get_notebook(self, path, content, format): 180 | """ 181 | Get a notebook from the database. 182 | """ 183 | with self.engine.begin() as db: 184 | try: 185 | record = get_file( 186 | db, 187 | self.user_id, 188 | path, 189 | content, 190 | self.crypto.decrypt, 191 | ) 192 | except NoSuchFile: 193 | self.no_such_entity(path) 194 | 195 | return self._notebook_model_from_db(record, content) 196 | 197 | def _notebook_model_from_db(self, record, content): 198 | """ 199 | Build a notebook model from database record. 200 | """ 201 | path = to_api_path(record['parent_name'] + record['name']) 202 | model = base_model(path) 203 | model['type'] = 'notebook' 204 | model['last_modified'] = model['created'] = record['created_at'] 205 | if content: 206 | content = reads_base64(record['content']) 207 | self.mark_trusted_cells(content, path) 208 | model['content'] = content 209 | model['format'] = 'json' 210 | self.validate_notebook_model(model) 211 | return model 212 | 213 | def _get_directory(self, path, content, format): 214 | """ 215 | Get a directory from the database. 216 | """ 217 | with self.engine.begin() as db: 218 | try: 219 | record = get_directory( 220 | db, self.user_id, path, content 221 | ) 222 | except NoSuchDirectory: 223 | if self.file_exists(path): 224 | # TODO: It's awkward/expensive to have to check this to 225 | # return a 400 instead of 404. Consider just 404ing. 226 | self.do_400("Wrong type: %s" % path) 227 | else: 228 | self.no_such_entity(path) 229 | 230 | return self._directory_model_from_db(record, content) 231 | 232 | def _convert_file_records(self, file_records): 233 | """ 234 | Apply _notebook_model_from_db or _file_model_from_db to each entry 235 | in file_records, depending on the result of `guess_type`. 236 | """ 237 | for record in file_records: 238 | type_ = self.guess_type(record['name'], allow_directory=False) 239 | if type_ == 'notebook': 240 | yield self._notebook_model_from_db(record, False) 241 | elif type_ == 'file': 242 | yield self._file_model_from_db(record, False, None) 243 | else: 244 | self.do_500("Unknown file type %s" % type_) 245 | 246 | def _directory_model_from_db(self, record, content): 247 | """ 248 | Build a directory model from database directory record. 249 | """ 250 | model = base_directory_model(to_api_path(record['name'])) 251 | if content: 252 | model['format'] = 'json' 253 | model['content'] = list( 254 | chain( 255 | self._convert_file_records(record['files']), 256 | ( 257 | self._directory_model_from_db(subdir, False) 258 | for subdir in record['subdirs'] 259 | ), 260 | ) 261 | ) 262 | return model 263 | 264 | def _file_model_from_db(self, record, content, format): 265 | """ 266 | Build a file model from database record. 267 | """ 268 | # TODO: Most of this is shared with _notebook_model_from_db. 269 | path = to_api_path(record['parent_name'] + record['name']) 270 | model = base_model(path) 271 | model['type'] = 'file' 272 | model['last_modified'] = model['created'] = record['created_at'] 273 | if content: 274 | bcontent = record['content'] 275 | model['content'], model['format'], model['mimetype'] = from_b64( 276 | path, 277 | bcontent, 278 | format, 279 | ) 280 | return model 281 | 282 | def _get_file(self, path, content, format): 283 | with self.engine.begin() as db: 284 | try: 285 | record = get_file( 286 | db, 287 | self.user_id, 288 | path, 289 | content, 290 | self.crypto.decrypt, 291 | ) 292 | except NoSuchFile: 293 | if self.dir_exists(path): 294 | # TODO: It's awkward/expensive to have to check this to 295 | # return a 400 instead of 404. Consider just 404ing. 296 | self.do_400(u"Wrong type: %s" % path) 297 | else: 298 | self.no_such_entity(path) 299 | return self._file_model_from_db(record, content, format) 300 | 301 | def _save_notebook(self, db, model, path): 302 | """ 303 | Save a notebook. 304 | 305 | Returns a validation message. 306 | """ 307 | nb_contents = from_dict(model['content']) 308 | self.check_and_sign(nb_contents, path) 309 | save_file( 310 | db, 311 | self.user_id, 312 | path, 313 | writes_base64(nb_contents), 314 | self.crypto.encrypt, 315 | self.max_file_size_bytes, 316 | ) 317 | # It's awkward that this writes to the model instead of returning. 318 | self.validate_notebook_model(model) 319 | return model.get('message') 320 | 321 | def _save_file(self, db, model, path): 322 | """ 323 | Save a non-notebook file. 324 | """ 325 | save_file( 326 | db, 327 | self.user_id, 328 | path, 329 | to_b64(model['content'], model.get('format', None)), 330 | self.crypto.encrypt, 331 | self.max_file_size_bytes, 332 | ) 333 | return None 334 | 335 | def _save_directory(self, db, path): 336 | """ 337 | 'Save' a directory. 338 | """ 339 | ensure_directory(db, self.user_id, path) 340 | 341 | @outside_root_to_404 342 | def save(self, model, path): 343 | if 'type' not in model: 344 | raise web.HTTPError(400, u'No model type provided') 345 | if 'content' not in model and model['type'] != 'directory': 346 | raise web.HTTPError(400, u'No file content provided') 347 | 348 | path = path.strip('/') 349 | 350 | # Almost all of this is duplicated with FileContentsManager :(. 351 | self.log.debug("Saving %s", path) 352 | if model['type'] not in ('file', 'directory', 'notebook'): 353 | self.do_400("Unhandled contents type: %s" % model['type']) 354 | try: 355 | with self.engine.begin() as db: 356 | if model['type'] == 'notebook': 357 | validation_message = self._save_notebook(db, model, path) 358 | elif model['type'] == 'file': 359 | validation_message = self._save_file(db, model, path) 360 | else: 361 | validation_message = self._save_directory(db, path) 362 | except (web.HTTPError, PathOutsideRoot): 363 | raise 364 | except FileTooLarge: 365 | self.file_too_large(path) 366 | except Exception as e: 367 | self.log.error(u'Error while saving file: %s %s', 368 | path, e, exc_info=True) 369 | self.do_500( 370 | u'Unexpected error while saving file: %s %s' % (path, e) 371 | ) 372 | 373 | # TODO: Consider not round-tripping to the database again here. 374 | model = self.get(path, type=model['type'], content=False) 375 | if validation_message is not None: 376 | model['message'] = validation_message 377 | return model 378 | 379 | @outside_root_to_404 380 | def rename_file(self, old_path, path): 381 | """ 382 | Rename object from old_path to path. 383 | 384 | NOTE: This method is unfortunately named on the base class. It actually 385 | moves files and directories as well. 386 | """ 387 | with self.engine.begin() as db: 388 | try: 389 | if self.file_exists(old_path): 390 | rename_file(db, self.user_id, old_path, path) 391 | elif self.dir_exists(old_path): 392 | rename_directory(db, self.user_id, old_path, path) 393 | else: 394 | self.no_such_entity(old_path) 395 | except (FileExists, DirectoryExists): 396 | self.already_exists(path) 397 | except RenameRoot as e: 398 | self.do_409(str(e)) 399 | except (web.HTTPError, PathOutsideRoot): 400 | raise 401 | except Exception as e: 402 | self.log.exception( 403 | 'Error renaming file/directory from %s to %s', 404 | old_path, 405 | path, 406 | ) 407 | self.do_500( 408 | u'Unexpected error while renaming %s: %s' 409 | % (old_path, e) 410 | ) 411 | 412 | def _delete_non_directory(self, path): 413 | with self.engine.begin() as db: 414 | deleted_count = delete_file(db, self.user_id, path) 415 | if not deleted_count: 416 | self.no_such_entity(path) 417 | 418 | def _delete_directory(self, path): 419 | with self.engine.begin() as db: 420 | try: 421 | deleted_count = delete_directory(db, self.user_id, path) 422 | except DirectoryNotEmpty: 423 | self.not_empty(path) 424 | if not deleted_count: 425 | self.no_such_entity(path) 426 | 427 | @outside_root_to_404 428 | def delete_file(self, path): 429 | """ 430 | Delete object corresponding to path. 431 | """ 432 | if self.file_exists(path): 433 | self._delete_non_directory(path) 434 | elif self.dir_exists(path): 435 | self._delete_directory(path) 436 | else: 437 | self.no_such_entity(path) 438 | -------------------------------------------------------------------------------- /pgcontents/schema.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2014 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | 16 | from __future__ import unicode_literals 17 | 18 | from sqlalchemy import ( 19 | CheckConstraint, 20 | Column, 21 | DateTime, 22 | ForeignKey, 23 | ForeignKeyConstraint, 24 | func, 25 | Integer, 26 | LargeBinary, 27 | MetaData, 28 | Table, 29 | Unicode, 30 | UniqueConstraint, 31 | ) 32 | 33 | metadata = MetaData(schema='pgcontents') 34 | 35 | # Shared Types 36 | UserID = Unicode(30) 37 | FilePath = Unicode(300) 38 | 39 | users = Table( 40 | 'users', 41 | metadata, 42 | Column('id', UserID, primary_key=True), 43 | ) 44 | 45 | """ 46 | We need to be able to query: 47 | 1. Does a directory exist? 48 | 2. Does a file exists? 49 | 3. What are the contents of a directory. This must include both files **and** 50 | other directories. 51 | 52 | Having just directory_name and suffix on files doesn't work because there are 53 | no entities that represent just directories themselves, which means there's no 54 | way to determine if a directory is a child of another directory. 55 | """ 56 | directories = Table( 57 | 'directories', 58 | metadata, 59 | # ======= # 60 | # Columns # 61 | # ======= # 62 | Column( 63 | 'user_id', 64 | UserID, 65 | ForeignKey(users.c.id), 66 | nullable=False, 67 | primary_key=True 68 | ), 69 | Column('name', FilePath, nullable=False, primary_key=True), 70 | Column('parent_user_id', UserID, nullable=True), 71 | Column('parent_name', FilePath, nullable=True), 72 | # =========== # 73 | # Constraints # 74 | # =========== # 75 | ForeignKeyConstraint( 76 | ['parent_user_id', 'parent_name'], 77 | ['directories.user_id', 'directories.name'], 78 | deferrable=True, 79 | initially=u'IMMEDIATE' 80 | ), 81 | CheckConstraint( 82 | 'user_id = parent_user_id', 83 | name='directories_match_user_id', 84 | ), 85 | # Assert that parent_name is a prefix of name. 86 | CheckConstraint( 87 | "position(parent_name in name) != 0", 88 | name='directories_parent_name_prefix', 89 | ), 90 | # Assert that all directories begin or end with '/'. 91 | CheckConstraint( 92 | "left(name, 1) = '/'", 93 | name='directories_startwith_slash', 94 | ), 95 | CheckConstraint( 96 | "right(name, 1) = '/'", 97 | name='directories_endwith_slash', 98 | ), 99 | # Assert that the name of this directory has one more '/' than its parent. 100 | CheckConstraint( 101 | "length(regexp_replace(name, '[^/]+', '', 'g')) - 1" 102 | "= length(regexp_replace(parent_name, '[^/]+', '', 'g'))", 103 | name='directories_slash_count', 104 | ), 105 | # Assert that parent_user_id is NULL iff parent_name is NULL. This should 106 | # be true only for each user's root directory. 107 | CheckConstraint( 108 | ''.join( 109 | [ 110 | '(parent_name IS NULL AND parent_user_id IS NULL)' 111 | ' OR ', 112 | '(parent_name IS NOT NULL AND parent_user_id IS NOT NULL)' 113 | ], 114 | ), 115 | name='directories_null_user_id_match', 116 | ), 117 | ) 118 | 119 | 120 | files = Table( 121 | 'files', 122 | metadata, 123 | Column('id', Integer(), nullable=False, primary_key=True), 124 | Column('name', FilePath, nullable=False), 125 | Column( 126 | 'user_id', 127 | UserID, 128 | ForeignKey(users.c.id), 129 | nullable=False, 130 | ), 131 | Column('parent_name', FilePath, nullable=False), 132 | Column('content', LargeBinary(100000), nullable=False), 133 | Column( 134 | 'created_at', 135 | DateTime, 136 | default=func.now(), 137 | nullable=False, 138 | ), 139 | UniqueConstraint( 140 | 'user_id', 141 | 'parent_name', 142 | 'name', 143 | name="uix_filepath_username" 144 | ), 145 | ForeignKeyConstraint( 146 | ['user_id', 'parent_name'], 147 | [directories.c.user_id, directories.c.name], 148 | onupdate=u'CASCADE' 149 | ), 150 | ) 151 | 152 | 153 | # Alternate checkpoint table used by PostgresCheckpointsManager. 154 | remote_checkpoints = Table( 155 | 'remote_checkpoints', 156 | metadata, 157 | Column('id', Integer(), nullable=False, primary_key=True), 158 | Column( 159 | 'user_id', 160 | UserID, 161 | ForeignKey(users.c.id), 162 | nullable=False, 163 | ), 164 | Column('path', FilePath, nullable=False), 165 | Column('content', LargeBinary(100000), nullable=False), 166 | Column('last_modified', DateTime, default=func.now(), nullable=False), 167 | ) 168 | -------------------------------------------------------------------------------- /pgcontents/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quantopian/pgcontents/51f8febcf6ece4e88b047768b9ce18553162d63c/pgcontents/tests/__init__.py -------------------------------------------------------------------------------- /pgcontents/tests/test_encryption.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for notebook encryption utilities. 3 | """ 4 | from unittest import TestCase 5 | 6 | from cryptography.fernet import Fernet 7 | 8 | from ..crypto import ( 9 | derive_fallback_fernet_keys, 10 | FallbackCrypto, 11 | FernetEncryption, 12 | memoize_single_arg, 13 | NoEncryption, 14 | single_password_crypto_factory, 15 | ) 16 | 17 | 18 | class TestEncryption(TestCase): 19 | 20 | def test_fernet_derivation(self): 21 | pws = [u'currentpassword', u'oldpassword', None] 22 | 23 | # This must be Unicode, so we use the `u` prefix to support py2. 24 | user_id = u'4e322fa200fffd0001000001' 25 | 26 | current_crypto = single_password_crypto_factory(pws[0])(user_id) 27 | old_crypto = single_password_crypto_factory(pws[1])(user_id) 28 | 29 | def make_single_key_crypto(key): 30 | if key is None: 31 | return NoEncryption() 32 | return FernetEncryption(Fernet(key.encode('ascii'))) 33 | 34 | multi_fernet_crypto = FallbackCrypto( 35 | [make_single_key_crypto(k) 36 | for k in derive_fallback_fernet_keys(pws, user_id)] 37 | ) 38 | 39 | data = b'ayy lmao' 40 | 41 | # Data encrypted with the current key. 42 | encrypted_data_current = current_crypto.encrypt(data) 43 | self.assertNotEqual(encrypted_data_current, data) 44 | self.assertEqual(current_crypto.decrypt(encrypted_data_current), data) 45 | 46 | # Data encrypted with the old key. 47 | encrypted_data_old = old_crypto.encrypt(data) 48 | self.assertNotEqual(encrypted_data_current, data) 49 | self.assertEqual(old_crypto.decrypt(encrypted_data_old), data) 50 | 51 | # The single fernet with the first key should be able to decrypt the 52 | # multi-fernet's encrypted data. 53 | self.assertEqual( 54 | current_crypto.decrypt(multi_fernet_crypto.encrypt(data)), 55 | data 56 | ) 57 | 58 | # Multi should be able decrypt anything encrypted with either key. 59 | self.assertEqual(multi_fernet_crypto.decrypt(encrypted_data_current), 60 | data) 61 | self.assertEqual(multi_fernet_crypto.decrypt(encrypted_data_old), data) 62 | 63 | # Unencrypted data should be returned unchanged. 64 | self.assertEqual(multi_fernet_crypto.decrypt(data), data) 65 | 66 | def test_memoize_single_arg(self): 67 | full_calls = [] 68 | 69 | @memoize_single_arg 70 | def mock_factory(user_id): 71 | full_calls.append(user_id) 72 | return u'crypto' + user_id 73 | 74 | calls_to_make = [u'1', u'2', u'3', u'2', u'1'] 75 | expected_results = [u'crypto' + user_id for user_id in calls_to_make] 76 | expected_full_calls = [u'1', u'2', u'3'] 77 | 78 | results = [] 79 | for user_id in calls_to_make: 80 | results.append(mock_factory(user_id)) 81 | 82 | self.assertEqual(results, expected_results) 83 | self.assertEqual(full_calls, expected_full_calls) 84 | -------------------------------------------------------------------------------- /pgcontents/tests/test_hybrid_manager.py: -------------------------------------------------------------------------------- 1 | # coding: utf-8 2 | """ 3 | Tests for HybridContentsManager. 4 | """ 5 | from os import ( 6 | makedirs, 7 | mkdir, 8 | ) 9 | from os.path import ( 10 | exists, 11 | join as osjoin, 12 | ) 13 | from posixpath import join as pjoin 14 | from unittest import TestCase 15 | 16 | from six import ( 17 | iteritems, 18 | itervalues, 19 | ) 20 | from IPython.utils.tempdir import TemporaryDirectory 21 | from notebook.services.contents.tests.test_manager import TestContentsManager 22 | from notebook.services.contents.tests.test_contents_api import APITest 23 | 24 | from pgcontents.hybridmanager import HybridContentsManager 25 | from pgcontents.pgmanager import PostgresContentsManager 26 | 27 | from .test_pgmanager import PostgresContentsManagerTestCase 28 | from .utils import ( 29 | assertRaisesHTTPError, 30 | make_fernet, 31 | remigrate_test_schema, 32 | TEST_DB_URL, 33 | ) 34 | from ..utils.ipycompat import FileContentsManager 35 | 36 | 37 | setup_module = remigrate_test_schema 38 | 39 | 40 | def _make_dir(contents_manager, api_path): 41 | """ 42 | Make a directory. 43 | """ 44 | os_path = contents_manager._get_os_path(api_path) 45 | try: 46 | makedirs(os_path) 47 | except OSError: 48 | print("Directory already exists: %r" % os_path) 49 | 50 | 51 | class FileTestCase(TestContentsManager): 52 | 53 | def setUp(self): 54 | self._temp_dir = TemporaryDirectory() 55 | self.td = self._temp_dir.name 56 | self._file_manager = FileContentsManager(root_dir=self.td, 57 | delete_to_trash=False) 58 | self.contents_manager = HybridContentsManager( 59 | managers={'': self._file_manager} 60 | ) 61 | 62 | def tearDown(self): 63 | self._temp_dir.cleanup() 64 | 65 | def make_dir(self, api_path): 66 | """make a subdirectory at api_path 67 | override in subclasses if contents are not on the filesystem. 68 | """ 69 | _make_dir(self._file_manager, api_path) 70 | 71 | 72 | class PostgresTestCase(PostgresContentsManagerTestCase): 73 | 74 | def setUp(self): 75 | self.crypto = make_fernet() 76 | self._pgmanager = PostgresContentsManager( 77 | user_id='test', 78 | db_url=TEST_DB_URL, 79 | crypto=self.crypto, 80 | ) 81 | self._pgmanager.ensure_user() 82 | self._pgmanager.ensure_root_directory() 83 | 84 | self.contents_manager = HybridContentsManager( 85 | managers={'': self._pgmanager} 86 | ) 87 | 88 | self.addCleanup(self._pgmanager.engine.dispose) 89 | self.addCleanup(self._pgmanager.checkpoints.engine.dispose) 90 | 91 | # HybridContentsManager is not expected to dispatch calls to get_file_id 92 | # because PostgresContentsManager is the only contents manager that 93 | # implements it. 94 | def test_get_file_id(self): 95 | pass 96 | 97 | def set_pgmgr_attribute(self, name, value): 98 | setattr(self._pgmanager, name, value) 99 | 100 | def make_dir(self, api_path): 101 | self.contents_manager.new( 102 | model={'type': 'directory'}, 103 | path=api_path, 104 | ) 105 | 106 | 107 | class MultiRootTestCase(TestCase): 108 | 109 | def setUp(self): 110 | 111 | mgr_roots = ['A', '', u'unicodé'] 112 | self.temp_dirs = { 113 | prefix: TemporaryDirectory() for prefix in mgr_roots 114 | } 115 | self.temp_dir_names = { 116 | prefix: v.name for prefix, v in iteritems(self.temp_dirs) 117 | } 118 | self._managers = { 119 | prefix: FileContentsManager(root_dir=self.temp_dir_names[prefix], 120 | delete_to_trash=False) 121 | for prefix in mgr_roots 122 | } 123 | self.contents_manager = HybridContentsManager(managers=self._managers) 124 | 125 | def test_get(self): 126 | cm = self.contents_manager 127 | 128 | untitled_nb = 'Untitled.ipynb' 129 | untitled_txt = 'untitled.txt' 130 | for prefix, real_dir in iteritems(self.temp_dir_names): 131 | # Create a notebook 132 | model = cm.new_untitled(path=prefix, type='notebook') 133 | name = model['name'] 134 | path = model['path'] 135 | 136 | self.assertEqual(name, untitled_nb) 137 | self.assertEqual(path, pjoin(prefix, untitled_nb)) 138 | self.assertTrue( 139 | exists(osjoin(real_dir, untitled_nb)) 140 | ) 141 | 142 | # Check that we can 'get' on the notebook we just created 143 | model2 = cm.get(path) 144 | assert isinstance(model2, dict) 145 | self.assertDictContainsSubset( 146 | {'name': name, 'path': path}, 147 | model2, 148 | ) 149 | 150 | nb_as_file = cm.get(path, content=True, type='file') 151 | self.assertDictContainsSubset( 152 | {'name': name, 'path': path, 'format': 'text'}, 153 | nb_as_file, 154 | ) 155 | self.assertNotIsInstance(nb_as_file['content'], dict) 156 | 157 | nb_as_bin_file = cm.get( 158 | path=path, 159 | content=True, 160 | type='file', 161 | format='base64' 162 | ) 163 | self.assertDictContainsSubset( 164 | {'name': name, 'path': path, 'format': 'base64'}, 165 | nb_as_bin_file, 166 | ) 167 | self.assertNotIsInstance(nb_as_bin_file['content'], dict) 168 | 169 | # Test notebook in sub-directory 170 | sub_dir = 'foo' 171 | mkdir(osjoin(real_dir, sub_dir)) 172 | prefixed_sub_dir = pjoin(prefix, sub_dir) 173 | 174 | cm.new_untitled(path=prefixed_sub_dir, ext='.ipynb') 175 | self.assertTrue(exists(osjoin(real_dir, sub_dir, untitled_nb))) 176 | 177 | sub_dir_nbpath = pjoin(prefixed_sub_dir, untitled_nb) 178 | model2 = cm.get(sub_dir_nbpath) 179 | self.assertDictContainsSubset( 180 | { 181 | 'type': 'notebook', 182 | 'format': 'json', 183 | 'name': untitled_nb, 184 | 'path': sub_dir_nbpath, 185 | }, 186 | model2, 187 | ) 188 | self.assertIn('content', model2) 189 | 190 | # Test .txt in sub-directory. 191 | cm.new_untitled(path=prefixed_sub_dir, ext='.txt') 192 | self.assertTrue(exists(osjoin(real_dir, sub_dir, untitled_txt))) 193 | 194 | sub_dir_txtpath = pjoin(prefixed_sub_dir, untitled_txt) 195 | file_model = cm.get(path=sub_dir_txtpath) 196 | self.assertDictContainsSubset( 197 | { 198 | 'content': '', 199 | 'format': 'text', 200 | 'mimetype': 'text/plain', 201 | 'name': 'untitled.txt', 202 | 'path': sub_dir_txtpath, 203 | 'type': 'file', 204 | 'writable': True, 205 | }, 206 | file_model, 207 | ) 208 | self.assertIn('created', file_model) 209 | self.assertIn('last_modified', file_model) 210 | 211 | # Test directory in sub-directory. 212 | sub_sub_dirname = 'bar' 213 | sub_sub_dirpath = pjoin(prefixed_sub_dir, sub_sub_dirname) 214 | cm.save( 215 | {'type': 'directory', 'path': sub_sub_dirpath}, 216 | sub_sub_dirpath, 217 | ) 218 | self.assertTrue(exists(osjoin(real_dir, sub_dir, sub_sub_dirname))) 219 | sub_sub_dir_model = cm.get(sub_sub_dirpath) 220 | self.assertDictContainsSubset( 221 | { 222 | 'type': 'directory', 223 | 'format': 'json', 224 | 'name': sub_sub_dirname, 225 | 'path': sub_sub_dirpath, 226 | 'content': [], 227 | }, 228 | sub_sub_dir_model, 229 | ) 230 | 231 | # Test list with content on prefix/foo. 232 | dirmodel = cm.get(prefixed_sub_dir) 233 | self.assertDictContainsSubset( 234 | { 235 | 'type': 'directory', 236 | 'path': prefixed_sub_dir, 237 | 'name': sub_dir, 238 | }, 239 | dirmodel, 240 | ) 241 | self.assertIsInstance(dirmodel['content'], list) 242 | self.assertEqual(len(dirmodel['content']), 3) 243 | 244 | # Request each item in the subdirectory with no content. 245 | nbmodel_no_content = cm.get(sub_dir_nbpath, content=False) 246 | file_model_no_content = cm.get(sub_dir_txtpath, content=False) 247 | sub_sub_dir_no_content = cm.get(sub_sub_dirpath, content=False) 248 | 249 | for entry in dirmodel['content']: 250 | # Order isn't guaranteed by the spec, so this is a hacky way of 251 | # verifying that all entries are matched. 252 | if entry['path'] == sub_sub_dir_no_content['path']: 253 | self.assertEqual(entry, sub_sub_dir_no_content) 254 | elif entry['path'] == nbmodel_no_content['path']: 255 | self.assertEqual(entry, nbmodel_no_content) 256 | elif entry['path'] == file_model_no_content['path']: 257 | self.assertEqual(entry, file_model_no_content) 258 | else: 259 | self.fail("Unexpected directory entry: %s" % entry) 260 | 261 | def test_root_dir_ops(self): 262 | cm = self.contents_manager 263 | cm.new_untitled(ext='.ipynb') 264 | cm.new_untitled(ext='.txt') 265 | 266 | root_dir_model = cm.get('') 267 | self.assertDictContainsSubset( 268 | {'path': '', 'name': '', 'type': 'directory', 'format': 'json'}, 269 | root_dir_model, 270 | ) 271 | content = root_dir_model['content'] 272 | self.assertIsInstance(content, list) 273 | # Two new files, plus the sub-manager directories. 274 | dirs = set(self.temp_dir_names) 275 | files = {'Untitled.ipynb', 'untitled.txt'} 276 | paths = dirs | files 277 | self.assertEqual(len(content), 4) 278 | for entry in content: 279 | self.assertEqual(entry['path'], entry['name']) 280 | path = entry['path'] 281 | if path not in paths: 282 | self.fail("Unexpected entry path %s" % entry) 283 | if path in dirs: 284 | self.assertEqual(entry['type'], 'directory') 285 | elif path == 'Untitled.ipynb': 286 | self.assertEqual(entry['type'], 'notebook') 287 | else: 288 | self.assertEqual(entry['type'], 'file') 289 | 290 | def test_cant_delete_root(self): 291 | cm = self.contents_manager 292 | for prefix in self.temp_dirs: 293 | with assertRaisesHTTPError(self, 400): 294 | cm.delete(prefix) 295 | 296 | def test_cant_rename_across_managers(self): 297 | cm = self.contents_manager 298 | cm.new_untitled(ext='.ipynb') 299 | 300 | with assertRaisesHTTPError(self, 400): 301 | cm.rename('Untitled.ipynb', 'A/Untitled.ipynb') 302 | 303 | def tearDown(self): 304 | for dir_ in itervalues(self.temp_dirs): 305 | dir_.cleanup() 306 | 307 | 308 | del PostgresContentsManagerTestCase 309 | del TestContentsManager 310 | del APITest 311 | -------------------------------------------------------------------------------- /pgcontents/tests/test_pgcontents_api.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | # 3 | # Copyright 2014 Quantopian, Inc. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """ 17 | Run IPython's APITest for ContentsManager using PostgresContentsManager. 18 | """ 19 | from __future__ import unicode_literals 20 | from base64 import ( 21 | b64encode, 22 | ) 23 | from dateutil.parser import parse 24 | from six import iteritems 25 | 26 | from IPython.utils.tempdir import TemporaryDirectory 27 | from notebook.services.contents.tests.test_contents_api import APITest 28 | from notebook.tests.launchnotebook import assert_http_error 29 | from requests import HTTPError 30 | 31 | from ..constants import UNLIMITED 32 | from ..crypto import FernetEncryption, NoEncryption 33 | from ..hybridmanager import HybridContentsManager 34 | from ..pgmanager import ( 35 | PostgresContentsManager, 36 | writes_base64, 37 | ) 38 | from ..checkpoints import PostgresCheckpoints 39 | from ..query import ( 40 | create_directory, 41 | delete_directory, 42 | delete_file, 43 | dir_exists, 44 | file_exists, 45 | save_file, 46 | ) 47 | from .utils import ( 48 | clear_test_db, 49 | make_fernet, 50 | _norm_unicode, 51 | remigrate_test_schema, 52 | TEST_DB_URL, 53 | ) 54 | from ..utils.ipycompat import ( 55 | Config, 56 | FileContentsManager, 57 | GenericFileCheckpoints, 58 | to_os_path, 59 | ) 60 | from ..utils.sync import walk, walk_dirs 61 | 62 | 63 | setup_module = remigrate_test_schema 64 | 65 | 66 | class _APITestBase(APITest): 67 | """ 68 | APITest that also runs a test for our implementation of `walk`. 69 | """ 70 | 71 | config = Config() 72 | config.FileContentsManager.delete_to_trash = False 73 | 74 | def test_walk(self): 75 | """ 76 | Test ContentsManager.walk. 77 | """ 78 | results = { 79 | _norm_unicode(dname): (subdirs, files) 80 | for dname, subdirs, files in walk(self.notebook.contents_manager) 81 | } 82 | # This is a dictionary because the ordering of these is all messed up 83 | # on OSX. 84 | expected = { 85 | '': ( 86 | [ 87 | 'Directory with spaces in', 88 | 'foo', 89 | 'ordering', 90 | u'unicodé', 91 | u'å b', 92 | ], 93 | ['inroot.blob', 'inroot.ipynb', 'inroot.txt'], 94 | ), 95 | 'Directory with spaces in': ( 96 | [], 97 | ['inspace.blob', 'inspace.ipynb', 'inspace.txt'], 98 | ), 99 | 'foo': ( 100 | ['bar'], 101 | [ 102 | 'a.blob', 'a.ipynb', 'a.txt', 103 | 'b.blob', 'b.ipynb', 'b.txt', 104 | 'name with spaces.blob', 105 | 'name with spaces.ipynb', 106 | 'name with spaces.txt', 107 | u'unicodé.blob', u'unicodé.ipynb', u'unicodé.txt' 108 | ] 109 | ), 110 | 'foo/bar': ( 111 | [], 112 | ['baz.blob', 'baz.ipynb', 'baz.txt'], 113 | ), 114 | 'ordering': ( 115 | [], 116 | [ 117 | 'A.blob', 'A.ipynb', 'A.txt', 118 | 'C.blob', 'C.ipynb', 'C.txt', 119 | 'b.blob', 'b.ipynb', 'b.txt', 120 | ], 121 | ), 122 | u'unicodé': ( 123 | [], 124 | ['innonascii.blob', 'innonascii.ipynb', 'innonascii.txt'], 125 | ), 126 | u'å b': ( 127 | [], 128 | [u'ç d.blob', u'ç d.ipynb', u'ç d.txt'], 129 | ), 130 | } 131 | 132 | for dname, (subdirs, files) in iteritems(expected): 133 | result_subdirs, result_files = results.pop(_norm_unicode(dname)) 134 | if dname == '': 135 | sep = '' 136 | else: 137 | sep = '/' 138 | self.assertEqual( 139 | set( 140 | map( 141 | _norm_unicode, 142 | [sep.join([dname, sub]) for sub in subdirs] 143 | ) 144 | ), 145 | set(map(_norm_unicode, result_subdirs)), 146 | ) 147 | self.assertEqual( 148 | set( 149 | map( 150 | _norm_unicode, 151 | [sep.join([dname, fname]) for fname in files] 152 | ), 153 | ), 154 | set(map(_norm_unicode, result_files)), 155 | ) 156 | self.assertEqual(results, {}) 157 | 158 | def test_list_checkpoints_sorting(self): 159 | """ 160 | Test that list_checkpoints returns results sorted by last_modified. 161 | """ 162 | for i in range(5): 163 | self.api.new_checkpoint('foo/a.ipynb') 164 | cps = self.api.get_checkpoints('foo/a.ipynb').json() 165 | 166 | self.assertEqual( 167 | cps, 168 | sorted( 169 | cps, 170 | key=lambda cp: parse(cp['last_modified']), 171 | reverse=True, 172 | ) 173 | ) 174 | 175 | # ContentsManager has different behaviour in notebook 5.5+ 176 | # https://github.com/jupyter/notebook/pull/3108...it now allows 177 | # non-empty directories to be deleted. 178 | # 179 | # PostgresContentsManager should continue to work the old way and 180 | # prevent non-empty directories from being deleted, since it doesn't 181 | # support backing up the deleted directory in the OS trash can. 182 | # FileContentsManager should allow non-empty directories to be deleted. 183 | def test_delete_non_empty_dir(self): 184 | if isinstance(self.notebook.contents_manager, 185 | PostgresContentsManager): 186 | # make sure non-empty directories cannot be deleted with 187 | # PostgresContentsManager 188 | _test_delete_non_empty_dir_fail(self, u'å b') 189 | elif isinstance(self.notebook.contents_manager, 190 | HybridContentsManager): 191 | # check that one of the non-empty subdirectories owned by the 192 | # PostgresContentsManager cannnot be deleted 193 | _test_delete_non_empty_dir_fail(self, 'Directory with spaces in') 194 | elif isinstance(self.notebook.contents_manager, FileContentsManager): 195 | # use the 'delete_to_trash' flag to avoid moving the file to the 196 | # trash, because it doesn't work on jenkins 197 | self.notebook.contents_manager.delete_to_trash = False 198 | _test_delete_non_empty_dir_fail(self, u'å b') 199 | else: 200 | # for all other contents managers that we test use the super class 201 | # implementation of this test (i.e. make sure non-empty dirs can 202 | # be deleted) 203 | super(_APITestBase, self).test_delete_non_empty_dir() 204 | 205 | def test_checkpoints_move_with_file(self): 206 | # Create a checkpoint of initial state. 207 | response = self.api.new_checkpoint('foo/a.ipynb') 208 | response_json = response.json() 209 | 210 | # Move the file down. 211 | self.api.rename('foo/a.ipynb', 'foo/bar/a.ipynb') 212 | 213 | # Looking for checkpoints in the old location should yield no results. 214 | self.assertEqual(self.api.get_checkpoints('foo/a.ipynb').json(), []) 215 | 216 | # Looking for checkpoints in the new location should work. 217 | checkpoints = self.api.get_checkpoints('foo/bar/a.ipynb').json() 218 | self.assertEqual(checkpoints, [response_json]) 219 | 220 | # Rename the directory that the file is in. 221 | self.api.rename('foo/bar', 'foo/car') 222 | self.assertEqual( 223 | self.api.get_checkpoints('foo/bar/a.ipynb').json(), 224 | [], 225 | ) 226 | checkpoints = self.api.get_checkpoints('foo/car/a.ipynb').json() 227 | self.assertEqual(checkpoints, [response_json]) 228 | 229 | # Now move the directory that the file is in. 230 | self.make_dir('foo/buz') 231 | self.api.rename('foo/car', 'foo/buz/car') 232 | self.assertEqual( 233 | self.api.get_checkpoints('foo/car/a.ipynb').json(), 234 | [], 235 | ) 236 | checkpoints = self.api.get_checkpoints('foo/buz/car/a.ipynb').json() 237 | self.assertEqual(checkpoints, [response_json]) 238 | 239 | 240 | def _test_delete_non_empty_dir_fail(self, path): 241 | with assert_http_error(400): 242 | self.api.delete(path) 243 | 244 | 245 | def _test_delete_non_empty_dir_pass(self, path): 246 | # Test that non empty directory can be deleted 247 | self.api.delete(path) 248 | # Check if directory has actually been deleted 249 | with assert_http_error(404): 250 | self.api.list(path) 251 | 252 | 253 | def postgres_contents_config(): 254 | """ 255 | Shared setup code for PostgresContentsAPITest and subclasses. 256 | """ 257 | config = Config() 258 | config.NotebookApp.contents_manager_class = PostgresContentsManager 259 | config.PostgresContentsManager.user_id = 'test' 260 | config.PostgresContentsManager.db_url = TEST_DB_URL 261 | return config 262 | 263 | 264 | class PostgresContentsAPITest(_APITestBase): 265 | 266 | config = postgres_contents_config() 267 | 268 | # Don't support hidden directories. 269 | hidden_dirs = [] 270 | 271 | def setUp(self): 272 | # This has to happen before the super call because the base class setup 273 | # calls our make_* functions, which require a user or else we violate 274 | # foreign-key constraints. 275 | self.pg_manager.ensure_user() 276 | self.pg_manager.ensure_root_directory() 277 | super(PostgresContentsAPITest, self).setUp() 278 | 279 | self.addCleanup(self.pg_manager.engine.dispose) 280 | if hasattr(self.pg_manager.checkpoints, 'engine'): 281 | self.addCleanup(self.pg_manager.checkpoints.engine.dispose) 282 | 283 | def tearDown(self): 284 | super(PostgresContentsAPITest, self).tearDown() 285 | clear_test_db() 286 | 287 | @property 288 | def pg_manager(self): 289 | return self.notebook.contents_manager 290 | 291 | @property 292 | def user_id(self): 293 | return self.pg_manager.user_id 294 | 295 | @property 296 | def engine(self): 297 | return self.pg_manager.engine 298 | 299 | @property 300 | def crypto(self): 301 | return self.pg_manager.crypto 302 | 303 | # Superclass method overrides. 304 | def make_dir(self, api_path): 305 | with self.engine.begin() as db: 306 | create_directory(db, self.user_id, api_path) 307 | 308 | def make_txt(self, api_path, txt): 309 | with self.engine.begin() as db: 310 | save_file( 311 | db, 312 | self.user_id, 313 | api_path, 314 | b64encode(txt.encode('utf-8')), 315 | self.crypto.encrypt, 316 | UNLIMITED, 317 | ) 318 | 319 | def make_blob(self, api_path, blob): 320 | with self.engine.begin() as db: 321 | save_file( 322 | db, 323 | self.user_id, 324 | api_path, 325 | b64encode(blob), 326 | self.crypto.encrypt, 327 | UNLIMITED, 328 | ) 329 | 330 | def make_nb(self, api_path, nb): 331 | with self.engine.begin() as db: 332 | save_file( 333 | db, 334 | self.user_id, 335 | api_path, 336 | writes_base64(nb), 337 | self.crypto.encrypt, 338 | UNLIMITED, 339 | ) 340 | 341 | def delete_dir(self, api_path, db=None): 342 | if self.isdir(api_path): 343 | dirs, files = [], [] 344 | for dir_, _, fs in walk_dirs(self.pg_manager, [api_path]): 345 | dirs.append(dir_) 346 | files.extend(fs) 347 | 348 | with self.engine.begin() as db: 349 | for file_ in files: 350 | delete_file(db, self.user_id, file_) 351 | for dir_ in reversed(dirs): 352 | delete_directory(db, self.user_id, dir_) 353 | 354 | def delete_file(self, api_path): 355 | if self.isfile(api_path): 356 | with self.engine.begin() as db: 357 | delete_file(db, self.user_id, api_path) 358 | 359 | def isfile(self, api_path): 360 | with self.engine.begin() as db: 361 | return file_exists(db, self.user_id, api_path) 362 | 363 | def isdir(self, api_path): 364 | with self.engine.begin() as db: 365 | return dir_exists(db, self.user_id, api_path) 366 | 367 | # End superclass method overrides. 368 | 369 | # Test overrides. 370 | def test_mkdir_hidden_400(self): 371 | """ 372 | We don't support hidden directories. 373 | """ 374 | pass 375 | 376 | def test_checkpoints_separate_root(self): 377 | pass 378 | 379 | def test_crypto_types(self): 380 | self.assertIsInstance(self.pg_manager.crypto, NoEncryption) 381 | self.assertIsInstance(self.pg_manager.checkpoints.crypto, NoEncryption) 382 | 383 | 384 | class EncryptedPostgresContentsAPITest(PostgresContentsAPITest): 385 | config = postgres_contents_config() 386 | config.PostgresContentsManager.crypto = make_fernet() 387 | 388 | def test_crypto_types(self): 389 | self.assertIsInstance(self.pg_manager.crypto, FernetEncryption) 390 | self.assertIsInstance( 391 | self.pg_manager.checkpoints.crypto, 392 | FernetEncryption, 393 | ) 394 | 395 | 396 | class PostgresContentsFileCheckpointsAPITest(PostgresContentsAPITest): 397 | """ 398 | Test using PostgresContents and FileCheckpoints. 399 | """ 400 | config = Config() 401 | config.NotebookApp.contents_manager_class = PostgresContentsManager 402 | config.PostgresContentsManager.checkpoints_class = GenericFileCheckpoints 403 | config.PostgresContentsManager.user_id = 'test' 404 | config.PostgresContentsManager.db_url = TEST_DB_URL 405 | 406 | # Don't support hidden directories. 407 | hidden_dirs = [] 408 | 409 | @classmethod 410 | def setup_class(cls): 411 | cls.td = TemporaryDirectory() 412 | cls.config.GenericFileCheckpoints.root_dir = cls.td.name 413 | super(PostgresContentsFileCheckpointsAPITest, cls).setup_class() 414 | 415 | @classmethod 416 | def teardown_class(cls): 417 | super(PostgresContentsFileCheckpointsAPITest, cls).teardown_class() 418 | cls.td.cleanup() 419 | 420 | def test_checkpoints_move_with_file(self): 421 | # This test fails for this suite because the FileCheckpoints class is 422 | # not recognizing any checkpoints when renaming a directory. See: 423 | # https://github.com/jupyter/notebook/blob/bd6396d31e56f311e4022215 424 | # 25f9db7686834150/notebook/services/contents/filecheckpoints.py#L9 425 | # 8-L99 426 | # It looks like this is a bug upstream, as I can imagine that method 427 | # wanting to list out all checkpoints for the given path if the path is 428 | # a directory. For now we filed an issue to track this: 429 | # https://github.com/quantopian/pgcontents/issues/68 430 | pass 431 | 432 | 433 | def postgres_checkpoints_config(): 434 | """ 435 | Shared setup for PostgresCheckpointsAPITest and subclasses. 436 | """ 437 | config = Config() 438 | config.NotebookApp.contents_manager_class = FileContentsManager 439 | config.FileContentsManager.delete_to_trash = False 440 | config.ContentsManager.checkpoints_class = PostgresCheckpoints 441 | config.PostgresCheckpoints.user_id = 'test' 442 | config.PostgresCheckpoints.db_url = TEST_DB_URL 443 | 444 | return config 445 | 446 | 447 | class PostgresCheckpointsAPITest(_APITestBase): 448 | """ 449 | Test using PostgresCheckpoints with the built-in FileContentsManager. 450 | """ 451 | config = postgres_checkpoints_config() 452 | 453 | @property 454 | def checkpoints(self): 455 | return self.notebook.contents_manager.checkpoints 456 | 457 | def setUp(self): 458 | super(PostgresCheckpointsAPITest, self).setUp() 459 | self.checkpoints.ensure_user() 460 | self.addCleanup(self.checkpoints.engine.dispose) 461 | 462 | def tearDown(self): 463 | self.checkpoints.purge_db() 464 | clear_test_db() 465 | super(PostgresCheckpointsAPITest, self).tearDown() 466 | 467 | def test_pgcheckpoints_is_used(self): 468 | self.assertIsInstance(self.checkpoints, PostgresCheckpoints) 469 | 470 | def test_checkpoints_separate_root(self): 471 | pass 472 | 473 | 474 | class EncryptedPostgresCheckpointsAPITest(PostgresCheckpointsAPITest): 475 | config = postgres_checkpoints_config() 476 | config.PostgresCheckpoints.crypto = make_fernet() 477 | 478 | def test_crypto_types(self): 479 | self.assertIsInstance(self.checkpoints.crypto, FernetEncryption) 480 | 481 | 482 | class HybridContentsPGRootAPITest(PostgresContentsAPITest): 483 | """ 484 | Test using a HybridContentsManager splitting between files and Postgres. 485 | """ 486 | files_prefix = 'foo' 487 | files_test_cls = APITest 488 | 489 | @classmethod 490 | def make_config(cls, td): 491 | config = Config() 492 | config.NotebookApp.contents_manager_class = HybridContentsManager 493 | config.HybridContentsManager.manager_classes = { 494 | '': PostgresContentsManager, 495 | cls.files_prefix: FileContentsManager, 496 | } 497 | config.HybridContentsManager.manager_kwargs = { 498 | '': {'user_id': 'test', 'db_url': TEST_DB_URL}, 499 | cls.files_prefix: { 500 | 'root_dir': td.name, 501 | 'delete_to_trash': False 502 | }, 503 | } 504 | return config 505 | 506 | @classmethod 507 | def setup_class(cls): 508 | cls.td = TemporaryDirectory() 509 | cls.config = cls.make_config(cls.td) 510 | super(HybridContentsPGRootAPITest, cls).setup_class() 511 | 512 | @property 513 | def pg_manager(self): 514 | return self.notebook.contents_manager.root_manager 515 | 516 | def to_os_path(self, api_path): 517 | return to_os_path(api_path, root=self.td.name) 518 | 519 | # Autogenerate setup methods by dispatching on api_path. 520 | def __api_path_dispatch(method_name): 521 | """ 522 | For a given method name, create a method which either uses the 523 | PostgresContentsAPITest implementation of that method name, or the base 524 | APITest implementation, depending on whether the given path starts with 525 | self.files_prefix. 526 | """ 527 | def _method(self, api_path, *args): 528 | parts = api_path.strip('/').split('/') 529 | if parts[0] == self.files_prefix: 530 | # Dispatch to filesystem. 531 | return getattr(self.files_test_cls, method_name)( 532 | self, '/'.join(parts[1:]), *args 533 | ) 534 | else: 535 | # Dispatch to Postgres. 536 | return getattr(PostgresContentsAPITest, method_name)( 537 | self, api_path, *args 538 | ) 539 | return _method 540 | 541 | __methods_to_multiplex = [ 542 | 'make_txt', 543 | 'make_blob', 544 | 'make_dir', 545 | 'make_nb', 546 | 'delete_dir', 547 | 'delete_file', 548 | 'isfile', 549 | 'isdir', 550 | ] 551 | locs = locals() 552 | for method_name in __methods_to_multiplex: 553 | locs[method_name] = __api_path_dispatch(method_name) 554 | del __methods_to_multiplex 555 | del __api_path_dispatch 556 | del locs 557 | 558 | # Override to not delete the root of the file subsystem. 559 | def test_delete_dirs(self): 560 | # depth-first delete everything, so we don't try to delete empty 561 | # directories 562 | for name in sorted(self.dirs + ['/'], key=len, reverse=True): 563 | listing = self.api.list(name).json()['content'] 564 | for model in listing: 565 | # Expect delete to fail on root of file subsystem. 566 | if model['path'] == self.files_prefix: 567 | with self.assertRaises(HTTPError) as err: 568 | self.api.delete(model['path']) 569 | self.assertEqual(err.exception.response.status_code, 400) 570 | else: 571 | self.api.delete(model['path']) 572 | 573 | listing = self.api.list('/').json()['content'] 574 | self.assertEqual(len(listing), 1) 575 | self.assertEqual(listing[0]['path'], self.files_prefix) 576 | 577 | 578 | class EncryptedHybridContentsAPITest(HybridContentsPGRootAPITest): 579 | 580 | @classmethod 581 | def make_config(cls, td): 582 | config = super(EncryptedHybridContentsAPITest, cls).make_config(td) 583 | config.HybridContentsManager.manager_kwargs['']['crypto'] = ( 584 | make_fernet() 585 | ) 586 | return config 587 | 588 | def test_crypto_types(self): 589 | self.assertIsInstance(self.pg_manager.crypto, FernetEncryption) 590 | self.assertIsInstance( 591 | self.pg_manager.checkpoints.crypto, 592 | FernetEncryption, 593 | ) 594 | 595 | 596 | # This needs to be removed or else we'll run the main IPython tests as well. 597 | del APITest 598 | -------------------------------------------------------------------------------- /pgcontents/tests/test_pgmanager.py: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2014 Quantopian, Inc. 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | """ 16 | Run IPython's TestContentsManager using PostgresContentsManager. 17 | """ 18 | from __future__ import unicode_literals 19 | 20 | from base64 import b64encode 21 | from cryptography.fernet import Fernet 22 | from itertools import combinations 23 | 24 | from notebook.services.contents.tests.test_manager import TestContentsManager 25 | 26 | from pgcontents.pgmanager import PostgresContentsManager 27 | from .utils import ( 28 | assertRaisesHTTPError, 29 | clear_test_db, 30 | make_fernet, 31 | _norm_unicode, 32 | TEST_DB_URL, 33 | remigrate_test_schema, 34 | ) 35 | from ..crypto import FernetEncryption 36 | from ..utils.sync import walk_files_with_content 37 | 38 | setup_module = remigrate_test_schema 39 | 40 | 41 | class PostgresContentsManagerTestCase(TestContentsManager): 42 | 43 | @classmethod 44 | def tearDownClass(cls): 45 | # Override the superclass teardown. 46 | pass 47 | 48 | def setUp(self): 49 | self.crypto = make_fernet() 50 | self.contents_manager = PostgresContentsManager( 51 | user_id='test', 52 | db_url=TEST_DB_URL, 53 | crypto=self.crypto, 54 | ) 55 | self.contents_manager.ensure_user() 56 | self.contents_manager.ensure_root_directory() 57 | 58 | # We need to dispose of any engines created during tests or else the 59 | # engine's QueuePool will leak connections even once this suite has 60 | # finished running. Then as other test suites start to run, the number 61 | # of connections will eventually creep up to the maximum number that 62 | # postgres allows. For reference, see the SQLAlchemy docs here: 63 | # https://docs.sqlalchemy.org/en/13/core/connections.html#engine-disposal 64 | # 65 | # This pattern should be repeated in any test class that creates a 66 | # PostgresContentsManager or a PostgresCheckpoints object (note that 67 | # even though the checkpoints manager lives on the contents manager it 68 | # still creates its own engine). An alternative solution to calling 69 | # dispose here would be to have these classes create engines with a 70 | # NullPool when testing, but that 1) adds more latency, and 2) adds 71 | # test-specific behavior to the classes themselves. 72 | self.addCleanup(self.contents_manager.engine.dispose) 73 | self.addCleanup(self.contents_manager.checkpoints.engine.dispose) 74 | 75 | def tearDown(self): 76 | clear_test_db() 77 | 78 | def set_pgmgr_attribute(self, name, value): 79 | """ 80 | Overridable method for setting attributes on our pgmanager. 81 | 82 | This exists so that we can re-use the tests here in 83 | test_hybrid_manager. 84 | """ 85 | setattr(self.contents_manager, name, value) 86 | 87 | def make_dir(self, api_path): 88 | self.contents_manager.new( 89 | model={'type': 'directory'}, 90 | path=api_path, 91 | ) 92 | 93 | def make_populated_dir(self, api_path): 94 | """ 95 | Create a directory at api_path with a notebook and a text file. 96 | """ 97 | self.make_dir(api_path) 98 | self.contents_manager.new( 99 | path='/'.join([api_path, 'nb.ipynb']) 100 | ) 101 | self.contents_manager.new( 102 | path='/'.join([api_path, 'file.txt']) 103 | ) 104 | 105 | def check_populated_dir_files(self, api_path): 106 | """ 107 | Check that a directory created with make_populated_dir has a 108 | notebook and a text file with expected names. 109 | """ 110 | dirmodel = self.contents_manager.get(api_path) 111 | self.assertEqual(dirmodel['path'], api_path) 112 | self.assertEqual(dirmodel['type'], 'directory') 113 | for entry in dirmodel['content']: 114 | # Skip any subdirectories created after the fact. 115 | if entry['type'] == 'directory': 116 | continue 117 | elif entry['type'] == 'file': 118 | self.assertEqual(entry['name'], 'file.txt') 119 | self.assertEqual( 120 | entry['path'], 121 | '/'.join([api_path, 'file.txt']), 122 | ) 123 | elif entry['type'] == 'notebook': 124 | self.assertEqual(entry['name'], 'nb.ipynb') 125 | self.assertEqual( 126 | entry['path'], 127 | '/'.join([api_path, 'nb.ipynb']), 128 | ) 129 | 130 | def test_walk_files_with_content(self): 131 | all_dirs = ['foo', 'bar', 'foo/bar', 'foo/bar/foo', 'foo/bar/foo/bar'] 132 | for dir in all_dirs: 133 | self.make_populated_dir(dir) 134 | 135 | expected_file_paths = [ 136 | u'bar/file.txt', 137 | u'bar/nb.ipynb', 138 | u'foo/file.txt', 139 | u'foo/nb.ipynb', 140 | u'foo/bar/file.txt', 141 | u'foo/bar/nb.ipynb', 142 | u'foo/bar/foo/file.txt', 143 | u'foo/bar/foo/nb.ipynb', 144 | u'foo/bar/foo/bar/file.txt', 145 | u'foo/bar/foo/bar/nb.ipynb', 146 | ] 147 | 148 | cm = self.contents_manager 149 | 150 | filepaths = [] 151 | for file in walk_files_with_content(cm): 152 | self.assertEqual( 153 | file, 154 | cm.get(file['path'], content=True) 155 | ) 156 | filepaths.append(_norm_unicode(file['path'])) 157 | 158 | self.assertEqual( 159 | filepaths.sort(), 160 | expected_file_paths.sort() 161 | ) 162 | 163 | def test_modified_date(self): 164 | 165 | cm = self.contents_manager 166 | 167 | # Create a new notebook. 168 | nb, name, path = self.new_notebook() 169 | model = cm.get(path) 170 | 171 | # Add a cell and save. 172 | self.add_code_cell(model['content']) 173 | cm.save(model, path) 174 | 175 | # Reload notebook and verify that last_modified incremented. 176 | saved = cm.get(path) 177 | self.assertGreater(saved['last_modified'], model['last_modified']) 178 | 179 | # Move the notebook and verify that last_modified incremented. 180 | new_path = 'renamed.ipynb' 181 | cm.rename(path, new_path) 182 | renamed = cm.get(new_path) 183 | self.assertGreater(renamed['last_modified'], saved['last_modified']) 184 | 185 | def test_get_file_id(self): 186 | cm = self.contents_manager 187 | 188 | # Create a new notebook. 189 | nb, name, path = self.new_notebook() 190 | model = cm.get(path) 191 | 192 | # Make sure we can get the id and it's not none. 193 | id_ = cm.get_file_id(path) 194 | self.assertIsNotNone(id_) 195 | 196 | # Make sure the id stays the same after we edit and save. 197 | self.add_code_cell(model['content']) 198 | cm.save(model, path) 199 | self.assertEqual(id_, cm.get_file_id(path)) 200 | 201 | # Make sure the id stays the same after a rename. 202 | updated_path = "updated_name.ipynb" 203 | cm.rename(path, updated_path) 204 | self.assertEqual(id_, cm.get_file_id(updated_path)) 205 | 206 | def test_rename_file(self): 207 | cm = self.contents_manager 208 | nb, nb_name, nb_path = self.new_notebook() 209 | assert nb_name == 'Untitled.ipynb' 210 | 211 | # A simple rename of the file within the same directory. 212 | cm.rename(nb_path, 'new_name.ipynb') 213 | assert cm.get('new_name.ipynb')['path'] == 'new_name.ipynb' 214 | 215 | # The old file name should no longer be found. 216 | with assertRaisesHTTPError(self, 404): 217 | cm.get(nb_name) 218 | 219 | # Test that renaming outside of the root fails. 220 | with assertRaisesHTTPError(self, 404): 221 | cm.rename('../foo', '../bar') 222 | 223 | # Test that renaming something to itself fails. 224 | with assertRaisesHTTPError(self, 409): 225 | cm.rename('new_name.ipynb', 'new_name.ipynb') 226 | 227 | # Test that renaming a non-existent file fails. 228 | with assertRaisesHTTPError(self, 404): 229 | cm.rename('non_existent.ipynb', 'some_name.ipynb') 230 | 231 | # Now test moving a file. 232 | self.make_dir('My Folder') 233 | nb_destination = 'My Folder/new_name.ipynb' 234 | cm.rename('new_name.ipynb', nb_destination) 235 | 236 | updated_notebook_model = cm.get(nb_destination) 237 | assert updated_notebook_model['name'] == 'new_name.ipynb' 238 | assert updated_notebook_model['path'] == nb_destination 239 | 240 | # The old file name should no longer be found. 241 | with assertRaisesHTTPError(self, 404): 242 | cm.get('new_name.ipynb') 243 | 244 | def test_rename_directory(self): 245 | """ 246 | Create a directory hierarchy that looks like: 247 | 248 | foo/ 249 | ... 250 | bar/ 251 | ... 252 | foo/ 253 | ... 254 | bar/ 255 | ... 256 | bar/ 257 | 258 | then rename /foo/bar -> /foo/bar_changed and verify that all changes 259 | propagate correctly. 260 | """ 261 | cm = self.contents_manager 262 | 263 | all_dirs = ['foo', 'bar', 'foo/bar', 'foo/bar/foo', 'foo/bar/foo/bar'] 264 | unchanged_dirs = all_dirs[:2] 265 | changed_dirs = all_dirs[2:] 266 | 267 | for dir_ in all_dirs: 268 | self.make_populated_dir(dir_) 269 | self.check_populated_dir_files(dir_) 270 | 271 | # Renaming to an extant directory should raise 272 | for src, dest in combinations(all_dirs, 2): 273 | with assertRaisesHTTPError(self, 409): 274 | cm.rename(src, dest) 275 | 276 | # Renaming the root directory should raise 277 | with assertRaisesHTTPError(self, 409): 278 | cm.rename('', 'baz') 279 | 280 | # Verify that we can't create a new notebook in the (nonexistent) 281 | # target directory 282 | with assertRaisesHTTPError(self, 404): 283 | cm.new_untitled('foo/bar_changed', ext='.ipynb') 284 | 285 | cm.rename('foo/bar', 'foo/bar_changed') 286 | 287 | # foo/ and bar/ should be unchanged 288 | for unchanged in unchanged_dirs: 289 | self.check_populated_dir_files(unchanged) 290 | 291 | # foo/bar/ and subdirectories should have leading prefixes changed 292 | for changed_dirname in changed_dirs: 293 | with assertRaisesHTTPError(self, 404): 294 | cm.get(changed_dirname) 295 | new_dirname = changed_dirname.replace( 296 | 'foo/bar', 'foo/bar_changed', 1 297 | ) 298 | self.check_populated_dir_files(new_dirname) 299 | 300 | # Verify that we can now create a new notebook in the changed directory 301 | cm.new_untitled('foo/bar_changed', ext='.ipynb') 302 | 303 | def test_move_empty_directory(self): 304 | cm = self.contents_manager 305 | 306 | self.make_dir('Parent Folder') 307 | self.make_dir('Child Folder') 308 | 309 | # A rename moving one folder into the other. 310 | child_folder_destination = 'Parent Folder/Child Folder' 311 | cm.rename('Child Folder', child_folder_destination) 312 | 313 | updated_parent_model = cm.get('Parent Folder') 314 | assert updated_parent_model['path'] == 'Parent Folder' 315 | assert len(updated_parent_model['content']) == 1 316 | 317 | with assertRaisesHTTPError(self, 404): 318 | # Should raise a 404 because the contents manager should not be 319 | # able to find a folder with this path. 320 | cm.get('Child Folder') 321 | 322 | # Confirm that the child folder has moved into the parent folder. 323 | updated_child_model = cm.get(child_folder_destination) 324 | assert updated_child_model['name'] == 'Child Folder' 325 | assert updated_child_model['path'] == child_folder_destination 326 | 327 | # Test moving it back up. 328 | cm.rename('Parent Folder/Child Folder', 'Child Folder') 329 | 330 | updated_parent_model = cm.get('Parent Folder') 331 | assert len(updated_parent_model['content']) == 0 332 | 333 | with assertRaisesHTTPError(self, 404): 334 | cm.get('Parent Folder/Child Folder') 335 | 336 | updated_child_model = cm.get('Child Folder') 337 | assert updated_child_model['name'] == 'Child Folder' 338 | assert updated_child_model['path'] == 'Child Folder' 339 | 340 | def test_move_populated_directory(self): 341 | cm = self.contents_manager 342 | 343 | all_dirs = [ 344 | 'foo', 'foo/bar', 'foo/bar/populated_dir', 345 | 'biz', 'biz/buz', 346 | ] 347 | 348 | for dir_ in all_dirs: 349 | if dir_ == 'foo/bar/populated_dir': 350 | self.make_populated_dir(dir_) 351 | self.check_populated_dir_files(dir_) 352 | else: 353 | self.make_dir(dir_) 354 | 355 | # Move the populated directory over to "biz". 356 | cm.rename('foo/bar/populated_dir', 'biz/populated_dir') 357 | 358 | bar_model = cm.get('foo/bar') 359 | assert len(bar_model['content']) == 0 360 | 361 | biz_model = cm.get('biz') 362 | assert len(biz_model['content']) == 2 363 | 364 | with assertRaisesHTTPError(self, 404): 365 | cm.get('foo/bar/populated_dir') 366 | 367 | populated_dir_model = cm.get('biz/populated_dir') 368 | assert populated_dir_model['name'] == 'populated_dir' 369 | assert populated_dir_model['path'] == 'biz/populated_dir' 370 | self.check_populated_dir_files('biz/populated_dir') 371 | 372 | # Test moving a directory with sub-directories and files that go 373 | # multiple layers deep. 374 | self.make_populated_dir('biz/populated_dir/populated_sub_dir') 375 | self.make_dir('biz/populated_dir/populated_sub_dir/empty_dir') 376 | cm.rename('biz/populated_dir', 'populated_dir') 377 | 378 | populated_dir_model = cm.get('populated_dir') 379 | assert populated_dir_model['name'] == 'populated_dir' 380 | assert populated_dir_model['path'] == 'populated_dir' 381 | self.check_populated_dir_files('populated_dir') 382 | self.check_populated_dir_files('populated_dir/populated_sub_dir') 383 | 384 | empty_dir_model = cm.get('populated_dir/populated_sub_dir/empty_dir') 385 | assert empty_dir_model['name'] == 'empty_dir' 386 | assert ( 387 | empty_dir_model['path'] == 388 | 'populated_dir/populated_sub_dir/empty_dir' 389 | ) 390 | assert len(empty_dir_model['content']) == 0 391 | 392 | def test_max_file_size(self): 393 | 394 | cm = self.contents_manager 395 | max_size = 120 396 | self.set_pgmgr_attribute('max_file_size_bytes', max_size) 397 | 398 | def size_in_db(s): 399 | return len(self.crypto.encrypt(b64encode(s.encode('utf-8')))) 400 | 401 | # max_file_size_bytes should be based on the size in the database, not 402 | # the size of the input. 403 | good = 'a' * 10 404 | self.assertEqual(size_in_db(good), max_size) 405 | cm.save( 406 | model={ 407 | 'content': good, 408 | 'format': 'text', 409 | 'type': 'file', 410 | }, 411 | path='good.txt', 412 | ) 413 | result = cm.get('good.txt') 414 | self.assertEqual(result['content'], good) 415 | 416 | bad = 'a' * 30 417 | self.assertGreater(size_in_db(bad), max_size) 418 | with assertRaisesHTTPError(self, 413): 419 | cm.save( 420 | model={ 421 | 'content': bad, 422 | 'format': 'text', 423 | 'type': 'file', 424 | }, 425 | path='bad.txt', 426 | ) 427 | 428 | def test_changing_crypto_disables_ability_to_read(self): 429 | cm = self.contents_manager 430 | 431 | _, _, nb_path = self.new_notebook() 432 | nb_model = cm.get(nb_path) 433 | 434 | file_path = 'file.txt' 435 | cm.save( 436 | model={ 437 | 'content': 'not encrypted', 438 | 'format': 'text', 439 | 'type': 'file', 440 | }, 441 | path=file_path, 442 | ) 443 | file_model = cm.get(file_path) 444 | 445 | alt_key = b64encode(b'fizzbuzz' * 4) 446 | self.set_pgmgr_attribute('crypto', FernetEncryption(Fernet(alt_key))) 447 | 448 | with assertRaisesHTTPError(self, 500): 449 | cm.get(nb_path) 450 | 451 | with assertRaisesHTTPError(self, 500): 452 | cm.get(file_path) 453 | 454 | # Restore the original crypto instance and verify that we can still 455 | # decrypt. 456 | self.set_pgmgr_attribute('crypto', self.crypto) 457 | 458 | decrypted_nb_model = cm.get(nb_path) 459 | self.assertEqual(nb_model, decrypted_nb_model) 460 | 461 | decrypted_file_model = cm.get(file_path) 462 | self.assertEqual(file_model, decrypted_file_model) 463 | 464 | def test_relative_paths(self): 465 | cm = self.contents_manager 466 | 467 | nb, name, path = self.new_notebook() 468 | self.assertEqual(cm.get(path), cm.get('/a/../' + path)) 469 | self.assertEqual(cm.get(path), cm.get('/a/../b/c/../../' + path)) 470 | 471 | with assertRaisesHTTPError(self, 404): 472 | cm.get('..') 473 | with assertRaisesHTTPError(self, 404): 474 | cm.get('foo/../../../bar') 475 | with assertRaisesHTTPError(self, 404): 476 | cm.delete('../foo') 477 | with assertRaisesHTTPError(self, 404): 478 | cm.rename('../foo', '../bar') 479 | with assertRaisesHTTPError(self, 404): 480 | cm.save(model={ 481 | 'type': 'file', 482 | 'content': u'', 483 | 'format': 'text', 484 | }, path='../foo') 485 | 486 | 487 | # This needs to be removed or else we'll run the main IPython tests as well. 488 | del TestContentsManager 489 | -------------------------------------------------------------------------------- /pgcontents/tests/test_synchronization.py: -------------------------------------------------------------------------------- 1 | """ 2 | Tests for synchronization tools. 3 | """ 4 | from __future__ import unicode_literals 5 | from base64 import b64encode 6 | from logging import Logger 7 | from unittest import TestCase 8 | 9 | from cryptography.fernet import Fernet 10 | from sqlalchemy import create_engine 11 | 12 | from pgcontents import PostgresContentsManager 13 | from pgcontents.crypto import ( 14 | FernetEncryption, 15 | NoEncryption, 16 | single_password_crypto_factory, 17 | ) 18 | from pgcontents.query import generate_files, generate_checkpoints 19 | from pgcontents.utils.ipycompat import new_markdown_cell 20 | 21 | from .utils import ( 22 | assertRaisesHTTPError, 23 | clear_test_db, 24 | remigrate_test_schema, 25 | populate, 26 | TEST_DB_URL, 27 | ) 28 | from ..utils.sync import ( 29 | reencrypt_all_users, 30 | unencrypt_all_users, 31 | ) 32 | 33 | try: 34 | import mock 35 | except ImportError: 36 | from unittest import mock 37 | 38 | 39 | class TestReEncryption(TestCase): 40 | 41 | def setUp(self): 42 | remigrate_test_schema() 43 | 44 | def tearDown(self): 45 | clear_test_db() 46 | 47 | def add_markdown_cell(self, path): 48 | # Load and update 49 | model = self.contents.get(path=path) 50 | model['content'].cells.append( 51 | new_markdown_cell('Created by test: ' + path) 52 | ) 53 | 54 | # Save and checkpoint again. 55 | self.contents.save(model, path=path) 56 | return model 57 | 58 | def test_reencryption(self): 59 | """ 60 | Create two unencrypted notebooks and a file, create checkpoints for 61 | each, then encrypt and check that content is unchanged, then re-encrypt 62 | and check the same. 63 | """ 64 | db_url = TEST_DB_URL 65 | user_id = 'test_reencryption' 66 | 67 | no_crypto = NoEncryption() 68 | no_crypto_manager = PostgresContentsManager( 69 | user_id=user_id, 70 | db_url=db_url, 71 | crypto=no_crypto, 72 | create_user_on_startup=True, 73 | ) 74 | 75 | key1 = b'fizzbuzz' * 4 76 | crypto1 = FernetEncryption(Fernet(b64encode(key1))) 77 | manager1 = PostgresContentsManager( 78 | user_id=user_id, 79 | db_url=db_url, 80 | crypto=crypto1, 81 | ) 82 | 83 | key2 = key1[::-1] 84 | crypto2 = FernetEncryption(Fernet(b64encode(key2))) 85 | manager2 = PostgresContentsManager( 86 | user_id=user_id, 87 | db_url=db_url, 88 | crypto=crypto2, 89 | ) 90 | 91 | # Populate an unencrypted user. 92 | paths = populate(no_crypto_manager) 93 | 94 | original_content = {} 95 | for path in paths: 96 | # Create a checkpoint of the original content and store what we 97 | # expect it to look like. 98 | no_crypto_manager.create_checkpoint(path) 99 | original_content[path] = no_crypto_manager.get(path)['content'] 100 | 101 | updated_content = {} 102 | for path in paths: 103 | # Create a new version of each notebook with a cell appended. 104 | model = no_crypto_manager.get(path=path) 105 | model['content'].cells.append( 106 | new_markdown_cell('Created by test: ' + path) 107 | ) 108 | no_crypto_manager.save(model, path=path) 109 | 110 | # Store the updated content. 111 | updated_content[path] = no_crypto_manager.get(path)['content'] 112 | 113 | # Create a checkpoint of the new content. 114 | no_crypto_manager.create_checkpoint(path) 115 | 116 | def check_path_content(path, mgr, expected): 117 | retrieved = mgr.get(path)['content'] 118 | self.assertEqual(retrieved, expected[path]) 119 | 120 | def check_reencryption(old, new): 121 | for path in paths: 122 | # We should no longer be able to retrieve notebooks from the 123 | # no-crypto manager. 124 | with assertRaisesHTTPError(self, 500): 125 | old.get(path) 126 | 127 | # The new manager should read the latest version of each file. 128 | check_path_content(path, new, updated_content) 129 | 130 | # We should have two checkpoints available, one from the 131 | # original version of the file, and one for the updated 132 | # version. 133 | (new_cp, old_cp) = new.list_checkpoints(path) 134 | self.assertGreater( 135 | new_cp['last_modified'], 136 | old_cp['last_modified'], 137 | ) 138 | 139 | # The old checkpoint should restore us to the original state. 140 | new.restore_checkpoint(old_cp['id'], path) 141 | check_path_content(path, new, original_content) 142 | 143 | # The new checkpoint should put us back into our updated state. 144 | # state. 145 | new.restore_checkpoint(new_cp['id'], path) 146 | check_path_content(path, new, updated_content) 147 | 148 | engine = create_engine(db_url) 149 | logger = Logger('Reencryption Testing') 150 | 151 | no_crypto_factory = {user_id: no_crypto}.__getitem__ 152 | crypto1_factory = {user_id: crypto1}.__getitem__ 153 | crypto2_factory = {user_id: crypto2}.__getitem__ 154 | 155 | # Verify that reencryption is idempotent: 156 | for _ in range(2): 157 | reencrypt_all_users( 158 | engine, 159 | no_crypto_factory, 160 | crypto1_factory, 161 | logger, 162 | ) 163 | check_reencryption(no_crypto_manager, manager1) 164 | 165 | for _ in range(2): 166 | reencrypt_all_users( 167 | engine, 168 | crypto1_factory, 169 | crypto2_factory, 170 | logger, 171 | ) 172 | check_reencryption(manager1, manager2) 173 | 174 | with self.assertRaises(ValueError): 175 | # Using reencrypt_all_users with a no-encryption target isn't 176 | # supported. 177 | reencrypt_all_users( 178 | engine, 179 | crypto2_factory, 180 | no_crypto_factory, 181 | logger, 182 | ) 183 | # There should have been no changes from the failed attempt. 184 | check_reencryption(manager1, manager2) 185 | 186 | # Unencrypt and verify that we can now read everything with the no 187 | # crypto manager. 188 | unencrypt_all_users(engine, crypto2_factory, logger) 189 | check_reencryption(manager2, no_crypto_manager) 190 | 191 | 192 | class TestGenerateNotebooks(TestCase): 193 | 194 | def setUp(self): 195 | remigrate_test_schema() 196 | self.db_url = TEST_DB_URL 197 | self.engine = create_engine(self.db_url) 198 | encryption_pw = u'foobar' 199 | self.crypto_factory = single_password_crypto_factory(encryption_pw) 200 | 201 | def tearDown(self): 202 | clear_test_db() 203 | 204 | @staticmethod 205 | def cleanup_pgcontents_managers(managers): 206 | for manager in managers: 207 | manager.engine.dispose() 208 | manager.checkpoints.engine.dispose() 209 | 210 | def populate_users(self, user_ids): 211 | """ 212 | Create a `PostgresContentsManager` and notebooks for each user. 213 | 214 | Notebooks are returned in a list in order of their creation. 215 | """ 216 | def encrypted_pgmanager(user_id): 217 | return PostgresContentsManager( 218 | user_id=user_id, 219 | db_url=self.db_url, 220 | crypto=self.crypto_factory(user_id), 221 | create_user_on_startup=True, 222 | ) 223 | managers = {user_id: encrypted_pgmanager(user_id) 224 | for user_id in user_ids} 225 | paths = [(user_id, path) 226 | for user_id in user_ids 227 | for path in populate(managers[user_id])] 228 | 229 | # Create a text file for each user as well, which should be ignored by 230 | # the notebook generators 231 | model = {'content': 'text file contents', 'format': 'text'} 232 | for manager in managers.values(): 233 | manager.new(model, path='text file.txt') 234 | 235 | return (managers, paths) 236 | 237 | def save_bad_notebook(self, manager): 238 | """ 239 | Save a notebook with non-notebook content. Trying to parse it should 240 | cause `CorruptedFile` to be raised. 241 | 242 | Returns the file id of the saved notebook. 243 | """ 244 | model = { 245 | 'type': 'file', 246 | 'content': 'bad notebook contents', 247 | 'format': 'text', 248 | } 249 | path = 'bad notebook.ipynb' 250 | manager.new(model, path=path) 251 | return manager.get_file_id(path) 252 | 253 | def test_generate_files(self): 254 | """ 255 | Create files for three users; try fetching them using `generate_files`. 256 | """ 257 | user_ids = ['test_generate_files0', 258 | 'test_generate_files1', 259 | 'test_generate_files2'] 260 | (managers, paths) = self.populate_users(user_ids) 261 | 262 | # Dispose of all engines created during this test to prevent leaked 263 | # database connections. 264 | self.addCleanup(self.cleanup_pgcontents_managers, managers.values()) 265 | 266 | # Since the bad notebook is saved last, it will be hit only when no 267 | # max_dt is specified. 268 | bad_notebook_id = self.save_bad_notebook(managers[user_ids[0]]) 269 | 270 | def get_file_dt(idx): 271 | (user_id, path) = paths[idx] 272 | return managers[user_id].get(path, content=False)['last_modified'] 273 | 274 | # Find three split datetimes 275 | n = 3 276 | split_idxs = [i * (len(paths) // (n + 1)) for i in range(1, n + 1)] 277 | split_dts = [get_file_dt(idx) for idx in split_idxs] 278 | 279 | def check_call(kwargs, expect_files, expect_warning=False): 280 | """ 281 | Call `generate_files`; check that all expected files are found, 282 | with the correct content, in the correct order. 283 | """ 284 | file_record = [] 285 | logger = Logger('Generate Files Testing') 286 | with mock.patch.object(logger, 'warning') as mock_warn: 287 | for result in generate_files(self.engine, self.crypto_factory, 288 | logger=logger, **kwargs): 289 | manager = managers[result['user_id']] 290 | 291 | # This recreates functionality from 292 | # `manager._notebook_model_from_db` to match with the model 293 | # returned by `manager.get`. 294 | nb = result['content'] 295 | manager.mark_trusted_cells(nb, result['path']) 296 | 297 | # Check that the content returned by the pgcontents manager 298 | # matches that returned by `generate_files` 299 | self.assertEqual( 300 | nb, 301 | manager.get(result['path'])['content'] 302 | ) 303 | 304 | file_record.append((result['user_id'], result['path'])) 305 | 306 | if expect_warning: 307 | mock_warn.assert_called_once_with( 308 | 'Corrupted file with id %d in table files.' 309 | % bad_notebook_id 310 | ) 311 | mock_warn.reset_mock() 312 | else: 313 | mock_warn.assert_not_called() 314 | 315 | # Make sure all files were found in the right order 316 | self.assertEqual(file_record, expect_files) 317 | 318 | # Expect all files given no `min_dt`/`max_dt` 319 | check_call( 320 | {}, 321 | paths, 322 | expect_warning=True, 323 | ) 324 | 325 | check_call( 326 | {'min_dt': split_dts[1]}, 327 | paths[split_idxs[1]:], 328 | expect_warning=True, 329 | ) 330 | 331 | check_call( 332 | {'max_dt': split_dts[1]}, 333 | paths[:split_idxs[1]], 334 | expect_warning=False, 335 | ) 336 | 337 | check_call( 338 | {'min_dt': split_dts[0], 'max_dt': split_dts[2]}, 339 | paths[split_idxs[0]:split_idxs[2]], 340 | expect_warning=False, 341 | ) 342 | 343 | def test_generate_checkpoints(self): 344 | """ 345 | Create checkpoints in three stages; try fetching them with 346 | `generate_checkpoints`. 347 | """ 348 | user_ids = ['test_generate_checkpoints0', 349 | 'test_generate_checkpoints1', 350 | 'test_generate_checkpoints2'] 351 | (managers, paths) = self.populate_users(user_ids) 352 | 353 | # Dispose of all engines created during this test to prevent leaked 354 | # database connections. 355 | self.addCleanup(self.cleanup_pgcontents_managers, managers.values()) 356 | 357 | def update_content(user_id, path, text): 358 | """ 359 | Add a Markdown cell and save the notebook. 360 | 361 | Returns the new notebook content. 362 | """ 363 | manager = managers[user_id] 364 | model = manager.get(path) 365 | model['content'].cells.append( 366 | new_markdown_cell(text + ' on path: ' + path) 367 | ) 368 | manager.save(model, path) 369 | return manager.get(path)['content'] 370 | 371 | # Each of the next three steps creates a checkpoint for each notebook 372 | # and stores the notebook content in a list, together with the user id, 373 | # the path, and the datetime of the new checkpoint. 374 | 375 | # Begin by making a checkpoint for the original notebook content. 376 | beginning_checkpoints = [] 377 | for user_id, path in paths: 378 | content = managers[user_id].get(path)['content'] 379 | dt = managers[user_id].create_checkpoint(path)['last_modified'] 380 | beginning_checkpoints.append((user_id, path, dt, content)) 381 | 382 | # Update each notebook and make a new checkpoint. 383 | middle_checkpoints = [] 384 | middle_min_dt = None 385 | for user_id, path in paths: 386 | content = update_content(user_id, path, '1st addition') 387 | dt = managers[user_id].create_checkpoint(path)['last_modified'] 388 | middle_checkpoints.append((user_id, path, dt, content)) 389 | if middle_min_dt is None: 390 | middle_min_dt = dt 391 | 392 | # Update each notebook again and make another checkpoint. 393 | end_checkpoints = [] 394 | end_min_dt = None 395 | for user_id, path in paths: 396 | content = update_content(user_id, path, '2nd addition') 397 | dt = managers[user_id].create_checkpoint(path)['last_modified'] 398 | end_checkpoints.append((user_id, path, dt, content)) 399 | if end_min_dt is None: 400 | end_min_dt = dt 401 | 402 | def concat_all(lists): 403 | return sum(lists, []) 404 | 405 | def check_call(kwargs, expect_checkpoints): 406 | """ 407 | Call `generate_checkpoints`; check that all expected checkpoints 408 | are found, with the correct content, in the correct order. 409 | """ 410 | checkpoint_record = [] 411 | for result in generate_checkpoints(self.engine, 412 | self.crypto_factory, **kwargs): 413 | manager = managers[result['user_id']] 414 | 415 | # This recreates functionality from 416 | # `manager._notebook_model_from_db` to match with the model 417 | # returned by `manager.get`. 418 | nb = result['content'] 419 | manager.mark_trusted_cells(nb, result['path']) 420 | 421 | checkpoint_record.append((result['user_id'], result['path'], 422 | result['last_modified'], nb)) 423 | 424 | # Make sure all checkpoints were found in the right order 425 | self.assertEqual(checkpoint_record, expect_checkpoints) 426 | 427 | # No `min_dt`/`max_dt` 428 | check_call({}, concat_all([beginning_checkpoints, middle_checkpoints, 429 | end_checkpoints])) 430 | 431 | # `min_dt` cuts off `beginning_checkpoints` checkpoints 432 | check_call({'min_dt': middle_min_dt}, 433 | concat_all([middle_checkpoints, end_checkpoints])) 434 | 435 | # `max_dt` cuts off `end_checkpoints` checkpoints 436 | check_call({'max_dt': end_min_dt}, 437 | concat_all([beginning_checkpoints, middle_checkpoints])) 438 | 439 | # `min_dt` and `max_dt` together isolate `middle_checkpoints` 440 | check_call({'min_dt': middle_min_dt, 'max_dt': end_min_dt}, 441 | middle_checkpoints) 442 | -------------------------------------------------------------------------------- /pgcontents/tests/utils.py: -------------------------------------------------------------------------------- 1 | # encoding: utf-8 2 | """ 3 | Utilities for testing. 4 | """ 5 | from __future__ import unicode_literals 6 | from contextlib import contextmanager 7 | from cryptography.fernet import Fernet 8 | from getpass import getuser 9 | from itertools import starmap 10 | import os 11 | import posixpath 12 | from unicodedata import normalize 13 | 14 | from IPython.utils import py3compat 15 | from nose.tools import nottest 16 | from sqlalchemy import create_engine 17 | from tornado.web import HTTPError 18 | 19 | from ..api_utils import api_path_join 20 | from ..crypto import FernetEncryption 21 | from ..schema import metadata 22 | from ..utils.ipycompat import ( 23 | new_code_cell, 24 | new_markdown_cell, 25 | new_notebook, 26 | new_raw_cell, 27 | ) 28 | from ..utils.migrate import upgrade 29 | 30 | 31 | TEST_DB_URL = os.environ.get('PGCONTENTS_TEST_DB_URL') 32 | if TEST_DB_URL is None: 33 | TEST_DB_URL = "postgresql://{user}@/pgcontents_testing".format( 34 | user=getuser(), 35 | ) 36 | 37 | 38 | def make_fernet(): 39 | return FernetEncryption(Fernet(Fernet.generate_key())) 40 | 41 | 42 | def _norm_unicode(s): 43 | """Normalize unicode strings""" 44 | return normalize('NFC', py3compat.cast_unicode(s)) 45 | 46 | 47 | @contextmanager 48 | def assertRaisesHTTPError(testcase, status, msg=None): 49 | msg = msg or "Should have raised HTTPError(%i)" % status 50 | try: 51 | yield 52 | except HTTPError as e: 53 | testcase.assertEqual(e.status_code, status) 54 | else: 55 | testcase.fail(msg) 56 | 57 | 58 | _tables = ( 59 | 'pgcontents.remote_checkpoints', 60 | 'pgcontents.files', 61 | 'pgcontents.directories', 62 | 'pgcontents.users', 63 | ) 64 | unexpected_tables = set(metadata.tables) - set(_tables) 65 | if unexpected_tables: 66 | raise Exception("Unexpected tables in metadata: %s" % unexpected_tables) 67 | 68 | 69 | @nottest 70 | def clear_test_db(): 71 | engine = create_engine(TEST_DB_URL) 72 | with engine.connect() as conn: 73 | for table in map(metadata.tables.__getitem__, _tables): 74 | conn.execute(table.delete()) 75 | 76 | 77 | @nottest 78 | def remigrate_test_schema(): 79 | """ 80 | Drop recreate the test db schema. 81 | """ 82 | drop_testing_db_tables() 83 | migrate_testing_db() 84 | 85 | 86 | @nottest 87 | def drop_testing_db_tables(): 88 | """ 89 | Drop all tables from the testing db. 90 | """ 91 | engine = create_engine(TEST_DB_URL) 92 | conn = engine.connect() 93 | trans = conn.begin() 94 | conn.execute('DROP SCHEMA IF EXISTS pgcontents CASCADE') 95 | conn.execute('DROP TABLE IF EXISTS alembic_version CASCADE') 96 | 97 | trans.commit() 98 | 99 | 100 | @nottest 101 | def migrate_testing_db(revision='head'): 102 | """ 103 | Migrate the testing db to the latest alembic revision. 104 | """ 105 | upgrade(TEST_DB_URL, revision) 106 | 107 | 108 | @nottest 109 | def test_notebook(name): 110 | """ 111 | Make a test notebook for the given name. 112 | """ 113 | nb = new_notebook() 114 | nb.cells.append(new_code_cell("'code_' + '{}'".format(name))) 115 | nb.cells.append(new_raw_cell("raw_{}".format(name))) 116 | nb.cells.append(new_markdown_cell('markdown_{}'.format(name))) 117 | return nb 118 | 119 | 120 | def populate(contents_mgr): 121 | """ 122 | Populate a test directory with a ContentsManager. 123 | """ 124 | dirs_nbs = [ 125 | ('', 'inroot.ipynb'), 126 | ('Directory with spaces in', 'inspace.ipynb'), 127 | ('unicodé', 'innonascii.ipynb'), 128 | ('foo', 'a.ipynb'), 129 | ('foo', 'name with spaces.ipynb'), 130 | ('foo', 'unicodé.ipynb'), 131 | ('foo/bar', 'baz.ipynb'), 132 | ('å b', 'ç d.ipynb'), 133 | ] 134 | 135 | for dirname, nbname in dirs_nbs: 136 | contents_mgr.save({'type': 'directory'}, path=dirname) 137 | contents_mgr.save( 138 | {'type': 'notebook', 'content': test_notebook(nbname)}, 139 | path=api_path_join(dirname, nbname), 140 | ) 141 | return list(starmap(posixpath.join, dirs_nbs)) 142 | -------------------------------------------------------------------------------- /pgcontents/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/quantopian/pgcontents/51f8febcf6ece4e88b047768b9ce18553162d63c/pgcontents/utils/__init__.py -------------------------------------------------------------------------------- /pgcontents/utils/ipycompat.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for managing compat between notebook versions. 3 | """ 4 | from traitlets.config import Config 5 | from notebook.services.contents.checkpoints import ( 6 | Checkpoints, 7 | GenericCheckpointsMixin, 8 | ) 9 | from notebook.services.contents.filemanager import FileContentsManager 10 | from notebook.services.contents.filecheckpoints import ( 11 | GenericFileCheckpoints 12 | ) 13 | from notebook.services.contents.manager import ContentsManager 14 | from notebook.utils import to_os_path 15 | from nbformat import from_dict, reads, writes 16 | from nbformat.v4.nbbase import ( 17 | new_code_cell, 18 | new_markdown_cell, 19 | new_notebook, 20 | new_raw_cell, 21 | ) 22 | from nbformat.v4.rwbase import strip_transient 23 | from traitlets import ( 24 | Any, 25 | Bool, 26 | Dict, 27 | Instance, 28 | Integer, 29 | HasTraits, 30 | Unicode, 31 | ) 32 | 33 | 34 | __all__ = [ 35 | 'Any', 36 | 'Bool', 37 | 'Checkpoints', 38 | 'Config', 39 | 'ContentsManager', 40 | 'Dict', 41 | 'FileContentsManager', 42 | 'GenericCheckpointsMixin', 43 | 'GenericFileCheckpoints', 44 | 'HasTraits', 45 | 'Instance', 46 | 'Integer', 47 | 'Unicode', 48 | 'from_dict', 49 | 'new_code_cell', 50 | 'new_markdown_cell', 51 | 'new_notebook', 52 | 'new_raw_cell', 53 | 'reads', 54 | 'strip_transient', 55 | 'to_os_path', 56 | 'writes', 57 | ] 58 | -------------------------------------------------------------------------------- /pgcontents/utils/migrate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for running migrations. 3 | """ 4 | from contextlib import contextmanager 5 | from os.path import join 6 | import subprocess 7 | 8 | from IPython.utils.tempdir import TemporaryDirectory 9 | 10 | from pgcontents.constants import ( 11 | ALEMBIC_INI_TEMPLATE, 12 | ALEMBIC_DIR_LOCATION, 13 | ) 14 | 15 | 16 | @contextmanager 17 | def temp_alembic_ini(alembic_dir_location, sqlalchemy_url): 18 | """ 19 | Temporarily write an alembic.ini file for use with alembic migration 20 | scripts. 21 | """ 22 | with TemporaryDirectory() as tempdir: 23 | alembic_ini_filename = join(tempdir, 'temp_alembic.ini') 24 | with open(alembic_ini_filename, 'w') as f: 25 | f.write( 26 | ALEMBIC_INI_TEMPLATE.format( 27 | alembic_dir_location=alembic_dir_location, 28 | sqlalchemy_url=sqlalchemy_url, 29 | ) 30 | ) 31 | yield alembic_ini_filename 32 | 33 | 34 | def upgrade(db_url, revision): 35 | """ 36 | Upgrade the given database to revision. 37 | """ 38 | with temp_alembic_ini(ALEMBIC_DIR_LOCATION, db_url) as alembic_ini: 39 | subprocess.check_call( 40 | ['alembic', '-c', alembic_ini, 'upgrade', revision] 41 | ) 42 | -------------------------------------------------------------------------------- /pgcontents/utils/sync.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities for synchronizing directories. 3 | """ 4 | from __future__ import ( 5 | print_function, 6 | unicode_literals, 7 | ) 8 | 9 | from ..checkpoints import PostgresCheckpoints 10 | from ..crypto import FallbackCrypto 11 | from ..query import ( 12 | list_users, 13 | reencrypt_user_content, 14 | ) 15 | 16 | 17 | def create_user(db_url, user): 18 | """ 19 | Create a user. 20 | """ 21 | PostgresCheckpoints( 22 | db_url=db_url, 23 | user_id=user, 24 | create_user_on_startup=True, 25 | ) 26 | 27 | 28 | def _separate_dirs_files(models): 29 | """ 30 | Split an iterable of models into a list of file paths and a list of 31 | directory paths. 32 | """ 33 | dirs = [] 34 | files = [] 35 | for model in models: 36 | if model['type'] == 'directory': 37 | dirs.append(model['path']) 38 | else: 39 | files.append(model['path']) 40 | return dirs, files 41 | 42 | 43 | def walk(mgr): 44 | """ 45 | Like os.walk, but written in terms of the ContentsAPI. 46 | 47 | Takes a ContentsManager and returns a generator of tuples of the form: 48 | (directory name, [subdirectories], [files in directory]) 49 | """ 50 | return walk_dirs(mgr, ['']) 51 | 52 | 53 | def walk_dirs(mgr, dirs): 54 | """ 55 | Recursive helper for walk. 56 | """ 57 | for directory in dirs: 58 | children = mgr.get( 59 | directory, 60 | content=True, 61 | type='directory', 62 | )['content'] 63 | dirs, files = map(sorted, _separate_dirs_files(children)) 64 | yield directory, dirs, files 65 | if dirs: 66 | for entry in walk_dirs(mgr, dirs): 67 | yield entry 68 | 69 | 70 | def walk_files(mgr): 71 | """ 72 | Iterate over all files visible to ``mgr``. 73 | """ 74 | for dir_, subdirs, files in walk_files(mgr): 75 | for file_ in files: 76 | yield file_ 77 | 78 | 79 | def walk_files_with_content(mgr): 80 | """ 81 | Iterate over the contents of all files visible to ``mgr``. 82 | """ 83 | for _, _, files in walk(mgr): 84 | for f in files: 85 | yield mgr.get(f, content=True) 86 | 87 | 88 | def all_user_ids(engine): 89 | """ 90 | Get a list of user_ids from an engine. 91 | """ 92 | with engine.begin() as db: 93 | return [row[0] for row in list_users(db)] 94 | 95 | 96 | def reencrypt_all_users(engine, 97 | old_crypto_factory, 98 | new_crypto_factory, 99 | logger): 100 | """ 101 | Re-encrypt data for all users. 102 | 103 | This function is idempotent, meaning that it should be possible to apply 104 | the same re-encryption process multiple times without having any effect on 105 | the database. Idempotency is achieved by first attempting to decrypt with 106 | the old crypto and falling back to the new crypto on failure. 107 | 108 | An important consequence of this strategy is that **decrypting** a database 109 | is not supported with this function, because ``NoEncryption.decrypt`` 110 | always succeeds. To decrypt an already-encrypted database, use 111 | ``unencrypt_all_users`` instead. 112 | 113 | It is, however, possible to perform an initial encryption of a database by 114 | passing a function returning a ``NoEncryption`` as ``old_crypto_factory``. 115 | 116 | Parameters 117 | ---------- 118 | engine : SQLAlchemy.engine 119 | Engine encapsulating database connections. 120 | old_crypto_factory : function[str -> Any] 121 | A function from user_id to an object providing the interface required 122 | by PostgresContentsManager.crypto. Results of this will be used for 123 | decryption of existing database content. 124 | new_crypto_factory : function[str -> Any] 125 | A function from user_id to an object providing the interface required 126 | by PostgresContentsManager.crypto. Results of this will be used for 127 | re-encryption of database content. 128 | 129 | This **must not** return instances of ``NoEncryption``. Use 130 | ``unencrypt_all_users`` if you want to unencrypt a database. 131 | logger : logging.Logger, optional 132 | A logger to user during re-encryption. 133 | 134 | See Also 135 | -------- 136 | reencrypt_user 137 | unencrypt_all_users 138 | """ 139 | logger.info("Beginning re-encryption for all users.") 140 | for user_id in all_user_ids(engine): 141 | reencrypt_single_user( 142 | engine, 143 | user_id, 144 | old_crypto=old_crypto_factory(user_id), 145 | new_crypto=new_crypto_factory(user_id), 146 | logger=logger, 147 | ) 148 | logger.info("Finished re-encryption for all users.") 149 | 150 | 151 | def reencrypt_single_user(engine, user_id, old_crypto, new_crypto, logger): 152 | """ 153 | Re-encrypt all files and checkpoints for a single user. 154 | """ 155 | # Use FallbackCrypto so that we're re-entrant if we halt partway through. 156 | crypto = FallbackCrypto([new_crypto, old_crypto]) 157 | 158 | reencrypt_user_content( 159 | engine=engine, 160 | user_id=user_id, 161 | old_decrypt_func=crypto.decrypt, 162 | new_encrypt_func=crypto.encrypt, 163 | logger=logger, 164 | ) 165 | 166 | 167 | def unencrypt_all_users(engine, old_crypto_factory, logger): 168 | """ 169 | Unencrypt data for all users. 170 | 171 | Parameters 172 | ---------- 173 | engine : SQLAlchemy.engine 174 | Engine encapsulating database connections. 175 | old_crypto_factory : function[str -> Any] 176 | A function from user_id to an object providing the interface required 177 | by PostgresContentsManager.crypto. Results of this will be used for 178 | decryption of existing database content. 179 | logger : logging.Logger, optional 180 | A logger to user during re-encryption. 181 | """ 182 | logger.info("Beginning re-encryption for all users.") 183 | for user_id in all_user_ids(engine): 184 | unencrypt_single_user( 185 | engine=engine, 186 | user_id=user_id, 187 | old_crypto=old_crypto_factory(user_id), 188 | logger=logger, 189 | ) 190 | logger.info("Finished re-encryption for all users.") 191 | 192 | 193 | def unencrypt_single_user(engine, user_id, old_crypto, logger): 194 | """ 195 | Unencrypt all files and checkpoints for a single user. 196 | """ 197 | reencrypt_user_content( 198 | engine=engine, 199 | user_id=user_id, 200 | old_decrypt_func=old_crypto.decrypt, 201 | new_encrypt_func=lambda s: s, 202 | logger=logger, 203 | ) 204 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | exclude = pgcontents/alembic/versions/* 3 | 4 | [nosetests] 5 | with-ignore-docstrings=1 6 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from setuptools import setup, find_packages 3 | from os.path import join, dirname, abspath 4 | import sys 5 | 6 | 7 | long_description = '' 8 | 9 | if 'upload' in sys.argv or '--long-description' in sys.argv: 10 | with open('README.rst') as f: 11 | long_description = f.read() 12 | 13 | 14 | def read_requirements(basename): 15 | reqs_file = join(dirname(abspath(__file__)), basename) 16 | with open(reqs_file) as f: 17 | return [req.strip() for req in f.readlines()] 18 | 19 | 20 | def main(): 21 | setup( 22 | name='pgcontents', 23 | version='0.6', 24 | description="A Postgres-backed ContentsManager for IPython/Jupyter.", 25 | long_description=long_description, 26 | author="Scott Sanderson", 27 | author_email="ssanderson@quantopian.com", 28 | packages=find_packages(include='pgcontents.*'), 29 | license='Apache 2.0', 30 | include_package_data=True, 31 | zip_safe=False, 32 | url="https://github.com/quantopian/pgcontents", 33 | classifiers=[ 34 | 'Development Status :: 4 - Beta', 35 | 'Framework :: IPython', 36 | 'License :: OSI Approved :: Apache Software License', 37 | 'Natural Language :: English', 38 | 'Operating System :: OS Independent', 39 | 'Programming Language :: Python :: 2.7', 40 | 'Programming Language :: Python :: 3.5', 41 | 'Programming Language :: Python :: 3.6', 42 | 'Programming Language :: Python', 43 | 'Topic :: Database', 44 | ], 45 | install_requires=[ 46 | 'SQLAlchemy>=1.0.5', 47 | 'alembic>=0.7.6', 48 | 'click>=3.3', 49 | 'cryptography>=1.4', 50 | 'psycopg2>=2.6.1', 51 | 'six>=1.9.0', 52 | 'notebook>=5.0', 53 | ], 54 | extras_require={ 55 | 'test': [ 56 | 'notebook[test]', 57 | 'nose', 58 | 'nose-ignore-docstring', 59 | 'requests', 60 | 'mock', 61 | ], 62 | }, 63 | scripts=[ 64 | 'bin/pgcontents', 65 | ], 66 | ) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist=py{27,35,36}-notebook5,py{35,36}-notebook6,notest,flake8 3 | skip_missing_interpreters=True 4 | 5 | [testenv] 6 | whitelist_externals = 7 | createdb 8 | 9 | install_command = 10 | py{27,35,36}-notebook5: pip install -v -c notebook5_constraints.txt {opts} {packages} 11 | py{35,36}-notebook6: pip install -v -c notebook6_constraints.txt {opts} {packages} 12 | notest,flake8: pip install {opts} {packages} 13 | 14 | deps = 15 | py{27,35,36}-notebook{5,6}: .[test] 16 | flake8: flake8 17 | notest: . 18 | 19 | commands = 20 | py{27,35,36}-notebook{5,6}: nosetests pgcontents/tests 21 | flake8: flake8 pgcontents 22 | notest: python -c 'import pgcontents' 23 | 24 | passenv = PGCONTENTS_TEST_DB_URL 25 | --------------------------------------------------------------------------------