├── .editorconfig ├── .flake8 ├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ └── feature_request.md ├── dependabot.yml └── workflows │ ├── main.yml │ ├── nightly.yml │ ├── publish-to-dockerhub.yml │ └── publish-to-test-pypi.yml ├── .gitignore ├── .vscode └── settings.json ├── Dockerfile ├── LICENCE.md ├── README.md ├── pyproject.toml ├── requirements-dev.txt ├── setup.py ├── src └── gcp_storage_emulator │ ├── __init__.py │ ├── __main__.py │ ├── exceptions.py │ ├── handlers │ ├── __init__.py │ ├── buckets.py │ └── objects.py │ ├── server.py │ ├── settings.py │ └── storage.py └── tests ├── __init__.py ├── test_binary.png ├── test_main.py ├── test_server.py ├── test_storage.py └── test_text.txt /.editorconfig: -------------------------------------------------------------------------------- 1 | # top-most EditorConfig file 2 | root = true 3 | charset = utf-8 4 | 5 | # Unix-style newlines with a newline ending every file 6 | [*] 7 | end_of_line = lf 8 | insert_final_newline = true 9 | trim_trailing_whitespace = true 10 | 11 | [*.py] 12 | charset = utf-8 13 | indent_style = space 14 | indent_size = 4 15 | -------------------------------------------------------------------------------- /.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-complexity = 10 3 | max-line-length = 127 4 | exclude = 5 | .git, 6 | .venv, 7 | __pycache__, 8 | *egg, 9 | build, 10 | dist, 11 | venv 12 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | [A minimal, complete, and reproducible example](https://stackoverflow.com/help/minimal-reproducible-example) to reproduce the behavior. 15 | 16 | **Expected behavior** 17 | A clear and concise description of what you expected to happen. 18 | 19 | **System (please complete the following information):** 20 | - OS version: 21 | - Python version: 22 | - gcp-storage-emulator version: 23 | 24 | **Additional context** 25 | Add any other context about the problem here. 26 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: pip 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | - package-ecosystem: "docker" 9 | directory: "/" 10 | schedule: 11 | interval: "daily" 12 | open-pull-requests-limit: 10 13 | - package-ecosystem: "github-actions" 14 | directory: "/" 15 | schedule: 16 | interval: "daily" 17 | open-pull-requests-limit: 10 18 | -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | pull_request_review: 11 | types: [submitted] 12 | workflow_dispatch: 13 | 14 | jobs: 15 | lint: 16 | runs-on: ubuntu-22.04 17 | steps: 18 | - uses: actions/checkout@v4 19 | - uses: actions/setup-python@v5 20 | with: 21 | python-version: '3.x' 22 | - uses: psf/black@stable 23 | 24 | build: 25 | runs-on: ubuntu-22.04 26 | strategy: 27 | matrix: 28 | python-version: ['3.8', '3.9', '3.10', '3.11', '3.12', 'pypy-3.9'] 29 | steps: 30 | - uses: actions/checkout@v4 31 | - name: Set up Python ${{ matrix.python-version }} 32 | uses: actions/setup-python@v5 33 | with: 34 | python-version: ${{ matrix.python-version }} 35 | - name: Install dependencies 36 | run: | 37 | python -m pip install --upgrade pip 38 | if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi 39 | - name: Lint with flake8 40 | run: | 41 | # stop the build if there are Python syntax errors or undefined names 42 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 43 | # The GitHub editor is 127 chars wide 44 | flake8 . --count --max-complexity=10 --max-line-length=127 --statistics 45 | - name: Install package 46 | run: pip install -e . 47 | - name: Test with pytest 48 | run: pytest 49 | - name: Upload coverage to Codecov 50 | uses: codecov/codecov-action@v4 51 | with: 52 | flags: unittests 53 | 54 | docker: 55 | runs-on: ubuntu-22.04 56 | strategy: 57 | matrix: 58 | arch: [linux/amd64, linux/arm64] 59 | steps: 60 | - uses: actions/checkout@v4 61 | - name: Set up QEMU 62 | uses: docker/setup-qemu-action@v3 63 | - name: Set up Docker Buildx 64 | uses: docker/setup-buildx-action@v3 65 | - name: Cache Docker layers 66 | uses: actions/cache@v4 67 | with: 68 | path: /tmp/.buildx-cache 69 | key: ${{ runner.os }}-build-x-${{ matrix.arch }}-${{ github.sha }} 70 | restore-keys: | 71 | ${{ runner.os }}-build-x-${{ matrix.arch }}- 72 | - name: Build 73 | uses: docker/build-push-action@v6.7.0 74 | with: 75 | platforms: ${{ matrix.arch }} 76 | push: false 77 | load: true 78 | tags: oittaa/gcp-storage-emulator:latest 79 | cache-from: type=local,src=/tmp/.buildx-cache 80 | cache-to: type=local,dest=/tmp/.buildx-cache-new 81 | - 82 | # Temp fix 83 | # https://github.com/docker/build-push-action/issues/252 84 | # https://github.com/moby/buildkit/issues/1896 85 | name: Move cache 86 | run: | 87 | rm -rf /tmp/.buildx-cache 88 | mv /tmp/.buildx-cache-new /tmp/.buildx-cache 89 | - name: Docker container up 90 | run: docker run -d --rm -p 8080:8080 --name gcp-storage-emulator oittaa/gcp-storage-emulator 91 | - name: Wait 10 seconds 92 | run: sleep 10 93 | - name: Check running containers 94 | run: docker ps -a 95 | - name: Check the container reachability 96 | run: curl -s --retry 10 --retry-connrefused http://localhost:8080/ 97 | - name: Check Docker logs 98 | run: docker logs gcp-storage-emulator 99 | - name: Docker container down 100 | run: docker stop gcp-storage-emulator 101 | 102 | publish: 103 | if: ${{ github.event_name == 'push' }} 104 | needs: [lint, build, docker] 105 | runs-on: ubuntu-22.04 106 | steps: 107 | - uses: actions/checkout@v4 108 | - name: Set up QEMU 109 | uses: docker/setup-qemu-action@v3 110 | - name: Set up Docker Buildx 111 | uses: docker/setup-buildx-action@v3 112 | - name: Cache Docker layers 113 | uses: actions/cache@v4 114 | with: 115 | path: /tmp/.buildx-cache 116 | key: ${{ runner.os }}-build-x-${{ github.sha }} 117 | restore-keys: | 118 | ${{ runner.os }}-build-x- 119 | - name: Login to DockerHub 120 | uses: docker/login-action@v3.3.0 121 | with: 122 | username: ${{ secrets.DOCKERHUB_USERNAME }} 123 | password: ${{ secrets.DOCKERHUB_TOKEN }} 124 | - name: Build and push 125 | id: docker_build 126 | uses: docker/build-push-action@v6.7.0 127 | with: 128 | platforms: linux/amd64,linux/arm64 129 | push: true 130 | tags: oittaa/gcp-storage-emulator:latest 131 | cache-from: type=local,src=/tmp/.buildx-cache 132 | cache-to: type=local,dest=/tmp/.buildx-cache 133 | - name: Image digest 134 | run: echo ${{ steps.docker_build.outputs.digest }} 135 | -------------------------------------------------------------------------------- /.github/workflows/nightly.yml: -------------------------------------------------------------------------------- 1 | name: Nightly Test 2 | 3 | # Run the nightly tests at at 6 AM UTC 4 | on: 5 | schedule: 6 | - cron: "0 6 * * *" 7 | jobs: 8 | docker: 9 | runs-on: ubuntu-22.04 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up Docker Buildx 13 | uses: docker/setup-buildx-action@v3 14 | - name: Cache Docker layers 15 | uses: actions/cache@v4 16 | with: 17 | path: /tmp/.buildx-cache 18 | key: ${{ runner.os }}-build-x-${{ github.sha }} 19 | restore-keys: | 20 | ${{ runner.os }}-build-x- 21 | - name: Build 22 | uses: docker/build-push-action@v6.7.0 23 | with: 24 | push: false 25 | load: true 26 | tags: oittaa/gcp-storage-emulator:latest 27 | cache-from: type=local,src=/tmp/.buildx-cache 28 | cache-to: type=local,dest=/tmp/.buildx-cache 29 | - name: Docker container up 30 | run: docker run -d --rm -p 8080:8080 --name gcp-storage-emulator oittaa/gcp-storage-emulator 31 | - name: Wait 10 seconds 32 | run: sleep 10 33 | - name: Check running containers 34 | run: docker ps -a 35 | - name: Check the container reachability 36 | run: curl -s --retry 10 --retry-connrefused http://localhost:8080/ 37 | - name: Check Docker logs 38 | run: docker logs gcp-storage-emulator 39 | - name: Docker container down 40 | run: docker stop gcp-storage-emulator 41 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-dockerhub.yml: -------------------------------------------------------------------------------- 1 | name: Publish to Docker Hub 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | publish: 9 | runs-on: ubuntu-22.04 10 | steps: 11 | - uses: actions/checkout@v4 12 | - name: Set up QEMU 13 | uses: docker/setup-qemu-action@v3 14 | - name: Set up Docker Buildx 15 | uses: docker/setup-buildx-action@v3 16 | - name: Cache Docker layers 17 | uses: actions/cache@v4 18 | with: 19 | path: /tmp/.buildx-cache 20 | key: ${{ runner.os }}-build-x-${{ github.sha }} 21 | restore-keys: | 22 | ${{ runner.os }}-build-x- 23 | - name: Login to DockerHub 24 | uses: docker/login-action@v3.3.0 25 | with: 26 | username: ${{ secrets.DOCKERHUB_USERNAME }} 27 | password: ${{ secrets.DOCKERHUB_TOKEN }} 28 | - name: Build and push 29 | id: docker_build 30 | uses: docker/build-push-action@v6.7.0 31 | with: 32 | platforms: linux/amd64,linux/arm64 33 | push: true 34 | tags: | 35 | oittaa/gcp-storage-emulator:latest 36 | oittaa/gcp-storage-emulator:${{ github.event.release.tag_name }} 37 | cache-from: type=local,src=/tmp/.buildx-cache 38 | cache-to: type=local,dest=/tmp/.buildx-cache 39 | - name: Image digest 40 | run: echo ${{ steps.docker_build.outputs.digest }} 41 | -------------------------------------------------------------------------------- /.github/workflows/publish-to-test-pypi.yml: -------------------------------------------------------------------------------- 1 | name: Publish Python 🐍 distributions 📦 to PyPI and TestPyPI 2 | 3 | on: 4 | release: 5 | types: [published] 6 | jobs: 7 | build-n-publish: 8 | if: startsWith(github.ref, 'refs/tags') 9 | name: Build and publish Python 🐍 distributions 📦 to PyPI and TestPyPI 10 | runs-on: ubuntu-22.04 11 | steps: 12 | - uses: actions/checkout@v4 13 | - name: Set up Python 3.x 14 | uses: actions/setup-python@v5 15 | with: 16 | python-version: '3.x' 17 | - name: Install pypa/build 18 | run: >- 19 | python -m 20 | pip install 21 | build 22 | --user 23 | - name: Build a binary wheel and a source tarball 24 | run: >- 25 | python -m 26 | build 27 | --sdist 28 | --wheel 29 | --outdir dist/ 30 | . 31 | - name: Publish distribution 📦 to Test PyPI 32 | uses: pypa/gh-action-pypi-publish@v1.9.0 33 | with: 34 | password: ${{ secrets.TEST_PYPI_API_TOKEN }} 35 | repository-url: https://test.pypi.org/legacy/ 36 | - name: Publish distribution 📦 to PyPI 37 | uses: pypa/gh-action-pypi-publish@v1.9.0 38 | with: 39 | password: ${{ secrets.PYPI_API_TOKEN }} 40 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | 140 | # Visual Studio Code 141 | .vscode/* 142 | !.vscode/settings.json 143 | !.vscode/tasks.json 144 | !.vscode/launch.json 145 | !.vscode/extensions.json 146 | *.code-workspace 147 | 148 | # Local History for Visual Studio Code 149 | .history/ 150 | 151 | .cloudstorage 152 | .idea 153 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "python.linting.pylintEnabled": false, 3 | "python.linting.flake8Enabled": true, 4 | "python.linting.flake8Args": ["--max-complexity=10", "--max-line-length=127"], 5 | "python.linting.enabled": true 6 | } 7 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.12.5-slim 2 | 3 | # Allow statements and log messages to immediately appear in the Knative logs 4 | ENV PYTHONUNBUFFERED True 5 | ENV APP_HOME /app 6 | ENV PORT 8080 7 | ENV HOST 0.0.0.0 8 | ENV STORAGE_BASE / 9 | ENV STORAGE_DIR storage 10 | 11 | # Python app installation 12 | WORKDIR $APP_HOME 13 | COPY README.md pyproject.toml setup.py ./ 14 | COPY src src/ 15 | RUN pip install . 16 | 17 | ENTRYPOINT ["gcp-storage-emulator"] 18 | CMD ["start"] 19 | -------------------------------------------------------------------------------- /LICENCE.md: -------------------------------------------------------------------------------- 1 | # BSD 3-Clause License 2 | 3 | Copyright (c) Eero Vuojolahti 2021. 4 | Copyright (c) Alessandro Artoni 2020, Potato London Ltd. 2020-, and all contributors. 5 | All rights reserved. 6 | 7 | Redistribution and use in source and binary forms, with or without modification, 8 | are permitted provided that the following conditions are met: 9 | 10 | 1. Redistributions of source code must retain the above copyright notice, 11 | this list of conditions and the following disclaimer. 12 | 13 | 2. Redistributions in binary form must reproduce the above copyright 14 | notice, this list of conditions and the following disclaimer in the 15 | documentation and/or other materials provided with the distribution. 16 | 17 | 3. Neither the name of Djangae nor 18 | the names of its contributors may be used to endorse or promote products 19 | derived from this software without specific prior written permission. 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 22 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 23 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 24 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 25 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 26 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 27 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 28 | ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 30 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Local Emulator for Google Cloud Storage 2 | 3 | [![CI](https://github.com/oittaa/gcp-storage-emulator/actions/workflows/main.yml/badge.svg)](https://github.com/oittaa/gcp-storage-emulator/actions/workflows/main.yml) 4 | [![PyPI](https://img.shields.io/pypi/v/gcp-storage-emulator.svg)](https://pypi.org/project/gcp-storage-emulator/) 5 | [![codecov](https://codecov.io/gh/oittaa/gcp-storage-emulator/branch/main/graph/badge.svg?token=GpiSgoXsGL)](https://codecov.io/gh/oittaa/gcp-storage-emulator) 6 | [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) 7 | 8 | Google doesn't (yet) ship an emulator for the Cloud Storage API like they do for 9 | Cloud Datastore. 10 | 11 | This is a stub emulator so you can run your tests and do local development without 12 | having to connect to the production Storage APIs. 13 | 14 | 15 | **THIS IS A WORK IN PROGRESS AND ONLY SUPPORTS A LIMITED SUBSET OF THE API** 16 | 17 | --- 18 | 19 | ## Installation 20 | 21 | `pip install gcp-storage-emulator` 22 | 23 | 24 | ## CLI Usage 25 | 26 | 27 | ### Starting the emulator 28 | Start the emulator with: 29 | 30 | ```bash 31 | gcp-storage-emulator start 32 | ``` 33 | 34 | By default, the server will listen on `http://localhost:9023` and data is stored under `./.cloudstorage`. You can configure the folder using the env variables `STORAGE_BASE` (default `./`) and `STORAGE_DIR` (default `.cloudstorage`). 35 | 36 | If you wish to run the emulator in a testing environment or if you don't want to persist any data, you can use the `--in-memory` parameter. For tests, you might want to consider starting up the server from your code (see the [Python APIs](#python-apis)) 37 | 38 | If you're using the Google client library (e.g. `google-cloud-storage` for Python) then you can set the `STORAGE_EMULATOR_HOST` environment variable to tell the library to connect to your emulator endpoint rather than the standard `https://storage.googleapis.com`, e.g.: 39 | 40 | ```bash 41 | export STORAGE_EMULATOR_HOST=http://localhost:9023 42 | ``` 43 | 44 | 45 | ### Wiping data 46 | 47 | You can wipe the data by running 48 | 49 | ```bash 50 | gcp-storage-emulator wipe 51 | ``` 52 | 53 | You can pass `--keep-buckets` to wipe the data while keeping the buckets. 54 | 55 | #### Example 56 | 57 | Use in-memory storage and automatically create default storage bucket `my-bucket`. 58 | 59 | ```bash 60 | gcp-storage-emulator start --host=localhost --port=9023 --in-memory --default-bucket=my-bucket 61 | ``` 62 | 63 | ## Python APIs 64 | 65 | To start a server from your code you can do 66 | 67 | ```python 68 | from gcp_storage_emulator.server import create_server 69 | 70 | server = create_server("localhost", 9023, in_memory=False) 71 | 72 | server.start() 73 | # ........ 74 | server.stop() 75 | ``` 76 | 77 | You can wipe the data by calling `server.wipe()` 78 | 79 | This can also be achieved (e.g. during tests) by hitting the `/wipe` HTTP endpoint 80 | 81 | #### Example 82 | 83 | ```python 84 | import os 85 | 86 | from google.cloud import storage 87 | from gcp_storage_emulator.server import create_server 88 | 89 | HOST = "localhost" 90 | PORT = 9023 91 | BUCKET = "test-bucket" 92 | 93 | # default_bucket parameter creates the bucket automatically 94 | server = create_server(HOST, PORT, in_memory=True, default_bucket=BUCKET) 95 | server.start() 96 | 97 | os.environ["STORAGE_EMULATOR_HOST"] = f"http://{HOST}:{PORT}" 98 | client = storage.Client() 99 | 100 | bucket = client.bucket(BUCKET) 101 | blob = bucket.blob("blob1") 102 | blob.upload_from_string("test1") 103 | blob = bucket.blob("blob2") 104 | blob.upload_from_string("test2") 105 | for blob in bucket.list_blobs(): 106 | content = blob.download_as_bytes() 107 | print(f"Blob [{blob.name}]: {content}") 108 | 109 | server.stop() 110 | ``` 111 | 112 | ## Docker 113 | 114 | Pull the Docker image. 115 | 116 | ```bash 117 | docker pull oittaa/gcp-storage-emulator 118 | ``` 119 | 120 | Inside the container instance, the value of the `PORT` environment variable always reflects the port to which requests are sent. It defaults to `8080`. The directory used for the emulated storage is located under `/storage` in the container. In the following example the host's directory `$(pwd)/cloudstorage` will be bound to the emulated storage. 121 | 122 | ```bash 123 | docker run -d \ 124 | -e PORT=9023 \ 125 | -p 9023:9023 \ 126 | --name gcp-storage-emulator \ 127 | -v "$(pwd)/cloudstorage":/storage \ 128 | oittaa/gcp-storage-emulator 129 | ``` 130 | 131 | ```python 132 | import os 133 | 134 | from google.cloud import exceptions, storage 135 | 136 | HOST = "localhost" 137 | PORT = 9023 138 | BUCKET = "test-bucket" 139 | 140 | os.environ["STORAGE_EMULATOR_HOST"] = f"http://{HOST}:{PORT}" 141 | client = storage.Client() 142 | 143 | try: 144 | bucket = client.create_bucket(BUCKET) 145 | except exceptions.Conflict: 146 | bucket = client.bucket(BUCKET) 147 | 148 | blob = bucket.blob("blob1") 149 | blob.upload_from_string("test1") 150 | print(blob.download_as_bytes()) 151 | ``` 152 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools", 4 | "wheel" 5 | ] 6 | 7 | [tool.pytest.ini_options] 8 | addopts = "--cov=src --cov-report=xml --cov-branch" 9 | testpaths = [ 10 | "tests" 11 | ] 12 | -------------------------------------------------------------------------------- /requirements-dev.txt: -------------------------------------------------------------------------------- 1 | flake8 2 | google-cloud-storage 3 | google-crc32c 4 | pytest 5 | pytest-cov 6 | requests 7 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import os 2 | from setuptools import setup, find_packages 3 | 4 | NAME = "gcp-storage-emulator" 5 | PACKAGES = find_packages("src") 6 | 7 | DESCRIPTION = "A stub emulator for the Google Cloud Storage API" 8 | URL = "https://github.com/oittaa/gcp-storage-emulator" 9 | LONG_DESCRIPTION = open(os.path.join(os.path.dirname(__file__), "README.md")).read() 10 | 11 | AUTHOR = "Eero Vuojolahti" 12 | AUTHOR_EMAIL = "contact@oittaa.com" 13 | GITHUB_REF = os.environ.get("GITHUB_REF") 14 | PREFIX = "refs/tags/" 15 | 16 | if GITHUB_REF and GITHUB_REF.startswith(PREFIX): 17 | prefix_len = len(PREFIX) 18 | VERSION = GITHUB_REF[prefix_len:] 19 | else: 20 | VERSION = "0.0.0.dev0" 21 | 22 | setup( 23 | name=NAME, 24 | version=VERSION, 25 | description=DESCRIPTION, 26 | long_description=LONG_DESCRIPTION, 27 | long_description_content_type="text/markdown", 28 | url=URL, 29 | author=AUTHOR, 30 | author_email=AUTHOR_EMAIL, 31 | packages=PACKAGES, 32 | package_dir={"": "src"}, 33 | zip_safe=False, 34 | keywords=[ 35 | "Google Cloud Storage", 36 | "Google App Engine", 37 | "Google Cloud Platform", 38 | "GCS", 39 | "GAE", 40 | "GCP", 41 | ], 42 | classifiers=[ 43 | "Development Status :: 3 - Alpha", 44 | "Intended Audience :: Developers", 45 | "License :: OSI Approved :: BSD License", 46 | "Operating System :: OS Independent", 47 | "Programming Language :: Python", 48 | "Programming Language :: Python :: 3", 49 | "Programming Language :: Python :: 3.8", 50 | "Programming Language :: Python :: 3.9", 51 | "Programming Language :: Python :: 3.10", 52 | "Programming Language :: Python :: 3.11", 53 | "Programming Language :: Python :: 3.12", 54 | ], 55 | entry_points={ 56 | "console_scripts": [ 57 | "gcp-storage-emulator=gcp_storage_emulator.__main__:main", 58 | ], 59 | }, 60 | install_requires=[ 61 | "fs", 62 | "google-crc32c", 63 | ], 64 | python_requires=">=3.8", 65 | ) 66 | -------------------------------------------------------------------------------- /src/gcp_storage_emulator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oittaa/gcp-storage-emulator/0d623b8d2a0a4432a990b50373ee1c5d59370f47/src/gcp_storage_emulator/__init__.py -------------------------------------------------------------------------------- /src/gcp_storage_emulator/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import argparse 4 | import logging 5 | import os 6 | import sys 7 | 8 | from gcp_storage_emulator.handlers.buckets import create_bucket 9 | from gcp_storage_emulator.server import create_server 10 | from gcp_storage_emulator.storage import Storage 11 | 12 | # One after gcloud-task-emulator one 13 | DEFAULT_PORT = int(os.environ.get("PORT", 9023)) 14 | DEFAULT_HOST = os.environ.get("HOST", "localhost") 15 | 16 | 17 | def get_server(host, port, memory=False, default_bucket=None, data_dir=None): 18 | server = create_server(host, port, memory, default_bucket, data_dir=data_dir) 19 | return server 20 | 21 | 22 | def wipe(data_dir=None, keep_buckets=False): 23 | keep_str = " while keeping the buckets" if keep_buckets else "" 24 | print(f"Wiping...{keep_str}") 25 | server = create_server(None, None, False, data_dir=data_dir) 26 | server.wipe(keep_buckets=keep_buckets) 27 | print("Done.") 28 | return 0 29 | 30 | 31 | def prepare_args_parser(): 32 | parser = argparse.ArgumentParser(description="Google Cloud Storage Emulator") 33 | parser.add_argument( 34 | "-d", "--data-dir", default=None, help="directory to use as the storage root" 35 | ) 36 | subparsers = parser.add_subparsers(title="subcommands", dest="subcommand") 37 | 38 | start = subparsers.add_parser("start", help="start the emulator") 39 | start.add_argument( 40 | "--port", type=int, help="the port to run the server on", default=DEFAULT_PORT 41 | ) 42 | start.add_argument( 43 | "-H", "--host", help="the host to run the server on", default=DEFAULT_HOST 44 | ) 45 | start.add_argument( 46 | "--default-bucket", 47 | help="The default bucket. If provided, bucket will be created automatically", 48 | ) 49 | start.add_argument( 50 | "-q", 51 | "--quiet", 52 | action="store_true", 53 | default=False, 54 | help="only outputs critical level logging", 55 | ) 56 | start.add_argument( 57 | "-M", 58 | "--no-store-on-disk", 59 | "--in-memory", 60 | action="store_true", 61 | default=False, 62 | help="use in-memory storage", 63 | ) 64 | 65 | wipe = subparsers.add_parser("wipe", help="Wipe the local data") 66 | wipe.add_argument( 67 | "--keep-buckets", 68 | action="store_true", 69 | default=False, 70 | help="If provided the data will be wiped but the existing buckets are kept", 71 | ) 72 | 73 | create_bucket = subparsers.add_parser("create_bucket", help="create bucket") 74 | # -n, --name deprecated 75 | create_bucket.add_argument( 76 | "-n", "--name", action="store_true", help=argparse.SUPPRESS 77 | ) 78 | create_bucket.add_argument("name", help="Name of the new bucket") 79 | 80 | return parser, subparsers 81 | 82 | 83 | def main(args=sys.argv[1:], test_mode=False): 84 | parser, subparsers = prepare_args_parser() 85 | args = parser.parse_args(args) 86 | if args.subcommand not in subparsers.choices.keys(): 87 | parser.print_usage() 88 | sys.exit(1) 89 | 90 | if args.subcommand == "wipe": 91 | answer = input( 92 | "This operation will IRREVERSIBLY DELETE all your data. Do you wish to proceed? [y/N] " 93 | ) 94 | if answer.lower() in ("y", "ye", "yes"): 95 | sys.exit(wipe(data_dir=args.data_dir, keep_buckets=args.keep_buckets)) 96 | else: 97 | print("wipe command cancelled") 98 | sys.exit(1) 99 | 100 | if args.subcommand == "create_bucket": 101 | storage = Storage(data_dir=args.data_dir) 102 | create_bucket(args.name, storage) 103 | sys.exit(0) 104 | 105 | root = logging.getLogger("") 106 | stream_handler = logging.StreamHandler() 107 | root.addHandler(stream_handler) 108 | if args.quiet: 109 | root.setLevel(logging.CRITICAL) 110 | else: 111 | root.setLevel(logging.DEBUG) 112 | server = get_server( 113 | args.host, args.port, args.no_store_on_disk, args.default_bucket, args.data_dir 114 | ) 115 | if test_mode: 116 | return server 117 | sys.exit(server.run()) 118 | 119 | 120 | if __name__ == "__main__": 121 | main() 122 | -------------------------------------------------------------------------------- /src/gcp_storage_emulator/exceptions.py: -------------------------------------------------------------------------------- 1 | class NotFound(Exception): 2 | pass 3 | 4 | 5 | class Conflict(Exception): 6 | pass 7 | -------------------------------------------------------------------------------- /src/gcp_storage_emulator/handlers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oittaa/gcp-storage-emulator/0d623b8d2a0a4432a990b50373ee1c5d59370f47/src/gcp_storage_emulator/handlers/__init__.py -------------------------------------------------------------------------------- /src/gcp_storage_emulator/handlers/buckets.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from datetime import datetime, timezone 3 | from http import HTTPStatus 4 | 5 | from gcp_storage_emulator import settings 6 | from gcp_storage_emulator.exceptions import Conflict, NotFound 7 | 8 | logger = logging.getLogger("api.bucket") 9 | 10 | CONFLICT = { 11 | "error": { 12 | "errors": [ 13 | { 14 | "domain": "global", 15 | "reason": "conflict", 16 | "message": "You already own this bucket. Please select another name.", 17 | } 18 | ], 19 | "code": 409, 20 | "message": "You already own this bucket. Please select another name.", 21 | } 22 | } 23 | 24 | BAD_REQUEST = { 25 | "error": { 26 | "errors": [ 27 | {"domain": "global", "reason": "invalid", "message": "Empty bucket name"} 28 | ], 29 | "code": 400, 30 | "message": "Empty bucket name", 31 | } 32 | } 33 | 34 | 35 | def _make_bucket_resource(bucket_name): 36 | now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ") 37 | return { 38 | "kind": "storage#bucket", 39 | "id": bucket_name, 40 | "selfLink": "{}/b/{}".format(settings.API_ENDPOINT, bucket_name), 41 | "projectNumber": "1234", 42 | "name": bucket_name, 43 | "timeCreated": now, 44 | "updated": now, 45 | "metageneration": "1", 46 | "iamConfiguration": { 47 | "bucketPolicyOnly": {"enabled": False}, 48 | "uniformBucketLevelAccess": {"enabled": False}, 49 | }, 50 | "location": "US", 51 | "locationType": "multi-region", 52 | "storageClass": "STANDARD", 53 | "etag": "CAE=", 54 | } 55 | 56 | 57 | def get(request, response, storage, *args, **kwargs): 58 | name = request.params.get("bucket_name") 59 | if name and storage.buckets.get(name): 60 | response.json(storage.buckets.get(name)) 61 | else: 62 | response.status = HTTPStatus.NOT_FOUND 63 | 64 | 65 | def ls(request, response, storage, *args, **kwargs): 66 | logger.info("[BUCKETS] List received") 67 | response.json( 68 | { 69 | "kind": "storage#buckets", 70 | "items": list(storage.buckets.values()), 71 | } 72 | ) 73 | 74 | 75 | def create_bucket(name, storage): 76 | if storage.get_bucket(name): 77 | return False 78 | else: 79 | bucket = _make_bucket_resource(name) 80 | storage.create_bucket(name, bucket) 81 | return bucket 82 | 83 | 84 | def insert(request, response, storage, *args, **kwargs): 85 | name = request.data.get("name") 86 | if name: 87 | logger.debug( 88 | "[BUCKETS] Received request to create bucket with name {}".format(name) 89 | ) 90 | bucket = create_bucket(name, storage) 91 | if not bucket: 92 | response.status = HTTPStatus.CONFLICT 93 | response.json(CONFLICT) 94 | else: 95 | bucket = _make_bucket_resource(name) 96 | storage.create_bucket(name, bucket) 97 | response.json(bucket) 98 | else: 99 | response.status = HTTPStatus.BAD_REQUEST 100 | response.json(BAD_REQUEST) 101 | 102 | 103 | def delete(request, response, storage, *args, **kwargs): 104 | name = request.params.get("bucket_name") 105 | if not name: 106 | response.status = HTTPStatus.BAD_REQUEST 107 | return response.json(BAD_REQUEST) 108 | 109 | try: 110 | storage.delete_bucket(name) 111 | except NotFound: 112 | response.status = HTTPStatus.NOT_FOUND 113 | except Conflict: 114 | response.status = HTTPStatus.CONFLICT 115 | -------------------------------------------------------------------------------- /src/gcp_storage_emulator/handlers/objects.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | import json 3 | import logging 4 | import re 5 | import secrets 6 | import string 7 | import time 8 | import urllib.parse 9 | from base64 import b64encode 10 | from copy import deepcopy 11 | from datetime import datetime, timezone 12 | from enum import IntEnum 13 | from http import HTTPStatus 14 | 15 | import google_crc32c 16 | 17 | from gcp_storage_emulator.exceptions import Conflict, NotFound 18 | 19 | logger = logging.getLogger("api.object") 20 | 21 | _WRITABLE_FIELDS = ( 22 | "cacheControl", 23 | "contentDisposition", 24 | "contentEncoding", 25 | "contentLanguage", 26 | "contentType", 27 | "crc32c", 28 | "customTime", 29 | "md5Hash", 30 | "metadata", 31 | "storageClass", 32 | ) 33 | 34 | _HASH_HEADER = "X-Goog-Hash" 35 | 36 | BAD_REQUEST = { 37 | "error": { 38 | "errors": [{"domain": "global", "reason": "invalid", "message": None}], 39 | "code": 400, 40 | "message": None, 41 | } 42 | } 43 | 44 | NOT_FOUND = { 45 | "error": { 46 | "errors": [{"domain": "global", "reason": "notFound", "message": None}], 47 | "code": 404, 48 | "message": None, 49 | } 50 | } 51 | 52 | 53 | MD5_CHECKSUM_ERROR = 'Provided MD5 hash "{}" doesn\'t match calculated MD5 hash "{}".' 54 | CRC32C_CHECKSUM_ERROR = 'Provided CRC32C "{}" doesn\'t match calculated CRC32C "{}".' 55 | 56 | 57 | class GoogleHTTPStatus(IntEnum): 58 | def __new__(cls, value, phrase, description=""): 59 | obj = int.__new__(cls, value) 60 | obj._value_ = value 61 | 62 | obj.phrase = phrase 63 | obj.description = description 64 | return obj 65 | 66 | RESUME_INCOMPLETE = 308, "Resume Incomplete" 67 | 68 | 69 | def _handle_conflict(response, err): 70 | msg = str(err) 71 | response.status = HTTPStatus.BAD_REQUEST 72 | resp = deepcopy(BAD_REQUEST) 73 | resp["error"]["message"] = msg 74 | resp["error"]["errors"][0]["message"] = msg 75 | response.json(resp) 76 | 77 | 78 | def _crc32c(content): 79 | if isinstance(content, str): 80 | content = content.encode() 81 | val = google_crc32c.Checksum(content) 82 | return b64encode(val.digest()).decode("ascii") 83 | 84 | 85 | def _md5(content): 86 | if isinstance(content, str): 87 | content = content.encode() 88 | return b64encode(hashlib.md5(content).digest()).decode("ascii") 89 | 90 | 91 | def _checksums(content, file_obj): 92 | crc32c_hash = _crc32c(content) 93 | obj_crc32c = file_obj.get("crc32c") 94 | md5_hash = _md5(content) 95 | obj_md5 = file_obj.get("md5Hash") 96 | if not obj_crc32c: 97 | file_obj["crc32c"] = crc32c_hash 98 | else: 99 | if obj_crc32c != crc32c_hash: 100 | raise Conflict(CRC32C_CHECKSUM_ERROR.format(obj_crc32c, crc32c_hash)) 101 | if not obj_md5: 102 | file_obj["md5Hash"] = md5_hash 103 | else: 104 | if obj_md5 != md5_hash: 105 | raise Conflict(MD5_CHECKSUM_ERROR.format(obj_md5, md5_hash)) 106 | if not file_obj.get("etag"): 107 | file_obj["etag"] = md5_hash 108 | return file_obj 109 | 110 | 111 | def _patch_object(obj, metadata): 112 | if metadata: 113 | obj["metageneration"] = str(int(obj["metageneration"]) + 1) 114 | for key in _WRITABLE_FIELDS: 115 | val = metadata.get(key) 116 | if val is not None: 117 | if key == "customTime" and obj.get(key) and obj.get(key) > val: 118 | continue 119 | obj[key] = val 120 | return obj 121 | 122 | 123 | def _make_object_resource( 124 | base_url, bucket_name, object_name, content_type, content_length, metadata=None 125 | ): 126 | time_id = time.time_ns() 127 | now = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%S.%fZ") 128 | 129 | obj = { 130 | "kind": "storage#object", 131 | "id": "{}/{}/{}".format(bucket_name, object_name, time_id), 132 | "selfLink": "/storage/v1/b/{}/o/{}".format(bucket_name, object_name), 133 | "name": object_name, 134 | "bucket": bucket_name, 135 | "generation": str(time_id), 136 | "metageneration": "1", 137 | "contentType": content_type, 138 | "timeCreated": now, 139 | "updated": now, 140 | "storageClass": "STANDARD", 141 | "timeStorageClassUpdated": now, 142 | "size": content_length, 143 | "md5Hash": None, 144 | "mediaLink": "{}/download/storage/v1/b/{}/o/{}?generation={}&alt=media".format( 145 | base_url, 146 | bucket_name, 147 | object_name, 148 | time_id, 149 | ), 150 | "crc32c": None, 151 | "etag": None, 152 | } 153 | obj = _patch_object(obj, metadata) 154 | return obj 155 | 156 | 157 | def _content_type_from_request(request, default=None): 158 | if "contentEncoding" in request.query: 159 | return request.query["contentEncoding"][0] 160 | return default 161 | 162 | 163 | def _media_upload(request, response, storage): 164 | object_id = request.query["name"][0] 165 | content_type = _content_type_from_request( 166 | request, request.get_header("content-type") 167 | ) 168 | obj = _make_object_resource( 169 | request.base_url, 170 | request.params["bucket_name"], 171 | object_id, 172 | content_type, 173 | str(len(request.data)), 174 | ) 175 | obj = _checksums(request.data, obj) 176 | storage.create_file( 177 | request.params["bucket_name"], 178 | object_id, 179 | request.data, 180 | obj, 181 | ) 182 | 183 | response.json(obj) 184 | 185 | 186 | def _multipart_upload(request, response, storage): 187 | object_id = request.data["meta"].get("name") 188 | # Overrides the object metadata's name value, if any. 189 | if "name" in request.query: 190 | object_id = request.query["name"][0] 191 | content_type = _content_type_from_request(request, request.data["content-type"]) 192 | obj = _make_object_resource( 193 | request.base_url, 194 | request.params["bucket_name"], 195 | object_id, 196 | content_type, 197 | str(len(request.data["content"])), 198 | request.data["meta"], 199 | ) 200 | obj = _checksums(request.data["content"], obj) 201 | storage.create_file( 202 | request.params["bucket_name"], 203 | object_id, 204 | request.data["content"], 205 | obj, 206 | ) 207 | 208 | response.json(obj) 209 | 210 | 211 | def _create_resumable_upload(request, response, storage): 212 | # Workaround for libraries using POST method when they should be using PUT. 213 | if "upload_id" in request.query: 214 | return upload_partial(request, response, storage) 215 | if request.data: 216 | object_id = request.data.get("name") 217 | # Overrides the object metadata's name value, if any. 218 | if "name" in request.query: 219 | object_id = request.query["name"][0] 220 | content_type = _content_type_from_request( 221 | request, request.get_header("x-upload-content-type", "application/octet-stream") 222 | ) 223 | content_length = request.get_header("x-upload-content-length", None) 224 | obj = _make_object_resource( 225 | request.base_url, 226 | request.params["bucket_name"], 227 | object_id, 228 | content_type, 229 | content_length, 230 | ) 231 | id = storage.create_resumable_upload( 232 | request.params["bucket_name"], 233 | object_id, 234 | obj, 235 | ) 236 | encoded_id = urllib.parse.urlencode( 237 | { 238 | "upload_id": id, 239 | } 240 | ) 241 | response["Location"] = request.full_url + "&{}".format(encoded_id) 242 | 243 | 244 | def _delete(storage, bucket_name, object_id): 245 | try: 246 | storage.delete_file(bucket_name, object_id) 247 | return True 248 | except NotFound: 249 | return False 250 | 251 | 252 | def _patch(storage, bucket_name, object_id, metadata): 253 | try: 254 | obj = storage.get_file_obj(bucket_name, object_id) 255 | obj = _patch_object(obj, metadata) 256 | storage.patch_object(bucket_name, object_id, obj) 257 | return obj 258 | except NotFound: 259 | logger.error( 260 | "Could not patch {}/{}: with {}".format(bucket_name, object_id, metadata) 261 | ) 262 | return None 263 | 264 | 265 | def xml_upload(request, response, storage, *args, **kwargs): 266 | content_type = request.get_header("Content-Type", "application/octet-stream") 267 | obj = _make_object_resource( 268 | request.base_url, 269 | request.params["bucket_name"], 270 | request.params["object_id"], 271 | content_type, 272 | str(len(request.data)), 273 | ) 274 | try: 275 | obj = _checksums(request.data, obj) 276 | storage.create_file( 277 | request.params["bucket_name"], 278 | request.params["object_id"], 279 | request.data, 280 | obj, 281 | ) 282 | 283 | except NotFound: 284 | response.status = HTTPStatus.NOT_FOUND 285 | 286 | 287 | def insert(request, response, storage, *args, **kwargs): 288 | uploadType = request.query.get("uploadType") 289 | 290 | if not uploadType or len(uploadType) == 0: 291 | response.status = HTTPStatus.BAD_REQUEST 292 | return 293 | 294 | uploadType = uploadType[0] 295 | 296 | try: 297 | if uploadType == "media": 298 | return _media_upload(request, response, storage) 299 | 300 | if uploadType == "resumable": 301 | return _create_resumable_upload(request, response, storage) 302 | 303 | if uploadType == "multipart": 304 | return _multipart_upload(request, response, storage) 305 | except NotFound: 306 | response.status = HTTPStatus.NOT_FOUND 307 | except Conflict as err: 308 | _handle_conflict(response, err) 309 | 310 | 311 | def upload_partial(request, response, storage, *args, **kwargs): 312 | """https://cloud.google.com/storage/docs/performing-resumable-uploads""" 313 | upload_id = request.query.get("upload_id")[0] 314 | regex = r"^\s*bytes (?P[0-9]+)-(?P[0-9]+)/(?P[0-9]+)$" 315 | pattern = re.compile(regex) 316 | content_range = request.get_header("Content-Range", "") 317 | match = pattern.fullmatch(content_range) 318 | try: 319 | obj = storage.get_resumable_file_obj(upload_id) 320 | if match: 321 | m_dict = match.groupdict() 322 | total_size = int(m_dict["total_size"]) 323 | data = storage.add_to_resumable_upload(upload_id, request.data, total_size) 324 | if data is None: 325 | response.status = GoogleHTTPStatus.RESUME_INCOMPLETE 326 | response["Range"] = "bytes=0-{}".format(m_dict["end"]) 327 | return 328 | else: 329 | data = request.data or b"" 330 | 331 | obj = _checksums(data, obj) 332 | obj["size"] = str(len(data)) 333 | storage.create_file(obj["bucket"], obj["name"], data, obj, upload_id) 334 | response.json(obj) 335 | except NotFound: 336 | response.status = HTTPStatus.NOT_FOUND 337 | except Conflict as err: 338 | _handle_conflict(response, err) 339 | 340 | 341 | def get(request, response, storage, *args, **kwargs): 342 | if request.query.get("alt") and request.query.get("alt")[0] == "media": 343 | return download(request, response, storage) 344 | try: 345 | obj = storage.get_file_obj( 346 | request.params["bucket_name"], request.params["object_id"] 347 | ) 348 | response.json(obj) 349 | except NotFound: 350 | response.status = HTTPStatus.NOT_FOUND 351 | 352 | 353 | def ls(request, response, storage, *args, **kwargs): 354 | bucket_name = request.params["bucket_name"] 355 | prefix = request.query.get("prefix")[0] if request.query.get("prefix") else None 356 | delimiter = ( 357 | request.query.get("delimiter")[0] if request.query.get("delimiter") else None 358 | ) 359 | try: 360 | files, prefixes = storage.get_file_list(bucket_name, prefix, delimiter) 361 | except NotFound: 362 | response.status = HTTPStatus.NOT_FOUND 363 | else: 364 | response.json({"kind": "storage#object", "prefixes": prefixes, "items": files}) 365 | 366 | 367 | def copy(request, response, storage, *args, **kwargs): 368 | try: 369 | obj = storage.get_file_obj( 370 | request.params["bucket_name"], request.params["object_id"] 371 | ) 372 | except NotFound: 373 | response.status = HTTPStatus.NOT_FOUND 374 | return 375 | 376 | dest_obj = _make_object_resource( 377 | request.base_url, 378 | request.params["dest_bucket_name"], 379 | request.params["dest_object_id"], 380 | obj["contentType"], 381 | obj["size"], 382 | obj, 383 | ) 384 | 385 | file = storage.get_file(request.params["bucket_name"], request.params["object_id"]) 386 | try: 387 | dest_obj = _checksums(file, dest_obj) 388 | storage.create_file( 389 | request.params["dest_bucket_name"], 390 | request.params["dest_object_id"], 391 | file, 392 | dest_obj, 393 | ) 394 | response.json(dest_obj) 395 | except NotFound: 396 | response.status = HTTPStatus.NOT_FOUND 397 | except Conflict as err: 398 | _handle_conflict(response, err) 399 | 400 | 401 | def rewrite(request, response, storage, *args, **kwargs): 402 | try: 403 | obj = storage.get_file_obj( 404 | request.params["bucket_name"], request.params["object_id"] 405 | ) 406 | except NotFound: 407 | response.status = HTTPStatus.NOT_FOUND 408 | return 409 | 410 | dest_obj = _make_object_resource( 411 | request.base_url, 412 | request.params["dest_bucket_name"], 413 | request.params["dest_object_id"], 414 | obj["contentType"], 415 | obj["size"], 416 | obj, 417 | ) 418 | 419 | file = storage.get_file(request.params["bucket_name"], request.params["object_id"]) 420 | try: 421 | dest_obj = _checksums(file, dest_obj) 422 | storage.create_file( 423 | request.params["dest_bucket_name"], 424 | request.params["dest_object_id"], 425 | file, 426 | dest_obj, 427 | ) 428 | response.json( 429 | { 430 | "resource": dest_obj, 431 | "written": dest_obj["size"], 432 | "size": dest_obj["size"], 433 | "done": True, 434 | } 435 | ) 436 | except NotFound: 437 | response.status = HTTPStatus.NOT_FOUND 438 | except Conflict as err: 439 | _handle_conflict(response, err) 440 | 441 | 442 | def compose(request, response, storage, *args, **kwargs): 443 | content_type = None 444 | dest_file = b"" 445 | try: 446 | dest_properties = request.data["destination"] 447 | for src_obj in request.data["sourceObjects"]: 448 | if content_type is None: 449 | temp = storage.get_file_obj( 450 | request.params["bucket_name"], src_obj["name"] 451 | ) 452 | content_type = temp["contentType"] 453 | dest_file += storage.get_file( 454 | request.params["bucket_name"], src_obj["name"] 455 | ) 456 | 457 | except NotFound: 458 | response.status = HTTPStatus.NOT_FOUND 459 | return 460 | 461 | dest_obj = _make_object_resource( 462 | request.base_url, 463 | request.params["bucket_name"], 464 | request.params["object_id"], 465 | content_type, 466 | len(dest_file), 467 | dest_properties, 468 | ) 469 | 470 | try: 471 | dest_obj = _checksums(dest_file, dest_obj) 472 | storage.create_file( 473 | request.params["bucket_name"], 474 | request.params["object_id"], 475 | dest_file, 476 | dest_obj, 477 | ) 478 | response.json(dest_obj) 479 | except NotFound: 480 | response.status = HTTPStatus.NOT_FOUND 481 | except Conflict as err: 482 | _handle_conflict(response, err) 483 | 484 | 485 | def download(request, response, storage, *args, **kwargs): 486 | try: 487 | file = storage.get_file( 488 | request.params["bucket_name"], request.params["object_id"] 489 | ) 490 | obj = storage.get_file_obj( 491 | request.params["bucket_name"], request.params["object_id"] 492 | ) 493 | range = request.get_header("range", None) 494 | if range: 495 | regex = r"^\s*bytes=(?P[0-9]+)-(?P[0-9]*)$" 496 | pattern = re.compile(regex) 497 | match = pattern.fullmatch(range) 498 | if match: 499 | end = orig_len = len(file) 500 | m_dict = match.groupdict() 501 | start = int(m_dict["start"]) 502 | if m_dict["end"]: 503 | end = min(orig_len, int(m_dict["end"]) + 1) 504 | file = file[start:end] 505 | end -= 1 506 | response["Content-Range"] = "bytes {}-{}/{}".format( 507 | start, end, orig_len 508 | ) 509 | response.status = HTTPStatus.PARTIAL_CONTENT 510 | else: 511 | hash_header = "crc32c={},md5={}".format(obj["crc32c"], obj["md5Hash"]) 512 | response[_HASH_HEADER] = hash_header 513 | 514 | if "response-content-disposition" in request.query: 515 | response["Content-Disposition"] = request.query[ 516 | "response-content-disposition" 517 | ][0] 518 | 519 | response.write_file(file, content_type=obj.get("contentType")) 520 | except NotFound: 521 | response.status = HTTPStatus.NOT_FOUND 522 | 523 | 524 | def delete(request, response, storage, *args, **kwargs): 525 | if not _delete(storage, request.params["bucket_name"], request.params["object_id"]): 526 | response.status = HTTPStatus.NOT_FOUND 527 | 528 | 529 | def patch(request, response, storage, *args, **kwargs): 530 | obj = _patch( 531 | storage, 532 | request.params["bucket_name"], 533 | request.params["object_id"], 534 | request.data, 535 | ) 536 | if obj: 537 | response.json(obj) 538 | else: 539 | response.status = HTTPStatus.NOT_FOUND 540 | 541 | 542 | def batch(request, response, storage, *args, **kwargs): 543 | boundary = "batch_" + "".join( 544 | secrets.choice(string.ascii_uppercase + string.ascii_lowercase + string.digits) 545 | for _ in range(32) 546 | ) 547 | response["Content-Type"] = "multipart/mixed; boundary={}".format(boundary) 548 | for item in request.data: 549 | resp_data = None 550 | response.write("--{}\r\nContent-Type: application/http\r\n".format(boundary)) 551 | method = item.get("method") 552 | bucket_name = item.get("bucket_name") 553 | object_id = item.get("object_id") 554 | meta = item.get("meta") 555 | if method == "PATCH": 556 | resp_data = _patch(storage, bucket_name, object_id, meta) 557 | if resp_data: 558 | response.write("HTTP/1.1 200 OK\r\n") 559 | response.write("Content-Type: application/json; charset=UTF-8\r\n") 560 | response.write(json.dumps(resp_data)) 561 | response.write("\r\n\r\n") 562 | if method == "DELETE": 563 | if object_id: 564 | resp_data = _delete(storage, bucket_name, object_id) 565 | else: 566 | try: 567 | storage.delete_bucket(bucket_name) 568 | resp_data = True 569 | except (Conflict, NotFound): 570 | pass 571 | if resp_data: 572 | response.write("HTTP/1.1 204 No Content\r\n") 573 | response.write("Content-Type: application/json; charset=UTF-8\r\n") 574 | if not resp_data: 575 | msg = "No such object: {}/{}".format(bucket_name, object_id) 576 | resp_data = deepcopy(NOT_FOUND) 577 | resp_data["error"]["message"] = msg 578 | resp_data["error"]["errors"][0]["message"] = msg 579 | response.write("HTTP/1.1 404 Not Found\r\n") 580 | response.write("Content-Type: application/json; charset=UTF-8\r\n\r\n") 581 | response.write(json.dumps(resp_data)) 582 | response.write("\r\n\r\n") 583 | 584 | response.write("--{}--".format(boundary)) 585 | -------------------------------------------------------------------------------- /src/gcp_storage_emulator/server.py: -------------------------------------------------------------------------------- 1 | import gzip 2 | import json 3 | import logging 4 | import re 5 | import threading 6 | import time 7 | import zlib 8 | from email.parser import BytesParser 9 | from functools import partial 10 | from http import HTTPStatus, server 11 | from urllib.parse import parse_qs, unquote, urlparse 12 | 13 | from gcp_storage_emulator import settings 14 | from gcp_storage_emulator.handlers import buckets, objects 15 | from gcp_storage_emulator.storage import Storage 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | GET = "GET" 20 | POST = "POST" 21 | PUT = "PUT" 22 | DELETE = "DELETE" 23 | PATCH = "PATCH" 24 | 25 | 26 | def _wipe_data(req, res, storage): 27 | keep_buckets = bool(req.query.get("keep-buckets")) 28 | logger.debug("Wiping storage") 29 | if keep_buckets: 30 | logger.debug("...while keeping the buckets") 31 | storage.wipe(keep_buckets) 32 | 33 | logger.debug("Storage wiped") 34 | res.write("OK") 35 | 36 | 37 | def _health_check(req, res, storage): 38 | res.write("OK") 39 | 40 | 41 | HANDLERS = ( 42 | (r"^{}/b$".format(settings.API_ENDPOINT), {GET: buckets.ls, POST: buckets.insert}), 43 | ( 44 | r"^{}/b/(?P[-.\w]+)$".format(settings.API_ENDPOINT), 45 | {GET: buckets.get, DELETE: buckets.delete}, 46 | ), 47 | ( 48 | r"^{}/b/(?P[-.\w]+)/o$".format(settings.API_ENDPOINT), 49 | {GET: objects.ls}, 50 | ), 51 | ( 52 | r"^{}/b/(?P[-.\w]+)/o/(?P.*[^/]+)/copyTo/b/".format( 53 | settings.API_ENDPOINT 54 | ) 55 | + r"(?P[-.\w]+)/o/(?P.*[^/]+)$", 56 | {POST: objects.copy}, 57 | ), 58 | ( 59 | r"^{}/b/(?P[-.\w]+)/o/(?P.*[^/]+)/rewriteTo/b/".format( 60 | settings.API_ENDPOINT 61 | ) 62 | + r"(?P[-.\w]+)/o/(?P.*[^/]+)$", 63 | {POST: objects.rewrite}, 64 | ), 65 | ( 66 | r"^{}/b/(?P[-.\w]+)/o/(?P.*[^/]+)/compose$".format( 67 | settings.API_ENDPOINT 68 | ), 69 | {POST: objects.compose}, 70 | ), 71 | ( 72 | r"^{}/b/(?P[-.\w]+)/o/(?P.*[^/]+)$".format( 73 | settings.API_ENDPOINT 74 | ), 75 | {GET: objects.get, DELETE: objects.delete, PATCH: objects.patch}, 76 | ), 77 | # Non-default API endpoints 78 | ( 79 | r"^{}/b/(?P[-.\w]+)/o$".format(settings.UPLOAD_API_ENDPOINT), 80 | {POST: objects.insert, PUT: objects.upload_partial}, 81 | ), 82 | ( 83 | r"^{}/b/(?P[-.\w]+)/o/(?P.*[^/]+)$".format( 84 | settings.DOWNLOAD_API_ENDPOINT 85 | ), 86 | {GET: objects.download}, 87 | ), 88 | ( 89 | r"^{}$".format(settings.BATCH_API_ENDPOINT), 90 | {POST: objects.batch}, 91 | ), 92 | # Internal API, not supported by the real GCS 93 | (r"^/$", {GET: _health_check}), # Health check endpoint 94 | (r"^/wipe$", {GET: _wipe_data}), # Wipe all data 95 | # Public file serving, same as object.download and signed URLs 96 | ( 97 | r"^/(?P[-.\w]+)/(?P.*[^/]+)$", 98 | {GET: objects.download, PUT: objects.xml_upload}, 99 | ), 100 | ) 101 | 102 | BATCH_HANDLERS = ( 103 | r"^(?P[\w]+).*{}/b/(?P[-.\w]+)/o/(?P[^\?]+[^/])([\?].*)?$".format( 104 | settings.API_ENDPOINT 105 | ), 106 | r"^(?P[\w]+).*{}/b/(?P[-.\w]+)([\?].*)?$".format( 107 | settings.API_ENDPOINT 108 | ), 109 | r"^Content-Type:\s*(?P[-.\w/]+)$", 110 | ) 111 | 112 | 113 | def _parse_batch_item(item): 114 | parsed_params = {} 115 | content_reached = None 116 | partial_content = "" 117 | current_content = item.get_payload() 118 | for line in current_content.splitlines(): 119 | if not content_reached: 120 | if not line: 121 | content_reached = True 122 | else: 123 | for regex in BATCH_HANDLERS: 124 | pattern = re.compile(regex) 125 | match = pattern.fullmatch(line) 126 | if match: 127 | for k, v in match.groupdict().items(): 128 | parsed_params[k] = unquote(v) 129 | else: 130 | partial_content += line 131 | if partial_content and parsed_params.get("content_type") == "application/json": 132 | parsed_params["meta"] = json.loads(partial_content) 133 | return parsed_params 134 | 135 | 136 | def _read_raw_data(request_handler): 137 | if request_handler.headers["Content-Length"]: 138 | return request_handler.rfile.read( 139 | int(request_handler.headers["Content-Length"]) 140 | ) 141 | 142 | if request_handler.headers["Transfer-Encoding"] == "chunked": 143 | raw_data = b"" 144 | 145 | while True: 146 | line = request_handler.rfile.readline().strip() 147 | chunk_size = int(line, 16) if line else 0 148 | if chunk_size == 0: 149 | break 150 | 151 | raw_data += request_handler.rfile.read(chunk_size) 152 | 153 | request_handler.rfile.readline() 154 | 155 | return raw_data 156 | 157 | return None 158 | 159 | 160 | def _decode_raw_data(raw_data, request_handler): 161 | if not raw_data: 162 | return None 163 | 164 | if request_handler.headers["Content-Encoding"] == "gzip": 165 | return gzip.decompress(raw_data) 166 | 167 | if request_handler.headers["Content-Encoding"] == "deflate": 168 | return zlib.decompress(raw_data) 169 | 170 | return raw_data 171 | 172 | 173 | def _read_data(request_handler, query): 174 | raw_data = _decode_raw_data(_read_raw_data(request_handler), request_handler) 175 | 176 | if not raw_data: 177 | return None 178 | 179 | content_type = request_handler.headers["Content-Type"] or "application/octet-stream" 180 | 181 | if content_type.startswith("application/json") and "upload_id" not in query: 182 | return json.loads(raw_data) 183 | 184 | if content_type.startswith("multipart/"): 185 | parser = BytesParser() 186 | header = bytes("Content-Type:" + content_type + "\r\n", "utf-8") 187 | 188 | msg = parser.parsebytes(header + raw_data) 189 | payload = msg.get_payload() 190 | 191 | if content_type.startswith("multipart/mixed"): 192 | # Batch https://cloud.google.com/storage/docs/json_api/v1/how-tos/batch 193 | rv = list() 194 | for item in payload: 195 | parsed_params = _parse_batch_item(item) 196 | rv.append(parsed_params) 197 | 198 | return rv 199 | 200 | # For multipart upload, google API expect the first item to be a json-encoded 201 | # object, and the second (and only other) part, the file content 202 | return { 203 | "meta": json.loads(payload[0].get_payload()), 204 | "content": payload[1].get_payload(decode=True), 205 | "content-type": payload[1].get_content_type(), 206 | } 207 | 208 | return raw_data 209 | 210 | 211 | class Request(object): 212 | def __init__(self, request_handler, method): 213 | super().__init__() 214 | self._path = request_handler.path 215 | self._request_handler = request_handler 216 | self._server_address = request_handler.server.server_address 217 | self._base_url = "http://{}:{}".format( 218 | self._server_address[0], self._server_address[1] 219 | ) 220 | self._full_url = self._base_url + self._path 221 | self._parsed_url = urlparse(self._full_url) 222 | self._query = parse_qs(self._parsed_url.query) 223 | self._methtod = method 224 | self._data = None 225 | self._parsed_params = None 226 | 227 | @property 228 | def path(self): 229 | return self._parsed_url.path 230 | 231 | @property 232 | def base_url(self): 233 | return self._base_url 234 | 235 | @property 236 | def full_url(self): 237 | return self._full_url 238 | 239 | @property 240 | def method(self): 241 | return self._methtod 242 | 243 | @property 244 | def query(self): 245 | return self._query 246 | 247 | @property 248 | def params(self): 249 | if not self._match: 250 | return None 251 | 252 | if not self._parsed_params: 253 | self._parsed_params = {} 254 | for k, v in self._match.groupdict().items(): 255 | self._parsed_params[k] = unquote(v) 256 | return self._parsed_params 257 | 258 | @property 259 | def data(self): 260 | if not self._data: 261 | self._data = _read_data(self._request_handler, self._query) 262 | return self._data 263 | 264 | def get_header(self, key, default=None): 265 | return self._request_handler.headers.get(key, default) 266 | 267 | def set_match(self, match): 268 | self._match = match 269 | 270 | 271 | class Response(object): 272 | def __init__(self, handler): 273 | super().__init__() 274 | self._handler = handler 275 | self.status = HTTPStatus.OK 276 | self._headers = {} 277 | self._content = "" 278 | 279 | def write(self, content): 280 | logger.warning( 281 | "[RESPONSE] Content handled as string, should be handled as stream" 282 | ) 283 | self._content += content 284 | 285 | def write_file(self, content, content_type="application/octet-stream"): 286 | if content_type is not None: 287 | self["Content-type"] = content_type 288 | 289 | self._content = content 290 | 291 | def json(self, obj): 292 | self["Content-type"] = "application/json" 293 | self._content = json.dumps(obj) 294 | 295 | def __setitem__(self, key, value): 296 | self._headers[key] = value 297 | 298 | def __getitem__(self, key): 299 | return self._headers[key] 300 | 301 | def close(self): 302 | self._handler.send_response(self.status.value, self.status.phrase) 303 | for k, v in self._headers.items(): 304 | self._handler.send_header(k, v) 305 | 306 | content = self._content 307 | 308 | if isinstance(self._content, str): 309 | content = self._content.encode("utf-8") 310 | 311 | self._handler.send_header("Content-Length", str(len(content))) 312 | self._handler.end_headers() 313 | self._handler.wfile.write(content) 314 | 315 | 316 | class Router(object): 317 | def __init__(self, request_handler): 318 | super().__init__() 319 | self._request_handler = request_handler 320 | 321 | def handle(self, method): 322 | if self._request_handler.headers["x-http-method-override"]: 323 | method = self._request_handler.headers["x-http-method-override"] 324 | 325 | request = Request(self._request_handler, method) 326 | response = Response(self._request_handler) 327 | 328 | for regex, handlers in HANDLERS: 329 | pattern = re.compile(regex) 330 | match = pattern.fullmatch(request.path) 331 | if match: 332 | request.set_match(match) 333 | handler = handlers.get(method) 334 | try: 335 | handler(request, response, self._request_handler.storage) 336 | except Exception as e: 337 | logger.error( 338 | "An error has occurred while running the handler for {} {}".format( 339 | request.method, 340 | request.full_url, 341 | ) 342 | ) 343 | logger.error(e) 344 | raise e 345 | break 346 | else: 347 | logger.error( 348 | "Method not implemented: {} - {}".format(request.method, request.path) 349 | ) 350 | response.status = HTTPStatus.NOT_IMPLEMENTED 351 | 352 | response.close() 353 | 354 | 355 | class RequestHandler(server.BaseHTTPRequestHandler): 356 | def __init__(self, storage, *args, **kwargs): 357 | self.storage = storage 358 | super().__init__(*args, **kwargs) 359 | 360 | def do_GET(self): 361 | router = Router(self) 362 | router.handle(GET) 363 | 364 | def do_POST(self): 365 | router = Router(self) 366 | router.handle(POST) 367 | 368 | def do_DELETE(self): 369 | router = Router(self) 370 | router.handle(DELETE) 371 | 372 | def do_PUT(self): 373 | router = Router(self) 374 | router.handle(PUT) 375 | 376 | def do_PATCH(self): 377 | router = Router(self) 378 | router.handle(PATCH) 379 | 380 | def log_message(self, format, *args): 381 | logger.info(format % args) 382 | 383 | 384 | class APIThread(threading.Thread): 385 | def __init__(self, host, port, storage, *args, **kwargs): 386 | super().__init__(*args, **kwargs) 387 | 388 | self._host = host 389 | self._port = port 390 | self.is_running = threading.Event() 391 | self._httpd = None 392 | self._storage = storage 393 | 394 | def run(self): 395 | self._httpd = server.HTTPServer( 396 | (self._host, self._port), partial(RequestHandler, self._storage) 397 | ) 398 | self.is_running.set() 399 | self._httpd.serve_forever() 400 | 401 | def join(self, timeout=None): 402 | self.is_running.clear() 403 | if self._httpd: 404 | logger.info("[API] Stopping API server") 405 | self._httpd.shutdown() 406 | self._httpd.server_close() 407 | 408 | 409 | class Server(object): 410 | def __init__(self, host, port, in_memory, default_bucket=None, data_dir=None): 411 | self._storage = Storage(use_memory_fs=in_memory, data_dir=data_dir) 412 | if default_bucket: 413 | logger.debug('[SERVER] Creating default bucket "{}"'.format(default_bucket)) 414 | buckets.create_bucket(default_bucket, self._storage) 415 | self._api = APIThread(host, port, self._storage) 416 | 417 | # Context Manager 418 | def __enter__(self): 419 | self.start() 420 | return self 421 | 422 | def __exit__(self, *args): 423 | self.stop() 424 | 425 | def start(self): 426 | self._api.start() 427 | self._api.is_running.wait() # Start the API thread 428 | 429 | def stop(self): 430 | self._api.join(timeout=1) 431 | 432 | def wipe(self, keep_buckets=False): 433 | self._storage.wipe(keep_buckets=keep_buckets) 434 | 435 | def run(self): 436 | try: 437 | self.start() 438 | logger.info("[SERVER] All services started") 439 | 440 | while True: 441 | try: 442 | time.sleep(0.1) 443 | except KeyboardInterrupt: 444 | logger.info("[SERVER] Received keyboard interrupt") 445 | break 446 | 447 | finally: 448 | self.stop() 449 | 450 | 451 | def create_server(host, port, in_memory=False, default_bucket=None, data_dir=None): 452 | logger.info("Starting server at {}:{}".format(host, port)) 453 | return Server( 454 | host, 455 | port, 456 | in_memory=in_memory, 457 | default_bucket=default_bucket, 458 | data_dir=data_dir, 459 | ) 460 | -------------------------------------------------------------------------------- /src/gcp_storage_emulator/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | API_ENDPOINT = "/storage/v1" 4 | UPLOAD_API_ENDPOINT = "/upload/storage/v1" 5 | BATCH_API_ENDPOINT = "/batch/storage/v1" 6 | DOWNLOAD_API_ENDPOINT = "/download/storage/v1" 7 | 8 | # pyfilesystem assumes OS fs within CWD as base 9 | STORAGE_BASE = os.path.abspath(os.environ.get("STORAGE_BASE", "./")) 10 | STORAGE_DIR = os.environ.get("STORAGE_DIR", ".cloudstorage") 11 | -------------------------------------------------------------------------------- /src/gcp_storage_emulator/storage.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import json 3 | import logging 4 | import os 5 | from hashlib import sha256 6 | 7 | import fs 8 | from fs.errors import FileExpected, ResourceNotFound 9 | 10 | from gcp_storage_emulator.exceptions import Conflict, NotFound 11 | from gcp_storage_emulator.settings import STORAGE_BASE, STORAGE_DIR 12 | 13 | # Real buckets can't start with an underscore 14 | RESUMABLE_DIR = "_resumable" 15 | 16 | logger = logging.getLogger(__name__) 17 | 18 | 19 | class Storage(object): 20 | def __init__(self, use_memory_fs=False, data_dir=None): 21 | if not data_dir: 22 | data_dir = STORAGE_BASE 23 | if not os.path.isabs(data_dir): 24 | raise ValueError(f"{data_dir!r} must be an absolute path") 25 | 26 | self._data_dir = data_dir 27 | self._use_memory_fs = use_memory_fs 28 | self._pwd = fs.open_fs(self.get_storage_base()) 29 | try: 30 | self._fs = self._pwd.makedir(STORAGE_DIR) 31 | except fs.errors.DirectoryExists: 32 | self._fs = self._pwd.opendir(STORAGE_DIR) 33 | 34 | self._read_config_from_file() 35 | 36 | def _write_config_to_file(self): 37 | data = { 38 | "buckets": self.buckets, 39 | "objects": self.objects, 40 | "resumable": self.resumable, 41 | } 42 | 43 | with self._fs.open(".meta", mode="w") as meta: 44 | json.dump(data, meta, indent=2) 45 | 46 | def _read_config_from_file(self): 47 | try: 48 | with self._fs.open(".meta", mode="r") as meta: 49 | data = json.load(meta) 50 | self.buckets = data.get("buckets") 51 | self.objects = data.get("objects") 52 | self.resumable = data.get("resumable") 53 | except ResourceNotFound: 54 | self.buckets = {} 55 | self.objects = {} 56 | self.resumable = {} 57 | 58 | def _get_or_create_dir(self, bucket_name, file_name): 59 | try: 60 | bucket_dir = self._fs.makedir(bucket_name) 61 | except fs.errors.DirectoryExists: 62 | bucket_dir = self._fs.opendir(bucket_name) 63 | 64 | dir_name = fs.path.dirname(file_name) 65 | return bucket_dir.makedirs(dir_name, recreate=True) 66 | 67 | def get_storage_base(self): 68 | """Returns the pyfilesystem-compatible fs path to the storage 69 | 70 | This is the OSFS if using disk storage, or "mem://" otherwise. 71 | See https://docs.pyfilesystem.org/en/latest/guide.html#opening-filesystems for more info 72 | 73 | Returns: 74 | string -- The relevant filesystm 75 | """ 76 | 77 | if self._use_memory_fs: 78 | return "mem://" 79 | else: 80 | os.makedirs(self._data_dir, exist_ok=True) 81 | return self._data_dir 82 | 83 | def get_bucket(self, bucket_name): 84 | """Get the bucket resourec object given the bucket name 85 | 86 | Arguments: 87 | bucket_name {str} -- Name of the bucket 88 | 89 | Returns: 90 | dict -- GCS-like Bucket resource 91 | """ 92 | 93 | return self.buckets.get(bucket_name) 94 | 95 | def get_file_list(self, bucket_name, prefix=None, delimiter=None): 96 | """Lists all the blobs in the bucket that begin with the prefix. 97 | 98 | This can be used to list all blobs in a "folder", e.g. "public/". 99 | 100 | The delimiter argument can be used to restrict the results to only the 101 | "files" in the given "folder". Without the delimiter, the entire tree under 102 | the prefix is returned. For example, given these blobs: 103 | 104 | a/1.txt 105 | a/b/2.txt 106 | 107 | If you just specify prefix = 'a', you'll get back: 108 | 109 | a/1.txt 110 | a/b/2.txt 111 | 112 | However, if you specify prefix='a' and delimiter='/', you'll get back: 113 | 114 | a/1.txt 115 | 116 | Additionally, the same request will return blobs.prefixes populated with: 117 | 118 | a/b/ 119 | 120 | Source: https://cloud.google.com/storage/docs/listing-objects#storage-list-objects-python 121 | """ 122 | 123 | if bucket_name not in self.buckets: 124 | raise NotFound 125 | 126 | prefix_len = 0 127 | prefixes = [] 128 | bucket_objects = self.objects.get(bucket_name, {}) 129 | if prefix: 130 | prefix_len = len(prefix) 131 | objs = list( 132 | file_object 133 | for file_name, file_object in bucket_objects.items() 134 | if file_name.startswith(prefix) 135 | and (not delimiter or delimiter not in file_name[prefix_len:]) 136 | ) 137 | else: 138 | objs = list(bucket_objects.values()) 139 | if delimiter: 140 | prefixes = list( 141 | file_name[:prefix_len] 142 | + file_name[prefix_len:].split(delimiter, 1)[0] 143 | + delimiter 144 | for file_name in list(bucket_objects) 145 | if file_name.startswith(prefix or "") 146 | and delimiter in file_name[prefix_len:] 147 | ) 148 | return objs, prefixes 149 | 150 | def create_bucket(self, bucket_name, bucket_obj): 151 | """Create a bucket object representation and save it to the current fs 152 | 153 | Arguments: 154 | bucket_name {str} -- Name of the GCS bucket 155 | bucket_obj {dict} -- GCS-like Bucket resource 156 | 157 | Returns: 158 | [type] -- [description] 159 | """ 160 | 161 | self.buckets[bucket_name] = bucket_obj 162 | self._write_config_to_file() 163 | return bucket_obj 164 | 165 | def create_file(self, bucket_name, file_name, content, file_obj, file_id=None): 166 | """Create a text file given a string content 167 | 168 | Arguments: 169 | bucket_name {str} -- Name of the bucket to save to 170 | file_name {str} -- File name used to store data 171 | content {bytes} -- Content of the file to write 172 | file_obj {dict} -- GCS-like Object resource 173 | file_id {str} -- Resumable file id 174 | 175 | Raises: 176 | NotFound: Raised when the bucket doesn't exist 177 | """ 178 | 179 | if bucket_name not in self.buckets: 180 | raise NotFound 181 | 182 | file_dir = self._get_or_create_dir(bucket_name, file_name) 183 | 184 | base_name = fs.path.basename(file_name) 185 | with file_dir.open(base_name, mode="wb") as file: 186 | file.write(content) 187 | bucket_objects = self.objects.get(bucket_name, {}) 188 | bucket_objects[file_name] = file_obj 189 | self.objects[bucket_name] = bucket_objects 190 | if file_id: 191 | self.delete_resumable_file_obj(file_id) 192 | self._delete_file(RESUMABLE_DIR, self.safe_id(file_id)) 193 | self._write_config_to_file() 194 | 195 | def create_resumable_upload(self, bucket_name, file_name, file_obj): 196 | """Initiate the necessary data to support partial upload. 197 | 198 | This doesn't fully support partial upload, but expect the secondary PUT 199 | call to send all the data in one go. 200 | 201 | Basically, we try to comply to the bare minimum to the API described in 202 | https://cloud.google.com/storage/docs/performing-resumable-uploads ignoring 203 | any potential network failures 204 | 205 | Arguments: 206 | bucket_name {string} -- Name of the bucket to save to 207 | file_name {string} -- File name used to store data 208 | file_obj {dict} -- GCS Object resource 209 | 210 | Raises: 211 | NotFound: Raised when the bucket doesn't exist 212 | 213 | Returns: 214 | str -- id of the resumable upload session (`upload_id`) 215 | """ 216 | 217 | if bucket_name not in self.buckets: 218 | raise NotFound 219 | 220 | file_id = "{}:{}:{}".format(bucket_name, file_name, datetime.datetime.now()) 221 | self.resumable[file_id] = file_obj 222 | self._write_config_to_file() 223 | return file_id 224 | 225 | def add_to_resumable_upload(self, file_id, content, total_size): 226 | """Add data to partial resumable download. 227 | 228 | We can't use 'seek' to append since memory store seems to erase 229 | everything in those cases. That's why the previous part is loaded 230 | and rewritten again. 231 | 232 | Arguments: 233 | file_id {str} -- Resumable file id 234 | content {bytes} -- Content of the file to write 235 | total_size {int} -- Total object size 236 | 237 | 238 | Raises: 239 | NotFound: Raised when the object doesn't exist 240 | 241 | Returns: 242 | bytes -- Raw content of the file if completed, None otherwise 243 | """ 244 | safe_id = self.safe_id(file_id) 245 | try: 246 | file_content = self.get_file(RESUMABLE_DIR, safe_id, False) 247 | except NotFound: 248 | file_content = b"" 249 | file_content += content 250 | file_dir = self._get_or_create_dir(RESUMABLE_DIR, safe_id) 251 | with file_dir.open(safe_id, mode="wb") as file: 252 | file.write(file_content) 253 | size = len(file_content) 254 | if size >= total_size: 255 | return file_content[:total_size] 256 | return None 257 | 258 | def get_file_obj(self, bucket_name, file_name): 259 | """Gets the meta information for a file within a bucket 260 | 261 | Arguments: 262 | bucket_name {str} -- Name of the bucket 263 | file_name {str} -- File name 264 | 265 | Raises: 266 | NotFound: Raised when the object doesn't exist 267 | 268 | Returns: 269 | dict -- GCS-like Object resource 270 | """ 271 | 272 | try: 273 | return self.objects[bucket_name][file_name] 274 | except KeyError: 275 | raise NotFound 276 | 277 | def get_resumable_file_obj(self, file_id): 278 | """Gets the meta information for a file within resumables 279 | 280 | Arguments: 281 | file_id {str} -- Resumable file id 282 | 283 | Raises: 284 | NotFound: Raised when the object doesn't exist 285 | 286 | Returns: 287 | dict -- GCS-like Object resource 288 | """ 289 | 290 | try: 291 | return self.resumable[file_id] 292 | except KeyError: 293 | raise NotFound 294 | 295 | def get_file(self, bucket_name, file_name, show_error=True): 296 | """Get the raw data of a file within a bucket 297 | 298 | Arguments: 299 | bucket_name {str} -- Name of the bucket 300 | file_name {str} -- File name 301 | show_error {bool} -- Show error if the file is missing 302 | 303 | Raises: 304 | NotFound: Raised when the object doesn't exist 305 | 306 | Returns: 307 | bytes -- Raw content of the file 308 | """ 309 | 310 | try: 311 | bucket_dir = self._fs.opendir(bucket_name) 312 | return bucket_dir.open(file_name, mode="rb").read() 313 | except (FileExpected, ResourceNotFound) as e: 314 | if show_error: 315 | logger.error("Resource not found:") 316 | logger.error(e) 317 | raise NotFound 318 | 319 | def delete_resumable_file_obj(self, file_id): 320 | """Deletes the meta information for a file within resumables 321 | 322 | Arguments: 323 | file_id {str} -- Resumable file id 324 | 325 | Raises: 326 | NotFound: Raised when the object doesn't exist 327 | """ 328 | 329 | try: 330 | del self.resumable[file_id] 331 | except KeyError: 332 | raise NotFound 333 | 334 | def delete_bucket(self, bucket_name): 335 | """Delete a bucket's meta and file 336 | 337 | Arguments: 338 | bucket_name {str} -- GCS bucket name 339 | 340 | Raises: 341 | NotFound: If the bucket doesn't exist 342 | Conflict: If the bucket is not empty or there are pending uploads 343 | """ 344 | bucket_meta = self.buckets.get(bucket_name) 345 | if bucket_meta is None: 346 | raise NotFound("Bucket with name '{}' does not exist".format(bucket_name)) 347 | 348 | bucket_objects = self.objects.get(bucket_name, {}) 349 | 350 | if len(bucket_objects.keys()) != 0: 351 | raise Conflict("Bucket '{}' is not empty".format(bucket_name)) 352 | 353 | resumable_ids = [ 354 | file_id 355 | for (file_id, file_obj) in self.resumable.items() 356 | if file_obj.get("bucket") == bucket_name 357 | ] 358 | 359 | if len(resumable_ids) != 0: 360 | raise Conflict( 361 | "Bucket '{}' has pending upload sessions".format(bucket_name) 362 | ) 363 | 364 | del self.buckets[bucket_name] 365 | 366 | self._delete_dir(bucket_name) 367 | self._write_config_to_file() 368 | 369 | def delete_file(self, bucket_name, file_name): 370 | try: 371 | self.objects[bucket_name][file_name] 372 | except KeyError: 373 | raise NotFound( 374 | "Object with name '{}' does not exist in bucket '{}'".format( 375 | bucket_name, file_name 376 | ) 377 | ) 378 | 379 | del self.objects[bucket_name][file_name] 380 | 381 | self._delete_file(bucket_name, file_name) 382 | self._write_config_to_file() 383 | 384 | def _delete_file(self, bucket_name, file_name): 385 | try: 386 | with self._fs.opendir(bucket_name) as bucket_dir: 387 | bucket_dir.remove(file_name) 388 | except ResourceNotFound: 389 | logger.info("No file to remove '{}/{}'".format(bucket_name, file_name)) 390 | 391 | def _delete_dir(self, path, force=True): 392 | try: 393 | remover = self._fs.removetree if force else self._fs.removedir 394 | remover(path) 395 | except ResourceNotFound: 396 | logger.info("No folder to remove '{}'".format(path)) 397 | 398 | def wipe(self, keep_buckets=False): 399 | existing_buckets = self.buckets 400 | self.buckets = {} 401 | self.objects = {} 402 | self.resumable = {} 403 | 404 | try: 405 | self._fs.remove(".meta") 406 | except ResourceNotFound: 407 | pass 408 | try: 409 | for path in self._fs.listdir("."): 410 | self._fs.removetree(path) 411 | except ResourceNotFound as e: 412 | logger.warning(e) 413 | 414 | if keep_buckets: 415 | for bucket_name, bucket_obj in existing_buckets.items(): 416 | self.create_bucket(bucket_name, bucket_obj) 417 | 418 | def patch_object(self, bucket_name, file_name, file_obj): 419 | """Patch object 420 | 421 | Arguments: 422 | bucket_name {str} -- Name of the bucket to save to 423 | file_name {str} -- File name used to store data 424 | file_obj {dict} -- GCS-like Object resource 425 | """ 426 | 427 | bucket_objects = self.objects.get(bucket_name) 428 | if bucket_objects and bucket_objects.get(file_name): 429 | bucket_objects[file_name] = file_obj 430 | self.objects[bucket_name] = bucket_objects 431 | self._write_config_to_file() 432 | 433 | @staticmethod 434 | def safe_id(file_id): 435 | """Safe string from the resumable file_id 436 | 437 | Arguments: 438 | file_id {str} -- Resumable file id 439 | 440 | Returns: 441 | str -- Safe string to use in the file system 442 | """ 443 | return sha256(file_id.encode("utf-8")).hexdigest() 444 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oittaa/gcp-storage-emulator/0d623b8d2a0a4432a990b50373ee1c5d59370f47/tests/__init__.py -------------------------------------------------------------------------------- /tests/test_binary.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/oittaa/gcp-storage-emulator/0d623b8d2a0a4432a990b50373ee1c5d59370f47/tests/test_binary.png -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import os 2 | from unittest import TestCase as BaseTestCase 3 | 4 | import requests 5 | 6 | from gcp_storage_emulator.__main__ import main, wipe 7 | 8 | 9 | def _get_storage_client(http): 10 | """Gets a python storage client""" 11 | os.environ["STORAGE_EMULATOR_HOST"] = "http://localhost:9099" 12 | 13 | # Cloud storage uses environment variables to configure api endpoints for 14 | # file upload - which is read at module import time 15 | from google.cloud import storage 16 | 17 | if os.getenv("DEBUG"): 18 | from http import client as http_client 19 | 20 | http_client.HTTPConnection.debuglevel = 5 21 | return storage.Client( 22 | project="[PROJECT]", 23 | _http=http, 24 | client_options={"api_endpoint": "http://localhost:9099"}, 25 | ) 26 | 27 | 28 | class ServerBaseCase(BaseTestCase): 29 | @classmethod 30 | def setUpClass(cls): 31 | cls._server = main(["start", "--port=9099"], True) 32 | cls._server.start() 33 | 34 | @classmethod 35 | def tearDownClass(cls): 36 | wipe() 37 | cls._server.stop() 38 | 39 | def setUp(self): 40 | self._session = requests.Session() 41 | self._client = _get_storage_client(self._session) 42 | 43 | 44 | class MainHttpEndpointsTest(ServerBaseCase): 45 | """Tests for the HTTP endpoints.""" 46 | 47 | def _url(self, path): 48 | return os.environ["STORAGE_EMULATOR_HOST"] + path 49 | 50 | def test_health_check(self): 51 | url = self._url("/") 52 | response = requests.get(url) 53 | self.assertEqual(response.status_code, 200) 54 | self.assertEqual(response.content, "OK".encode("utf-8")) 55 | 56 | def test_wipe(self): 57 | url = self._url("/wipe") 58 | response = requests.get(url) 59 | self.assertEqual(response.status_code, 200) 60 | self.assertEqual(response.content, "OK".encode("utf-8")) 61 | 62 | def test_download_by_url(self): 63 | """Objects should be downloadable over HTTP from the emulator client.""" 64 | content = "Here is some content" 65 | bucket = self._client.create_bucket("anotherbucket") 66 | blob = bucket.blob("something.txt") 67 | blob.upload_from_string(content) 68 | 69 | url = self._url("/anotherbucket/something.txt") 70 | response = requests.get(url) 71 | self.assertEqual(response.status_code, 200) 72 | self.assertEqual(response.content, content.encode("utf-8")) 73 | 74 | def test_path_does_not_exist(self): 75 | url = self._url("/zzzzz-does-not-exist") 76 | response = requests.get(url) 77 | self.assertEqual(response.status_code, 501) 78 | self.assertEqual(response.content, "".encode("utf-8")) 79 | -------------------------------------------------------------------------------- /tests/test_server.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import os 3 | from io import BytesIO 4 | from tempfile import NamedTemporaryFile 5 | from unittest import TestCase as BaseTestCase 6 | 7 | import fs 8 | import requests 9 | from google.api_core.exceptions import BadRequest, Conflict, NotFound 10 | from google.auth.credentials import AnonymousCredentials, Signing 11 | 12 | from gcp_storage_emulator.server import create_server 13 | from gcp_storage_emulator.settings import STORAGE_BASE, STORAGE_DIR 14 | 15 | 16 | TEST_TEXT = os.path.join(os.path.dirname(os.path.abspath(__file__)), "test_text.txt") 17 | 18 | 19 | class FakeSigningCredentials(Signing, AnonymousCredentials): 20 | def sign_bytes(self, message): 21 | return b"foobar" 22 | 23 | @property 24 | def signer_email(self): 25 | return "foobar@example.tld" 26 | 27 | @property 28 | def signer(self): 29 | pass 30 | 31 | 32 | def _get_storage_client(http): 33 | """Gets a python storage client""" 34 | os.environ["STORAGE_EMULATOR_HOST"] = "http://localhost:9023" 35 | 36 | # Cloud Storage uses environment variables to configure API endpoints for 37 | # file upload - which is read at module import time 38 | from google.cloud import storage 39 | 40 | if os.getenv("DEBUG"): 41 | from http import client as http_client 42 | 43 | http_client.HTTPConnection.debuglevel = 5 44 | return storage.Client( 45 | project="[PROJECT]", 46 | _http=http, 47 | client_options={"api_endpoint": "http://localhost:9023"}, 48 | ) 49 | 50 | 51 | class ServerBaseCase(BaseTestCase): 52 | @classmethod 53 | def setUpClass(cls): 54 | cls._server = create_server("localhost", 9023, in_memory=False) 55 | cls._server.start() 56 | 57 | @classmethod 58 | def tearDownClass(cls): 59 | cls._server.wipe() 60 | cls._server.stop() 61 | 62 | def setUp(self): 63 | self._session = requests.Session() 64 | self._client = _get_storage_client(self._session) 65 | self._server.wipe() 66 | 67 | 68 | class BucketsTests(BaseTestCase): 69 | @classmethod 70 | def setUpClass(cls): 71 | cls._server = create_server("localhost", 9023, in_memory=False) 72 | cls._server.start() 73 | 74 | @classmethod 75 | def tearDownClass(cls): 76 | cls._server.wipe() 77 | cls._server.stop() 78 | 79 | def setUp(self): 80 | self._server.wipe() 81 | self._session = requests.Session() 82 | self._client = _get_storage_client(self._session) 83 | 84 | def test_bucket_creation(self): 85 | bucket = self._client.create_bucket("bucket_name") 86 | self.assertEqual(bucket.project_number, 1234) 87 | 88 | def test_bucket_creation_no_override(self): 89 | self._client.create_bucket("bucket_name") 90 | with self.assertRaises(Conflict): 91 | self._client.create_bucket("bucket_name") 92 | 93 | def test_bucket_list(self): 94 | bucket = self._client.create_bucket("bucket_name") 95 | all_bucket_names = [b.name for b in self._client.list_buckets()] 96 | self.assertIn(bucket.name, all_bucket_names) 97 | 98 | def test_bucket_get_existing(self): 99 | bucket = self._client.create_bucket("bucket_name") 100 | fetched_bucket = self._client.get_bucket("bucket_name") 101 | self.assertEqual(fetched_bucket.name, bucket.name) 102 | 103 | def test_bucket_get_existing_with_dot(self): 104 | bucket = self._client.create_bucket("bucket.name") 105 | fetched_bucket = self._client.get_bucket("bucket.name") 106 | self.assertEqual(fetched_bucket.name, bucket.name) 107 | 108 | def test_bucket_get_non_existing(self): 109 | with self.assertRaises(NotFound): 110 | self._client.get_bucket("bucket_name") 111 | 112 | def test_bucket_delete(self): 113 | bucket = self._client.create_bucket("bucket_name") 114 | bucket.delete() 115 | 116 | with self.assertRaises(NotFound): 117 | self._client.get_bucket("bucket_name") 118 | 119 | def test_bucket_delete_removes_file(self): 120 | bucket = self._client.create_bucket("bucket_name") 121 | bucket.delete() 122 | 123 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 124 | self.assertFalse(pwd.exists("bucket_name")) 125 | 126 | def test_bucket_delete_non_existing(self): 127 | # client.bucket doesn't create the actual bucket resource remotely 128 | bucket = self._client.bucket("bucket_name") 129 | with self.assertRaises(NotFound): 130 | bucket.delete() 131 | 132 | def test_bucket_delete_non_empty(self): 133 | bucket = self._client.create_bucket("bucket_name") 134 | blob = bucket.blob("canttouchme.txt") 135 | blob.upload_from_string("This should prevent deletion if not force") 136 | 137 | with self.assertRaises(Conflict): 138 | bucket.delete() 139 | 140 | blob = bucket.get_blob("canttouchme.txt") 141 | self.assertIsNotNone(blob) 142 | 143 | def test_bucket_force_delete(self): 144 | bucket = self._client.create_bucket("bucket_name") 145 | blob = bucket.blob("cantouchme.txt") 146 | blob.upload_from_string("This should prevent deletion if not force") 147 | 148 | bucket.delete(force=True) 149 | 150 | blob = bucket.get_blob("cantouchme.txt") 151 | self.assertIsNone(blob) 152 | 153 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 154 | self.assertFalse(pwd.exists("bucket_name")) 155 | 156 | 157 | class DefaultBucketTests(BaseTestCase): 158 | def tearDown(self): 159 | if self._server: 160 | self._server.wipe() 161 | self._server.stop() 162 | return super().tearDown() 163 | 164 | def test_bucket_created(self): 165 | self._server = create_server( 166 | "localhost", 9023, in_memory=True, default_bucket="example.appspot.com" 167 | ) 168 | self._server.start() 169 | self._session = requests.Session() 170 | self._client = _get_storage_client(self._session) 171 | bucket = self._client.get_bucket("example.appspot.com") 172 | self.assertEqual(bucket.name, "example.appspot.com") 173 | self.assertEqual(bucket.storage_class, "STANDARD") 174 | 175 | 176 | class ObjectsTests(ServerBaseCase): 177 | def test_upload_from_string(self): 178 | content = "this is the content of the file\n" 179 | bucket = self._client.create_bucket("testbucket") 180 | blob = bucket.blob("testblob-name.txt") 181 | blob.upload_from_string(content) 182 | 183 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 184 | read_content = pwd.readtext("testbucket/testblob-name.txt") 185 | self.assertEqual(read_content, content) 186 | 187 | def test_upload_from_text_file(self): 188 | bucket = self._client.create_bucket("testbucket") 189 | blob = bucket.blob("test_text.txt") 190 | with open(TEST_TEXT, "rb") as file: 191 | blob.upload_from_file(file) 192 | 193 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 194 | read_content = pwd.readtext("testbucket/test_text.txt") 195 | 196 | with open(TEST_TEXT, "rb") as file: 197 | expected_content = str(file.read(), encoding="utf-8") 198 | self.assertEqual(read_content, expected_content) 199 | 200 | def test_upload_from_bin_file(self): 201 | test_binary = os.path.join( 202 | os.path.dirname(os.path.abspath(__file__)), "test_binary.png" 203 | ) 204 | bucket = self._client.create_bucket("testbucket") 205 | blob = bucket.blob("binary.png") 206 | with open(test_binary, "rb") as file: 207 | blob.upload_from_file(file) 208 | 209 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 210 | read_content = pwd.readbytes("testbucket/binary.png") 211 | 212 | with open(test_binary, "rb") as file: 213 | expected_content = file.read() 214 | self.assertEqual(read_content, expected_content) 215 | 216 | def test_upload_from_bin_file_cr_lf(self): 217 | content = b"\r\rheeeeei\r\n" 218 | test_binary = BytesIO(content) 219 | bucket = self._client.create_bucket("testbucket") 220 | blob = bucket.blob("binary_cr.png") 221 | 222 | blob.upload_from_file(test_binary, size=len(content)) 223 | 224 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 225 | read_content = pwd.readbytes("testbucket/binary_cr.png") 226 | 227 | self.assertEqual(read_content, content) 228 | 229 | def test_upload_from_file_name(self): 230 | test_binary = os.path.join( 231 | os.path.dirname(os.path.abspath(__file__)), "test_binary.png" 232 | ) 233 | file_name = "test_binary.png" 234 | 235 | bucket = self._client.create_bucket("testbucket") 236 | blob = bucket.blob(file_name) 237 | blob.upload_from_filename(test_binary) 238 | blob = bucket.get_blob(file_name) 239 | with NamedTemporaryFile() as temp_file: 240 | blob.download_to_filename(temp_file.name) 241 | with open(test_binary, "rb") as orig_file: 242 | self.assertEqual(temp_file.read(), orig_file.read()) 243 | 244 | def test_upload_from_file(self): 245 | test_binary = os.path.join( 246 | os.path.dirname(os.path.abspath(__file__)), "test_binary.png" 247 | ) 248 | file_name = "test_binary.png" 249 | 250 | bucket = self._client.create_bucket("testbucket") 251 | blob = bucket.blob(file_name) 252 | with open(test_binary, "rb") as filehandle: 253 | blob.upload_from_file(filehandle) 254 | self.assertTrue(blob.id.startswith("testbucket/test_binary.png/")) 255 | 256 | def test_get(self): 257 | file_name = "testblob-name.txt" 258 | content = "this is the content of the file\n" 259 | bucket = self._client.create_bucket("testbucket") 260 | blob = bucket.blob(file_name) 261 | blob.upload_from_string(content) 262 | 263 | blob = bucket.get_blob(file_name) 264 | self.assertEqual(blob.name, file_name) 265 | 266 | def test_get_unicode(self): 267 | file_name = "tmp.ąćęłńóśźż.马铃薯.zip" 268 | content = "this is the content of the file\n" 269 | bucket = self._client.create_bucket("testbucket") 270 | blob = bucket.blob(file_name) 271 | blob.upload_from_string(content) 272 | 273 | blob = bucket.get_blob(file_name) 274 | self.assertEqual(blob.name, file_name) 275 | 276 | def test_get_nonexistent(self): 277 | bucket = self._client.create_bucket("testbucket") 278 | res = bucket.get_blob("idonotexist") 279 | 280 | self.assertIsNone(res) 281 | 282 | blob = bucket.blob("iexist") 283 | blob.upload_from_string("some_fake_content") 284 | res = bucket.get_blob("idonotexist") 285 | 286 | self.assertIsNone(res) 287 | 288 | def test_download_nonexistent(self): 289 | bucket = self._client.create_bucket("testbucket") 290 | blob = bucket.blob("idonotexist") 291 | with self.assertRaises(NotFound): 292 | blob.download_as_bytes() 293 | 294 | def test_upload_to_nonexistent_bucket(self): 295 | bucket = self._client.bucket("non-existent-test-bucket") 296 | blob = bucket.blob("idonotexisteither") 297 | with self.assertRaises(NotFound): 298 | blob.upload_from_string("some_content") 299 | 300 | def test_download_as_bytes(self): 301 | content = "The quick brown fox jumps over the lazy dog\n" 302 | bucket = self._client.create_bucket("testbucket") 303 | 304 | blob = bucket.blob("iexist") 305 | blob.upload_from_string(content) 306 | 307 | blob = bucket.get_blob("iexist") 308 | fetched_content = blob.download_as_bytes() 309 | self.assertEqual(fetched_content, content.encode("utf-8")) 310 | 311 | def test_download_range_start(self): 312 | content = b"123456789" 313 | bucket = self._client.create_bucket("testbucket") 314 | 315 | blob = bucket.blob("iexist") 316 | blob.upload_from_string(content) 317 | 318 | blob = bucket.get_blob("iexist") 319 | fetched_content = blob.download_as_bytes(start=2) 320 | self.assertEqual(fetched_content, b"3456789") 321 | 322 | def test_download_range_end(self): 323 | content = b"123456789" 324 | bucket = self._client.create_bucket("testbucket") 325 | 326 | blob = bucket.blob("iexist") 327 | blob.upload_from_string(content) 328 | 329 | blob = bucket.get_blob("iexist") 330 | fetched_content = blob.download_as_bytes(end=4) 331 | self.assertEqual(fetched_content, b"12345") 332 | 333 | def test_download_range_start_end(self): 334 | content = b"123456789" 335 | bucket = self._client.create_bucket("testbucket") 336 | 337 | blob = bucket.blob("iexist") 338 | blob.upload_from_string(content) 339 | 340 | blob = bucket.get_blob("iexist") 341 | fetched_content = blob.download_as_bytes(start=2, end=4) 342 | self.assertEqual(fetched_content, b"345") 343 | 344 | def test_set_content_encoding(self): 345 | content = "The quick brown fox jumps over the lazy dog\n" 346 | bucket = self._client.create_bucket("testbucket") 347 | 348 | blob = bucket.blob("testblob") 349 | blob.content_encoding = "gzip" 350 | blob.upload_from_string(content) 351 | blob.reload() 352 | self.assertEqual(blob.content_encoding, "gzip") 353 | 354 | def test_set_metadata(self): 355 | content = "The quick brown fox jumps over the lazy dog\n" 356 | bucket = self._client.create_bucket("testbucket") 357 | metadata = {"Color": "Pink"} 358 | 359 | blob = bucket.blob("testblob") 360 | blob.metadata = metadata 361 | blob.upload_from_string(content) 362 | blob.reload() 363 | self.assertEqual(blob.metadata, metadata) 364 | 365 | def test_set_custom_time(self): 366 | content = "The quick brown fox jumps over the lazy dog\n" 367 | bucket = self._client.create_bucket("testbucket") 368 | 369 | blob = bucket.blob("customtime") 370 | now = datetime.datetime.now(datetime.timezone.utc) 371 | blob.custom_time = now 372 | blob.upload_from_string(content) 373 | blob.reload() 374 | self.assertEqual(blob.custom_time, now) 375 | 376 | def test_patch_custom_time(self): 377 | content = "The quick brown fox jumps over the lazy dog\n" 378 | now = datetime.datetime.now(datetime.timezone.utc) 379 | bucket = self._client.create_bucket("testbucket") 380 | 381 | blob = bucket.blob("customtime") 382 | blob.upload_from_string(content) 383 | 384 | blob.reload() 385 | self.assertEqual(blob.custom_time, None) 386 | blob.custom_time = now 387 | blob.patch() 388 | blob.reload() 389 | self.assertEqual(blob.custom_time, now) 390 | 391 | def test_patch_custom_time_with_older_datetime(self): 392 | content = "The quick brown fox jumps over the lazy dog\n" 393 | newer = datetime.datetime.now(datetime.timezone.utc) 394 | older = datetime.datetime(2014, 11, 5, 20, 34, 37) 395 | bucket = self._client.create_bucket("testbucket") 396 | 397 | blob = bucket.blob("customtime") 398 | blob.upload_from_string(content) 399 | 400 | blob.reload() 401 | self.assertEqual(blob.custom_time, None) 402 | blob.custom_time = newer 403 | blob.patch() 404 | blob.reload() 405 | self.assertEqual(blob.custom_time, newer) 406 | blob.custom_time = older 407 | blob.patch() 408 | blob.reload() 409 | self.assertEqual(blob.custom_time, newer) 410 | 411 | def test_patch_content_encoding(self): 412 | content = "The quick brown fox jumps over the lazy dog\n" 413 | bucket = self._client.create_bucket("testbucket") 414 | 415 | blob = bucket.blob("testblob") 416 | blob.content_encoding = "gzip" 417 | blob.upload_from_string(content) 418 | blob.reload() 419 | metageneration = blob.metageneration 420 | self.assertEqual(blob.content_encoding, "gzip") 421 | blob.content_encoding = "" 422 | blob.patch() 423 | blob.reload() 424 | self.assertNotEqual(blob.metageneration, metageneration) 425 | self.assertEqual(blob.content_encoding, "") 426 | 427 | def test_valid_md5_hash(self): 428 | content = b"test" 429 | md5_hash = "CY9rzUYh03PK3k6DJie09g==" 430 | bucket = self._client.create_bucket("testbucket") 431 | blob = bucket.blob("hashtest") 432 | blob.md5_hash = md5_hash 433 | blob.upload_from_string(content) 434 | download_blob = bucket.get_blob("hashtest") 435 | self.assertEqual(download_blob.download_as_bytes(checksum="md5"), content) 436 | self.assertEqual(download_blob.md5_hash, md5_hash) 437 | 438 | def test_invalid_md5_hash(self): 439 | content = b"Hello World" 440 | bucket = self._client.create_bucket("testbucket") 441 | blob = bucket.blob("hashtest") 442 | blob.md5_hash = "deadbeef" 443 | with self.assertRaises(BadRequest): 444 | blob.upload_from_string(content) 445 | 446 | def test_valid_crc32c_hash(self): 447 | content = b"hello world" 448 | crc32c_hash = "yZRlqg==" 449 | bucket = self._client.create_bucket("testbucket") 450 | blob = bucket.blob("hashtest") 451 | blob.crc32c = crc32c_hash 452 | blob.upload_from_string(content) 453 | download_blob = bucket.get_blob("hashtest") 454 | self.assertEqual(download_blob.download_as_bytes(checksum="crc32c"), content) 455 | self.assertEqual(download_blob.crc32c, crc32c_hash) 456 | 457 | def test_invalid_crc32c_hash(self): 458 | content = b"Hello World" 459 | bucket = self._client.create_bucket("testbucket") 460 | blob = bucket.blob("hashtest") 461 | blob.crc32c = "deadbeef" 462 | with self.assertRaises(BadRequest): 463 | blob.upload_from_string(content) 464 | 465 | def test_download_binary_to_file(self): 466 | test_binary = os.path.join( 467 | os.path.dirname(os.path.abspath(__file__)), "test_binary.png" 468 | ) 469 | bucket = self._client.create_bucket("testbucket") 470 | 471 | blob = bucket.blob("binary.png") 472 | with open(test_binary, "rb") as file: 473 | blob.upload_from_file(file, content_type="image/png") 474 | 475 | blob = bucket.get_blob("binary.png") 476 | fetched_file = BytesIO() 477 | blob.download_to_file(fetched_file) 478 | 479 | with open(test_binary, "rb") as file: 480 | self.assertEqual(fetched_file.getvalue(), file.read()) 481 | 482 | def test_download_text_to_file(self): 483 | bucket = self._client.create_bucket("testbucket") 484 | 485 | blob = bucket.blob("text.txt") 486 | with open(TEST_TEXT, "rb") as file: 487 | blob.upload_from_file(file, content_type="text/plain; charset=utf-8") 488 | 489 | blob = bucket.get_blob("text.txt") 490 | fetched_file = BytesIO() 491 | blob.download_to_file(fetched_file) 492 | 493 | with open(TEST_TEXT, "rb") as file: 494 | self.assertEqual(fetched_file.getvalue(), file.read()) 495 | 496 | def test_delete_object(self): 497 | bucket = self._client.create_bucket("bucket_name") 498 | blob = bucket.blob("canttouchme.txt") 499 | blob.upload_from_string("File content") 500 | 501 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 502 | self.assertTrue(pwd.exists("bucket_name/canttouchme.txt")) 503 | blob.delete() 504 | 505 | self.assertIsNone(bucket.get_blob("cantouchme.txt")) 506 | self.assertFalse(pwd.exists("bucket_name/canttouchme.txt")) 507 | 508 | def test_delete_nonexistent_object(self): 509 | bucket = self._client.create_bucket("bucket_name") 510 | blob = bucket.blob("this-should-not-exists.txt") 511 | 512 | with self.assertRaises(NotFound): 513 | blob.delete() 514 | 515 | def test_create_within_directory(self): 516 | bucket = self._client.create_bucket("bucket_name") 517 | blob = bucket.blob("this/is/a/nested/file.txt") 518 | blob.upload_from_string("Not even joking!") 519 | 520 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 521 | read_content = pwd.readtext("bucket_name/this/is/a/nested/file.txt") 522 | self.assertEqual(read_content, "Not even joking!") 523 | 524 | def test_create_within_multiple_time_does_not_break(self): 525 | bucket = self._client.create_bucket("bucket_name") 526 | blob = bucket.blob("this/is/a/nested/file.txt") 527 | blob.upload_from_string("Not even joking!") 528 | 529 | bucket.blob("this/is/another/nested/file.txt") 530 | blob.upload_from_string("Yet another one") 531 | 532 | with fs.open_fs(os.path.join(STORAGE_BASE, STORAGE_DIR)) as pwd: 533 | self.assertTrue(pwd.exists("bucket_name/this/is/a/nested/file.txt")) 534 | 535 | def _assert_blob_list(self, expected, actual): 536 | self.assertEqual([b.name for b in expected], [b.name for b in actual]) 537 | 538 | def test_list_blobs_on_nonexistent_bucket(self): 539 | blobs = self._client.list_blobs("bucket_name") 540 | with self.assertRaises(NotFound): 541 | list(blobs) 542 | 543 | def test_list_blobs_on_empty_bucket(self): 544 | bucket = self._client.create_bucket("bucket_name") 545 | blobs = self._client.list_blobs(bucket) 546 | self._assert_blob_list(blobs, []) 547 | 548 | def test_list_blobs_on_entire_bucket(self): 549 | bucket_1 = self._client.create_bucket("bucket_name_1") 550 | bucket_2 = self._client.create_bucket("bucket_name_2") 551 | 552 | blob_1 = bucket_1.blob("a/b.txt") 553 | blob_1.upload_from_string("text") 554 | 555 | blob_2 = bucket_1.blob("c/d.txt") 556 | blob_2.upload_from_string("text") 557 | 558 | blob_3 = bucket_2.blob("a/b.txt") 559 | blob_3.upload_from_string("text") 560 | 561 | blobs = self._client.list_blobs(bucket_1) 562 | self._assert_blob_list(blobs, [blob_1, blob_2]) 563 | 564 | def test_list_blobs_with_prefix(self): 565 | bucket = self._client.create_bucket("bucket_name") 566 | 567 | blob_1 = bucket.blob("a/b.txt") 568 | blob_1.upload_from_string("text") 569 | 570 | blob_2 = bucket.blob("a/b/c.txt") 571 | blob_2.upload_from_string("text") 572 | 573 | blob_3 = bucket.blob("b/c.txt") 574 | blob_3.upload_from_string("text") 575 | 576 | blobs = self._client.list_blobs(bucket, prefix="a") 577 | 578 | self._assert_blob_list(blobs, [blob_1, blob_2]) 579 | 580 | def test_list_blobs_with_prefix_and_delimiter(self): 581 | bucket = self._client.create_bucket("bucket_name") 582 | 583 | blob_1 = bucket.blob("a/b.txt") 584 | blob_1.upload_from_string("text") 585 | 586 | blob_2 = bucket.blob("a/c.txt") 587 | blob_2.upload_from_string("text") 588 | 589 | blob_3 = bucket.blob("a/b/c.txt") 590 | blob_3.upload_from_string("text") 591 | 592 | blob_4 = bucket.blob("b/c.txt") 593 | blob_4.upload_from_string("text") 594 | 595 | blobs = self._client.list_blobs(bucket, prefix="a/", delimiter="/") 596 | 597 | self._assert_blob_list(blobs, [blob_1, blob_2]) 598 | self.assertEqual(blobs.prefixes, {"a/b/"}) 599 | 600 | def test_bucket_copy_existing(self): 601 | bucket = self._client.create_bucket("bucket_name") 602 | 603 | blob_1 = bucket.blob("a/b.txt") 604 | blob_1.upload_from_string("text") 605 | 606 | blob_2 = bucket.rename_blob(blob_1, "c/d.txt") 607 | 608 | blobs = self._client.list_blobs(bucket) 609 | self._assert_blob_list(blobs, [blob_2]) 610 | 611 | def test_bucket_copy_non_existing(self): 612 | bucket = self._client.create_bucket("bucket_name") 613 | 614 | blob_1 = bucket.blob("a/b.txt") 615 | 616 | with self.assertRaises(NotFound): 617 | bucket.rename_blob(blob_1, "c/d.txt") 618 | 619 | def test_compose_create_new_blob(self): 620 | bucket = self._client.create_bucket("compose_test") 621 | data_1 = b"AAA\n" 622 | source_1 = bucket.blob("source-1") 623 | source_1.upload_from_string(data_1, content_type="text/plain") 624 | 625 | data_2 = b"BBB\n" 626 | source_2 = bucket.blob("source-2") 627 | source_2.upload_from_string(data_2, content_type="text/plain") 628 | 629 | destination = bucket.blob("destination") 630 | destination.content_type = "text/somethingelse" 631 | destination.compose([source_1, source_2]) 632 | 633 | composed = destination.download_as_bytes() 634 | self.assertEqual(composed, data_1 + data_2) 635 | self.assertEqual(destination.content_type, "text/somethingelse") 636 | 637 | def test_compose_wo_content_type_set(self): 638 | bucket = self._client.create_bucket("compose_test") 639 | data_1 = b"AAA\n" 640 | source_1 = bucket.blob("source-1") 641 | source_1.upload_from_string(data_1, content_type="text/plain") 642 | 643 | data_2 = b"BBB\n" 644 | source_2 = bucket.blob("source-2") 645 | source_2.upload_from_string(data_2, content_type="text/plain") 646 | 647 | destination = bucket.blob("destination") 648 | destination.compose([source_1, source_2]) 649 | 650 | composed = destination.download_as_bytes() 651 | self.assertEqual(composed, data_1 + data_2) 652 | self.assertEqual(destination.content_type, "text/plain") 653 | 654 | def test_compose_nonexistent(self): 655 | bucket = self._client.create_bucket("compose_test") 656 | source_1 = bucket.blob("source-1") 657 | source_2 = bucket.blob("source-2") 658 | 659 | destination = bucket.blob("destination") 660 | with self.assertRaises(NotFound): 661 | destination.compose([source_1, source_2]) 662 | 663 | def test_batch_delete_one(self): 664 | content = "this is the content of the file\n" 665 | bucket = self._client.create_bucket("batchbucket") 666 | blob = bucket.blob("testblob-name1.txt") 667 | blob.upload_from_string(content) 668 | with self._client.batch(): 669 | bucket.delete_blob("testblob-name1.txt") 670 | self.assertIsNone(bucket.get_blob("testblob-name1.txt")) 671 | 672 | def test_batch_delete_nonexistent_blob(self): 673 | bucket = self._client.create_bucket("batchbucket") 674 | with self.assertRaises(NotFound): 675 | with self._client.batch(): 676 | bucket.delete_blob("does-not-exist.txt") 677 | 678 | def test_batch_patch_one(self): 679 | now = datetime.datetime.now(datetime.timezone.utc) 680 | content = "this is the content of the file\n" 681 | bucket = self._client.create_bucket("batchbucket") 682 | blob = bucket.blob("testblob-name1.txt") 683 | blob.upload_from_string(content) 684 | blob.reload() 685 | self.assertEqual(blob.custom_time, None) 686 | blob.custom_time = now 687 | with self._client.batch(): 688 | blob.patch() 689 | blob = bucket.get_blob("testblob-name1.txt") 690 | self.assertEqual(blob.custom_time, now) 691 | 692 | def test_batch_delete_two(self): 693 | content = "this is the content of the file\n" 694 | bucket = self._client.create_bucket("batchbucket") 695 | blob = bucket.blob("testblob-name1.txt") 696 | blob.upload_from_string(content) 697 | blob = bucket.blob("testblob-name2.txt") 698 | blob.upload_from_string(content) 699 | with self._client.batch(): 700 | bucket.delete_blob("testblob-name1.txt") 701 | bucket.delete_blob("testblob-name2.txt") 702 | self.assertIsNone(bucket.get_blob("testblob-name1.txt")) 703 | self.assertIsNone(bucket.get_blob("testblob-name2.txt")) 704 | 705 | def test_batch_patch_two(self): 706 | now = datetime.datetime.now(datetime.timezone.utc) 707 | content = "this is the content of the file\n" 708 | bucket = self._client.create_bucket("batchbucket") 709 | blob1 = bucket.blob("testblob-name1.txt") 710 | blob1.upload_from_string(content) 711 | blob2 = bucket.blob("testblob-name2.txt") 712 | blob2.upload_from_string(content) 713 | blob1.reload() 714 | blob2.reload() 715 | self.assertEqual(blob1.custom_time, None) 716 | self.assertEqual(blob2.custom_time, None) 717 | blob1.custom_time = now 718 | blob2.custom_time = now 719 | with self._client.batch(): 720 | blob1.patch() 721 | blob2.patch() 722 | blob1 = bucket.get_blob("testblob-name1.txt") 723 | blob2 = bucket.get_blob("testblob-name2.txt") 724 | self.assertEqual(blob1.custom_time, now) 725 | self.assertEqual(blob2.custom_time, now) 726 | 727 | def test_batch_delete_patch(self): 728 | now = datetime.datetime.now(datetime.timezone.utc) 729 | content = "this is the content of the file\n" 730 | bucket = self._client.create_bucket("batchbucket") 731 | blob = bucket.blob("testblob-name1.txt") 732 | blob.upload_from_string(content) 733 | blob = bucket.blob("testblob-name2.txt") 734 | blob.upload_from_string(content) 735 | blob = bucket.blob("testblob-name3.txt") 736 | blob.upload_from_string(content) 737 | self.assertEqual(blob.custom_time, None) 738 | blob.custom_time = now 739 | with self._client.batch(): 740 | bucket.delete_blob("testblob-name1.txt") 741 | bucket.delete_blob("testblob-name2.txt") 742 | blob.patch() 743 | self.assertIsNone(bucket.get_blob("testblob-name1.txt")) 744 | self.assertIsNone(bucket.get_blob("testblob-name2.txt")) 745 | blob = bucket.get_blob("testblob-name3.txt") 746 | self.assertEqual(blob.custom_time, now) 747 | 748 | def test_batch_delete_buckets(self): 749 | bucket1 = self._client.create_bucket("batchbucket1") 750 | bucket2 = self._client.create_bucket("batchbucket2") 751 | with self.assertRaises(NotFound): 752 | with self._client.batch(): 753 | bucket1.delete() 754 | bucket1.delete() 755 | bucket2.delete() 756 | with self.assertRaises(NotFound): 757 | self._client.get_bucket("batchbucket1") 758 | with self.assertRaises(NotFound): 759 | self._client.get_bucket("batchbucket2") 760 | 761 | def test_resumable_upload_small_chunk_size(self): 762 | content = b"a" * 10000000 763 | bucket = self._client.create_bucket("testbucket") 764 | 765 | blob = bucket.blob("resumable-test", chunk_size=256 * 1024) 766 | blob.upload_from_string(content) 767 | 768 | blob = bucket.get_blob("resumable-test") 769 | fetched_content = blob.download_as_bytes() 770 | self.assertEqual(len(fetched_content), len(content)) 771 | self.assertEqual(fetched_content, content) 772 | 773 | def test_resumable_upload_large_file(self): 774 | content = b"abcde12345" * 2000000 775 | bucket = self._client.create_bucket("testbucket") 776 | 777 | blob = bucket.blob("resumable-test") 778 | blob.upload_from_string(content) 779 | 780 | blob = bucket.get_blob("resumable-test") 781 | fetched_content = blob.download_as_bytes() 782 | self.assertEqual(len(fetched_content), len(content)) 783 | self.assertEqual(fetched_content, content) 784 | 785 | def test_empty_blob(self): 786 | bucket = self._client.create_bucket("testbucket") 787 | bucket.blob("empty_blob").open("w").close() 788 | 789 | blob = bucket.get_blob("empty_blob") 790 | fetched_content = blob.download_as_bytes() 791 | self.assertEqual(fetched_content, b"") 792 | 793 | def test_signed_url_download(self): 794 | content = b"The quick brown fox jumps over the lazy dog" 795 | bucket = self._client.create_bucket("testbucket") 796 | 797 | blob = bucket.blob("signed-download") 798 | blob.upload_from_string(content, content_type="text/mycustom") 799 | 800 | url = blob.generate_signed_url( 801 | api_access_endpoint="http://localhost:9023", 802 | credentials=FakeSigningCredentials(), 803 | version="v4", 804 | expiration=datetime.timedelta(minutes=15), 805 | method="GET", 806 | ) 807 | 808 | response = requests.get(url) 809 | self.assertEqual(response.status_code, 200) 810 | self.assertEqual(response.content, content) 811 | self.assertEqual(response.headers["content-type"], "text/mycustom") 812 | 813 | def test_signed_url_download_with_content_disposition(self): 814 | content = b"The quick brown fox jumps over the lazy dog" 815 | bucket = self._client.create_bucket("testbucket") 816 | 817 | blob = bucket.blob("signed-download") 818 | blob.upload_from_string(content, content_type="text/mycustom") 819 | 820 | requested_filename = "requested_filename.cst2" 821 | response_disposition = f'attachment; filename="{requested_filename}"' 822 | 823 | url = blob.generate_signed_url( 824 | api_access_endpoint="http://localhost:9023", 825 | credentials=FakeSigningCredentials(), 826 | version="v4", 827 | expiration=datetime.timedelta(minutes=15), 828 | response_disposition=response_disposition, 829 | method="GET", 830 | ) 831 | 832 | response = requests.get(url) 833 | self.assertEqual(response.status_code, 200) 834 | self.assertEqual(response.content, content) 835 | self.assertEqual( 836 | response.headers["content-disposition"], f"{response_disposition}" 837 | ) 838 | self.assertEqual(response.headers["content-type"], "text/mycustom") 839 | 840 | def test_url_generation_for_browser(self): 841 | self.skipTest("Used to test browser functionality with URL, not API.") 842 | os.environ["STORAGE_EMULATOR_HOST"] = "http://localhost:8080" 843 | content = b"The quick brown fox jumps over the lazy dog" 844 | 845 | # Cloud Storage uses environment variables to configure API endpoints for 846 | # file upload - which is read at module import time 847 | from google.cloud import storage 848 | 849 | http = requests.Session() 850 | 851 | client = storage.Client( 852 | project="[PROJECT]", 853 | _http=http, 854 | client_options={"api_endpoint": "http://localhost:8080"}, 855 | ) 856 | 857 | bucket = client.create_bucket("testbucket") 858 | 859 | blob = bucket.blob("signed-download") 860 | blob.upload_from_string(content, content_type="text/html") 861 | 862 | requested_filename = "requested_filename.cst2" 863 | response_disposition = f'attachment; filename="{requested_filename}"' 864 | 865 | url = blob.generate_signed_url( 866 | api_access_endpoint="http://localhost:8080", 867 | credentials=FakeSigningCredentials(), 868 | version="v4", 869 | expiration=datetime.timedelta(minutes=15), 870 | response_disposition=response_disposition, 871 | method="GET", 872 | ) 873 | print(url) 874 | 875 | def test_signed_url_upload(self): 876 | bucket = self._client.create_bucket("testbucket") 877 | blob = bucket.blob("signed-upload") 878 | url = blob.generate_signed_url( 879 | api_access_endpoint="http://localhost:9023", 880 | credentials=FakeSigningCredentials(), 881 | version="v4", 882 | expiration=datetime.timedelta(minutes=15), 883 | method="PUT", 884 | ) 885 | with open(TEST_TEXT, "rb") as file: 886 | headers = {"Content-type": "text/plain"} 887 | response = requests.put(url, data=file, headers=headers) 888 | self.assertEqual(response.status_code, 200) 889 | 890 | blob_content = blob.download_as_bytes() 891 | file.seek(0) 892 | self.assertEqual(blob_content, file.read()) 893 | self.assertEqual(blob.content_type, "text/plain") 894 | 895 | def test_signed_url_upload_to_nonexistent_bucket(self): 896 | bucket = self._client.bucket("non-existent-test-bucket") 897 | blob = bucket.blob("idonotexisteither") 898 | url = blob.generate_signed_url( 899 | api_access_endpoint="http://localhost:9023", 900 | credentials=FakeSigningCredentials(), 901 | version="v4", 902 | expiration=datetime.timedelta(minutes=15), 903 | method="PUT", 904 | ) 905 | with open(TEST_TEXT, "rb") as file: 906 | response = requests.put(url, data=file) 907 | self.assertEqual(response.status_code, 404) 908 | 909 | def test_initiate_resumable_upload_without_metadata(self): 910 | url = "http://127.0.0.1:9023/upload/storage/v1/b/test_bucket/o?" 911 | url += "uploadType=resumable&name=test_file" 912 | self._client.create_bucket("test_bucket") 913 | headers = {"Content-type": "application/json"} 914 | response = requests.post(url, headers=headers) 915 | self.assertEqual(response.status_code, 200) 916 | 917 | def test_media_upload_without_metadata(self): 918 | url = "http://127.0.0.1:9023/upload/storage/v1/b/test_bucket/o?" 919 | url += "uploadType=media&name=test_file&contentEncoding=text%2Fplain" 920 | bucket = self._client.create_bucket("test_bucket") 921 | with open(TEST_TEXT, "rb") as file: 922 | headers = {"Content-type": "text/html"} 923 | response = requests.post(url, data=file, headers=headers) 924 | self.assertEqual(response.status_code, 200) 925 | blob = bucket.blob("test_file") 926 | blob_content = blob.download_as_bytes() 927 | file.seek(0) 928 | self.assertEqual(blob_content, file.read()) 929 | self.assertEqual(blob.content_type, "text/plain") 930 | 931 | def test_upload_from_file_content_type_json(self): 932 | file_name = "test.json" 933 | content = b'[{"a": 1}]' 934 | bucket = self._client.create_bucket("testbucket") 935 | blob = bucket.blob(file_name) 936 | 937 | with NamedTemporaryFile() as temp_file: 938 | temp_file.write(content) 939 | temp_file.flush() 940 | temp_file.seek(0) 941 | blob.upload_from_file(temp_file, content_type="application/json") 942 | 943 | blob = bucket.get_blob(file_name) 944 | self.assertEqual(blob.name, file_name) 945 | self.assertEqual(blob.download_as_bytes(), content) 946 | 947 | 948 | class HttpEndpointsTest(ServerBaseCase): 949 | """Tests for the HTTP endpoints defined by server.HANDLERS.""" 950 | 951 | def _url(self, path): 952 | return os.environ["STORAGE_EMULATOR_HOST"] + path 953 | 954 | def test_download_by_url(self): 955 | """Objects should be downloadable over HTTP from the emulator client.""" 956 | content = "Here is some content" 957 | bucket = self._client.create_bucket("anotherbucket") 958 | blob = bucket.blob("something.txt") 959 | blob.upload_from_string(content) 960 | 961 | url = self._url("/anotherbucket/something.txt") 962 | response = requests.get(url) 963 | self.assertEqual(response.status_code, 200) 964 | self.assertEqual(response.content, content.encode("utf-8")) 965 | 966 | def test_download_by_dl_api_url(self): 967 | """Objects should be downloadable over HTTP from the emulator client.""" 968 | content = "Here is some content 123" 969 | bucket = self._client.create_bucket("bucket") 970 | blob = bucket.blob("something.txt") 971 | blob.upload_from_string(content) 972 | 973 | url = self._url("/download/storage/v1/b/bucket/o/something.txt") 974 | response = requests.get(url) 975 | self.assertEqual(response.status_code, 200) 976 | self.assertEqual(response.content, content.encode("utf-8")) 977 | 978 | def test_download_by_api_media_url(self): 979 | """Objects should be downloadable over HTTP from the emulator client.""" 980 | content = "Here is some content 456" 981 | bucket = self._client.create_bucket("bucket") 982 | blob = bucket.blob("something.txt") 983 | blob.upload_from_string(content) 984 | 985 | url = self._url("/storage/v1/b/bucket/o/something.txt") 986 | response = requests.get(url, params={"alt": "media"}) 987 | self.assertEqual(response.status_code, 200) 988 | self.assertEqual(response.content, content.encode("utf-8")) 989 | 990 | def test_download_file_within_folder(self): 991 | """Cloud Storage allows folders within buckets, so the download URL should allow for this.""" 992 | content = "Here is some content" 993 | bucket = self._client.create_bucket("yetanotherbucket") 994 | blob = bucket.blob("folder/contain~ing/something~v-1.0.α.txt") 995 | blob.upload_from_string(content) 996 | 997 | url = self._url("/yetanotherbucket/folder/contain~ing/something~v-1.0.α.txt") 998 | response = requests.get(url) 999 | self.assertEqual(response.status_code, 200) 1000 | self.assertEqual(response.content, content.encode("utf-8")) 1001 | 1002 | def test_wipe(self): 1003 | """Objects should wipe the data""" 1004 | storage_path = os.path.join(STORAGE_BASE, STORAGE_DIR) 1005 | content = "Here is some content" 1006 | bucket = self._client.create_bucket("anotherbucket1") 1007 | blob = bucket.blob("something.txt") 1008 | blob.upload_from_string(content) 1009 | 1010 | url = self._url("/wipe") 1011 | response = requests.get(url) 1012 | self.assertEqual(response.status_code, 200) 1013 | self.assertTrue(len(os.listdir(storage_path)) == 0) 1014 | 1015 | def test_wipe_keep_buckets(self): 1016 | """Objects should wipe the data but keep the root buckets""" 1017 | blob_path = "something.txt" 1018 | bucket_name = "anewone" 1019 | content = "Here is some content" 1020 | bucket = self._client.create_bucket(bucket_name) 1021 | blob = bucket.blob(blob_path) 1022 | blob.upload_from_string(content) 1023 | 1024 | url = self._url("/wipe?keep-buckets=true") 1025 | response = requests.get(url) 1026 | self.assertEqual(response.status_code, 200) 1027 | 1028 | fetched_bucket = self._client.get_bucket(bucket_name) 1029 | self.assertEqual(fetched_bucket.name, bucket.name) 1030 | with self.assertRaises(NotFound): 1031 | fetched_bucket.blob(blob_path).download_as_text() 1032 | -------------------------------------------------------------------------------- /tests/test_storage.py: -------------------------------------------------------------------------------- 1 | import json 2 | import os 3 | from unittest import TestCase as BaseTestCase 4 | 5 | from gcp_storage_emulator.exceptions import NotFound 6 | from gcp_storage_emulator.settings import STORAGE_BASE, STORAGE_DIR 7 | from gcp_storage_emulator.storage import Storage 8 | 9 | 10 | def _get_meta_path(): 11 | return os.path.join(os.getcwd(), STORAGE_BASE, STORAGE_DIR, ".meta") 12 | 13 | 14 | class StorageOSFSTests(BaseTestCase): 15 | def setUp(self): 16 | self.storage = Storage() 17 | self.storage.wipe() 18 | self.storage.create_bucket("a_bucket_name", {}) 19 | 20 | def tearDown(self): 21 | self.storage.wipe() 22 | 23 | def test_get_bucket_reads_from_meta(self): 24 | meta_path = _get_meta_path() 25 | buckets = {"key": "a"} 26 | 27 | with open(meta_path, "w") as file: 28 | json.dump( 29 | { 30 | "buckets": buckets, 31 | }, 32 | file, 33 | ) 34 | 35 | # Force a re-read from file, this is usually done in the constructor 36 | self.storage._read_config_from_file() 37 | self.assertEqual(self.storage.get_bucket("key"), "a") 38 | 39 | def test_get_file_obj_reads_from_meta(self): 40 | meta_path = _get_meta_path() 41 | objects = {"key": {"inner_key": "a"}} 42 | 43 | with open(meta_path, "w") as file: 44 | json.dump( 45 | { 46 | "objects": objects, 47 | }, 48 | file, 49 | ) 50 | 51 | # Force a re-read from file, this is usually done in the constructor 52 | self.storage._read_config_from_file() 53 | self.assertEqual(self.storage.get_file_obj("key", "inner_key"), "a") 54 | 55 | def test_get_file_obj_not_found(self): 56 | with self.assertRaises(NotFound): 57 | self.storage.get_file_obj("a_bucket", "a_file") 58 | 59 | self.storage.create_bucket("a_bucket", {}) 60 | with self.assertRaises(NotFound): 61 | self.storage.get_file_obj("a_bucket", "a_file") 62 | 63 | def test_get_file_not_found(self): 64 | with self.assertRaises(NotFound): 65 | self.storage.get_file("a_bucket", "a_file") 66 | 67 | self.storage.create_bucket("a_bucket", {}) 68 | with self.assertRaises(NotFound): 69 | self.storage.get_file("a_bucket", "a_file") 70 | 71 | def test_create_bucket_stores_meta(self): 72 | bucket_obj = {"key": "val"} 73 | self.storage.create_bucket("a_bucket", bucket_obj) 74 | 75 | meta_path = _get_meta_path() 76 | with open(meta_path, "r") as file: 77 | meta = json.load(file) 78 | self.assertEqual(meta["buckets"]["a_bucket"], bucket_obj) 79 | 80 | def test_create_file_stores_content(self): 81 | test_file = os.path.join( 82 | os.getcwd(), STORAGE_BASE, STORAGE_DIR, "a_bucket_name", "file_name.txt" 83 | ) 84 | content = "Łukas is a great developer".encode("utf8") 85 | file_obj = {} 86 | self.storage.create_file("a_bucket_name", "file_name.txt", content, file_obj) 87 | 88 | with open(test_file, "rb") as file: 89 | read_content = file.read() 90 | self.assertEqual(read_content, content) 91 | 92 | def test_create_file_stores_meta(self): 93 | content = "Łukas is a great developer".encode("utf8") 94 | file_obj = {"key": "val"} 95 | self.storage.create_file("a_bucket_name", "file_name.txt", content, file_obj) 96 | meta_path = _get_meta_path() 97 | with open(meta_path, "r") as file: 98 | meta = json.load(file) 99 | self.assertEqual( 100 | meta["objects"]["a_bucket_name"]["file_name.txt"], file_obj 101 | ) 102 | 103 | def test_create_resumable_upload_stores_meta(self): 104 | file_obj = {"key": "val"} 105 | file_id = self.storage.create_resumable_upload( 106 | "a_bucket_name", "file_name.png", file_obj 107 | ) 108 | meta_path = _get_meta_path() 109 | with open(meta_path, "r") as file: 110 | meta = json.load(file) 111 | self.assertEqual(meta["resumable"][file_id], file_obj) 112 | 113 | def test_file_ids_dont_clash(self): 114 | file_obj = {"key": "val"} 115 | file_id_1 = self.storage.create_resumable_upload( 116 | "a_bucket_name", "file_name.png", file_obj 117 | ) 118 | file_id_2 = self.storage.create_resumable_upload( 119 | "a_bucket_name", "file_name.png", file_obj 120 | ) 121 | self.assertNotEqual(file_id_1, file_id_2) 122 | 123 | def test_create_file_for_resumable_upload(self): 124 | test_file = os.path.join( 125 | os.getcwd(), STORAGE_BASE, STORAGE_DIR, "a_bucket_name", "file_name.png" 126 | ) 127 | content = b"Randy is also a great developer" 128 | file_obj = {"bucket": "a_bucket_name", "name": "file_name.png"} 129 | file_id = self.storage.create_resumable_upload( 130 | "a_bucket_name", "file_name.png", file_obj 131 | ) 132 | self.assertEqual(self.storage.get_resumable_file_obj(file_id), file_obj) 133 | self.storage.create_file( 134 | file_obj["bucket"], file_obj["name"], content, file_obj, file_id 135 | ) 136 | 137 | with open(test_file, "rb") as file: 138 | read_content = file.read() 139 | self.assertEqual(read_content, content) 140 | 141 | with open(_get_meta_path(), "r") as file: 142 | meta = json.load(file) 143 | self.assertEqual( 144 | meta["objects"]["a_bucket_name"]["file_name.png"], file_obj 145 | ) 146 | self.assertEqual(meta["resumable"], {}) 147 | 148 | def test_delete_bucket_stores_meta(self): 149 | bucket_obj = {"key": "val"} 150 | self.storage.create_bucket("a_bucket", bucket_obj) 151 | 152 | self.storage.delete_bucket("a_bucket") 153 | 154 | meta_path = _get_meta_path() 155 | with open(meta_path, "r") as file: 156 | meta = json.load(file) 157 | self.assertIsNone(meta["buckets"].get("a_bucket")) 158 | 159 | def test_wipe(self): 160 | bucket_a_obj = {"key_a": "val_a"} 161 | bucket_b_obj = {"key_b": "valb_"} 162 | self.storage.create_bucket("bucket_a", bucket_a_obj) 163 | self.storage.create_bucket("bucket_b", bucket_b_obj) 164 | self.storage.wipe() 165 | meta_path = _get_meta_path() 166 | self.assertFalse(os.path.isfile(meta_path)) 167 | 168 | def test_wipe_keep_buckets(self): 169 | bucket_a_obj = {"key_a": "val_a"} 170 | bucket_b_obj = {"key_b": "valb_"} 171 | self.storage.create_bucket("bucket_a", bucket_a_obj) 172 | self.storage.create_bucket("bucket_b", bucket_b_obj) 173 | self.storage.wipe(keep_buckets=True) 174 | meta_path = _get_meta_path() 175 | 176 | with open(meta_path, "r") as file: 177 | meta = json.load(file) 178 | self.assertEqual(meta["buckets"]["bucket_a"], bucket_a_obj) 179 | self.assertEqual(meta["buckets"]["bucket_b"], bucket_b_obj) 180 | self.assertEqual(meta["objects"], {}) 181 | self.assertEqual(meta["resumable"], {}) 182 | 183 | def test_without_absolute_path(self): 184 | with self.assertRaises(ValueError): 185 | _ = Storage(data_dir="test") 186 | -------------------------------------------------------------------------------- /tests/test_text.txt: -------------------------------------------------------------------------------- 1 | Lorem ipsum dolor sit, amet consectetur adipisicing elit. Ab eveniet quidem placeat, ea cumque quaerat voluptatem dolores totam delectus facilis ut dicta minima aspernatur architecto dignissimos consequatur, rem hic deserunt quasi sit alias mollitia repudiandae assumenda! Soluta vitae aperiam cupiditate ratione distinctio nam optio. Atque rem aliquid, unde mollitia vel voluptates provident quos voluptatem voluptate voluptas ipsum quaerat et autem architecto cumque enim itaque? Dolore natus possimus consectetur pariatur cum incidunt unde inventore, facilis magni provident excepturi cumque distinctio cupiditate aperiam fugiat facere odit consequuntur ea quibusdam voluptatum. Deserunt dolorem eaque sapiente? Praesentium, aperiam consequatur culpa vero nostrum earum officia. 2 | This is Łukäsz weirdness. 3 | --------------------------------------------------------------------------------