├── gcsfs ├── cli │ ├── __init__.py │ └── gcsfuse.py ├── tests │ ├── __init__.py │ ├── derived │ │ ├── __init__.py │ │ ├── gcsfs_test.py │ │ └── gcsfs_fixtures.py │ ├── fake-secret.json │ ├── settings.py │ ├── test_credentials.py │ ├── test_zb_hns_utils.py │ ├── test_inventory_report_listing.py │ ├── utils.py │ ├── test_manyopens.py │ ├── test_fuse.py │ ├── fake-service-account-credentials.json │ ├── test_init.py │ ├── test_mapping.py │ ├── test_core_versioned.py │ ├── test_retry.py │ ├── test_checkers.py │ ├── conftest.py │ └── test_extended_gcsfs.py ├── dask_link.py ├── mapping.py ├── zb_hns_utils.py ├── __init__.py ├── zonal_file.py ├── checkers.py ├── retry.py ├── extended_gcsfs.py ├── credentials.py └── _version.py ├── .gitattributes ├── docs ├── source │ ├── _static │ │ └── custom.css │ ├── api.rst │ ├── fuse.rst │ ├── developer.rst │ ├── code-of-conduct.rst │ ├── index.rst │ ├── changelog.rst │ └── conf.py ├── environment.yml ├── make.bat └── Makefile ├── .isort.cfg ├── .coveragerc ├── requirements.txt ├── MANIFEST.in ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── environment_gcsfs.yaml ├── README.md ├── .pre-commit-config.yaml ├── setup.cfg ├── setup.py ├── LICENSE.txt ├── .gitignore ├── .github └── workflows │ └── ci.yml └── cloudbuild └── e2e-tests-cloudbuild.yaml /gcsfs/cli/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gcsfs/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /gcsfs/tests/derived/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | gcsfs/_version.py export-subst 2 | -------------------------------------------------------------------------------- /gcsfs/dask_link.py: -------------------------------------------------------------------------------- 1 | def register(): 2 | """ 3 | Backward compatibility 4 | """ 5 | pass 6 | -------------------------------------------------------------------------------- /docs/source/_static/custom.css: -------------------------------------------------------------------------------- 1 | .classifier:before { 2 | font-style: normal; 3 | margin: 0.5em; 4 | content: ":"; 5 | } 6 | -------------------------------------------------------------------------------- /.isort.cfg: -------------------------------------------------------------------------------- 1 | [settings] 2 | profile = black 3 | known_third_party = aiohttp,click,decorator,fsspec,fuse,google,google_auth_oauthlib,pytest,requests,setuptools 4 | -------------------------------------------------------------------------------- /docs/environment.yml: -------------------------------------------------------------------------------- 1 | name: s3fs 2 | channels: 3 | - defaults 4 | dependencies: 5 | - python= 3.10 6 | - docutils<0.17 7 | - sphinx 8 | - sphinx_rtd_theme 9 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | include = 3 | gcsfs/* 4 | 5 | omit = 6 | gcsfs/tests/test* 7 | 8 | [report] 9 | show_missing = True 10 | 11 | [html] 12 | directory = coverage_html_report 13 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp!=4.0.0a0, !=4.0.0a1 2 | decorator>4.1.2 3 | fsspec==2025.12.0 4 | google-auth>=1.2 5 | google-auth-oauthlib 6 | google-cloud-storage 7 | google-cloud-storage-control 8 | requests 9 | -------------------------------------------------------------------------------- /gcsfs/mapping.py: -------------------------------------------------------------------------------- 1 | from .core import GCSFileSystem 2 | 3 | 4 | def GCSMap(root, gcs=None, check=False, create=False): 5 | """For backward compatibility""" 6 | gcs = gcs or GCSFileSystem.current() 7 | return gcs.get_mapper(root, check=check, create=create) 8 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include gcsfs *.py 2 | recursive-include docs *.rst 3 | 4 | include setup.py 5 | include README.rst 6 | include LICENSE.txt 7 | include MANIFEST.in 8 | include requirements.txt 9 | 10 | prune docs/_build 11 | include versioneer.py 12 | include gcsfs/_version.py 13 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: miniconda3-4.7 7 | 8 | conda: 9 | environment: docs/environment.yml 10 | 11 | python: 12 | install: 13 | - method: pip 14 | path: . 15 | 16 | sphinx: 17 | configuration: docs/source/conf.py 18 | fail_on_warning: true 19 | -------------------------------------------------------------------------------- /gcsfs/tests/fake-secret.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "service_account", 3 | "private_key_id": "NOT A SECRET", 4 | "private_key": "ALSO NOT A SECRET", 5 | "client_email": "fake-name@fake-project.iam.gserviceaccount.com", 6 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 7 | "token_uri": "https://oauth2.googleapis.com/token" 8 | } 9 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | gcsfs is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more. 2 | 3 | ## Project specific notes 4 | 5 | For testing remote API calls this project uses [VCR](https://vcrpy.readthedocs.io/en/latest/). See the docs for more information https://gcsfs.readthedocs.io/en/latest/developer.html. 6 | -------------------------------------------------------------------------------- /gcsfs/tests/derived/gcsfs_test.py: -------------------------------------------------------------------------------- 1 | import fsspec.tests.abstract as abstract 2 | 3 | from gcsfs.tests.derived.gcsfs_fixtures import GcsfsFixtures 4 | 5 | 6 | class TestGcsfsCopy(abstract.AbstractCopyTests, GcsfsFixtures): 7 | pass 8 | 9 | 10 | class TestGcsfsGet(abstract.AbstractGetTests, GcsfsFixtures): 11 | pass 12 | 13 | 14 | class TestGcsfsPut(abstract.AbstractPutTests, GcsfsFixtures): 15 | pass 16 | -------------------------------------------------------------------------------- /gcsfs/zb_hns_utils.py: -------------------------------------------------------------------------------- 1 | from io import BytesIO 2 | 3 | 4 | async def download_range(offset, length, mrd): 5 | """ 6 | Downloads a byte range from the file asynchronously. 7 | """ 8 | # If length = 0, mrd returns till end of file, so handle that case here 9 | if length == 0: 10 | return b"" 11 | buffer = BytesIO() 12 | await mrd.download_ranges([(offset, length, buffer)]) 13 | return buffer.getvalue() 14 | -------------------------------------------------------------------------------- /environment_gcsfs.yaml: -------------------------------------------------------------------------------- 1 | name: gcsfs_test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - python==3.11 6 | - aiohttp 7 | - crcmod 8 | - decorator 9 | - fsspec 10 | - google-api-core 11 | - google-api-python-client 12 | - google-auth 13 | - google-auth-oauthlib 14 | - google-cloud-core 15 | - google-cloud-storage 16 | - grpcio 17 | - pytest 18 | - pytest-timeout 19 | - pytest-asyncio 20 | - pytest-subtests 21 | - requests 22 | - ujson 23 | - pip: 24 | - git+https://github.com/fsspec/filesystem_spec 25 | -------------------------------------------------------------------------------- /gcsfs/tests/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | TEST_BUCKET = os.getenv("GCSFS_TEST_BUCKET", "gcsfs_test") 4 | TEST_VERSIONED_BUCKET = os.getenv("GCSFS_TEST_VERSIONED_BUCKET", "gcsfs_test_versioned") 5 | TEST_ZONAL_BUCKET = os.getenv("GCSFS_ZONAL_TEST_BUCKET", "gcsfs_zonal_test") 6 | TEST_PROJECT = os.getenv("GCSFS_TEST_PROJECT", "project") 7 | TEST_REQUESTER_PAYS_BUCKET = f"{TEST_BUCKET}_req_pay" 8 | TEST_KMS_KEY = os.getenv( 9 | "GCSFS_TEST_KMS_KEY", 10 | f"projects/{TEST_PROJECT}/locations/us/keyRings/gcsfs_test/cryptKeys/gcsfs_test_key", 11 | ) 12 | -------------------------------------------------------------------------------- /gcsfs/tests/test_credentials.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from gcsfs import GCSFileSystem 4 | from gcsfs.credentials import GoogleCredentials 5 | from gcsfs.retry import HttpError 6 | 7 | 8 | def test_googlecredentials_none(): 9 | credentials = GoogleCredentials(project="myproject", token=None, access="read_only") 10 | headers = {} 11 | credentials.apply(headers) 12 | 13 | 14 | @pytest.mark.parametrize("token", ["", "incorrect.token", "x" * 100]) 15 | def test_credentials_from_raw_token(token): 16 | with pytest.raises(HttpError, match="Invalid Credentials"): 17 | fs = GCSFileSystem(project="myproject", token=token) 18 | fs.ls("/") 19 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | gcsfs 2 | ===== 3 | 4 | [|Build Status|](https://github.com/fsspec/gcsfs/actions) 5 | [|Docs|](https://gcsfs.readthedocs.io/en/latest/?badge=latest) 6 | 7 | Pythonic file-system for Google Cloud Storage for fsspec. 8 | 9 | 10 | Support 11 | ------- 12 | 13 | Work on this repository is supported in part by: 14 | 15 | "Anaconda, Inc. - Advancing AI through open source." 16 | 17 | anaconda logo 18 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | # See https://pre-commit.com for more information 2 | # See https://pre-commit.com/hooks.html for more hooks 3 | exclude: versioneer.py 4 | repos: 5 | - repo: https://github.com/pre-commit/pre-commit-hooks 6 | rev: v6.0.0 7 | hooks: 8 | - id: end-of-file-fixer 9 | - id: requirements-txt-fixer 10 | - id: trailing-whitespace 11 | - repo: https://github.com/psf/black-pre-commit-mirror 12 | rev: 25.11.0 13 | hooks: 14 | - id: black 15 | args: 16 | - --target-version=py310 17 | - repo: https://github.com/pycqa/flake8 18 | rev: 7.3.0 19 | hooks: 20 | - id: flake8 21 | - repo: https://github.com/asottile/seed-isort-config 22 | rev: v2.2.0 23 | hooks: 24 | - id: seed-isort-config 25 | - repo: https://github.com/pre-commit/mirrors-isort 26 | rev: v5.10.1 27 | hooks: 28 | - id: isort 29 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [versioneer] 2 | VCS = git 3 | style = pep440 4 | versionfile_source = gcsfs/_version.py 5 | versionfile_build = gcsfs/_version.py 6 | tag_prefix = 7 | 8 | [flake8] 9 | exclude = versioneer.py,docs/source/conf.py 10 | ignore = 11 | # Extra space in brackets 12 | E20, 13 | # Multiple spaces around "," 14 | E231,E241, 15 | # Comments 16 | E26, 17 | # Import formatting 18 | E4, 19 | # Comparing types instead of isinstance 20 | E721, 21 | # Assigning lambda expression 22 | E731, 23 | # Ambiguous variable names 24 | E741, 25 | # line break before binary operator 26 | W503, 27 | # line break after binary operator 28 | W504, 29 | # redefinition of unused 'loop' from line 10 30 | F811, 31 | max-line-length = 120 32 | 33 | [tool:pytest] 34 | addopts = 35 | --color=yes --timeout=600 36 | log_cli = false 37 | log_cli_level = DEBUG 38 | -------------------------------------------------------------------------------- /gcsfs/tests/test_zb_hns_utils.py: -------------------------------------------------------------------------------- 1 | from unittest import mock 2 | 3 | import pytest 4 | 5 | from gcsfs import zb_hns_utils 6 | 7 | 8 | @pytest.mark.asyncio 9 | async def test_download_range(): 10 | """ 11 | Tests that download_range calls mrd.download_ranges with the correct 12 | parameters and returns the data written to the buffer. 13 | """ 14 | offset = 10 15 | length = 20 16 | mock_mrd = mock.AsyncMock() 17 | expected_data = b"test data from download" 18 | 19 | # Simulate the download_ranges method writing data to the buffer 20 | async def mock_download_ranges(ranges): 21 | _offset, _length, buffer = ranges[0] 22 | buffer.write(expected_data) 23 | 24 | mock_mrd.download_ranges.side_effect = mock_download_ranges 25 | 26 | result = await zb_hns_utils.download_range(offset, length, mock_mrd) 27 | 28 | mock_mrd.download_ranges.assert_called_once_with([(offset, length, mock.ANY)]) 29 | assert result == expected_data 30 | -------------------------------------------------------------------------------- /gcsfs/tests/test_inventory_report_listing.py: -------------------------------------------------------------------------------- 1 | import gcsfs.checkers 2 | import gcsfs.tests.settings 3 | from gcsfs.inventory_report import InventoryReport 4 | 5 | TEST_BUCKET = gcsfs.tests.settings.TEST_BUCKET 6 | 7 | 8 | # Basic integration test to ensure listing returns the correct result. 9 | def test_ls_base(monkeypatch, gcs): 10 | # First get results from original listing. 11 | items = gcs.ls(TEST_BUCKET) 12 | 13 | async def mock_fetch_snapshot(*args, **kwargs): 14 | return [{"name": item} for item in items], [] 15 | 16 | # Patch the fetch_snapshot method with the replacement. 17 | monkeypatch.setattr(InventoryReport, "fetch_snapshot", mock_fetch_snapshot) 18 | 19 | inventory_report_info = { 20 | "location": "location", 21 | "id": "id", 22 | "use_snapshot_listing": False, 23 | } 24 | 25 | # Then get results from listing with inventory report. 26 | actual_items = gcs.ls(TEST_BUCKET, inventory_report_info=inventory_report_info) 27 | 28 | # Check equality. 29 | assert actual_items == items 30 | -------------------------------------------------------------------------------- /gcsfs/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | 4 | from ._version import get_versions 5 | 6 | logger = logging.getLogger(__name__) 7 | __version__ = get_versions()["version"] 8 | del get_versions 9 | from .core import GCSFileSystem 10 | from .mapping import GCSMap 11 | 12 | if os.getenv("GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT", "false").lower() in ("true", "1"): 13 | try: 14 | from .extended_gcsfs import ExtendedGcsFileSystem as GCSFileSystem 15 | 16 | logger.info( 17 | "gcsfs experimental features enabled via GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT." 18 | ) 19 | except ImportError as e: 20 | logger.warning( 21 | f"GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT is set, but failed to import experimental features: {e}" 22 | ) 23 | # Fallback to core GCSFileSystem, do not register here 24 | 25 | # TODO: GCSMap still refers to the original GCSFileSystem. This will be 26 | # addressed in a future update. 27 | __all__ = ["GCSFileSystem", "GCSMap"] 28 | 29 | from . import _version 30 | 31 | __version__ = _version.get_versions()["version"] 32 | -------------------------------------------------------------------------------- /gcsfs/tests/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | import tempfile 4 | from contextlib import contextmanager 5 | 6 | 7 | @contextmanager 8 | def ignoring(*exceptions): 9 | try: 10 | yield 11 | except exceptions: 12 | pass 13 | 14 | 15 | @contextmanager 16 | def tempdir(dir=None): 17 | dirname = tempfile.mkdtemp(dir=dir) 18 | shutil.rmtree(dirname, ignore_errors=True) 19 | 20 | try: 21 | yield dirname 22 | finally: 23 | if os.path.exists(dirname): 24 | shutil.rmtree(dirname, ignore_errors=True) 25 | 26 | 27 | @contextmanager 28 | def tmpfile(extension="", dir=None): 29 | extension = "." + extension.lstrip(".") 30 | handle, filename = tempfile.mkstemp(extension, dir=dir) 31 | os.close(handle) 32 | os.remove(filename) 33 | 34 | try: 35 | yield filename 36 | finally: 37 | if os.path.exists(filename): 38 | if os.path.isdir(filename): 39 | shutil.rmtree(filename) 40 | else: 41 | with ignoring(OSError): 42 | os.remove(filename) 43 | -------------------------------------------------------------------------------- /docs/source/api.rst: -------------------------------------------------------------------------------- 1 | API 2 | === 3 | 4 | .. currentmodule:: gcsfs.core 5 | 6 | .. autosummary:: 7 | GCSFileSystem 8 | GCSFileSystem.cat 9 | GCSFileSystem.du 10 | GCSFileSystem.exists 11 | GCSFileSystem.get 12 | GCSFileSystem.glob 13 | GCSFileSystem.info 14 | GCSFileSystem.ls 15 | GCSFileSystem.mkdir 16 | GCSFileSystem.mv 17 | GCSFileSystem.open 18 | GCSFileSystem.put 19 | GCSFileSystem.read_block 20 | GCSFileSystem.rm 21 | GCSFileSystem.tail 22 | GCSFileSystem.touch 23 | GCSFileSystem.get_mapper 24 | 25 | .. autosummary:: 26 | GCSFile 27 | GCSFile.close 28 | GCSFile.flush 29 | GCSFile.info 30 | GCSFile.read 31 | GCSFile.seek 32 | GCSFile.tell 33 | GCSFile.write 34 | 35 | .. currentmodule:: gcsfs.mapping 36 | 37 | .. currentmodule:: gcsfs.core 38 | 39 | .. autoclass:: GCSFileSystem 40 | :members: 41 | :inherited-members: 42 | 43 | .. autoclass:: GCSFile 44 | :members: 45 | :inherited-members: 46 | 47 | .. currentmodule:: gcsfs.mapping 48 | 49 | .. raw:: html 50 | 51 | 53 | -------------------------------------------------------------------------------- /gcsfs/tests/test_manyopens.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test helper to open the same file many times. 3 | 4 | This is not a python unit test, but rather a standalone program that will open 5 | a file repeatedly, to check whether a cloud storage transient error can 6 | defeat gcsfs. This is to be run against real GCS, since we cannot capture 7 | HTTP exceptions with VCR. 8 | 9 | Ideally you should see nothing, just the attempt count go up until we're done. 10 | """ 11 | 12 | import sys 13 | 14 | import gcsfs 15 | 16 | 17 | def run(): 18 | if len(sys.argv) != 4: 19 | print( 20 | "usage: python -m gcsfs.tests.test_manyopens " 21 | ' ' 22 | ) 23 | return 24 | project = sys.argv[1] 25 | credentials = sys.argv[2] 26 | file = sys.argv[3] 27 | print("project: " + project) 28 | for i in range(2000): 29 | # Issue #12 only reproduces if I re-create the fs object every time. 30 | fs = gcsfs.GCSFileSystem(project=project, token=credentials) 31 | print("attempt %s" % i) 32 | with fs.open(file, "rb") as o: 33 | o.readline() 34 | 35 | 36 | if __name__ == "__main__": 37 | run() 38 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | import versioneer 6 | 7 | setup( 8 | name="gcsfs", 9 | version=versioneer.get_version(), 10 | cmdclass=versioneer.get_cmdclass(), 11 | description="Convenient Filesystem interface over GCS", 12 | url="https://github.com/fsspec/gcsfs", 13 | maintainer="Martin Durant", 14 | maintainer_email="mdurant@anaconda.com", 15 | license="BSD", 16 | classifiers=[ 17 | "Development Status :: 4 - Beta", 18 | "Intended Audience :: Developers", 19 | "License :: OSI Approved :: BSD License", 20 | "Operating System :: OS Independent", 21 | "Programming Language :: Python :: 3.10", 22 | "Programming Language :: Python :: 3.11", 23 | "Programming Language :: Python :: 3.12", 24 | "Programming Language :: Python :: 3.13", 25 | "Programming Language :: Python :: 3.14", 26 | ], 27 | keywords=["google-cloud-storage", "gcloud", "file-system"], 28 | packages=["gcsfs", "gcsfs.cli"], 29 | install_requires=[open("requirements.txt").read().strip().split("\n")], 30 | extras_require={"gcsfuse": ["fusepy"], "crc": ["crcmod"]}, 31 | python_requires=">=3.10", 32 | long_description_content_type="text/markdown", 33 | long_description=open("README.md").read(), 34 | zip_safe=False, 35 | ) 36 | -------------------------------------------------------------------------------- /gcsfs/tests/derived/gcsfs_fixtures.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import pytest 4 | from fsspec.tests.abstract import AbstractFixtures 5 | 6 | from gcsfs.core import GCSFileSystem 7 | from gcsfs.tests.conftest import _cleanup_gcs, allfiles 8 | from gcsfs.tests.settings import TEST_BUCKET 9 | 10 | 11 | class GcsfsFixtures(AbstractFixtures): 12 | @pytest.fixture(scope="class") 13 | def fs(self, gcs_factory, buckets_to_delete): 14 | GCSFileSystem.clear_instance_cache() 15 | gcs = gcs_factory() 16 | try: # ensure we're empty. 17 | # Create the bucket if it doesn't exist, otherwise clean it. 18 | if not gcs.exists(TEST_BUCKET): 19 | buckets_to_delete.add(TEST_BUCKET) 20 | gcs.mkdir(TEST_BUCKET) 21 | else: 22 | try: 23 | gcs.rm(gcs.find(TEST_BUCKET)) 24 | except Exception as e: 25 | logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}") 26 | 27 | gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()}) 28 | gcs.invalidate_cache() 29 | yield gcs 30 | finally: 31 | _cleanup_gcs(gcs) 32 | 33 | @pytest.fixture 34 | def fs_path(self): 35 | return TEST_BUCKET 36 | 37 | @pytest.fixture 38 | def supports_empty_directories(self): 39 | return False 40 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | BSD 3-Clause License 2 | 3 | Copyright (c) 2014-2018, Anaconda, Inc. and contributors 4 | All rights reserved. 5 | 6 | Redistribution and use in source and binary forms, with or without 7 | modification, are permitted provided that the following conditions are met: 8 | 9 | * Redistributions of source code must retain the above copyright notice, this 10 | list of conditions and the following disclaimer. 11 | 12 | * Redistributions in binary form must reproduce the above copyright notice, 13 | this list of conditions and the following disclaimer in the documentation 14 | and/or other materials provided with the distribution. 15 | 16 | * Neither the name of the copyright holder nor the names of its 17 | contributors may be used to endorse or promote products derived from 18 | this software without specific prior written permission. 19 | 20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # dask 2 | dask-worker-space/ 3 | 4 | # private notebooks 5 | private/ 6 | 7 | # Pyenv stuff 8 | .python-version 9 | 10 | # Byte-compiled / optimized / DLL files 11 | __pycache__/ 12 | *.py[cod] 13 | 14 | # C extensions 15 | *.so 16 | 17 | # Distribution / packaging 18 | .Python 19 | env/ 20 | build/ 21 | develop-eggs/ 22 | dist/ 23 | downloads/ 24 | pip-wheel-metadata/ 25 | eggs/ 26 | .eggs/ 27 | lib/ 28 | lib64/ 29 | parts/ 30 | sdist/ 31 | var/ 32 | *.egg-info/ 33 | .installed.cfg 34 | *.egg 35 | 36 | # PyInstaller 37 | # Usually these files are written by a python script from a template 38 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 39 | *.manifest 40 | *.spec 41 | 42 | # Installer logs 43 | pip-log.txt 44 | pip-delete-this-directory.txt 45 | 46 | # Unit test / coverage reports 47 | htmlcov/ 48 | .tox/ 49 | .coverage 50 | .coverage.* 51 | .cache 52 | nosetests.xml 53 | coverage.xml 54 | *.cover 55 | junit/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | 64 | # Sphinx documentation 65 | docs/_build/ 66 | 67 | # PyBuilder 68 | target/ 69 | 70 | # DotEnv configuration 71 | .env 72 | 73 | # Database 74 | *.db 75 | *.rdb 76 | 77 | # Pycharm 78 | .idea 79 | 80 | # VS Code 81 | .vscode/ 82 | 83 | # Spyder 84 | .spyproject/ 85 | 86 | # Jupyter NB Checkpoints 87 | .ipynb_checkpoints/ 88 | 89 | # exclude data from source control by default 90 | /data/ 91 | 92 | # Mac OS-specific storage files 93 | .DS_Store 94 | 95 | # vim 96 | *.swp 97 | *.swo 98 | 99 | # Mypy cache 100 | .mypy_cache/ 101 | 102 | #Pytest cache 103 | .pytest_cache/ 104 | 105 | libs/*.whl 106 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push, pull_request, workflow_dispatch] 4 | 5 | defaults: 6 | run: 7 | shell: bash -l -eo pipefail {0} 8 | 9 | jobs: 10 | test: 11 | name: Python ${{ matrix.python-version }} 12 | runs-on: ubuntu-latest 13 | timeout-minutes: 30 14 | strategy: 15 | fail-fast: false 16 | matrix: 17 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] 18 | 19 | steps: 20 | - name: Checkout source 21 | uses: actions/checkout@v5 22 | 23 | - name: Setup conda 24 | uses: conda-incubator/setup-miniconda@v3 25 | with: 26 | environment-file: environment_gcsfs.yaml 27 | python-version: ${{ matrix.PY }} 28 | activate-environment: gcsfs_test 29 | 30 | - name: Conda info 31 | run: | 32 | conda list 33 | conda --version 34 | 35 | - name: install 36 | run: | 37 | pip install -e . 38 | - name: Run Standard Tests 39 | run: | 40 | export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/gcsfs/tests/fake-secret.json 41 | pytest -vv -s \ 42 | --log-format="%(asctime)s %(levelname)s %(message)s" \ 43 | --log-date-format="%H:%M:%S" \ 44 | gcsfs/ 45 | - name: Run Tests with experimental support 46 | run: | 47 | export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/gcsfs/tests/fake-secret.json 48 | export GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT="true" 49 | pytest -vv -s \ 50 | --log-format="%(asctime)s %(levelname)s %(message)s" \ 51 | --log-date-format="%H:%M:%S" \ 52 | gcsfs/ 53 | 54 | lint: 55 | name: lint 56 | runs-on: ubuntu-latest 57 | steps: 58 | - uses: actions/checkout@v5 59 | - uses: actions/setup-python@v6 60 | with: 61 | python-version: "3.11" 62 | - uses: pre-commit/action@v3.0.1 63 | -------------------------------------------------------------------------------- /gcsfs/zonal_file.py: -------------------------------------------------------------------------------- 1 | from fsspec import asyn 2 | from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( 3 | AsyncMultiRangeDownloader, 4 | ) 5 | 6 | from gcsfs.core import GCSFile 7 | 8 | 9 | class ZonalFile(GCSFile): 10 | """ 11 | ZonalFile is subclass of GCSFile and handles data operations from 12 | Zonal buckets only using a high-performance gRPC path. 13 | """ 14 | 15 | def __init__(self, *args, **kwargs): 16 | """ 17 | Initializes the ZonalFile object. 18 | """ 19 | super().__init__(*args, **kwargs) 20 | self.mrd = None 21 | if "r" in self.mode: 22 | self.mrd = asyn.sync( 23 | self.gcsfs.loop, self._init_mrd, self.bucket, self.key, self.generation 24 | ) 25 | else: 26 | raise NotImplementedError( 27 | "Only read operations are currently supported for Zonal buckets." 28 | ) 29 | 30 | async def _init_mrd(self, bucket_name, object_name, generation=None): 31 | """ 32 | Initializes the AsyncMultiRangeDownloader. 33 | """ 34 | return await AsyncMultiRangeDownloader.create_mrd( 35 | self.gcsfs.grpc_client, bucket_name, object_name, generation 36 | ) 37 | 38 | def _fetch_range(self, start, end): 39 | """ 40 | Overrides the default _fetch_range to implement the gRPC read path. 41 | 42 | """ 43 | try: 44 | return self.gcsfs.cat_file(self.path, start=start, end=end, mrd=self.mrd) 45 | except RuntimeError as e: 46 | if "not satisfiable" in str(e): 47 | return b"" 48 | raise 49 | 50 | def close(self): 51 | """ 52 | Closes the ZonalFile and the underlying AsyncMultiRangeDownloader. 53 | """ 54 | if self.mrd: 55 | asyn.sync(self.gcsfs.loop, self.mrd.close) 56 | super().close() 57 | -------------------------------------------------------------------------------- /gcsfs/tests/test_fuse.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import tempfile 4 | import threading 5 | import time 6 | from functools import partial 7 | 8 | import pytest 9 | 10 | from gcsfs.tests.settings import TEST_BUCKET 11 | 12 | 13 | @pytest.fixture 14 | def fsspec_fuse_run(): 15 | """Fixture catches other errors on fuse import.""" 16 | try: 17 | _fuse = pytest.importorskip("fuse") # noqa 18 | 19 | from fsspec.fuse import run as _fsspec_fuse_run 20 | 21 | return _fsspec_fuse_run 22 | except Exception as error: 23 | logging.debug("Error importing fuse: %s", error) 24 | pytest.skip("Error importing fuse.") 25 | 26 | 27 | @pytest.mark.xfail(reason="Failing test not previously tested.") 28 | @pytest.mark.timeout(180) 29 | def test_fuse(gcs, fsspec_fuse_run): 30 | mountpath = tempfile.mkdtemp() 31 | _run = partial(fsspec_fuse_run, gcs, TEST_BUCKET + "/", mountpath) 32 | th = threading.Thread(target=_run) 33 | th.daemon = True 34 | th.start() 35 | 36 | time.sleep(5) 37 | timeout = 20 38 | n = 40 39 | for i in range(n): 40 | logging.debug(f"Attempt # {i + 1} / {n} to create lock file.") 41 | try: 42 | open(os.path.join(mountpath, "lock"), "w").close() 43 | os.remove(os.path.join(mountpath, "lock")) 44 | break 45 | except Exception as error: # noqa: E722 46 | logging.debug("Error: %s", error) 47 | time.sleep(0.5) 48 | timeout -= 0.5 49 | assert timeout > 0 50 | else: 51 | raise AssertionError(f"Attempted lock file failed after {n} attempts.") 52 | 53 | with open(os.path.join(mountpath, "hello"), "w") as f: 54 | # NB this is in TEXT mode 55 | f.write("hello") 56 | files = os.listdir(mountpath) 57 | assert "hello" in files 58 | with open(os.path.join(mountpath, "hello")) as f: 59 | # NB this is in TEXT mode 60 | assert f.read() == "hello" 61 | -------------------------------------------------------------------------------- /gcsfs/tests/fake-service-account-credentials.json: -------------------------------------------------------------------------------- 1 | { 2 | "type": "service_account", 3 | "project_id": "gcsfs", 4 | "private_key_id": "84e3fd6d7101ec632e7348e8940b2aca71133e71", 5 | "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDAJWz1KlBu2jRE\nlUahHKuJes34hj4pr8ADhgejpAguBBrubXVvSro7aSSbvyDC/GIcyDQ8Q33YK/kT\nufQvCez7iIACbtP53o6WjcrIAP+l8z9RUL9so+sBCaVRZzh74+cEMfWIbc3ACBB5\nU2BPBWQFtr3Qtbe8TUJ+liNcLb8I2JznfydHvl9cn0/50HeOB99Xho5JAY75aE0Y\nT+/aMTFlr/kUbekLRRi4pyE+uOA/ei5RmfwzqO366YLMtEC2DaHwTqSuxBWnbtTW\nu/OvYpmPHazd6own2zJLQ0Elnm5WC/d9YmxhHi/8pJFkkbVf/2CYWEBbmBI3ZOx3\n/nHQwcIPAgMBAAECggEAUztC/dYE/me10WmKLTrykTxpYTihT8RqG/ygbYGd63Tq\nx5IRlxJbJmYOrgp2IhBaXZZZjis8JXoyzBk2TXPyvChuLt+cIfYGdO/ZwZYxJ0z9\nhfdA3EoK/6mSe3cHcB8SEG6lqaHKyN6VaEC2DLTMlW8JvREiFEaxQY0+puzH/ge4\n2EypCP4pvlveH78EIIipPgWcJYGpv0bv8KErECuVHRjJv6vZqUjQdcIi73mCz/5u\nnQqLY8j9lOuCr9vBis7DZIyY2tn4vfqcqxfH9wuIFXnzIQW6Wyg0+bBQydHg1kJ2\nFOszfkBVxZ6LpcHGB4CV4c5z7Me2cMReXQz6VsyoLQKBgQD9v92rHZYDBy4/vGxx\nbpfUkAlcCGW8GXu+qsdmyhZdjSdjDLY6lav+6UoHIJgmnA7LsKPFgnEDrdn78KBb\n3wno3VHfozL5kF887q9hC/+UurwScCKIw5QkmWtsStVgjr6wPmAu6rspMz5xNjaa\nSU4YzlNcbBUUXUawhXytWPR+OwKBgQDB2bDCD00R2yfYFdjAKapqenOtMvrnihUi\nW9Se7Yizme7s25fDxF5CBPpOdKPU2EZUlqBC/5182oMUP/xYUOHJkuUhbYcvU0qr\n+BQewLwr6rs+O1QPTh/6e70SUFR+YJLaAHkDc6fvcdjtl+Zx/p02Zj+UiW3/D4Jj\nc0EqVr4qPQKBgQCbJx3a6xQ2dcWJoySLlxuvFQMkCt5pzQsk4jdaWmaifRSAM92Y\npLut+ecRxJRDx1gko7T/p2qC3WJT8iWbBx2ADRNqstcQUX5qO2dw5202+5bTj00O\nYsfKOSS96mPdzmo6SWl2RoB6CKM9hfCNFhVyhXXjJRMeiIoYlQZO1/1m0QKBgCzz\nat6FJ8z1MdcUsc9VmhPY00wdXzsjtOTjwHkeAa4MCvBXt2iI94Z9mwFoYLkxcZWZ\n3A3NMlrKXMzsTXq5PrI8Yu+Oc2OQ/+bCvv+ml7vjUYoLveFSr22pFd3STNWFVWhB\n5c3cGtwWXUQzDhfu/8umiCXMfHpBwW2IQ1srBCvNAoGATcC3oCFBC/HdGxdeJC5C\n59EoFvKdZsAdc2I5GS/DtZ1Wo9sXqubCaiUDz+4yty+ssHIZ1ikFr8rWfL6KFEs2\niTe+kgM/9FLFtftf1WDpbfIOumbz/6CiGLqsGNlO3ZaU0kYJ041SZ8RleTOYa0zO\noSTLwBo3vje+aflytEwS8SI=\n-----END PRIVATE KEY-----", 6 | "client_email": "fake@gscfs.iam.gserviceaccount.com", 7 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 8 | "token_uri": "https://oauth2.googleapis.com/token" 9 | } 10 | -------------------------------------------------------------------------------- /docs/source/fuse.rst: -------------------------------------------------------------------------------- 1 | GCSFS and FUSE 2 | ============== 3 | 4 | Warning, this functionality is **experimental**. 5 | 6 | FUSE_ is a mechanism to mount user-level filesystems in unix-like 7 | systems (linux, osx, etc.). GCSFS is able to use FUSE to present remote 8 | data/keys as if they were a directory on your local file-system. This 9 | allows for standard shell command manipulation, and loading of data 10 | by libraries that can only handle local file-paths (e.g., netCDF/HDF5). 11 | 12 | .. _FUSE: https://github.com/libfuse/libfuse 13 | 14 | Requirements 15 | ------------- 16 | 17 | In addition to a standard installation of GCSFS, you also need: 18 | 19 | - libfuse as a system install. The way to install this will depend 20 | on your OS. Examples include ``sudo apt-get install fuse``, 21 | ``sudo yum install fuse`` and download from osxfuse_. 22 | 23 | - fusepy_, which can be installed via conda or pip 24 | 25 | - pandas, which can also be installed via conda or pip (this library is 26 | used only for its timestring parsing). 27 | 28 | .. _osxfuse: https://osxfuse.github.io/ 29 | .. _fusepy: https://github.com/fusepy/fusepy 30 | 31 | Usage 32 | ----- 33 | 34 | FUSE functionality is available via the ``fsspec.fuse`` module. See the 35 | docstrings for further details. 36 | 37 | .. code-block:: python 38 | 39 | gcs = gcsfs.GCSFileSystem(..) 40 | from fsspec.fuse import run 41 | run(gcs, "bucket/path", "local/path", foreground=True, threads=False) 42 | 43 | Caveats 44 | ------- 45 | 46 | This functionality is experimental. The command usage may change, and you should 47 | expect exceptions. 48 | 49 | Furthermore: 50 | 51 | - although mutation operations tentatively work, you should not at the moment 52 | depend on gcsfuse as a reliable system that won't loose your data. 53 | 54 | - permissions on GCS are complicated, so all files will be shown as fully-open 55 | 0o777, regardless of state. If a read fails, you likely don't have the right 56 | permissions. 57 | 58 | .. raw:: html 59 | 60 | 62 | -------------------------------------------------------------------------------- /gcsfs/cli/gcsfuse.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | import click 4 | from fuse import FUSE 5 | 6 | from gcsfs.gcsfuse import GCSFS 7 | 8 | 9 | @click.command() 10 | @click.argument("bucket", type=str, required=True) 11 | @click.argument("mount_point", type=str, required=True) 12 | @click.option( 13 | "--token", 14 | type=str, 15 | required=False, 16 | default=None, 17 | help="Token to use for authentication", 18 | ) 19 | @click.option( 20 | "--project-id", type=str, required=False, default="", help="Billing Project ID" 21 | ) 22 | @click.option( 23 | "--foreground/--background", 24 | default=True, 25 | help="Run in the foreground or as a background process", 26 | ) 27 | @click.option( 28 | "--threads/--no-threads", default=True, help="Whether to run with threads" 29 | ) 30 | @click.option( 31 | "--cache_files", type=int, default=10, help="Number of open files to cache" 32 | ) 33 | @click.option( 34 | "-v", 35 | "--verbose", 36 | count=True, 37 | help="Set logging level. '-v' for 'gcsfuse' logging." 38 | "'-v -v' for complete debug logging.", 39 | ) 40 | def main( 41 | bucket, mount_point, token, project_id, foreground, threads, cache_files, verbose 42 | ): 43 | """Mount a Google Cloud Storage (GCS) bucket to a local directory""" 44 | 45 | if verbose == 1: 46 | logging.basicConfig(level=logging.INFO) 47 | logging.getLogger("gcsfs.gcsfuse").setLevel(logging.DEBUG) 48 | if verbose > 1: 49 | logging.basicConfig(level=logging.DEBUG) 50 | 51 | fmt = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s" 52 | if verbose == 1: 53 | logging.basicConfig(level=logging.INFO, format=fmt) 54 | logging.getLogger("gcsfs.gcsfuse").setLevel(logging.DEBUG) 55 | if verbose > 1: 56 | logging.basicConfig(level=logging.DEBUG, format=fmt) 57 | 58 | print(f"Mounting bucket {bucket} to directory {mount_point}") 59 | print("foreground:", foreground, ", nothreads:", not threads) 60 | FUSE( 61 | GCSFS(bucket, token=token, project=project_id, nfiles=cache_files), 62 | mount_point, 63 | nothreads=not threads, 64 | foreground=foreground, 65 | ) 66 | 67 | 68 | if __name__ == "__main__": 69 | main() 70 | -------------------------------------------------------------------------------- /gcsfs/tests/test_init.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | 4 | 5 | class TestConditionalImport: 6 | def setup_method(self, method): 7 | """Setup for each test method.""" 8 | self.original_env = os.environ.get("GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT") 9 | 10 | # Snapshot original gcsfs modules 11 | self.original_modules = { 12 | name: mod for name, mod in sys.modules.items() if name.startswith("gcsfs") 13 | } 14 | 15 | # Unload gcsfs modules to force re-import during the test 16 | modules_to_remove = list(self.original_modules.keys()) 17 | for name in modules_to_remove: 18 | if name in sys.modules: 19 | del sys.modules[name] 20 | 21 | def teardown_method(self, method): 22 | """Teardown after each test method.""" 23 | # Reset environment variable to its original state 24 | if self.original_env is not None: 25 | os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"] = self.original_env 26 | elif "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT" in os.environ: 27 | del os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"] 28 | 29 | # Clear any gcsfs modules loaded/modified during this test 30 | modules_to_remove = [name for name in sys.modules if name.startswith("gcsfs")] 31 | for name in modules_to_remove: 32 | if name in sys.modules: 33 | del sys.modules[name] 34 | 35 | # Restore the original gcsfs modules from the snapshot to avoid side effect 36 | # affecting other tests 37 | sys.modules.update(self.original_modules) 38 | 39 | def test_experimental_env_unset(self): 40 | """ 41 | Tests gcsfs.GCSFileSystem is core.GCSFileSystem when 42 | GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT is NOT set. 43 | """ 44 | if "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT" in os.environ: 45 | del os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"] 46 | 47 | import gcsfs 48 | 49 | assert ( 50 | gcsfs.GCSFileSystem is gcsfs.core.GCSFileSystem 51 | ), "Should be core.GCSFileSystem" 52 | assert not hasattr( 53 | gcsfs, "ExtendedGcsFileSystem" 54 | ), "ExtendedGcsFileSystem should not be imported directly on gcsfs" 55 | 56 | def test_experimental_env_set(self): 57 | """ 58 | Tests gcsfs.GCSFileSystem is extended_gcsfs.ExtendedGcsFileSystem when 59 | GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT IS set. 60 | """ 61 | os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"] = "true" 62 | 63 | import gcsfs 64 | 65 | assert ( 66 | gcsfs.GCSFileSystem is gcsfs.extended_gcsfs.ExtendedGcsFileSystem 67 | ), "Should be ExtendedGcsFileSystem" 68 | -------------------------------------------------------------------------------- /docs/source/developer.rst: -------------------------------------------------------------------------------- 1 | For Developers 2 | ============== 3 | 4 | We welcome contributions to gcsfs! 5 | 6 | Please file issues and requests on github_ and we welcome pull requests. 7 | 8 | .. _github: https://github.com/fsspec/gcsfs/issues 9 | 10 | Testing 11 | ------- 12 | 13 | The testing framework supports using your own GCS-compliant endpoint, by 14 | setting the "STORAGE_EMULATOR_HOST" environment variable. If this is 15 | not set, then an emulator will be spun up using ``docker`` and 16 | `fake-gcs-server`_. This emulator has almost all the functionality of 17 | real GCS. A small number of tests run differently or are skipped. 18 | 19 | If you want to actually test against real GCS, then you should set 20 | STORAGE_EMULATOR_HOST to "https://storage.googleapis.com" and also 21 | provide appropriate GCSFS_TEST_BUCKET, GCSFS_TEST_VERSIONED_BUCKET 22 | (To use for tests that target GCS object versioning, this bucket must have versioning enabled), 23 | GCSFS_ZONAL_TEST_BUCKET(To use for testing Rapid storage features) and GCSFS_TEST_PROJECT, 24 | as well as setting your default google credentials (or providing them via the fsspec config). 25 | 26 | When running tests against a real GCS endpoint, you have two options for test buckets: 27 | 28 | - **Provide existing buckets**: If you specify buckets that already exist, the 29 | test suite will manage objects *within* them (creating, modifying, and deleting 30 | objects as needed). The buckets themselves will **not** be deleted upon completion. 31 | **Warning**: The test suite will clear the contents of the bucket at the beginning and end of the 32 | test run, so be sure to use a bucket that does not contain important data. 33 | - **Let the tests create buckets**: If you specify bucket names that do not exist, 34 | the test suite will create them for the test run and automatically delete them 35 | during final cleanup. 36 | 37 | End-to-end Testing CI Pipeline 38 | --------------------------- 39 | 40 | We have a Cloud Build pipeline for end-to-end tests which includes tests on zonal 41 | and regional buckets. When a pull request is created for the ``main`` branch, 42 | there will be a ``end-to-end-tests-trigger`` check in the GitHub checks section. 43 | 44 | The pipeline's behavior depends on the author of the pull request: 45 | 46 | - If the PR is created by an owner or a collaborator, the pipeline will be 47 | triggered immediately. 48 | - If the PR is from an external contributor, an owner or collaborator must add 49 | the comment ``/gcbrun`` to the PR to trigger the pipeline, 50 | until then pipeline would be in failure state. 51 | 52 | The pipeline will also be triggered when a new commit is added to the PR. For 53 | external contributors, a new ``/gcbrun`` comment is required from an owner or 54 | collaborator after the new commit. The pipeline can also be manually 55 | re-triggered by adding a ``/gcbrun`` comment or by using re-run option from Github UI. 56 | 57 | The logs from the test run are available in the "details" section of the Checks 58 | tab in the pull request. 59 | 60 | .. _fake-gcs-server: https://github.com/fsouza/fake-gcs-server 61 | 62 | .. raw:: html 63 | 64 | 66 | -------------------------------------------------------------------------------- /gcsfs/tests/test_mapping.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from gcsfs.tests.settings import TEST_BUCKET 4 | 5 | MAPPING_ROOT = TEST_BUCKET + "/mapping" 6 | 7 | 8 | def test_api(): 9 | import gcsfs 10 | 11 | assert "GCSMap" in dir(gcsfs) 12 | assert "mapping" in dir(gcsfs) 13 | 14 | 15 | def test_map_simple(gcs): 16 | d = gcs.get_mapper(MAPPING_ROOT) 17 | assert not d 18 | 19 | assert list(d) == list(d.keys()) == [] 20 | assert list(d.values()) == [] 21 | assert list(d.items()) == [] 22 | 23 | 24 | def test_map_default_gcsfilesystem(gcs): 25 | d = gcs.get_mapper(MAPPING_ROOT) 26 | assert d.fs is gcs 27 | 28 | 29 | def test_map_errors(gcs): 30 | d = gcs.get_mapper(MAPPING_ROOT) 31 | with pytest.raises(KeyError): 32 | d["nonexistent"] 33 | try: 34 | gcs.get_mapper("does-not-exist") 35 | except Exception as e: 36 | assert "does-not-exist" in str(e) 37 | 38 | 39 | def test_map_with_data(gcs): 40 | d = gcs.get_mapper(MAPPING_ROOT) 41 | d["x"] = b"123" 42 | assert list(d) == list(d.keys()) == ["x"] 43 | assert list(d.values()) == [b"123"] 44 | assert list(d.items()) == [("x", b"123")] 45 | assert d["x"] == b"123" 46 | assert bool(d) 47 | 48 | assert gcs.find(MAPPING_ROOT) == [TEST_BUCKET + "/mapping/x"] 49 | d["x"] = b"000" 50 | assert d["x"] == b"000" 51 | 52 | d["y"] = b"456" 53 | assert d["y"] == b"456" 54 | assert set(d) == {"x", "y"} 55 | 56 | d.clear() 57 | assert list(d) == [] 58 | 59 | 60 | def test_map_clear_empty(gcs): 61 | d = gcs.get_mapper(MAPPING_ROOT) 62 | d.clear() 63 | assert list(d) == [] 64 | d["1"] = b"1" 65 | assert list(d) == ["1"] or list(d) == ["1"] 66 | d.clear() 67 | assert list(d) == [] 68 | 69 | 70 | def test_map_pickle(gcs): 71 | d = gcs.get_mapper(MAPPING_ROOT) 72 | d["x"] = b"1" 73 | assert d["x"] == b"1" 74 | 75 | import pickle 76 | 77 | d2 = pickle.loads(pickle.dumps(d)) 78 | 79 | assert d2["x"] == b"1" 80 | 81 | 82 | def test_map_array(gcs): 83 | from array import array 84 | 85 | d = gcs.get_mapper(MAPPING_ROOT) 86 | d["x"] = array("B", [65] * 1000) 87 | 88 | assert d["x"] == b"A" * 1000 89 | 90 | 91 | def test_map_bytearray(gcs): 92 | d = gcs.get_mapper(MAPPING_ROOT) 93 | d["x"] = bytearray(b"123") 94 | 95 | assert d["x"] == b"123" 96 | 97 | 98 | def test_new_bucket(gcs): 99 | new_bucket = TEST_BUCKET + "new-bucket" 100 | try: 101 | gcs.rmdir(new_bucket) 102 | except: # noqa: E722 103 | pass 104 | with pytest.raises(Exception) as e: 105 | d = gcs.get_mapper(new_bucket, check=True) 106 | assert "create=True" in str(e.value) 107 | 108 | try: 109 | d = gcs.get_mapper(new_bucket, create=True) 110 | assert not d 111 | 112 | d = gcs.get_mapper(new_bucket + "/new-directory") 113 | assert not d 114 | finally: 115 | gcs.rmdir(new_bucket) 116 | 117 | 118 | def test_map_pickle(gcs): 119 | import pickle 120 | 121 | d = gcs.get_mapper(MAPPING_ROOT) 122 | d["x"] = b"1234567890" 123 | 124 | b = pickle.dumps(d) 125 | assert b"1234567890" not in b 126 | 127 | e = pickle.loads(b) 128 | 129 | assert dict(e) == {"x": b"1234567890"} 130 | -------------------------------------------------------------------------------- /gcsfs/checkers.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from base64 import b64encode 3 | from hashlib import md5 4 | 5 | from .retry import ChecksumError 6 | 7 | try: 8 | import crcmod 9 | except ImportError: 10 | crcmod = None 11 | 12 | 13 | class ConsistencyChecker: 14 | def __init__(self): 15 | pass 16 | 17 | def update(self, data: bytes): 18 | pass 19 | 20 | def validate_json_response(self, gcs_object): 21 | pass 22 | 23 | def validate_headers(self, headers): 24 | pass 25 | 26 | def validate_http_response(self, r): 27 | pass 28 | 29 | 30 | class MD5Checker(ConsistencyChecker): 31 | def __init__(self): 32 | self.md = md5() 33 | 34 | def update(self, data): 35 | self.md.update(data) 36 | 37 | def validate_json_response(self, gcs_object): 38 | mdback = gcs_object["md5Hash"] 39 | if b64encode(self.md.digest()) != mdback.encode(): 40 | raise ChecksumError("MD5 checksum failed") 41 | 42 | def validate_headers(self, headers): 43 | if headers is not None and "X-Goog-Hash" in headers: 44 | 45 | dig = [ 46 | bit.split("=")[1] 47 | for bit in headers["X-Goog-Hash"].split(",") 48 | if bit and bit.strip().startswith("md5=") 49 | ] 50 | if dig: 51 | if b64encode(self.md.digest()).decode().rstrip("=") != dig[0]: 52 | raise ChecksumError("Checksum failure") 53 | else: 54 | raise NotImplementedError( 55 | "No md5 checksum available to do consistency check. GCS does " 56 | "not provide md5 sums for composite objects." 57 | ) 58 | 59 | def validate_http_response(self, r): 60 | return self.validate_headers(r.headers) 61 | 62 | 63 | class SizeChecker(ConsistencyChecker): 64 | def __init__(self): 65 | self.size = 0 66 | 67 | def update(self, data: bytes): 68 | self.size += len(data) 69 | 70 | def validate_json_response(self, gcs_object): 71 | assert int(gcs_object["size"]) == self.size, "Size mismatch" 72 | 73 | def validate_http_response(self, r): 74 | assert r.content_length == self.size 75 | 76 | 77 | class Crc32cChecker(ConsistencyChecker): 78 | def __init__(self): 79 | self.crc32c = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF) 80 | 81 | def update(self, data: bytes): 82 | self.crc32c.update(data) 83 | 84 | def validate_json_response(self, gcs_object): 85 | # docs for gcs_object: https://cloud.google.com/storage/docs/json_api/v1/objects 86 | digest = self.crc32c.digest() 87 | digest_b64 = base64.b64encode(digest).decode() 88 | expected = gcs_object["crc32c"] 89 | 90 | if digest_b64 != expected: 91 | raise ChecksumError(f'Expected "{expected}". Got "{digest_b64}"') 92 | 93 | def validate_headers(self, headers): 94 | if headers is not None: 95 | hasher = headers.get("X-Goog-Hash", "") 96 | crc = [h.split("=", 1)[1] for h in hasher.split(",") if "crc32c" in h] 97 | if not crc: 98 | raise NotImplementedError("No crc32c checksum was provided by google!") 99 | if crc[0] != b64encode(self.crc32c.digest()).decode(): 100 | raise ChecksumError() 101 | 102 | def validate_http_response(self, r): 103 | return self.validate_headers(r.headers) 104 | 105 | 106 | def get_consistency_checker(consistency: str | None) -> ConsistencyChecker: 107 | if consistency == "size": 108 | return SizeChecker() 109 | elif consistency == "md5": 110 | return MD5Checker() 111 | elif consistency == "crc32c": 112 | if crcmod is None: 113 | raise ImportError( 114 | "The python package `crcmod` is required for `consistency='crc32c'`. " 115 | "This can be installed with `pip install gcsfs[crc]`" 116 | ) 117 | else: 118 | return Crc32cChecker() 119 | else: 120 | return ConsistencyChecker() 121 | -------------------------------------------------------------------------------- /gcsfs/tests/test_core_versioned.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import posixpath 4 | 5 | import pytest 6 | from google.cloud import storage 7 | 8 | from gcsfs import GCSFileSystem 9 | from gcsfs.tests.settings import TEST_VERSIONED_BUCKET 10 | 11 | a = TEST_VERSIONED_BUCKET + "/tmp/test/a" 12 | b = TEST_VERSIONED_BUCKET + "/tmp/test/b" 13 | 14 | # Flag to track if the bucket was created by this test run. 15 | _VERSIONED_BUCKET_CREATED_BY_TESTS = False 16 | 17 | 18 | def is_versioning_enabled(): 19 | """ 20 | Helper function to check if the test bucket has versioning enabled. 21 | Returns a tuple of (bool, reason_string). 22 | """ 23 | # Don't skip when using an emulator, as we create the versioned bucket ourselves. 24 | global _VERSIONED_BUCKET_CREATED_BY_TESTS 25 | if os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com": 26 | return True, "" 27 | try: 28 | gcs = GCSFileSystem(project=os.getenv("GCSFS_TEST_PROJECT", "project")) 29 | if not gcs.exists(TEST_VERSIONED_BUCKET): 30 | logging.info( 31 | f"Creating versioned bucket for tests: {TEST_VERSIONED_BUCKET}" 32 | ) 33 | gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True) 34 | _VERSIONED_BUCKET_CREATED_BY_TESTS = True 35 | 36 | client = storage.Client( 37 | credentials=gcs.credentials.credentials, project=gcs.project 38 | ) 39 | bucket = client.get_bucket(TEST_VERSIONED_BUCKET) 40 | if bucket.versioning_enabled: 41 | return True, "" 42 | return ( 43 | False, 44 | f"Bucket '{TEST_VERSIONED_BUCKET}' does not have versioning enabled.", 45 | ) 46 | except Exception as e: 47 | return ( 48 | False, 49 | f"Could not verify versioning status for bucket '{TEST_VERSIONED_BUCKET}': {e}", 50 | ) 51 | 52 | 53 | pytestmark = pytest.mark.skipif( 54 | not is_versioning_enabled()[0], reason=is_versioning_enabled()[1] 55 | ) 56 | 57 | 58 | def test_info_versioned(gcs_versioned): 59 | with gcs_versioned.open(a, "wb") as wo: 60 | wo.write(b"v1") 61 | v1 = gcs_versioned.info(a)["generation"] 62 | assert v1 is not None 63 | with gcs_versioned.open(a, "wb") as wo: 64 | wo.write(b"v2") 65 | v2 = gcs_versioned.info(a)["generation"] 66 | assert v2 is not None and v1 != v2 67 | assert gcs_versioned.info(f"{a}#{v1}")["generation"] == v1 68 | assert gcs_versioned.info(f"{a}?generation={v2}")["generation"] == v2 69 | 70 | 71 | def test_cat_versioned(gcs_versioned): 72 | with gcs_versioned.open(b, "wb") as wo: 73 | wo.write(b"v1") 74 | v1 = gcs_versioned.info(b)["generation"] 75 | assert v1 is not None 76 | with gcs_versioned.open(b, "wb") as wo: 77 | wo.write(b"v2") 78 | assert gcs_versioned.cat(f"{b}#{v1}") == b"v1" 79 | 80 | 81 | def test_cp_versioned(gcs_versioned): 82 | with gcs_versioned.open(a, "wb") as wo: 83 | wo.write(b"v1") 84 | v1 = gcs_versioned.info(a)["generation"] 85 | assert v1 is not None 86 | with gcs_versioned.open(a, "wb") as wo: 87 | wo.write(b"v2") 88 | gcs_versioned.cp_file(f"{a}#{v1}", b) 89 | assert gcs_versioned.cat(b) == b"v1" 90 | 91 | 92 | def test_ls_versioned(gcs_versioned): 93 | with gcs_versioned.open(b, "wb") as wo: 94 | wo.write(b"v1") 95 | v1 = gcs_versioned.info(b)["generation"] 96 | with gcs_versioned.open(b, "wb") as wo: 97 | wo.write(b"v2") 98 | v2 = gcs_versioned.info(b)["generation"] 99 | dpath = posixpath.dirname(b) 100 | versions = {f"{b}#{v1}", f"{b}#{v2}"} 101 | assert versions == set(gcs_versioned.ls(dpath, versions=True)) 102 | assert versions == { 103 | entry["name"] for entry in gcs_versioned.ls(dpath, detail=True, versions=True) 104 | } 105 | assert gcs_versioned.ls(TEST_VERSIONED_BUCKET, versions=True) == [ 106 | f"{TEST_VERSIONED_BUCKET}/tmp" 107 | ] 108 | 109 | 110 | def test_find_versioned(gcs_versioned): 111 | with gcs_versioned.open(a, "wb") as wo: 112 | wo.write(b"v1") 113 | v1 = gcs_versioned.info(a)["generation"] 114 | with gcs_versioned.open(a, "wb") as wo: 115 | wo.write(b"v2") 116 | v2 = gcs_versioned.info(a)["generation"] 117 | versions = {f"{a}#{v1}", f"{a}#{v2}"} 118 | assert versions == set(gcs_versioned.find(a, versions=True)) 119 | assert versions == set(gcs_versioned.find(a, detail=True, versions=True)) 120 | 121 | 122 | def test_write_captures_generation(gcs_versioned): 123 | with gcs_versioned.open(a, "wb") as wo: 124 | wo.write(b"test content") 125 | assert wo.generation is not None 126 | assert wo.generation == gcs_versioned.info(a)["generation"] 127 | 128 | 129 | def test_write_captures_generation_multipart(gcs_versioned): 130 | with gcs_versioned.open(b, "wb") as wo: 131 | wo.write(b"first chunk") 132 | wo.flush() 133 | wo.write(b"second chunk") 134 | assert wo.generation is not None 135 | assert wo.generation == gcs_versioned.info(b)["generation"] 136 | -------------------------------------------------------------------------------- /gcsfs/tests/test_retry.py: -------------------------------------------------------------------------------- 1 | import multiprocessing 2 | import os 3 | import pickle 4 | from concurrent.futures import ProcessPoolExecutor 5 | 6 | import pytest 7 | import requests 8 | from requests.exceptions import ProxyError 9 | 10 | from gcsfs.retry import HttpError, is_retriable, validate_response 11 | from gcsfs.tests.settings import TEST_BUCKET 12 | from gcsfs.tests.utils import tmpfile 13 | 14 | 15 | def test_tempfile(): 16 | with tmpfile() as fn: 17 | with open(fn, "w"): 18 | pass 19 | assert os.path.exists(fn) 20 | assert not os.path.exists(fn) 21 | 22 | 23 | def test_retriable_exception(): 24 | e = requests.exceptions.Timeout() 25 | assert is_retriable(e) 26 | e = ValueError 27 | assert not is_retriable(e) 28 | 29 | e = HttpError({"message": "", "code": 500}) 30 | assert is_retriable(e) 31 | 32 | e = HttpError({"message": "", "code": "500"}) 33 | assert is_retriable(e) 34 | 35 | e = HttpError({"message": "", "code": 400}) 36 | assert not is_retriable(e) 37 | 38 | e = HttpError({"code": "429"}) 39 | assert is_retriable(e) 40 | 41 | e = ProxyError() 42 | assert is_retriable(e) 43 | 44 | 45 | def test_pickle_serialization(): 46 | expected = HttpError({"message": "", "code": 400}) 47 | 48 | # Serialize/Deserialize 49 | serialized = pickle.dumps(expected) 50 | actual = pickle.loads(serialized) 51 | 52 | is_same_type = type(expected) is type(actual) 53 | is_same_args = expected.args == actual.args 54 | 55 | assert is_same_type and is_same_args 56 | 57 | 58 | def conditional_exception(process_id): 59 | # Raise only on second process (id=1) 60 | if process_id == 1: 61 | raise HttpError({"message": "", "code": 400}) 62 | 63 | 64 | def test_multiprocessing_error_handling(): 65 | # Ensure spawn context to avoid forking issues 66 | ctx = multiprocessing.get_context("spawn") 67 | 68 | # Run on two processes 69 | with ProcessPoolExecutor(2, mp_context=ctx) as p: 70 | results = p.map(conditional_exception, range(2)) 71 | 72 | with pytest.raises(HttpError): 73 | _ = [result for result in results] 74 | 75 | 76 | def test_validate_response(): 77 | validate_response(200, None, "/path") 78 | 79 | # HttpError with no JSON body 80 | with pytest.raises(HttpError) as e: 81 | validate_response(503, b"", "/path") 82 | assert e.value.code == 503 83 | assert e.value.message == ", 503" 84 | 85 | # HttpError with JSON body 86 | j = '{"error": {"code": 503, "message": "Service Unavailable"}}' 87 | with pytest.raises(HttpError) as e: 88 | validate_response(503, j, "/path") 89 | assert e.value.code == 503 90 | assert e.value.message == "Service Unavailable, 503" 91 | 92 | # 403 93 | j = '{"error": {"message": "Not ok"}}' 94 | with pytest.raises(IOError, match="Forbidden: /path\nNot ok"): 95 | validate_response(403, j, "/path") 96 | 97 | # 404 98 | with pytest.raises(FileNotFoundError): 99 | validate_response(404, b"", "/path") 100 | 101 | # 502 102 | with pytest.raises(ProxyError): 103 | validate_response(502, b"", "/path") 104 | 105 | 106 | def test_validate_response_error_is_string(): 107 | # HttpError with JSON body 108 | j = '{"error": "Too Many Requests"}' 109 | with pytest.raises(HttpError) as e: 110 | validate_response(429, j, "/path") 111 | assert e.value.code == 429 112 | assert e.value.message == "Too Many Requests, 429" 113 | 114 | 115 | @pytest.mark.parametrize( 116 | ["file_path", "validate_get_error", "validate_list_error", "expected_error"], 117 | [ 118 | ( 119 | "/missing", 120 | FileNotFoundError, 121 | None, 122 | FileNotFoundError, 123 | ), # Not called 124 | ( 125 | "/missing", 126 | OSError("Forbidden"), 127 | FileNotFoundError, 128 | FileNotFoundError, 129 | ), 130 | ( 131 | "/2014-01-01.csv", 132 | None, 133 | None, 134 | None, 135 | ), 136 | ( 137 | "/2014-01-01.csv", 138 | OSError("Forbidden"), 139 | None, 140 | None, 141 | ), 142 | ], 143 | ids=[ 144 | "missing_with_get_perms", 145 | "missing_with_list_perms", 146 | "existing_with_get_perms", 147 | "existing_with_list_perms", 148 | ], 149 | ) 150 | def test_metadata_read_permissions( 151 | file_path, validate_get_error, validate_list_error, expected_error, gcs 152 | ): 153 | def _validate_response(self, status, content, path): 154 | if path.endswith(f"/o{file_path}") and validate_get_error is not None: 155 | raise validate_get_error 156 | if path.endswith("/o/") and validate_list_error is not None: 157 | raise validate_list_error 158 | validate_response(status, content, path) 159 | 160 | if expected_error is None: 161 | gcs.ls(TEST_BUCKET + file_path) 162 | gcs.info(TEST_BUCKET + file_path) 163 | assert gcs.exists(TEST_BUCKET + file_path) 164 | else: 165 | with pytest.raises(expected_error): 166 | gcs.ls(TEST_BUCKET + file_path) 167 | with pytest.raises(expected_error): 168 | gcs.info(TEST_BUCKET + file_path) 169 | assert gcs.exists(TEST_BUCKET + file_path) is False 170 | -------------------------------------------------------------------------------- /docs/source/code-of-conduct.rst: -------------------------------------------------------------------------------- 1 | Code of Conduct 2 | =============== 3 | 4 | All participants in the fsspec community are expected to adhere to a Code of Conduct. 5 | 6 | As contributors and maintainers of this project, and in the interest of 7 | fostering an open and welcoming community, we pledge to respect all people who 8 | contribute through reporting issues, posting feature requests, updating 9 | documentation, submitting pull requests or patches, and other activities. 10 | 11 | We are committed to making participation in this project a harassment-free 12 | experience for everyone, treating everyone as unique humans deserving of 13 | respect. 14 | 15 | Examples of unacceptable behaviour by participants include: 16 | 17 | - The use of sexualized language or imagery 18 | - Personal attacks 19 | - Trolling or insulting/derogatory comments 20 | - Public or private harassment 21 | - Publishing other's private information, such as physical or electronic 22 | addresses, without explicit permission 23 | - Other unethical or unprofessional conduct 24 | 25 | Project maintainers have the right and responsibility to remove, edit, or 26 | reject comments, commits, code, wiki edits, issues, and other contributions 27 | that are not aligned to this Code of Conduct, or to ban temporarily or 28 | permanently any contributor for other behaviours that they deem inappropriate, 29 | threatening, offensive, or harmful. 30 | 31 | By adopting this Code of Conduct, project maintainers commit themselves 32 | to fairly and consistently applying these principles to every aspect of 33 | managing this project. Project maintainers who do not follow or enforce 34 | the Code of Conduct may be permanently removed from the project team. 35 | 36 | This code of conduct applies both within project spaces and in public 37 | spaces when an individual is representing the project or its community. 38 | 39 | If you feel the code of conduct has been violated, please report the 40 | incident to the fsspec core team. 41 | 42 | Reporting 43 | --------- 44 | 45 | If you believe someone is violating theCode of Conduct we ask that you report it 46 | to the Project by emailing community@anaconda.com. All reports will be kept 47 | confidential. In some cases we may determine that a public statement will need 48 | to be made. If that's the case, the identities of all victims and reporters 49 | will remain confidential unless those individuals instruct us otherwise. 50 | If you believe anyone is in physical danger, please notify appropriate law 51 | enforcement first. 52 | 53 | In your report please include: 54 | 55 | - Your contact info 56 | - Names (real, nicknames, or pseudonyms) of any individuals involved. 57 | If there were other witnesses besides you, please try to include them as well. 58 | - When and where the incident occurred. Please be as specific as possible. 59 | - Your account of what occurred. If there is a publicly available record 60 | please include a link. 61 | - Any extra context you believe existed for the incident. 62 | - If you believe this incident is ongoing. 63 | - If you believe any member of the core team has a conflict of interest 64 | in adjudicating the incident. 65 | - What, if any, corrective response you believe would be appropriate. 66 | - Any other information you believe we should have. 67 | 68 | Core team members are obligated to maintain confidentiality with regard 69 | to the reporter and details of an incident. 70 | 71 | What happens next? 72 | ~~~~~~~~~~~~~~~~~~ 73 | 74 | You will receive an email acknowledging receipt of your complaint. 75 | The core team will immediately meet to review the incident and determine: 76 | 77 | - What happened. 78 | - Whether this event constitutes a code of conduct violation. 79 | - Who the bad actor was. 80 | - Whether this is an ongoing situation, or if there is a threat to anyone's 81 | physical safety. 82 | - If this is determined to be an ongoing incident or a threat to physical safety, 83 | the working groups' immediate priority will be to protect everyone involved. 84 | 85 | If a member of the core team is one of the named parties, they will not be 86 | included in any discussions, and will not be provided with any confidential 87 | details from the reporter. 88 | 89 | If anyone on the core team believes they have a conflict of interest in 90 | adjudicating on a reported issue, they will inform the other core team 91 | members, and exempt themselves from any discussion about the issue. 92 | Following this declaration, they will not be provided with any confidential 93 | details from the reporter. 94 | 95 | Once the working group has a complete account of the events they will make a 96 | decision as to how to response. Responses may include: 97 | 98 | - Nothing (if we determine no violation occurred). 99 | - A private reprimand from the working group to the individual(s) involved. 100 | - A public reprimand. 101 | - An imposed vacation 102 | - A permanent or temporary ban from some or all spaces (GitHub repositories, etc.) 103 | - A request for a public or private apology. 104 | 105 | We'll respond within one week to the person who filed the report with either a 106 | resolution or an explanation of why the situation is not yet resolved. 107 | 108 | Once we've determined our final action, we'll contact the original reporter 109 | to let them know what action (if any) we'll be taking. We'll take into account 110 | feedback from the reporter on the appropriateness of our response, but we 111 | don't guarantee we'll act on it. 112 | 113 | Acknowledgement 114 | --------------- 115 | 116 | This CoC is modified from the one by `BeeWare`_, which in turn refers to 117 | the `Contributor Covenant`_ and the `Django`_ project. 118 | 119 | .. _BeeWare: https://beeware.org/community/behavior/code-of-conduct/ 120 | .. _Contributor Covenant: https://www.contributor-covenant.org/version/1/3/0/code-of-conduct/ 121 | .. _Django: https://www.djangoproject.com/conduct/reporting/ 122 | 123 | .. raw:: html 124 | 125 | 127 | -------------------------------------------------------------------------------- /gcsfs/retry.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import json 3 | import logging 4 | import random 5 | 6 | import aiohttp.client_exceptions 7 | import google.auth.exceptions 8 | import requests.exceptions 9 | from decorator import decorator 10 | 11 | logger = logging.getLogger("gcsfs") 12 | 13 | 14 | class HttpError(Exception): 15 | """Holds the message and code from cloud errors.""" 16 | 17 | def __init__(self, error_response=None): 18 | # Save error_response for potential pickle. 19 | self._error_response = error_response 20 | if error_response: 21 | self.code = error_response.get("code", None) 22 | self.message = error_response.get("message", "") 23 | if self.code: 24 | if isinstance(self.message, bytes): 25 | self.message += (", %s" % self.code).encode() 26 | else: 27 | self.message += ", %s" % self.code 28 | else: 29 | self.message = "" 30 | self.code = None 31 | # Call the base class constructor with the parameters it needs 32 | super().__init__(self.message) 33 | 34 | def __reduce__(self): 35 | """This makes the Exception pickleable.""" 36 | 37 | # This is basically deconstructing the HttpError when pickled. 38 | return HttpError, (self._error_response,) 39 | 40 | 41 | class ChecksumError(Exception): 42 | """Raised when the md5 hash of the content does not match the header.""" 43 | 44 | pass 45 | 46 | 47 | RETRIABLE_EXCEPTIONS = ( 48 | requests.exceptions.ChunkedEncodingError, 49 | requests.exceptions.ConnectionError, 50 | requests.exceptions.ReadTimeout, 51 | requests.exceptions.Timeout, 52 | requests.exceptions.ProxyError, 53 | requests.exceptions.SSLError, 54 | requests.exceptions.ContentDecodingError, 55 | google.auth.exceptions.RefreshError, 56 | aiohttp.client_exceptions.ClientError, 57 | ChecksumError, 58 | ) 59 | 60 | 61 | errs = list(range(500, 505)) + [ 62 | # Request Timeout 63 | 408, 64 | # Too Many Requests 65 | 429, 66 | ] 67 | errs = set(errs + [str(e) for e in errs]) 68 | 69 | 70 | def is_retriable(exception): 71 | """Returns True if this exception is retriable.""" 72 | 73 | if isinstance(exception, HttpError): 74 | # Add 401 to retriable errors when it's an auth expiration issue 75 | if exception.code == 401 and "Invalid Credentials" in str(exception.message): 76 | return True 77 | return exception.code in errs 78 | 79 | return isinstance(exception, RETRIABLE_EXCEPTIONS) 80 | 81 | 82 | def validate_response(status, content, path, args=None): 83 | """ 84 | Check the requests object r, raise error if it's not ok. 85 | 86 | Parameters 87 | ---------- 88 | r: requests response object 89 | path: associated URL path, for error messages 90 | """ 91 | if status >= 400 and status != 499: 92 | # 499 is special "upload was cancelled" status 93 | if args: 94 | from .core import quote 95 | 96 | path = path.format(*[quote(p) for p in args]) 97 | if status == 404: 98 | raise FileNotFoundError(path) 99 | 100 | error = None 101 | if hasattr(content, "decode"): 102 | content = content.decode() 103 | try: 104 | error = json.loads(content)["error"] 105 | # Sometimes the error message is a string. 106 | if isinstance(error, str): 107 | msg = error 108 | else: 109 | msg = error["message"] 110 | except json.decoder.JSONDecodeError: 111 | msg = content 112 | 113 | if status == 403: 114 | raise OSError(f"Forbidden: {path}\n{msg}") 115 | elif status == 412: 116 | raise FileExistsError(path) 117 | elif status == 502: 118 | raise requests.exceptions.ProxyError() 119 | elif "invalid" in str(msg): 120 | raise ValueError(f"Bad Request: {path}\n{msg}") 121 | elif error and not isinstance(error, str): 122 | raise HttpError(error) 123 | elif status: 124 | raise HttpError({"code": status, "message": msg}) # text-like 125 | else: 126 | raise RuntimeError(msg) 127 | 128 | 129 | @decorator 130 | async def retry_request(func, retries=6, *args, **kwargs): 131 | for retry in range(retries): 132 | try: 133 | if retry > 0: 134 | await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32)) 135 | return await func(*args, **kwargs) 136 | except ( 137 | HttpError, 138 | requests.exceptions.RequestException, 139 | google.auth.exceptions.GoogleAuthError, 140 | ChecksumError, 141 | aiohttp.client_exceptions.ClientError, 142 | ) as e: 143 | if ( 144 | isinstance(e, HttpError) 145 | and e.code == 400 146 | and "requester pays" in e.message 147 | ): 148 | msg = ( 149 | "Bucket is requester pays. " 150 | "Set `requester_pays=True` when creating the GCSFileSystem." 151 | ) 152 | raise ValueError(msg) from e 153 | # Special test for 404 to avoid retrying the request 154 | if ( 155 | isinstance(e, aiohttp.client_exceptions.ClientResponseError) 156 | and e.status == 404 157 | ): 158 | logger.debug("Request returned 404, no retries.") 159 | raise e 160 | if isinstance(e, HttpError) and e.code == 404: 161 | logger.debug("Request returned 404, no retries.") 162 | raise e 163 | if retry == retries - 1: 164 | logger.exception(f"{func.__name__} out of retries on exception: {e}") 165 | raise e 166 | if is_retriable(e): 167 | logger.debug(f"{func.__name__} retrying after exception: {e}") 168 | continue 169 | logger.exception(f"{func.__name__} non-retriable exception: {e}") 170 | raise e 171 | -------------------------------------------------------------------------------- /gcsfs/tests/test_checkers.py: -------------------------------------------------------------------------------- 1 | import base64 2 | from hashlib import md5 3 | 4 | import pytest 5 | 6 | from gcsfs.checkers import Crc32cChecker, MD5Checker, SizeChecker, crcmod 7 | from gcsfs.retry import ChecksumError 8 | 9 | 10 | def google_response_from_data(expected_data: bytes, actual_data=None): 11 | actual_data = actual_data or expected_data 12 | checksum = md5(actual_data) 13 | checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8") 14 | if crcmod is not None: 15 | checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF) 16 | checksum.update(actual_data) 17 | crc = base64.b64encode(checksum.digest()).decode() 18 | 19 | class response: 20 | content_length = len(actual_data) 21 | headers = {"X-Goog-Hash": f"md5={checksum_b64}"} 22 | if crcmod is not None: 23 | headers["X-Goog-Hash"] += f", crc32c={crc}" 24 | 25 | return response 26 | 27 | 28 | def google_response_from_data_with_reverse_header_order( 29 | expected_data: bytes, actual_data=None 30 | ): 31 | actual_data = actual_data or expected_data 32 | checksum = md5(actual_data) 33 | checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8") 34 | if crcmod is not None: 35 | checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF) 36 | checksum.update(actual_data) 37 | crc = base64.b64encode(checksum.digest()).decode() 38 | 39 | class response: 40 | content_length = len(actual_data) 41 | headers = {} 42 | if crcmod is not None: 43 | headers["X-Goog-Hash"] = f"crc32c={crc}, md5={checksum_b64}" 44 | else: 45 | headers["X-Goog-Hash"] = f"md5={checksum_b64}" 46 | 47 | return response 48 | 49 | 50 | def google_json_response_from_data(expected_data: bytes, actual_data=None): 51 | actual_data = actual_data or expected_data 52 | checksum = md5(actual_data) 53 | checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8") 54 | 55 | response = {"md5Hash": checksum_b64, "size": len(actual_data)} 56 | 57 | # some manual checksums verified using gsutil ls -L 58 | # also can add using https://crccalc.com/ 59 | # be careful about newlines 60 | crc32c_points = { 61 | b"hello world\n": "8P9ykg==", 62 | b"different checksum": "DoesntMatter==", 63 | } 64 | 65 | try: 66 | response["crc32c"] = crc32c_points[actual_data] 67 | except KeyError: 68 | pass 69 | 70 | return response 71 | 72 | 73 | params = [ 74 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)), 75 | (MD5Checker(), b"hello world", b"hello world", ()), 76 | ] 77 | 78 | if crcmod is not None: 79 | params.append( 80 | (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,)) 81 | ) 82 | params.append((Crc32cChecker(), b"hello world", b"hello world", ())) 83 | 84 | 85 | @pytest.mark.parametrize("checker, data, actual_data, raises", params) 86 | def test_validate_headers(checker, data, actual_data, raises): 87 | response = google_response_from_data(actual_data) 88 | checker.update(data) 89 | 90 | if raises: 91 | with pytest.raises(raises): 92 | checker.validate_headers(response.headers) 93 | else: 94 | checker.validate_headers(response.headers) 95 | 96 | 97 | params = [ 98 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)), 99 | (MD5Checker(), b"hello world", b"hello world", ()), 100 | ] 101 | 102 | if crcmod is not None: 103 | params.append( 104 | (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,)) 105 | ) 106 | params.append((Crc32cChecker(), b"hello world", b"hello world", ())) 107 | 108 | 109 | @pytest.mark.parametrize("checker, data, actual_data, raises", params) 110 | def test_validate_headers_with_reverse_order(checker, data, actual_data, raises): 111 | response = google_response_from_data_with_reverse_header_order(actual_data) 112 | checker.update(data) 113 | 114 | if raises: 115 | with pytest.raises(raises): 116 | checker.validate_headers(response.headers) 117 | else: 118 | checker.validate_headers(response.headers) 119 | 120 | 121 | params = [ 122 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)), 123 | (MD5Checker(), b"hello world", b"hello world", ()), 124 | (SizeChecker(), b"hello world", b"hello world", ()), 125 | (SizeChecker(), b"hello world", b"different size", (AssertionError,)), 126 | ] 127 | 128 | if crcmod is not None: 129 | params.append((Crc32cChecker(), b"hello world", b"hello world", ())) 130 | params.append( 131 | (Crc32cChecker(), b"hello world", b"different size", (ChecksumError,)) 132 | ) 133 | 134 | 135 | @pytest.mark.parametrize("checker, data, actual_data, raises", params) 136 | def test_checker_validate_http_response(checker, data, actual_data, raises): 137 | response = google_response_from_data(data, actual_data=actual_data) 138 | checker.update(data) 139 | if raises: 140 | with pytest.raises(raises): 141 | checker.validate_http_response(response) 142 | else: 143 | checker.validate_http_response(response) 144 | 145 | 146 | params = [ 147 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)), 148 | (MD5Checker(), b"hello world", b"hello world", ()), 149 | (SizeChecker(), b"hello world", b"hello world", ()), 150 | (SizeChecker(), b"hello world", b"different size", (AssertionError,)), 151 | ] 152 | if crcmod is not None: 153 | params.extend( 154 | [ 155 | (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,)), 156 | (Crc32cChecker(), b"hello world\n", b"hello world\n", ()), 157 | ] 158 | ) 159 | 160 | 161 | @pytest.mark.parametrize("checker, data, actual_data, raises", params) 162 | def test_checker_validate_json_response(checker, data, actual_data, raises): 163 | response = google_json_response_from_data(data, actual_data=actual_data) 164 | checker.update(data) 165 | if raises: 166 | with pytest.raises(raises): 167 | checker.validate_json_response(response) 168 | else: 169 | checker.validate_json_response(response) 170 | -------------------------------------------------------------------------------- /docs/source/index.rst: -------------------------------------------------------------------------------- 1 | GCSFS 2 | ===== 3 | 4 | A pythonic file-system interface to `Google Cloud Storage`_. 5 | 6 | Please file issues and requests on github_ and we welcome pull requests. 7 | 8 | .. _github: https://github.com/fsspec/gcsfs/issues 9 | 10 | 11 | This package depends on fsspec_, and inherits many useful behaviours from there, 12 | including integration with Dask, and the facility for key-value dict-like 13 | objects of the type used by zarr. 14 | 15 | .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/ 16 | 17 | Installation 18 | ------------ 19 | 20 | The GCSFS library can be installed using ``conda``: 21 | 22 | .. code-block:: bash 23 | 24 | conda install -c conda-forge gcsfs 25 | 26 | or ``pip``: 27 | 28 | .. code-block:: bash 29 | 30 | pip install gcsfs 31 | 32 | or by cloning the repository: 33 | 34 | .. code-block:: bash 35 | 36 | git clone https://github.com/fsspec/gcsfs/ 37 | cd gcsfs/ 38 | pip install . 39 | 40 | Examples 41 | -------- 42 | 43 | Locate and read a file: 44 | 45 | .. code-block:: python 46 | 47 | >>> import gcsfs 48 | >>> fs = gcsfs.GCSFileSystem(project='my-google-project') 49 | >>> fs.ls('my-bucket') 50 | ['my-file.txt'] 51 | >>> with fs.open('my-bucket/my-file.txt', 'rb') as f: 52 | ... print(f.read()) 53 | b'Hello, world' 54 | 55 | (see also :meth:`~gcsfs.core.GCSFileSystem.walk` and :meth:`~gcsfs.core.GCSFileSystem.glob`) 56 | 57 | Read with delimited blocks: 58 | 59 | .. code-block:: python 60 | 61 | >>> fs.read_block(path, offset=1000, length=10, delimiter=b'\n') 62 | b'A whole line of text\n' 63 | 64 | Write with blocked caching: 65 | 66 | .. code-block:: python 67 | 68 | >>> with fs.open('mybucket/new-file', 'wb') as f: 69 | ... f.write(2*2**20 * b'a') 70 | ... f.write(2*2**20 * b'a') # data is flushed and file closed 71 | >>> fs.du('mybucket/new-file') 72 | {'mybucket/new-file': 4194304} 73 | 74 | Because GCSFS faithfully copies the Python file interface it can be used 75 | smoothly with other projects that consume the file interface like ``gzip`` or 76 | ``pandas``. 77 | 78 | .. code-block:: python 79 | 80 | >>> with fs.open('mybucket/my-file.csv.gz', 'rb') as f: 81 | ... g = gzip.GzipFile(fileobj=f) # Decompress data with gzip 82 | ... df = pd.read_csv(g) # Read CSV file with Pandas 83 | 84 | Credentials 85 | ----------- 86 | 87 | Several modes of authentication are supported: 88 | 89 | - if ``token=None`` (default), GCSFS will attempt to use your default gcloud 90 | credentials or, attempt to get credentials from the google metadata 91 | service, or fall back to anonymous access. This will work for most 92 | users without further action. Note that the default project may also 93 | be found, but it is often best to supply this anyway (only affects bucket- 94 | level operations). 95 | 96 | - if ``token='cloud'``, we assume we are running within google (compute 97 | or container engine) and fetch the credentials automatically from the 98 | metadata service. 99 | 100 | - if ``token=dict(...)`` or ``token=``, you may supply a token 101 | generated by the gcloud_ utility. This can be 102 | 103 | - a python dictionary 104 | 105 | - the path to a file containing the JSON returned by logging in with the 106 | gcloud CLI tool (e.g., 107 | ``~/.config/gcloud/application_default_credentials.json`` or 108 | ``~/.config/gcloud/legacy_credentials//adc.json``) 110 | 111 | - the path to a service account key 112 | 113 | - a google.auth.credentials.Credentials_ object 114 | 115 | Note that ``~`` will not be automatically expanded to the user home 116 | directory, and must be manually expanded with a utility like 117 | ``os.path.expanduser()``. 118 | 119 | - you can also generate tokens via Oauth2 in the browser using ``token='browser'``, 120 | which gcsfs then caches in a special file, ~/.gcs_tokens, and can subsequently be accessed with ``token='cache'``. 121 | 122 | - anonymous only access can be selected using ``token='anon'``, e.g. to access 123 | public resources such as 'anaconda-public-data'. 124 | 125 | .. _google.auth.credentials.Credentials: https://google-auth.readthedocs.io/en/master/reference/google.auth.credentials.html#google.auth.credentials.Credentials 126 | 127 | The acquired session tokens are *not* preserved when serializing the instances, so 128 | it is safe to pass them to worker processes on other machines if using in a 129 | distributed computation context. If credentials are given by a file path, however, 130 | then this file must exist on every machine. 131 | 132 | 133 | Integration 134 | ----------- 135 | 136 | The libraries ``intake``, ``pandas`` and ``dask`` accept URLs with the prefix 137 | "gcs://", and will use gcsfs to complete the IO operation in question. The 138 | IO functions take an argument ``storage_options``, which will be passed 139 | to ``GCSFileSystem``, for example: 140 | 141 | .. code-block:: python 142 | 143 | df = pd.read_excel("gcs://bucket/path/file.xls", 144 | storage_options={"token": "anon"}) 145 | 146 | This gives the chance to pass any credentials or other necessary 147 | arguments needed to gcsfs. 148 | 149 | 150 | Async 151 | ----- 152 | 153 | ``gcsfs`` is implemented using ``aiohttp``, and offers async functionality. 154 | A number of methods of ``GCSFileSystem`` are ``async``, for for each of these, 155 | there is also a synchronous version with the same name and lack of a "_" 156 | prefix. 157 | 158 | If you wish to call ``gcsfs`` from async code, then you should pass 159 | ``asynchronous=True, loop=loop`` to the constructor (the latter is optional, 160 | if you wish to use both async and sync methods). You must also explicitly 161 | await the client creation before making any GCS call. 162 | 163 | .. code-block:: python 164 | 165 | async def run_program(): 166 | gcs = GCSFileSystem(asynchronous=True) 167 | print(await gcs._ls("")) 168 | 169 | asyncio.run(run_program()) # or call from your async code 170 | 171 | Concurrent async operations are also used internally for bulk operations 172 | such as ``pipe/cat``, ``get/put``, ``cp/mv/rm``. The async calls are 173 | hidden behind a synchronisation layer, so are designed to be called 174 | from normal code. If you are *not* 175 | using async-style programming, you do not need to know about how this 176 | works, but you might find the implementation interesting. 177 | 178 | For every synchronous function there is asynchronous one prefixed by ``_``, but 179 | the ``open`` operation does not support async operation. If you need it to open 180 | some file in async manner, it's better to asynchronously download it to 181 | temporary location and working with it from there. 182 | 183 | Proxy 184 | ----- 185 | 186 | ``gcsfs`` uses ``aiohttp`` for calls to the storage api, which by default 187 | ignores ``HTTP_PROXY/HTTPS_PROXY`` environment variables. To read 188 | proxy settings from the environment provide ``session_kwargs`` as follows: 189 | 190 | .. code-block:: python 191 | 192 | fs = GCSFileSystem(project='my-google-project', session_kwargs={'trust_env': True}) 193 | 194 | For further reference check `aiohttp proxy support`_. 195 | 196 | .. _aiohttp proxy support: https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support 197 | 198 | 199 | Contents 200 | ======== 201 | 202 | .. toctree:: 203 | api 204 | developer 205 | fuse 206 | changelog 207 | code-of-conduct 208 | :maxdepth: 2 209 | 210 | 211 | .. _Google Cloud Storage: https://cloud.google.com/storage/docs/ 212 | 213 | .. _gcloud: https://cloud.google.com/sdk/docs/ 214 | 215 | .. _dask: http://dask.pydata.org/en/latest/remote-data-services.html 216 | 217 | .. _zarr: http://zarr.readthedocs.io/en/latest/tutorial.html#storage-alternatives 218 | 219 | Indices and tables 220 | ================== 221 | 222 | * :ref:`genindex` 223 | * :ref:`modindex` 224 | * :ref:`search` 225 | 226 | 227 | These docs pages collect anonymous tracking data using goatcounter, and the 228 | dashboard is available to the public: https://gcsfs.goatcounter.com/ . 229 | 230 | .. raw:: html 231 | 232 | 234 | -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 1>NUL 2>NUL 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\S3Fs.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\S3Fs.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /cloudbuild/e2e-tests-cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | substitutions: 2 | _REGION: "us-central1" 3 | _ZONE: "us-central1-a" 4 | _SHORT_BUILD_ID: ${BUILD_ID:0:8} 5 | 6 | steps: 7 | # Step 0: Generate a persistent SSH key for this build run. 8 | # This prevents gcloud from adding a new key to the OS Login profile on every ssh/scp command. 9 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 10 | id: "generate-ssh-key" 11 | entrypoint: "bash" 12 | args: 13 | - "-c" 14 | - | 15 | mkdir -p /workspace/.ssh 16 | # Generate the SSH key 17 | ssh-keygen -t rsa -f /workspace/.ssh/google_compute_engine -N '' -C gcb 18 | # Save the public key content to a file for the cleanup step 19 | cat /workspace/.ssh/google_compute_engine.pub > /workspace/gcb_ssh_key.pub 20 | waitFor: ["-"] 21 | 22 | # Step 1: Create a unique standard GCS bucket for the test run. 23 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 24 | id: "create-standard-bucket" 25 | entrypoint: "gcloud" 26 | args: 27 | - "storage" 28 | - "buckets" 29 | - "create" 30 | - "gs://gcsfs-test-standard-${_SHORT_BUILD_ID}" 31 | - "--project=${PROJECT_ID}" 32 | - "--location=${_REGION}" 33 | waitFor: ["-"] 34 | 35 | # Step 2: Create a unique versioned GCS bucket for the test run. 36 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 37 | id: "create-versioned-bucket" 38 | entrypoint: "gcloud" 39 | args: 40 | - "storage" 41 | - "buckets" 42 | - "create" 43 | - "gs://gcsfs-test-versioned-${_SHORT_BUILD_ID}" 44 | - "--project=${PROJECT_ID}" 45 | - "--location=${_REGION}" 46 | waitFor: ["-"] 47 | 48 | # Step 2a: Enable versioning on the versioned bucket. 49 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 50 | id: "enable-bucket-versioning" 51 | entrypoint: "gcloud" 52 | args: 53 | - "storage" 54 | - "buckets" 55 | - "update" 56 | - "gs://gcsfs-test-versioned-${_SHORT_BUILD_ID}" 57 | - "--versioning" 58 | waitFor: 59 | - "create-versioned-bucket" 60 | 61 | # Step 3: Create a GCE VM to run the tests. 62 | # The VM is created in the same zone as the buckets to test rapid storage features. 63 | # It's given the 'cloud-platform' scope to allow it to access GCS and other services. 64 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 65 | id: "create-vm" 66 | entrypoint: "gcloud" 67 | args: 68 | - "compute" 69 | - "instances" 70 | - "create" 71 | - "gcsfs-test-vm-${_SHORT_BUILD_ID}" 72 | - "--project=${PROJECT_ID}" 73 | - "--zone=${_ZONE}" 74 | - "--machine-type=e2-medium" 75 | - "--image-family=debian-13" 76 | - "--image-project=debian-cloud" 77 | - "--service-account=${_ZONAL_VM_SERVICE_ACCOUNT}" 78 | - "--scopes=https://www.googleapis.com/auth/cloud-platform" # Full access to project APIs 79 | - "--metadata=enable-oslogin=TRUE" 80 | waitFor: ["-"] 81 | 82 | # Step 4: Run the integration tests inside the newly created VM. 83 | # This step uses 'gcloud compute ssh' to execute a remote script. 84 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 85 | id: "run-tests-on-vm" 86 | entrypoint: "bash" 87 | args: 88 | - "-c" 89 | - | 90 | set -e 91 | # Wait for the VM to be fully initialized and SSH to be ready. 92 | for i in {1..10}; do 93 | if gcloud compute ssh gcsfs-test-vm-${_SHORT_BUILD_ID} --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine --command="echo VM is ready"; then 94 | break # Break if SSH is successful 95 | fi 96 | echo "Waiting for VM to become available... (attempt $$i/10)" 97 | sleep 15 98 | done 99 | 100 | # Copy the source code from the Cloud Build workspace to the VM's home directory, using the generated key. 101 | gcloud compute scp --recurse . gcsfs-test-vm-${_SHORT_BUILD_ID}:~ --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine 102 | 103 | # Script to be executed on the VM. 104 | # This script installs dependencies, sets environment variables, and runs pytest. 105 | VM_SCRIPT=" 106 | set -e 107 | echo '--- Installing dependencies on VM ---' 108 | sudo apt-get update > /dev/null 109 | sudo apt-get install -y python3-pip python3-venv fuse fuse3 libfuse2 > /dev/null 110 | 111 | echo '--- Installing Python and dependencies on VM ---' 112 | python3 -m venv env 113 | source env/bin/activate 114 | 115 | pip install --upgrade pip > /dev/null 116 | # Install testing libraries explicitly, as they are not in setup.py 117 | pip install pytest pytest-timeout pytest-subtests pytest-asyncio fusepy google-cloud-storage > /dev/null 118 | pip install -e . > /dev/null 119 | 120 | echo '--- Preparing test environment on VM ---' 121 | export GCSFS_TEST_BUCKET='gcsfs-test-standard-${_SHORT_BUILD_ID}' 122 | export GCSFS_TEST_VERSIONED_BUCKET='gcsfs-test-versioned-${_SHORT_BUILD_ID}' 123 | export GCSFS_ZONAL_TEST_BUCKET='${_GCSFS_ZONAL_TEST_BUCKET}' 124 | 125 | export STORAGE_EMULATOR_HOST=https://storage.googleapis.com 126 | export GCSFS_TEST_PROJECT=${PROJECT_ID} 127 | export GCSFS_TEST_KMS_KEY=projects/${PROJECT_ID}/locations/${_REGION}/keyRings/${_GCSFS_KEY_RING_NAME}/cryptoKeys/${_GCSFS_KEY_NAME} 128 | 129 | echo '--- Running standard tests on VM ---' 130 | pytest -vv -s \ 131 | --log-format='%(asctime)s %(levelname)s %(message)s' \ 132 | --log-date-format='%H:%M:%S' \ 133 | --color=no \ 134 | gcsfs/ \ 135 | --deselect gcsfs/tests/test_core.py::test_sign 136 | 137 | echo '--- Running Zonal tests on VM ---' 138 | export GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT="true" 139 | pytest -vv -s \ 140 | --log-format='%(asctime)s %(levelname)s %(message)s' \ 141 | --log-date-format='%H:%M:%S' \ 142 | --color=no \ 143 | gcsfs/tests/test_extended_gcsfs.py 144 | " 145 | 146 | # Execute the script on the VM via SSH. 147 | gcloud compute ssh gcsfs-test-vm-${_SHORT_BUILD_ID} --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine --command="$$VM_SCRIPT" 148 | waitFor: 149 | - "create-vm" 150 | - "create-standard-bucket" 151 | - "enable-bucket-versioning" 152 | - "generate-ssh-key" 153 | 154 | # --- Cleanup Steps --- 155 | 156 | # Step 5: Clean up the SSH key from the OS Login profile. 157 | # This step is crucial to prevent key accumulation. 158 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 159 | id: "cleanup-ssh-key" 160 | entrypoint: "bash" 161 | args: 162 | - "-c" 163 | - | 164 | echo "--- Removing SSH key from OS Login profile to prevent accumulation ---" 165 | gcloud compute os-login ssh-keys remove \ 166 | --key-file=/workspace/gcb_ssh_key.pub || true 167 | waitFor: 168 | - "run-tests-on-vm" 169 | 170 | # Step 6: Delete the GCE VM. 171 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 172 | id: "delete-vm" 173 | entrypoint: "gcloud" 174 | args: 175 | - "compute" 176 | - "instances" 177 | - "delete" 178 | - "gcsfs-test-vm-${_SHORT_BUILD_ID}" 179 | - "--zone=${_ZONE}" 180 | - "--quiet" 181 | waitFor: 182 | - "cleanup-ssh-key" 183 | 184 | # Step 7: Delete the standard GCS bucket. 185 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 186 | id: "delete-standard-bucket" 187 | entrypoint: "gcloud" 188 | args: 189 | [ 190 | "storage", 191 | "rm", 192 | "--recursive", 193 | "gs://gcsfs-test-standard-${_SHORT_BUILD_ID}", 194 | ] 195 | waitFor: 196 | - "run-tests-on-vm" 197 | 198 | # Step 8: Delete the versioned GCS bucket. 199 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk" 200 | id: "delete-versioned-bucket" 201 | entrypoint: "gcloud" 202 | args: 203 | [ 204 | "storage", 205 | "rm", 206 | "--recursive", 207 | "gs://gcsfs-test-versioned-${_SHORT_BUILD_ID}", 208 | ] 209 | waitFor: 210 | - "run-tests-on-vm" 211 | 212 | timeout: "3600s" # 60 minutes 213 | 214 | options: 215 | logging: CLOUD_LOGGING_ONLY 216 | pool: 217 | name: "projects/${PROJECT_ID}/locations/us-central1/workerPools/cloud-build-worker-pool" 218 | -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/GCSFs.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/GCSFs.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/GCSFs" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/GCSFs" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /docs/source/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | 4 | Note: in some releases, there are no changes, because we always guarantee 5 | releasing in step with fsspec. 6 | 7 | 2025.12.0 8 | --------- 9 | 10 | * Fix CI when run against rela SGC buckets (#71) 11 | * Run extended rests when env var is set (#712) 12 | * Support py3.14 and drop 3.9 (#709) 13 | * Introduce ExtendedGcsFileSystem for Zonal Bucket gRPC Read Path (#707) 14 | 15 | 2025.10.0 16 | --------- 17 | 18 | * fix info() performance regression (#705) 19 | * add CoC (#703) 20 | * mkdir should not create bucket by default (#701) 21 | * add anonymous tracker to docs (#700) 22 | 23 | 2025.9.0 24 | -------- 25 | 26 | * fix slow ls iterations (#697) 27 | * Ensure right error type for get() on nonexistent (#695) 28 | 29 | 2025.7.0 30 | -------- 31 | 32 | * acknowledge Anaconda support (#691) 33 | * less refreshing for CI (#690) 34 | 35 | 2025.5.1 36 | -------- 37 | 38 | * Fix token timezone comparison (#683, 688) 39 | 40 | 2025.5.0 41 | -------- 42 | 43 | * Avoid deprecated utcnow (#680) 44 | * Add support for specifying Cloud KMS keys when creating files (#679) 45 | * Yet another fix for isdir (#676) 46 | * Create warning for appending mode 'a' operations (#675) 47 | * add userProject to batch deletion query (#673) 48 | 49 | 2025.3.2 50 | -------- 51 | 52 | no changes 53 | 54 | 2025.3.1 55 | -------- 56 | 57 | * Fix find with path not ending with "/" (#668) 58 | * remove "beta" note from doc (#666) 59 | * don't check expiry of creds that don't expire (#665) 60 | 61 | 2025.3.0 62 | -------- 63 | 64 | * Improvements for credentials refresh under high load (#658) 65 | 66 | 2025.2.0 67 | -------- 68 | 69 | * guess upload file MIME types (#655) 70 | * better shutdown cleanup (#657) 71 | 72 | 2024.12.0 73 | --------- 74 | 75 | * Exclusive write (#651) 76 | * Avoid IndexError on integer seconds (#649) 77 | * note on non-posixness (#648) 78 | * handle chache_timeout=0 (#646) 79 | 80 | 2024.10.0 81 | --------- 82 | 83 | * Remove race condition in credentials (#643) 84 | * fix md5 hash order logic (#640) 85 | 86 | 2024.9.0 87 | -------- 88 | 89 | * In case error in a pure string (#631) 90 | 91 | 2024.6.1 92 | -------- 93 | 94 | no changes 95 | 96 | 2024.6.0 97 | -------- 98 | 99 | * Add seek(0) to request data to prevent issues on retries (#624) 100 | 101 | 2024.5.0 102 | -------- 103 | 104 | * swap order of "gcs", "gs" protocols (#620) 105 | * fix get_file for relative lpath (#618) 106 | 107 | 2024.3.1 108 | -------- 109 | 110 | * fix expiration= for sign() (#613) 111 | * do populate dircache in ls() (#612) 112 | * allow passing extra options to mkdir (#610) 113 | * credentials docs (#609) 114 | * retry in bulk rm (#608) 115 | * clean up loop on close (#606) 116 | 117 | 2024.2.0 118 | -------- 119 | 120 | * doc for passing tokens (#603) 121 | 122 | 2023.12.2 123 | --------- 124 | 125 | no changes 126 | 127 | 2023.12.1 128 | --------- 129 | 130 | no changes 131 | 132 | 2023.12.0 133 | --------- 134 | 135 | * use same version when paginating list (#591) 136 | * fix double asterisk glob test (#589) 137 | 138 | 2023.10.0 139 | --------- 140 | 141 | * Fix for transactions of small files (#586) 142 | 143 | 2023.9.2 144 | -------- 145 | 146 | * CI updates (#582) 147 | 148 | 2023.9.1 149 | -------- 150 | 151 | * small fixes following #573 (#578) 152 | 153 | 2023.9.0 154 | -------- 155 | 156 | * bulk operations edge cases (#576, 572) 157 | * inventory report based file listing (#573) 158 | * pickle HttpError (#571) 159 | * avoid warnings (#569) 160 | * maxdepth in find() (#566) 161 | * invalidate dircache (#564) 162 | * standard metadata field names (#563) 163 | * performance of building cache in find() (#561) 164 | 165 | 166 | 2023.6.0 167 | -------- 168 | 169 | * allow raw/session token for auth (#554) 170 | * fix listings_expiry_time kwargs (#551) 171 | * allow setting fixed metadata on put/pipe (#550) 172 | 173 | 2023.5.0 174 | -------- 175 | 176 | * Allow emulator host without protocol (#548) 177 | * Prevent upload retry from closing the file being sent (#540) 178 | 179 | 2023.4.0 180 | -------- 181 | 182 | No changes 183 | 184 | 2023.3.0 185 | -------- 186 | 187 | * Don't let find() mess up dircache (#531) 188 | * Drop py3.7 (#529) 189 | * Update docs (#528) 190 | * Make times UTC (#527) 191 | * Use BytesIO for large bodies (#525) 192 | * Fix: Don't append generation when it is absent (#523) 193 | * get/put/cp consistency tests (#521) 194 | 195 | 2023.1.0 196 | -------- 197 | 198 | * Support create time (#516, 518) 199 | * defer async session creation (#513, 514) 200 | * support listing of file versions (#509) 201 | * fix ``sign`` following versioned split protocol (#513) 202 | 203 | 2022.11.0 204 | --------- 205 | 206 | * implement object versioning (#504) 207 | 208 | 2022.10.0 209 | --------- 210 | 211 | * bump fsspec to 2022.10.0 (#503) 212 | 213 | 2022.8.1 214 | -------- 215 | 216 | * don't install prerelease aiohttp (#490) 217 | 218 | 2022.7.1 219 | -------- 220 | 221 | * Try cloud auth by default (#479) 222 | 223 | 2022.5.0 224 | -------- 225 | 226 | * invalidate listings cache for simple put/pipe (#474) 227 | * conform _mkdir and _cat_file to upstream (#471) 228 | 229 | 2022.3.0 230 | -------- 231 | 232 | (note that this release happened in 2022.4, but we label as 2022.3 to match 233 | fsspec) 234 | 235 | * bucket exists workaround (#464) 236 | * dirmarkers (#459) 237 | * check connection (#457) 238 | * browser connection now uses local server (#456) 239 | * bucket location (#455) 240 | * ensure auth is closed (#452) 241 | 242 | 2022.02.0 243 | --------- 244 | 245 | * fix list_buckets without cache (#449) 246 | * drop py36 (#445) 247 | 248 | 2022.01.0 249 | --------- 250 | 251 | * update refname for versions (#442) 252 | 253 | 2021.11.1 254 | --------- 255 | 256 | * don't touch cache when doing find with a prefix (#437) 257 | 258 | 2021.11.0 259 | --------- 260 | 261 | * move to fsspec org 262 | * add support for google fixed_key_metadata (#429) 263 | * deprecate `content_encoding` parameter of setxattrs method (#429) 264 | * use emulator for resting instead of vcrpy (#424) 265 | 266 | 2021.10.1 267 | --------- 268 | 269 | * url signing (#411) 270 | * default callback (#422) 271 | 272 | 2021.10.0 273 | --------- 274 | 275 | * min version for decorator 276 | * default callback in get (#422) 277 | 278 | 2021.09.0 279 | --------- 280 | 281 | * correctly recognise 404 (#419) 282 | * fix for .details due to upstream (#417) 283 | * callbacks in get/put (#416) 284 | * "%" in paths (#415) 285 | 286 | 2021.08.1 287 | --------- 288 | 289 | * don't retry 404s (#406) 290 | 291 | 2021.07.0 292 | --------- 293 | 294 | * fix find/glob with a prefix (#399) 295 | 296 | 2021.06.1 297 | --------- 298 | 299 | * kwargs to aiohttpClient session 300 | * graceful timeout when disconnecting at finalise (#397) 301 | 302 | 2021.06.0 303 | --------- 304 | 305 | * negative ranges in cat_file (#394) 306 | 307 | 2021.05.0 308 | --------- 309 | 310 | * no credentials bug fix (#390) 311 | * use googleapis.com (#388) 312 | * more retries (#387, 385, 380) 313 | * Code cleanup (#381) 314 | * license to match stated one (#378) 315 | * deps updated (#376) 316 | 317 | Version 2021.04.0 318 | ----------------- 319 | 320 | * switch to calver and fsspec pin 321 | 322 | Version 0.8.0 323 | ------------- 324 | 325 | * keep up with fsspec 0.9.0 async 326 | * one-shot find 327 | * consistency checkers 328 | * retries for intermittent issues 329 | * timeouts 330 | * partial cat 331 | * http error status 332 | * CI to GHA 333 | 334 | Version 0.7.0 335 | ------------- 336 | 337 | * async operations via aiohttp 338 | 339 | 340 | Version 0.6.0 341 | ------------- 342 | 343 | * **API-breaking**: Changed requester-pays handling for ``GCSFileSystem``. 344 | 345 | The ``user_project`` keyword has been removed, and has been replaced with 346 | the ``requester_pays`` keyword. If you're working with a ``requester_pays`` bucket 347 | you will need to explicitly pass ``requester_pays-True``. This will include your 348 | ``project`` ID in requests made to GCS. 349 | 350 | Version 0.5.3 351 | ------------- 352 | 353 | * ``GCSFileSystem`` now validates that the ``project`` provided, if any, matches the 354 | Google default project when using ``token-'google_default'`` to authenticate (:pr:`219`). 355 | * Fixed bug in ``GCSFileSystem.cat`` on objects in requester-pays buckets (:pr:`217`). 356 | 357 | Version 0.5.2 358 | ------------- 359 | 360 | * Fixed bug in ``user_project`` fallback for default Google authentication (:pr:`213`) 361 | 362 | Version 0.5.1 363 | ------------- 364 | 365 | * ``user_project`` now falls back to the ``project`` if provided (:pr:`208`) 366 | 367 | Version 0.5.0 368 | ------------- 369 | 370 | * Added the ability to make requester-pays requests with the ``user_project`` parameter (:pr:`206`) 371 | 372 | Version 0.4.0 373 | ------------- 374 | 375 | * Improved performance when serializing filesystem objects (:pr:`182`) 376 | * Fixed authorization errors when using ``gcsfs`` within multithreaded code (:pr:`183`, :pr:`192`) 377 | * Added contributing instructions (:pr:`185`) 378 | * Improved performance for :meth:`gcsfs.GCSFileSystem.info` (:pr:`187`) 379 | * Fixed bug in :meth:`gcsfs.GCSFileSystem.info` raising an error (:pr:`190`) 380 | 381 | .. raw:: html 382 | 383 | 385 | -------------------------------------------------------------------------------- /docs/source/conf.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # 3 | # GCSFs documentation build configuration file, created by 4 | # sphinx-quickstart on Mon Mar 21 15:20:01 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # sys.path.insert(0, os.path.abspath('.')) 19 | 20 | # -- General configuration ------------------------------------------------ 21 | 22 | # If your documentation needs a minimal Sphinx version, state it here. 23 | # needs_sphinx = '1.0' 24 | 25 | # Add any Sphinx extension module names here, as strings. They can be 26 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 27 | # ones. 28 | extensions = [ 29 | "sphinx.ext.autodoc", 30 | "sphinx.ext.todo", 31 | "sphinx.ext.ifconfig", 32 | "sphinx.ext.viewcode", 33 | "sphinx.ext.autosummary", 34 | "sphinx.ext.extlinks", 35 | "sphinx.ext.napoleon", 36 | ] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ["_templates"] 40 | 41 | # The suffix(es) of source filenames. 42 | # You can specify multiple suffix as a list of string: 43 | # source_suffix = ['.rst', '.md'] 44 | source_suffix = ".rst" 45 | 46 | # The encoding of source files. 47 | # source_encoding = 'utf-8-sig' 48 | 49 | # The master toctree document. 50 | master_doc = "index" 51 | 52 | # General information about the project. 53 | project = "GCSFs" 54 | copyright = "2017, Continuum Analytics" 55 | author = "Continuum Analytics" 56 | 57 | # The version info for the project you're documenting, acts as replacement for 58 | # |version| and |release|, also used in various other places throughout the 59 | # built documents. 60 | # 61 | # The short X.Y version. 62 | import gcsfs 63 | 64 | version = gcsfs.__version__ 65 | # The full version, including alpha/beta/rc tags. 66 | release = version 67 | 68 | # There are two options for replacing |today|: either, you set today to some 69 | # non-false value, then it is used: 70 | # today = '' 71 | # Else, today_fmt is used as the format for a strftime call. 72 | # today_fmt = '%B %d, %Y' 73 | 74 | # List of patterns, relative to source directory, that match files and 75 | # directories to ignore when looking for source files. 76 | exclude_patterns = [] 77 | 78 | # The reST default role (used for this markup: `text`) to use for all 79 | # documents. 80 | # default_role = None 81 | 82 | # If true, '()' will be appended to :func: etc. cross-reference text. 83 | # add_function_parentheses = True 84 | 85 | # If true, the current module name will be prepended to all description 86 | # unit titles (such as .. function::). 87 | # add_module_names = True 88 | 89 | # If true, sectionauthor and moduleauthor directives will be shown in the 90 | # output. They are ignored by default. 91 | # show_authors = False 92 | 93 | # The name of the Pygments (syntax highlighting) style to use. 94 | pygments_style = "sphinx" 95 | 96 | # A list of ignored prefixes for module index sorting. 97 | # modindex_common_prefix = [] 98 | 99 | # If true, keep warnings as "system message" paragraphs in the built documents. 100 | # keep_warnings = False 101 | 102 | # If true, `todo` and `todoList` produce output, else they produce nothing. 103 | todo_include_todos = False 104 | 105 | 106 | # -- Options for HTML output ---------------------------------------------- 107 | 108 | html_theme = "sphinx_rtd_theme" 109 | 110 | # Theme options are theme-specific and customize the look and feel of a theme 111 | # further. For a list of options available for each theme, see the 112 | # documentation. 113 | # html_theme_options = {} 114 | 115 | # Add any paths that contain custom themes here, relative to this directory. 116 | # html_theme_path = [] 117 | 118 | # The name for this set of Sphinx documents. If None, it defaults to 119 | # " v documentation". 120 | # html_title = None 121 | 122 | # A shorter title for the navigation bar. Default is the same as html_title. 123 | # html_short_title = None 124 | 125 | # The name of an image file (relative to this directory) to place at the top 126 | # of the sidebar. 127 | # html_logo = None 128 | 129 | # The name of an image file (within the static path) to use as favicon of the 130 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 131 | # pixels large. 132 | # html_favicon = None 133 | 134 | # Add any paths that contain custom static files (such as style sheets) here, 135 | # relative to this directory. They are copied after the builtin static files, 136 | # so a file named "default.css" will overwrite the builtin "default.css". 137 | html_static_path = ["_static"] 138 | 139 | # Custom CSS file to override read the docs default CSS. 140 | # Contains workaround for RTD not rendering colon between argument name and type 141 | html_css_files = ["custom.css"] 142 | 143 | # Add any extra paths that contain custom files (such as robots.txt or 144 | # .htaccess) here, relative to this directory. These files are copied 145 | # directly to the root of the documentation. 146 | # html_extra_path = [] 147 | 148 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 149 | # using the given strftime format. 150 | # html_last_updated_fmt = '%b %d, %Y' 151 | 152 | # If true, SmartyPants will be used to convert quotes and dashes to 153 | # typographically correct entities. 154 | # html_use_smartypants = True 155 | 156 | # Custom sidebar templates, maps document names to template names. 157 | # html_sidebars = {} 158 | 159 | # Additional templates that should be rendered to pages, maps page names to 160 | # template names. 161 | # html_additional_pages = {} 162 | 163 | # If false, no module index is generated. 164 | # html_domain_indices = True 165 | 166 | # If false, no index is generated. 167 | # html_use_index = True 168 | 169 | # If true, the index is split into individual pages for each letter. 170 | # html_split_index = False 171 | 172 | # If true, links to the reST sources are added to the pages. 173 | # html_show_sourcelink = True 174 | 175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 176 | # html_show_sphinx = True 177 | 178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 179 | # html_show_copyright = True 180 | 181 | # If true, an OpenSearch description file will be output, and all pages will 182 | # contain a tag referring to it. The value of this option must be the 183 | # base URL from which the finished HTML is served. 184 | # html_use_opensearch = '' 185 | 186 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 187 | # html_file_suffix = None 188 | 189 | # Language to be used for generating the HTML full-text search index. 190 | # Sphinx supports the following languages: 191 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 192 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 193 | # html_search_language = 'en' 194 | 195 | # A dictionary with options for the search language support, empty by default. 196 | # Now only 'ja' uses this config value 197 | # html_search_options = {'type': 'default'} 198 | 199 | # The name of a javascript file (relative to the configuration directory) that 200 | # implements a search results scorer. If empty, the default will be used. 201 | # html_search_scorer = 'scorer.js' 202 | 203 | # Output file base name for HTML help builder. 204 | htmlhelp_basename = "GCSFSdoc" 205 | 206 | # -- Options for LaTeX output --------------------------------------------- 207 | 208 | latex_elements = { 209 | # The paper size ('letterpaper' or 'a4paper'). 210 | #'papersize': 'letterpaper', 211 | # The font size ('10pt', '11pt' or '12pt'). 212 | #'pointsize': '10pt', 213 | # Additional stuff for the LaTeX preamble. 214 | #'preamble': '', 215 | # Latex figure (float) alignment 216 | #'figure_align': 'htbp', 217 | } 218 | 219 | # Grouping the document tree into LaTeX files. List of tuples 220 | # (source start file, target name, title, 221 | # author, documentclass [howto, manual, or own class]). 222 | latex_documents = [ 223 | (master_doc, "GCSFs.tex", "GCSFs Documentation", "Continuum Analytics", "manual") 224 | ] 225 | 226 | # The name of an image file (relative to this directory) to place at the top of 227 | # the title page. 228 | # latex_logo = None 229 | 230 | # For "manual" documents, if this is true, then toplevel headings are parts, 231 | # not chapters. 232 | # latex_use_parts = False 233 | 234 | # If true, show page references after internal links. 235 | # latex_show_pagerefs = False 236 | 237 | # If true, show URL addresses after external links. 238 | # latex_show_urls = False 239 | 240 | # Documents to append as an appendix to all manuals. 241 | # latex_appendices = [] 242 | 243 | # If false, no module index is generated. 244 | # latex_domain_indices = True 245 | 246 | 247 | # -- Options for manual page output --------------------------------------- 248 | 249 | # One entry per manual page. List of tuples 250 | # (source start file, name, description, authors, manual section). 251 | man_pages = [(master_doc, "gcsfs", "GCSFs Documentation", [author], 1)] 252 | 253 | # If true, show URL addresses after external links. 254 | # man_show_urls = False 255 | 256 | 257 | # -- Options for Texinfo output ------------------------------------------- 258 | 259 | # Grouping the document tree into Texinfo files. List of tuples 260 | # (source start file, target name, title, author, 261 | # dir menu entry, description, category) 262 | texinfo_documents = [ 263 | ( 264 | master_doc, 265 | "GCSFs", 266 | "GCSFs Documentation", 267 | author, 268 | "GCSFs", 269 | "One line description of project.", 270 | "Miscellaneous", 271 | ) 272 | ] 273 | 274 | # Documents to append as an appendix to all manuals. 275 | # texinfo_appendices = [] 276 | 277 | # If false, no module index is generated. 278 | # texinfo_domain_indices = True 279 | 280 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 281 | # texinfo_show_urls = 'footnote' 282 | 283 | # If true, do not generate a @detailmenu in the "Top" node's menu. 284 | # texinfo_no_detailmenu = False 285 | 286 | extlinks = {"pr": ("https://github.com/fsspec/gcsfs/pull/%s", "PR #%s")} 287 | -------------------------------------------------------------------------------- /gcsfs/extended_gcsfs.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from enum import Enum 3 | 4 | from fsspec import asyn 5 | from google.api_core import exceptions as api_exceptions 6 | from google.api_core import gapic_v1 7 | from google.api_core.client_info import ClientInfo 8 | from google.auth.credentials import AnonymousCredentials 9 | from google.cloud import storage_control_v2 10 | from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient 11 | from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( 12 | AsyncMultiRangeDownloader, 13 | ) 14 | 15 | from gcsfs import __version__ as version 16 | from gcsfs import zb_hns_utils 17 | from gcsfs.core import GCSFile, GCSFileSystem 18 | from gcsfs.zonal_file import ZonalFile 19 | 20 | logger = logging.getLogger("gcsfs") 21 | 22 | USER_AGENT = "python-gcsfs" 23 | 24 | 25 | class BucketType(Enum): 26 | ZONAL_HIERARCHICAL = "ZONAL_HIERARCHICAL" 27 | HIERARCHICAL = "HIERARCHICAL" 28 | NON_HIERARCHICAL = "NON_HIERARCHICAL" 29 | UNKNOWN = "UNKNOWN" 30 | 31 | 32 | gcs_file_types = { 33 | BucketType.ZONAL_HIERARCHICAL: ZonalFile, 34 | BucketType.NON_HIERARCHICAL: GCSFile, 35 | BucketType.HIERARCHICAL: GCSFile, 36 | BucketType.UNKNOWN: GCSFile, 37 | } 38 | 39 | 40 | class ExtendedGcsFileSystem(GCSFileSystem): 41 | """ 42 | This class will be used when GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT env variable is set to true. 43 | ExtendedGcsFileSystem is a subclass of GCSFileSystem that adds new logic for bucket types 44 | including zonal and hierarchical. For buckets without special properties, it forwards requests 45 | to the parent class GCSFileSystem for default processing. 46 | """ 47 | 48 | def __init__(self, *args, **kwargs): 49 | super().__init__(*args, **kwargs) 50 | self.grpc_client = None 51 | self.storage_control_client = None 52 | # Adds user-passed credentials to ExtendedGcsFileSystem to pass to gRPC/Storage Control clients. 53 | # We unwrap the nested credentials here because self.credentials is a GCSFS wrapper, 54 | # but the clients expect the underlying google.auth credentials object. 55 | self.credential = self.credentials.credentials 56 | # When token="anon", self.credentials.credentials is None. This is 57 | # often used for testing with emulators. However, the gRPC and storage 58 | # control clients require a credentials object for initialization. 59 | # We explicitly use AnonymousCredentials() to allow unauthenticated access. 60 | if self.credentials.token == "anon": 61 | self.credential = AnonymousCredentials() 62 | # initializing grpc and storage control client for Hierarchical and 63 | # zonal bucket operations 64 | self.grpc_client = asyn.sync(self.loop, self._create_grpc_client) 65 | self._storage_control_client = asyn.sync( 66 | self.loop, self._create_control_plane_client 67 | ) 68 | self._storage_layout_cache = {} 69 | 70 | async def _create_grpc_client(self): 71 | if self.grpc_client is None: 72 | return AsyncGrpcClient( 73 | credentials=self.credential, 74 | client_info=ClientInfo(user_agent=f"{USER_AGENT}/{version}"), 75 | ).grpc_client 76 | else: 77 | return self.grpc_client 78 | 79 | async def _create_control_plane_client(self): 80 | # Initialize the storage control plane client for bucket 81 | # metadata operations 82 | client_info = gapic_v1.client_info.ClientInfo( 83 | user_agent=f"{USER_AGENT}/{version}" 84 | ) 85 | return storage_control_v2.StorageControlAsyncClient( 86 | credentials=self.credential, client_info=client_info 87 | ) 88 | 89 | async def _lookup_bucket_type(self, bucket): 90 | if bucket in self._storage_layout_cache: 91 | return self._storage_layout_cache[bucket] 92 | bucket_type = await self._get_bucket_type(bucket) 93 | # Dont cache UNKNOWN type 94 | if bucket_type == BucketType.UNKNOWN: 95 | return BucketType.UNKNOWN 96 | self._storage_layout_cache[bucket] = bucket_type 97 | return self._storage_layout_cache[bucket] 98 | 99 | _sync_lookup_bucket_type = asyn.sync_wrapper(_lookup_bucket_type) 100 | 101 | async def _get_bucket_type(self, bucket): 102 | try: 103 | bucket_name_value = f"projects/_/buckets/{bucket}/storageLayout" 104 | response = await self._storage_control_client.get_storage_layout( 105 | name=bucket_name_value 106 | ) 107 | 108 | if response.location_type == "zone": 109 | return BucketType.ZONAL_HIERARCHICAL 110 | else: 111 | # This should be updated to include HNS in the future 112 | return BucketType.NON_HIERARCHICAL 113 | except api_exceptions.NotFound: 114 | logger.warning(f"Error: Bucket {bucket} not found or you lack permissions.") 115 | return BucketType.UNKNOWN 116 | except Exception as e: 117 | logger.error( 118 | f"Could not determine bucket type for bucket name {bucket}: {e}" 119 | ) 120 | # Default to UNKNOWN in case bucket type is not obtained 121 | return BucketType.UNKNOWN 122 | 123 | def _open( 124 | self, 125 | path, 126 | mode="rb", 127 | block_size=None, 128 | cache_options=None, 129 | acl=None, 130 | consistency=None, 131 | metadata=None, 132 | autocommit=True, 133 | fixed_key_metadata=None, 134 | generation=None, 135 | **kwargs, 136 | ): 137 | """ 138 | Open a file. 139 | """ 140 | bucket, _, _ = self.split_path(path) 141 | bucket_type = self._sync_lookup_bucket_type(bucket) 142 | return gcs_file_types[bucket_type]( 143 | self, 144 | path, 145 | mode, 146 | block_size=block_size or self.default_block_size, 147 | cache_options=cache_options, 148 | consistency=consistency or self.consistency, 149 | metadata=metadata, 150 | acl=acl, 151 | autocommit=autocommit, 152 | fixed_key_metadata=fixed_key_metadata, 153 | generation=generation, 154 | **kwargs, 155 | ) 156 | 157 | # Replacement method for _process_limits to support new params (offset and length) for MRD. 158 | async def _process_limits_to_offset_and_length(self, path, start, end): 159 | """ 160 | Calculates the read offset and length from start and end parameters. 161 | 162 | Args: 163 | path (str): The path to the file. 164 | start (int | None): The starting byte position. 165 | end (int | None): The ending byte position. 166 | 167 | Returns: 168 | tuple: A tuple containing (offset, length). 169 | 170 | Raises: 171 | ValueError: If the calculated range is invalid. 172 | """ 173 | size = None 174 | 175 | if start is None: 176 | offset = 0 177 | elif start < 0: 178 | size = (await self._info(path))["size"] if size is None else size 179 | offset = size + start 180 | else: 181 | offset = start 182 | 183 | if end is None: 184 | size = (await self._info(path))["size"] if size is None else size 185 | effective_end = size 186 | elif end < 0: 187 | size = (await self._info(path))["size"] if size is None else size 188 | effective_end = size + end 189 | else: 190 | effective_end = end 191 | 192 | if offset < 0: 193 | raise ValueError(f"Calculated start offset ({offset}) cannot be negative.") 194 | if effective_end < offset: 195 | raise ValueError( 196 | f"Calculated end position ({effective_end}) cannot be before start offset ({offset})." 197 | ) 198 | elif effective_end == offset: 199 | length = 0 # Handle zero-length slice 200 | else: 201 | length = effective_end - offset # Normal case 202 | size = (await self._info(path))["size"] if size is None else size 203 | if effective_end > size: 204 | length = max(0, size - offset) # Clamp and ensure non-negative 205 | 206 | return offset, length 207 | 208 | sync_process_limits_to_offset_and_length = asyn.sync_wrapper( 209 | _process_limits_to_offset_and_length 210 | ) 211 | 212 | async def _is_zonal_bucket(self, bucket): 213 | bucket_type = await self._lookup_bucket_type(bucket) 214 | return bucket_type == BucketType.ZONAL_HIERARCHICAL 215 | 216 | async def _cat_file(self, path, start=None, end=None, mrd=None, **kwargs): 217 | """Fetch a file's contents as bytes, with an optimized path for Zonal buckets. 218 | 219 | This method overrides the parent `_cat_file` to read objects in Zonal buckets using gRPC. 220 | 221 | Args: 222 | path (str): The full GCS path to the file (e.g., "bucket/object"). 223 | start (int, optional): The starting byte position to read from. 224 | end (int, optional): The ending byte position to read to. 225 | mrd (AsyncMultiRangeDownloader, optional): An existing multi-range 226 | downloader instance. If not provided, a new one will be created for Zonal buckets. 227 | 228 | Returns: 229 | bytes: The content of the file or file range. 230 | """ 231 | mrd = kwargs.pop("mrd", None) 232 | mrd_created = False 233 | 234 | # A new MRD is required when read is done directly by the 235 | # GCSFilesystem class without creating a GCSFile object first. 236 | if mrd is None: 237 | bucket, object_name, generation = self.split_path(path) 238 | # Fall back to default implementation if not a zonal bucket 239 | if not await self._is_zonal_bucket(bucket): 240 | return await super()._cat_file(path, start=start, end=end, **kwargs) 241 | 242 | mrd = await AsyncMultiRangeDownloader.create_mrd( 243 | self.grpc_client, bucket, object_name, generation 244 | ) 245 | mrd_created = True 246 | 247 | offset, length = await self._process_limits_to_offset_and_length( 248 | path, start, end 249 | ) 250 | try: 251 | return await zb_hns_utils.download_range( 252 | offset=offset, length=length, mrd=mrd 253 | ) 254 | finally: 255 | # Explicit cleanup if we created the MRD 256 | if mrd_created: 257 | await mrd.close() 258 | -------------------------------------------------------------------------------- /gcsfs/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import shlex 4 | import subprocess 5 | import time 6 | 7 | import fsspec 8 | import pytest 9 | import requests 10 | from google.cloud import storage 11 | 12 | from gcsfs import GCSFileSystem 13 | from gcsfs.tests.settings import TEST_BUCKET, TEST_VERSIONED_BUCKET, TEST_ZONAL_BUCKET 14 | 15 | files = { 16 | "test/accounts.1.json": ( 17 | b'{"amount": 100, "name": "Alice"}\n' 18 | b'{"amount": 200, "name": "Bob"}\n' 19 | b'{"amount": 300, "name": "Charlie"}\n' 20 | b'{"amount": 400, "name": "Dennis"}\n' 21 | ), 22 | "test/accounts.2.json": ( 23 | b'{"amount": 500, "name": "Alice"}\n' 24 | b'{"amount": 600, "name": "Bob"}\n' 25 | b'{"amount": 700, "name": "Charlie"}\n' 26 | b'{"amount": 800, "name": "Dennis"}\n' 27 | ), 28 | } 29 | 30 | csv_files = { 31 | "2014-01-01.csv": ( 32 | b"name,amount,id\n" b"Alice,100,1\n" b"Bob,200,2\n" b"Charlie,300,3\n" 33 | ), 34 | "2014-01-02.csv": b"name,amount,id\n", 35 | "2014-01-03.csv": ( 36 | b"name,amount,id\n" b"Dennis,400,4\n" b"Edith,500,5\n" b"Frank,600,6\n" 37 | ), 38 | } 39 | text_files = { 40 | "nested/file1": b"hello\n", 41 | "nested/file2": b"world", 42 | "nested/nested2/file1": b"hello\n", 43 | "nested/nested2/file2": b"world", 44 | } 45 | allfiles = dict(**files, **csv_files, **text_files) 46 | a = TEST_BUCKET + "/tmp/test/a" 47 | b = TEST_BUCKET + "/tmp/test/b" 48 | c = TEST_BUCKET + "/tmp/test/c" 49 | d = TEST_BUCKET + "/tmp/test/d" 50 | 51 | params = dict() 52 | 53 | 54 | def stop_docker(container): 55 | cmd = shlex.split('docker ps -a -q --filter "name=%s"' % container) 56 | cid = subprocess.check_output(cmd).strip().decode() 57 | if cid: 58 | subprocess.call(["docker", "rm", "-f", "-v", cid]) 59 | 60 | 61 | @pytest.fixture(scope="session") 62 | def docker_gcs(): 63 | if "STORAGE_EMULATOR_HOST" in os.environ: 64 | # assume using real API or otherwise have a server already set up 65 | yield os.getenv("STORAGE_EMULATOR_HOST") 66 | return 67 | params["token"] = "anon" 68 | container = "gcsfs_test" 69 | cmd = ( 70 | "docker run -d -p 4443:4443 --name gcsfs_test fsouza/fake-gcs-server:latest -scheme " 71 | "http -public-host 0.0.0.0:4443 -external-url http://localhost:4443 " 72 | "-backend memory" 73 | ) 74 | stop_docker(container) 75 | subprocess.check_output(shlex.split(cmd)) 76 | url = "http://0.0.0.0:4443" 77 | timeout = 10 78 | while True: 79 | try: 80 | r = requests.get(url + "/storage/v1/b") 81 | if r.ok: 82 | yield url 83 | break 84 | except Exception as e: # noqa: E722 85 | timeout -= 1 86 | if timeout < 0: 87 | raise SystemError from e 88 | time.sleep(1) 89 | stop_docker(container) 90 | 91 | 92 | @pytest.fixture(scope="session") 93 | def gcs_factory(docker_gcs): 94 | params["endpoint_url"] = docker_gcs 95 | 96 | def factory(**kwargs): 97 | GCSFileSystem.clear_instance_cache() 98 | return fsspec.filesystem("gcs", **params, **kwargs) 99 | 100 | return factory 101 | 102 | 103 | @pytest.fixture(scope="session") 104 | def buckets_to_delete(): 105 | """ 106 | Provides a session-scoped set to track the names of GCS buckets that are 107 | created by the test suite. 108 | 109 | When tests run, they may create new GCS buckets. If these buckets are not 110 | deleted, they will persist after the test run, leading to resource leakage. 111 | This set acts as a registry of buckets that the `final_cleanup` fixture 112 | should remove at the end of the entire test session. 113 | """ 114 | return set() 115 | 116 | 117 | @pytest.fixture 118 | def gcs(gcs_factory, buckets_to_delete, populate=True): 119 | gcs = gcs_factory() 120 | try: # ensure we're empty. 121 | # Create the bucket if it doesn't exist, otherwise clean it. 122 | if not gcs.exists(TEST_BUCKET): 123 | gcs.mkdir(TEST_BUCKET) 124 | # By adding the bucket name to this set, we are marking it for 125 | # deletion at the end of the test session. This ensures that if 126 | # the test suite creates the bucket, it will also be responsible 127 | # for deleting it. If the bucket already existed, we assume it's 128 | # managed externally and should not be deleted by the tests. 129 | buckets_to_delete.add(TEST_BUCKET) 130 | else: 131 | try: 132 | gcs.rm(gcs.find(TEST_BUCKET)) 133 | except Exception as e: 134 | logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}") 135 | 136 | if populate: 137 | gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()}) 138 | gcs.invalidate_cache() 139 | yield gcs 140 | finally: 141 | _cleanup_gcs(gcs) 142 | 143 | 144 | @pytest.fixture 145 | def extended_gcs_factory(gcs_factory, buckets_to_delete, populate=True): 146 | created_instances = [] 147 | 148 | def factory(**kwargs): 149 | fs = _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate, **kwargs) 150 | created_instances.append(fs) 151 | return fs 152 | 153 | yield factory 154 | 155 | for fs in created_instances: 156 | _cleanup_gcs(fs) 157 | 158 | 159 | @pytest.fixture 160 | def extended_gcsfs(gcs_factory, buckets_to_delete, populate=True): 161 | extended_gcsfs = _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate) 162 | try: 163 | yield extended_gcsfs 164 | finally: 165 | _cleanup_gcs(extended_gcsfs) 166 | 167 | 168 | def _cleanup_gcs(gcs): 169 | """Clean the bucket contents, logging a warning on failure.""" 170 | try: 171 | gcs.rm(gcs.find(TEST_BUCKET)) 172 | except Exception as e: 173 | logging.warning(f"Failed to clean up GCS bucket {TEST_BUCKET}: {e}") 174 | 175 | 176 | @pytest.fixture(scope="session", autouse=True) 177 | def final_cleanup(gcs_factory, buckets_to_delete): 178 | """ 179 | A session-scoped, auto-use fixture that deletes all buckets registered 180 | in the `buckets_to_delete` set after the entire test session is complete. 181 | """ 182 | yield 183 | # This code runs after the entire test session finishes 184 | 185 | gcs = gcs_factory() 186 | for bucket in buckets_to_delete: 187 | # The cleanup logic attempts to delete every bucket that was 188 | # added to the set during the session. For real GCS, only delete if 189 | # created by the test suite. 190 | try: 191 | if gcs.exists(bucket): 192 | gcs.rm(bucket, recursive=True) 193 | logging.info(f"Cleaned up bucket: {bucket}") 194 | except Exception as e: 195 | logging.warning(f"Failed to perform final cleanup for bucket {bucket}: {e}") 196 | 197 | 198 | @pytest.fixture 199 | def gcs_versioned(gcs_factory, buckets_to_delete): 200 | gcs = gcs_factory() 201 | gcs.version_aware = True 202 | is_real_gcs = ( 203 | os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com" 204 | ) 205 | try: # ensure we're empty. 206 | # The versioned bucket might be created by `is_versioning_enabled` 207 | # in test_core_versioned.py. We must register it for cleanup only if 208 | # it was created by this test run. 209 | try: 210 | from gcsfs.tests.test_core_versioned import ( 211 | _VERSIONED_BUCKET_CREATED_BY_TESTS, 212 | ) 213 | 214 | if _VERSIONED_BUCKET_CREATED_BY_TESTS: 215 | buckets_to_delete.add(TEST_VERSIONED_BUCKET) 216 | except ImportError: 217 | pass # test_core_versioned is not being run 218 | if is_real_gcs: 219 | cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET) 220 | else: 221 | # For emulators, we delete and recreate the bucket for a clean state 222 | try: 223 | gcs.rm(TEST_VERSIONED_BUCKET, recursive=True) 224 | except FileNotFoundError: 225 | pass 226 | gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True) 227 | buckets_to_delete.add(TEST_VERSIONED_BUCKET) 228 | gcs.invalidate_cache() 229 | yield gcs 230 | finally: 231 | # Ensure the bucket is empty after the test. 232 | try: 233 | if is_real_gcs: 234 | cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET) 235 | except Exception as e: 236 | logging.warning( 237 | f"Failed to clean up versioned bucket {TEST_VERSIONED_BUCKET} after test: {e}" 238 | ) 239 | 240 | 241 | def cleanup_versioned_bucket(gcs, bucket_name, prefix=None): 242 | """ 243 | Deletes all object versions in a bucket using the google-cloud-storage client, 244 | ensuring it uses the same credentials as the gcsfs instance. 245 | """ 246 | # Define a retry policy for API calls to handle rate limiting. 247 | # This can retry on 429 Too Many Requests errors, which can happen 248 | # when deleting many object versions quickly. 249 | from google.api_core.retry import Retry 250 | 251 | retry_policy = Retry( 252 | initial=1.0, # Initial delay in seconds 253 | maximum=30.0, # Maximum delay in seconds 254 | multiplier=1.2, # Backoff factor 255 | ) 256 | 257 | client = storage.Client( 258 | credentials=gcs.credentials.credentials, project=gcs.project 259 | ) 260 | 261 | # List all blobs, including old versions 262 | blobs_to_delete = list(client.list_blobs(bucket_name, versions=True, prefix=prefix)) 263 | 264 | if not blobs_to_delete: 265 | logging.info("No object versions to delete in %s.", bucket_name) 266 | return 267 | 268 | logging.info( 269 | "Deleting %d object versions from %s.", len(blobs_to_delete), bucket_name 270 | ) 271 | time.sleep(2) 272 | for blob in blobs_to_delete: 273 | blob.delete(retry=retry_policy) 274 | 275 | logging.info("Successfully deleted %d object versions.", len(blobs_to_delete)) 276 | 277 | 278 | def _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate=True, **kwargs): 279 | is_real_gcs = ( 280 | os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com" 281 | ) 282 | 283 | extended_gcsfs = gcs_factory(**kwargs) 284 | # Only create/delete/populate the bucket if we are NOT using the real GCS endpoint. 285 | if not is_real_gcs: 286 | try: 287 | extended_gcsfs.rm(TEST_ZONAL_BUCKET, recursive=True) 288 | except FileNotFoundError: 289 | pass 290 | extended_gcsfs.mkdir(TEST_ZONAL_BUCKET) 291 | buckets_to_delete.add(TEST_ZONAL_BUCKET) 292 | if populate: 293 | extended_gcsfs.pipe( 294 | {TEST_ZONAL_BUCKET + "/" + k: v for k, v in allfiles.items()} 295 | ) 296 | extended_gcsfs.invalidate_cache() 297 | return extended_gcsfs 298 | -------------------------------------------------------------------------------- /gcsfs/credentials.py: -------------------------------------------------------------------------------- 1 | import json 2 | import logging 3 | import os 4 | import pickle 5 | import textwrap 6 | import threading 7 | import warnings 8 | from datetime import datetime, timezone 9 | 10 | import google.auth as gauth 11 | import google.auth.compute_engine 12 | import google.auth.credentials 13 | import google.auth.exceptions 14 | import requests 15 | from google.auth.transport.requests import Request 16 | from google.oauth2 import service_account 17 | from google.oauth2.credentials import Credentials 18 | from google_auth_oauthlib.flow import InstalledAppFlow 19 | 20 | from gcsfs.retry import HttpError 21 | 22 | logger = logging.getLogger("gcsfs.credentials") 23 | 24 | tfile = os.path.join(os.path.expanduser("~"), ".gcs_tokens") 25 | 26 | not_secret = { 27 | "client_id": "586241054156-9kst7ltfj66svc342pcn43vp6ta3idin" 28 | ".apps.googleusercontent.com", 29 | "client_secret": "xto0LIFYX35mmHF9T1R2QBqT", 30 | } 31 | 32 | client_config = { 33 | "installed": { 34 | "client_id": not_secret["client_id"], 35 | "client_secret": not_secret["client_secret"], 36 | "auth_uri": "https://accounts.google.com/o/oauth2/auth", 37 | "token_uri": "https://accounts.google.com/o/oauth2/token", 38 | } 39 | } 40 | 41 | 42 | class GoogleCredentials: 43 | def __init__(self, project, access, token, check_credentials=None, on_google=True): 44 | self.scope = "https://www.googleapis.com/auth/devstorage." + access 45 | self.project = project 46 | self.access = access 47 | self.heads = {} 48 | 49 | self.credentials = None 50 | self.method = None 51 | self.lock = threading.Lock() 52 | self.token = token 53 | self.on_google = on_google 54 | self.connect(method=token) 55 | 56 | if check_credentials: 57 | warnings.warn( 58 | "The `check_credentials` argument is deprecated and will be removed in a future release.", 59 | DeprecationWarning, 60 | ) 61 | 62 | @classmethod 63 | def load_tokens(cls): 64 | """Get "browser" tokens from disc""" 65 | try: 66 | with open(tfile, "rb") as f: 67 | tokens = pickle.load(f) 68 | except Exception: 69 | tokens = {} 70 | GoogleCredentials.tokens = tokens 71 | 72 | @staticmethod 73 | def _save_tokens(): 74 | try: 75 | with open(tfile, "wb") as f: 76 | pickle.dump(GoogleCredentials.tokens, f, 2) 77 | except Exception as e: 78 | warnings.warn("Saving token cache failed: " + str(e)) 79 | 80 | def _connect_google_default(self): 81 | credentials, project = gauth.default(scopes=[self.scope]) 82 | msg = textwrap.dedent( 83 | """\ 84 | User-provided project '{}' does not match the google default project '{}'. Either 85 | 86 | 1. Accept the google-default project by not passing a `project` to GCSFileSystem 87 | 2. Configure the default project to match the user-provided project (gcloud config set project) 88 | 3. Use an authorization method other than 'google_default' by providing 'token=...' 89 | """ 90 | ) 91 | if self.project and self.project != project: 92 | raise ValueError(msg.format(self.project, project)) 93 | self.project = project 94 | self.credentials = credentials 95 | 96 | def _connect_cloud(self): 97 | if not self.on_google: 98 | raise ValueError 99 | self.credentials = gauth.compute_engine.Credentials() 100 | try: 101 | with requests.Session() as session: 102 | req = Request(session) 103 | self.credentials.refresh(req) 104 | except gauth.exceptions.RefreshError as error: 105 | raise ValueError("Invalid gcloud credentials") from error 106 | 107 | def _connect_cache(self): 108 | if len(self.tokens) == 0: 109 | raise ValueError("No cached tokens") 110 | 111 | project, access = self.project, self.access 112 | if (project, access) in self.tokens: 113 | credentials = self.tokens[(project, access)] 114 | self.credentials = credentials 115 | 116 | def _dict_to_credentials(self, token): 117 | """ 118 | Convert old dict-style token. 119 | 120 | Does not preserve access token itself, assumes refresh required. 121 | """ 122 | try: 123 | token = service_account.Credentials.from_service_account_info( 124 | token, scopes=[self.scope] 125 | ) 126 | except: # noqa: E722 127 | # TODO: catch specific exceptions 128 | # According https://github.com/googleapis/python-cloud-core/blob/master/google/cloud/client.py 129 | # Scopes required for authenticating with a service. User authentication fails 130 | # with invalid_scope if scope is specified. 131 | token = Credentials( 132 | None, 133 | refresh_token=token["refresh_token"], 134 | client_secret=token["client_secret"], 135 | client_id=token["client_id"], 136 | token_uri="https://oauth2.googleapis.com/token", 137 | ) 138 | return token 139 | 140 | def _connect_token(self, token): 141 | """ 142 | Connect using a concrete token 143 | 144 | Parameters 145 | ---------- 146 | token: str, dict or Credentials 147 | If a str and a valid file name, try to load as a Service file, or next as a JSON; 148 | if not a valid file name, assume it's a valid raw (non-renewable/session) token, and pass to Credentials. If 149 | dict, try to interpret as credentials; if Credentials, use directly. 150 | """ 151 | if isinstance(token, str): 152 | if os.path.exists(token): 153 | try: 154 | # is this a "service" token? 155 | self._connect_service(token) 156 | return 157 | except: # noqa: E722 158 | # TODO: catch specific exceptions 159 | # some other kind of token file 160 | # will raise exception if is not json 161 | with open(token) as data: 162 | token = json.load(data) 163 | else: 164 | token = Credentials(token) 165 | if isinstance(token, dict): 166 | credentials = self._dict_to_credentials(token) 167 | elif isinstance(token, google.auth.credentials.Credentials): 168 | credentials = token 169 | else: 170 | raise ValueError("Token format not understood") 171 | self.credentials = credentials 172 | if self.credentials.valid: 173 | self.credentials.apply(self.heads) 174 | 175 | def _credentials_valid(self, refresh_buffer): 176 | return ( 177 | self.credentials.valid 178 | # In addition to checking current validity, we ensure that there is 179 | # not a near-future expiry to avoid errors when expiration hits. 180 | and ( 181 | ( 182 | self.credentials.expiry 183 | and ( 184 | self.credentials.expiry.replace(tzinfo=timezone.utc) 185 | - datetime.now(timezone.utc) 186 | ).total_seconds() 187 | > refresh_buffer 188 | ) 189 | or not self.credentials.expiry 190 | ) 191 | ) 192 | 193 | def maybe_refresh(self, refresh_buffer=300): 194 | """ 195 | Check and refresh credentials if needed 196 | """ 197 | if self.credentials is None: 198 | return # anon 199 | 200 | if self._credentials_valid(refresh_buffer): 201 | return # still good, with buffer 202 | 203 | with requests.Session() as session: 204 | req = Request(session) 205 | with self.lock: 206 | if self._credentials_valid(refresh_buffer): 207 | return # repeat check to avoid race conditions 208 | 209 | logger.debug("GCS refresh") 210 | try: 211 | self.credentials.refresh(req) 212 | except gauth.exceptions.RefreshError as error: 213 | # Re-raise as HttpError with a 401 code and the expected message 214 | raise HttpError( 215 | {"code": 401, "message": "Invalid Credentials"} 216 | ) from error 217 | 218 | # https://github.com/fsspec/filesystem_spec/issues/565 219 | self.credentials.apply(self.heads) 220 | 221 | def apply(self, out): 222 | """Insert credential headers in-place to a dictionary""" 223 | self.maybe_refresh() 224 | if self.credentials is not None: 225 | self.credentials.apply(out) 226 | 227 | def _connect_service(self, fn): 228 | # raises exception if the file does not match expectation 229 | credentials = service_account.Credentials.from_service_account_file( 230 | fn, scopes=[self.scope] 231 | ) 232 | self.credentials = credentials 233 | 234 | def _connect_anon(self): 235 | self.credentials = None 236 | 237 | def _connect_browser(self): 238 | flow = InstalledAppFlow.from_client_config(client_config, [self.scope]) 239 | credentials = flow.run_local_server() 240 | self.tokens[(self.project, self.access)] = credentials 241 | self._save_tokens() 242 | self.credentials = credentials 243 | 244 | def connect(self, method=None): 245 | """ 246 | Establish session token. A new token will be requested if the current 247 | one is within 100s of expiry. 248 | 249 | Parameters 250 | ---------- 251 | method: str (google_default|cache|cloud|token|anon|browser) or None 252 | Type of authorisation to implement - calls `_connect_*` methods. 253 | If None, will try sequence of methods. 254 | """ 255 | if method not in [ 256 | "google_default", 257 | "cache", 258 | "cloud", 259 | "token", 260 | "anon", 261 | None, 262 | ]: 263 | self._connect_token(method) 264 | elif method is None: 265 | for meth in ["google_default", "cache", "cloud", "anon"]: 266 | try: 267 | self.connect(method=meth) 268 | logger.debug("Connected with method %s", meth) 269 | break 270 | except (google.auth.exceptions.GoogleAuthError, ValueError) as e: 271 | # GoogleAuthError is the base class for all authentication 272 | # errors 273 | logger.debug( 274 | 'Connection with method "%s" failed' % meth, exc_info=e 275 | ) 276 | # Reset credentials if they were set but the authentication failed 277 | # (reverts to 'anon' behavior) 278 | self.credentials = None 279 | else: 280 | # Since the 'anon' connection method should always succeed, 281 | # getting here means something has gone terribly wrong. 282 | raise RuntimeError("All connection methods have failed!") 283 | else: 284 | self.__getattribute__("_connect_" + method)() 285 | self.method = method 286 | -------------------------------------------------------------------------------- /gcsfs/tests/test_extended_gcsfs.py: -------------------------------------------------------------------------------- 1 | import contextlib 2 | import io 3 | import os 4 | from itertools import chain 5 | from unittest import mock 6 | 7 | import pytest 8 | from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import ( 9 | AsyncMultiRangeDownloader, 10 | ) 11 | from google.cloud.storage.exceptions import DataCorruption 12 | 13 | from gcsfs.checkers import ConsistencyChecker, MD5Checker, SizeChecker 14 | from gcsfs.extended_gcsfs import BucketType 15 | from gcsfs.tests.conftest import csv_files, files, text_files 16 | from gcsfs.tests.settings import TEST_ZONAL_BUCKET 17 | 18 | file = "test/accounts.1.json" 19 | file_path = f"{TEST_ZONAL_BUCKET}/{file}" 20 | json_data = files[file] 21 | lines = io.BytesIO(json_data).readlines() 22 | file_size = len(json_data) 23 | 24 | REQUIRED_ENV_VAR = "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT" 25 | 26 | a = TEST_ZONAL_BUCKET + "/tmp/test/a" 27 | b = TEST_ZONAL_BUCKET + "/tmp/test/b" 28 | c = TEST_ZONAL_BUCKET + "/tmp/test/c" 29 | 30 | # If the condition is True, only then tests in this file are run. 31 | should_run = os.getenv(REQUIRED_ENV_VAR, "false").lower() in ( 32 | "true", 33 | "1", 34 | ) 35 | pytestmark = pytest.mark.skipif( 36 | not should_run, reason=f"Skipping tests: {REQUIRED_ENV_VAR} env variable is not set" 37 | ) 38 | 39 | 40 | @pytest.fixture 41 | def gcs_bucket_mocks(): 42 | """A factory fixture for mocking bucket functionality for different bucket types.""" 43 | 44 | @contextlib.contextmanager 45 | def _gcs_bucket_mocks_factory(file_data, bucket_type_val): 46 | """Creates mocks for a given file content and bucket type.""" 47 | is_real_gcs = ( 48 | os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com" 49 | ) 50 | if is_real_gcs: 51 | yield None 52 | return 53 | patch_target_lookup_bucket_type = ( 54 | "gcsfs.extended_gcsfs.ExtendedGcsFileSystem._lookup_bucket_type" 55 | ) 56 | patch_target_sync_lookup_bucket_type = ( 57 | "gcsfs.extended_gcsfs.ExtendedGcsFileSystem._sync_lookup_bucket_type" 58 | ) 59 | patch_target_create_mrd = ( 60 | "google.cloud.storage._experimental.asyncio.async_multi_range_downloader" 61 | ".AsyncMultiRangeDownloader.create_mrd" 62 | ) 63 | patch_target_gcsfs_cat_file = "gcsfs.core.GCSFileSystem._cat_file" 64 | 65 | async def download_side_effect(read_requests, **kwargs): 66 | if read_requests and len(read_requests) == 1: 67 | param_offset, param_length, buffer_arg = read_requests[0] 68 | if hasattr(buffer_arg, "write"): 69 | buffer_arg.write( 70 | file_data[param_offset : param_offset + param_length] 71 | ) 72 | return [mock.Mock(error=None)] 73 | 74 | mock_downloader = mock.Mock(spec=AsyncMultiRangeDownloader) 75 | mock_downloader.download_ranges = mock.AsyncMock( 76 | side_effect=download_side_effect 77 | ) 78 | 79 | mock_create_mrd = mock.AsyncMock(return_value=mock_downloader) 80 | with ( 81 | mock.patch( 82 | patch_target_sync_lookup_bucket_type, return_value=bucket_type_val 83 | ) as mock_sync_lookup_bucket_type, 84 | mock.patch( 85 | patch_target_lookup_bucket_type, 86 | return_value=bucket_type_val, 87 | ), 88 | mock.patch(patch_target_create_mrd, mock_create_mrd), 89 | mock.patch( 90 | patch_target_gcsfs_cat_file, new_callable=mock.AsyncMock 91 | ) as mock_cat_file, 92 | ): 93 | mocks = { 94 | "sync_lookup_bucket_type": mock_sync_lookup_bucket_type, 95 | "create_mrd": mock_create_mrd, 96 | "downloader": mock_downloader, 97 | "cat_file": mock_cat_file, 98 | } 99 | yield mocks 100 | # Common assertion for all tests using this mock 101 | mock_cat_file.assert_not_called() 102 | 103 | return _gcs_bucket_mocks_factory 104 | 105 | 106 | read_block_params = [ 107 | # Read specific chunk 108 | pytest.param(3, 10, None, json_data[3 : 3 + 10], id="offset=3, length=10"), 109 | # Read from beginning up to length 110 | pytest.param(0, 5, None, json_data[0:5], id="offset=0, length=5"), 111 | # Read from offset to end (simulate large length) 112 | pytest.param(15, 5000, None, json_data[15:], id="offset=15, length=large"), 113 | # Read beyond end of file (should return empty bytes) 114 | pytest.param(file_size + 10, 5, None, b"", id="offset>size, length=5"), 115 | # Read exactly at the end (zero length) 116 | pytest.param(file_size, 10, None, b"", id="offset=size, length=10"), 117 | # Read with delimiter 118 | pytest.param(1, 35, b"\n", lines[1], id="offset=1, length=35, delimiter=newline"), 119 | pytest.param(0, 30, b"\n", lines[0], id="offset=0, length=35, delimiter=newline"), 120 | pytest.param( 121 | 0, 35, b"\n", lines[0] + lines[1], id="offset=0, length=35, delimiter=newline" 122 | ), 123 | ] 124 | 125 | 126 | def test_read_block_zb(extended_gcsfs, gcs_bucket_mocks, subtests): 127 | for param in read_block_params: 128 | with subtests.test(id=param.id): 129 | offset, length, delimiter, expected_data = param.values 130 | path = file_path 131 | 132 | with gcs_bucket_mocks( 133 | json_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL 134 | ) as mocks: 135 | result = extended_gcsfs.read_block(path, offset, length, delimiter) 136 | 137 | assert result == expected_data 138 | if mocks: 139 | mocks["sync_lookup_bucket_type"].assert_called_once_with( 140 | TEST_ZONAL_BUCKET 141 | ) 142 | if expected_data: 143 | mocks["downloader"].download_ranges.assert_called_with( 144 | [(offset, mock.ANY, mock.ANY)] 145 | ) 146 | else: 147 | mocks["downloader"].download_ranges.assert_not_called() 148 | 149 | 150 | @pytest.mark.parametrize("bucket_type_val", list(BucketType)) 151 | def test_open_uses_correct_blocksize_and_consistency_for_all_bucket_types( 152 | extended_gcs_factory, gcs_bucket_mocks, bucket_type_val 153 | ): 154 | csv_file = "2014-01-01.csv" 155 | csv_file_path = f"{TEST_ZONAL_BUCKET}/{csv_file}" 156 | csv_data = csv_files[csv_file] 157 | 158 | custom_filesystem_block_size = 100 * 1024 * 1024 159 | extended_gcsfs = extended_gcs_factory( 160 | block_size=custom_filesystem_block_size, consistency="md5" 161 | ) 162 | 163 | with gcs_bucket_mocks(csv_data, bucket_type_val=bucket_type_val): 164 | with extended_gcsfs.open(csv_file_path, "rb") as f: 165 | assert f.blocksize == custom_filesystem_block_size 166 | assert isinstance(f.checker, MD5Checker) 167 | 168 | file_block_size = 1024 * 1024 169 | with extended_gcsfs.open( 170 | csv_file_path, "rb", block_size=file_block_size, consistency="size" 171 | ) as f: 172 | assert f.blocksize == file_block_size 173 | assert isinstance(f.checker, SizeChecker) 174 | 175 | 176 | @pytest.mark.parametrize("bucket_type_val", list(BucketType)) 177 | def test_open_uses_default_blocksize_and_consistency_from_fs( 178 | extended_gcsfs, gcs_bucket_mocks, bucket_type_val 179 | ): 180 | csv_file = "2014-01-01.csv" 181 | csv_file_path = f"{TEST_ZONAL_BUCKET}/{csv_file}" 182 | csv_data = csv_files[csv_file] 183 | 184 | with gcs_bucket_mocks(csv_data, bucket_type_val=bucket_type_val): 185 | with extended_gcsfs.open(csv_file_path, "rb") as f: 186 | assert f.blocksize == extended_gcsfs.default_block_size 187 | assert type(f.checker) is ConsistencyChecker 188 | 189 | 190 | def test_read_small_zb(extended_gcsfs, gcs_bucket_mocks): 191 | csv_file = "2014-01-01.csv" 192 | csv_file_path = f"{TEST_ZONAL_BUCKET}/{csv_file}" 193 | csv_data = csv_files[csv_file] 194 | 195 | with gcs_bucket_mocks( 196 | csv_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL 197 | ) as mocks: 198 | with extended_gcsfs.open(csv_file_path, "rb", block_size=10) as f: 199 | out = [] 200 | i = 1 201 | while True: 202 | i += 1 203 | data = f.read(3) 204 | if data == b"": 205 | break 206 | out.append(data) 207 | assert extended_gcsfs.cat(csv_file_path) == b"".join(out) 208 | # cache drop 209 | assert len(f.cache.cache) < len(out) 210 | if mocks: 211 | mocks["sync_lookup_bucket_type"].assert_called_once_with( 212 | TEST_ZONAL_BUCKET 213 | ) 214 | 215 | 216 | def test_readline_zb(extended_gcsfs, gcs_bucket_mocks): 217 | all_items = chain.from_iterable( 218 | [files.items(), csv_files.items(), text_files.items()] 219 | ) 220 | for k, data in all_items: 221 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL): 222 | with extended_gcsfs.open("/".join([TEST_ZONAL_BUCKET, k]), "rb") as f: 223 | result = f.readline() 224 | expected = data.split(b"\n")[0] + (b"\n" if data.count(b"\n") else b"") 225 | assert result == expected 226 | 227 | 228 | def test_readline_from_cache_zb(extended_gcsfs, gcs_bucket_mocks): 229 | data = b"a,b\n11,22\n3,4" 230 | if not extended_gcsfs.on_google: 231 | with mock.patch.object( 232 | extended_gcsfs, "_sync_lookup_bucket_type", return_value=BucketType.UNKNOWN 233 | ): 234 | with extended_gcsfs.open(a, "wb") as f: 235 | f.write(data) 236 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL): 237 | with extended_gcsfs.open(a, "rb") as f: 238 | result = f.readline() 239 | assert result == b"a,b\n" 240 | assert f.loc == 4 241 | assert f.cache.cache == data 242 | 243 | result = f.readline() 244 | assert result == b"11,22\n" 245 | assert f.loc == 10 246 | assert f.cache.cache == data 247 | 248 | result = f.readline() 249 | assert result == b"3,4" 250 | assert f.loc == 13 251 | assert f.cache.cache == data 252 | 253 | 254 | def test_readline_empty_zb(extended_gcsfs, gcs_bucket_mocks): 255 | data = b"" 256 | if not extended_gcsfs.on_google: 257 | with mock.patch.object( 258 | extended_gcsfs, "_sync_lookup_bucket_type", return_value=BucketType.UNKNOWN 259 | ): 260 | with extended_gcsfs.open(b, "wb") as f: 261 | f.write(data) 262 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL): 263 | with extended_gcsfs.open(b, "rb") as f: 264 | result = f.readline() 265 | assert result == data 266 | 267 | 268 | def test_readline_blocksize_zb(extended_gcsfs, gcs_bucket_mocks): 269 | data = b"ab\n" + b"a" * (2**18) + b"\nab" 270 | if not extended_gcsfs.on_google: 271 | with mock.patch.object( 272 | extended_gcsfs, "_sync_lookup_bucket_type", return_value=BucketType.UNKNOWN 273 | ): 274 | with extended_gcsfs.open(c, "wb") as f: 275 | f.write(data) 276 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL): 277 | with extended_gcsfs.open(c, "rb", block_size=2**18) as f: 278 | result = f.readline() 279 | expected = b"ab\n" 280 | assert result == expected 281 | 282 | result = f.readline() 283 | expected = b"a" * (2**18) + b"\n" 284 | assert result == expected 285 | 286 | result = f.readline() 287 | expected = b"ab" 288 | assert result == expected 289 | 290 | 291 | @pytest.mark.parametrize( 292 | "start,end,exp_offset,exp_length,exp_exc", 293 | [ 294 | (None, None, 0, file_size, None), # full file 295 | (-10, None, file_size - 10, 10, None), # start negative 296 | (10, -10, 10, file_size - 20, None), # end negative 297 | (20, 20, 20, 0, None), # zero-length slice 298 | (50, 40, None, None, ValueError), # end before start -> raises 299 | (-200, None, None, None, ValueError), # offset negative -> raises 300 | (file_size - 10, 200, file_size - 10, 10, None), # end > size clamps 301 | ( 302 | file_size + 10, 303 | file_size + 20, 304 | file_size + 10, 305 | 0, 306 | None, 307 | ), # offset > size -> empty 308 | ], 309 | ) 310 | def test_process_limits_parametrized( 311 | extended_gcsfs, start, end, exp_offset, exp_length, exp_exc 312 | ): 313 | if exp_exc is not None: 314 | with pytest.raises(exp_exc): 315 | extended_gcsfs.sync_process_limits_to_offset_and_length( 316 | file_path, start, end 317 | ) 318 | else: 319 | offset, length = extended_gcsfs.sync_process_limits_to_offset_and_length( 320 | file_path, start, end 321 | ) 322 | assert offset == exp_offset 323 | assert length == exp_length 324 | 325 | 326 | @pytest.mark.parametrize( 327 | "exception_to_raise", 328 | [ValueError, DataCorruption, Exception], 329 | ) 330 | def test_mrd_exception_handling(extended_gcsfs, gcs_bucket_mocks, exception_to_raise): 331 | """ 332 | Tests that _cat_file correctly propagates exceptions from mrd.download_ranges. 333 | """ 334 | with gcs_bucket_mocks( 335 | json_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL 336 | ) as mocks: 337 | if extended_gcsfs.on_google: 338 | pytest.skip("Cannot mock exceptions on real GCS") 339 | 340 | # Configure the mock to raise a specified exception 341 | if exception_to_raise is DataCorruption: 342 | # The first argument is 'response', the message is in '*args' 343 | mocks["downloader"].download_ranges.side_effect = exception_to_raise( 344 | None, "Test exception raised" 345 | ) 346 | else: 347 | mocks["downloader"].download_ranges.side_effect = exception_to_raise( 348 | "Test exception raised" 349 | ) 350 | 351 | with pytest.raises(exception_to_raise, match="Test exception raised"): 352 | extended_gcsfs.read_block(file_path, 0, 10) 353 | 354 | mocks["downloader"].download_ranges.assert_called_once() 355 | 356 | 357 | def test_mrd_stream_cleanup(extended_gcsfs, gcs_bucket_mocks): 358 | """ 359 | Tests that mrd stream is properly closed with file closure. 360 | """ 361 | with gcs_bucket_mocks( 362 | json_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL 363 | ) as mocks: 364 | if not extended_gcsfs.on_google: 365 | 366 | def close_side_effect(): 367 | mocks["downloader"].is_stream_open = False 368 | 369 | mocks["downloader"].close.side_effect = close_side_effect 370 | 371 | with extended_gcsfs.open(file_path, "rb") as f: 372 | assert f.mrd is not None 373 | 374 | assert True is f.closed 375 | assert False is f.mrd.is_stream_open 376 | -------------------------------------------------------------------------------- /gcsfs/_version.py: -------------------------------------------------------------------------------- 1 | # This file helps to compute a version number in source trees obtained from 2 | # git-archive tarball (such as those provided by githubs download-from-tag 3 | # feature). Distribution tarballs (built by setup.py sdist) and build 4 | # directories (produced by setup.py build) will contain a much shorter file 5 | # that just contains the computed version number. 6 | 7 | # This file is released into the public domain. 8 | # Generated by versioneer-0.29 9 | # https://github.com/python-versioneer/python-versioneer 10 | 11 | """Git implementation of _version.py.""" 12 | 13 | import errno 14 | import functools 15 | import os 16 | import re 17 | import subprocess 18 | import sys 19 | from typing import Any, Callable, Dict, List, Optional, Tuple 20 | 21 | 22 | def get_keywords() -> Dict[str, str]: 23 | """Get the keywords needed to look up the version information.""" 24 | # these strings will be replaced by git during git-archive. 25 | # setup.py/versioneer.py will grep for the variable names, so they must 26 | # each be defined on a line of their own. _version.py will just call 27 | # get_keywords(). 28 | git_refnames = " (HEAD -> main)" 29 | git_full = "4d4f04f51ccd0cdc43ef59da76aacfb3ed73db47" 30 | git_date = "2025-12-16 21:07:31 +0530" 31 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} 32 | return keywords 33 | 34 | 35 | class VersioneerConfig: 36 | """Container for Versioneer configuration parameters.""" 37 | 38 | VCS: str 39 | style: str 40 | tag_prefix: str 41 | parentdir_prefix: str 42 | versionfile_source: str 43 | verbose: bool 44 | 45 | 46 | def get_config() -> VersioneerConfig: 47 | """Create, populate and return the VersioneerConfig() object.""" 48 | # these strings are filled in when 'setup.py versioneer' creates 49 | # _version.py 50 | cfg = VersioneerConfig() 51 | cfg.VCS = "git" 52 | cfg.style = "pep440" 53 | cfg.tag_prefix = "" 54 | cfg.parentdir_prefix = "None" 55 | cfg.versionfile_source = "gcsfs/_version.py" 56 | cfg.verbose = False 57 | return cfg 58 | 59 | 60 | class NotThisMethod(Exception): 61 | """Exception raised if a method is not valid for the current scenario.""" 62 | 63 | 64 | LONG_VERSION_PY: Dict[str, str] = {} 65 | HANDLERS: Dict[str, Dict[str, Callable]] = {} 66 | 67 | 68 | def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator 69 | """Create decorator to mark a method as the handler of a VCS.""" 70 | 71 | def decorate(f: Callable) -> Callable: 72 | """Store f in HANDLERS[vcs][method].""" 73 | if vcs not in HANDLERS: 74 | HANDLERS[vcs] = {} 75 | HANDLERS[vcs][method] = f 76 | return f 77 | 78 | return decorate 79 | 80 | 81 | def run_command( 82 | commands: List[str], 83 | args: List[str], 84 | cwd: Optional[str] = None, 85 | verbose: bool = False, 86 | hide_stderr: bool = False, 87 | env: Optional[Dict[str, str]] = None, 88 | ) -> Tuple[Optional[str], Optional[int]]: 89 | """Call the given command(s).""" 90 | assert isinstance(commands, list) 91 | process = None 92 | 93 | popen_kwargs: Dict[str, Any] = {} 94 | if sys.platform == "win32": 95 | # This hides the console window if pythonw.exe is used 96 | startupinfo = subprocess.STARTUPINFO() 97 | startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW 98 | popen_kwargs["startupinfo"] = startupinfo 99 | 100 | for command in commands: 101 | try: 102 | dispcmd = str([command] + args) 103 | # remember shell=False, so use git.cmd on windows, not just git 104 | process = subprocess.Popen( 105 | [command] + args, 106 | cwd=cwd, 107 | env=env, 108 | stdout=subprocess.PIPE, 109 | stderr=(subprocess.PIPE if hide_stderr else None), 110 | **popen_kwargs, 111 | ) 112 | break 113 | except OSError as e: 114 | if e.errno == errno.ENOENT: 115 | continue 116 | if verbose: 117 | print("unable to run %s" % dispcmd) 118 | print(e) 119 | return None, None 120 | else: 121 | if verbose: 122 | print("unable to find command, tried %s" % (commands,)) 123 | return None, None 124 | stdout = process.communicate()[0].strip().decode() 125 | if process.returncode != 0: 126 | if verbose: 127 | print("unable to run %s (error)" % dispcmd) 128 | print("stdout was %s" % stdout) 129 | return None, process.returncode 130 | return stdout, process.returncode 131 | 132 | 133 | def versions_from_parentdir( 134 | parentdir_prefix: str, 135 | root: str, 136 | verbose: bool, 137 | ) -> Dict[str, Any]: 138 | """Try to determine the version from the parent directory name. 139 | 140 | Source tarballs conventionally unpack into a directory that includes both 141 | the project name and a version string. We will also support searching up 142 | two directory levels for an appropriately named parent directory 143 | """ 144 | rootdirs = [] 145 | 146 | for _ in range(3): 147 | dirname = os.path.basename(root) 148 | if dirname.startswith(parentdir_prefix): 149 | return { 150 | "version": dirname[len(parentdir_prefix) :], 151 | "full-revisionid": None, 152 | "dirty": False, 153 | "error": None, 154 | "date": None, 155 | } 156 | rootdirs.append(root) 157 | root = os.path.dirname(root) # up a level 158 | 159 | if verbose: 160 | print( 161 | "Tried directories %s but none started with prefix %s" 162 | % (str(rootdirs), parentdir_prefix) 163 | ) 164 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix") 165 | 166 | 167 | @register_vcs_handler("git", "get_keywords") 168 | def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: 169 | """Extract version information from the given file.""" 170 | # the code embedded in _version.py can just fetch the value of these 171 | # keywords. When used from setup.py, we don't want to import _version.py, 172 | # so we do it with a regexp instead. This function is not used from 173 | # _version.py. 174 | keywords: Dict[str, str] = {} 175 | try: 176 | with open(versionfile_abs, "r") as fobj: 177 | for line in fobj: 178 | if line.strip().startswith("git_refnames ="): 179 | mo = re.search(r'=\s*"(.*)"', line) 180 | if mo: 181 | keywords["refnames"] = mo.group(1) 182 | if line.strip().startswith("git_full ="): 183 | mo = re.search(r'=\s*"(.*)"', line) 184 | if mo: 185 | keywords["full"] = mo.group(1) 186 | if line.strip().startswith("git_date ="): 187 | mo = re.search(r'=\s*"(.*)"', line) 188 | if mo: 189 | keywords["date"] = mo.group(1) 190 | except OSError: 191 | pass 192 | return keywords 193 | 194 | 195 | @register_vcs_handler("git", "keywords") 196 | def git_versions_from_keywords( 197 | keywords: Dict[str, str], 198 | tag_prefix: str, 199 | verbose: bool, 200 | ) -> Dict[str, Any]: 201 | """Get version information from git keywords.""" 202 | if "refnames" not in keywords: 203 | raise NotThisMethod("Short version file found") 204 | date = keywords.get("date") 205 | if date is not None: 206 | # Use only the last line. Previous lines may contain GPG signature 207 | # information. 208 | date = date.splitlines()[-1] 209 | 210 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant 211 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 212 | # -like" string, which we must then edit to make compliant), because 213 | # it's been around since git-1.5.3, and it's too difficult to 214 | # discover which version we're using, or to work around using an 215 | # older one. 216 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 217 | refnames = keywords["refnames"].strip() 218 | if refnames.startswith("$Format"): 219 | if verbose: 220 | print("keywords are unexpanded, not using") 221 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball") 222 | refs = {r.strip() for r in refnames.strip("()").split(",")} 223 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of 224 | # just "foo-1.0". If we see a "tag: " prefix, prefer those. 225 | TAG = "tag: " 226 | tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)} 227 | if not tags: 228 | # Either we're using git < 1.8.3, or there really are no tags. We use 229 | # a heuristic: assume all version tags have a digit. The old git %d 230 | # expansion behaves like git log --decorate=short and strips out the 231 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish 232 | # between branches and tags. By ignoring refnames without digits, we 233 | # filter out many common branch names like "release" and 234 | # "stabilization", as well as "HEAD" and "master". 235 | tags = {r for r in refs if re.search(r"\d", r)} 236 | if verbose: 237 | print("discarding '%s', no digits" % ",".join(refs - tags)) 238 | if verbose: 239 | print("likely tags: %s" % ",".join(sorted(tags))) 240 | for ref in sorted(tags): 241 | # sorting will prefer e.g. "2.0" over "2.0rc1" 242 | if ref.startswith(tag_prefix): 243 | r = ref[len(tag_prefix) :] 244 | # Filter out refs that exactly match prefix or that don't start 245 | # with a number once the prefix is stripped (mostly a concern 246 | # when prefix is '') 247 | if not re.match(r"\d", r): 248 | continue 249 | if verbose: 250 | print("picking %s" % r) 251 | return { 252 | "version": r, 253 | "full-revisionid": keywords["full"].strip(), 254 | "dirty": False, 255 | "error": None, 256 | "date": date, 257 | } 258 | # no suitable tags, so version is "0+unknown", but full hex is still there 259 | if verbose: 260 | print("no suitable tags, using unknown + full revision id") 261 | return { 262 | "version": "0+unknown", 263 | "full-revisionid": keywords["full"].strip(), 264 | "dirty": False, 265 | "error": "no suitable tags", 266 | "date": None, 267 | } 268 | 269 | 270 | @register_vcs_handler("git", "pieces_from_vcs") 271 | def git_pieces_from_vcs( 272 | tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command 273 | ) -> Dict[str, Any]: 274 | """Get version from 'git describe' in the root of the source tree. 275 | 276 | This only gets called if the git-archive 'subst' keywords were *not* 277 | expanded, and _version.py hasn't already been rewritten with a short 278 | version string, meaning we're inside a checked out source tree. 279 | """ 280 | GITS = ["git"] 281 | if sys.platform == "win32": 282 | GITS = ["git.cmd", "git.exe"] 283 | 284 | # GIT_DIR can interfere with correct operation of Versioneer. 285 | # It may be intended to be passed to the Versioneer-versioned project, 286 | # but that should not change where we get our version from. 287 | env = os.environ.copy() 288 | env.pop("GIT_DIR", None) 289 | runner = functools.partial(runner, env=env) 290 | 291 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) 292 | if rc != 0: 293 | if verbose: 294 | print("Directory %s not under git control" % root) 295 | raise NotThisMethod("'git rev-parse --git-dir' returned error") 296 | 297 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] 298 | # if there isn't one, this yields HEX[-dirty] (no NUM) 299 | describe_out, rc = runner( 300 | GITS, 301 | [ 302 | "describe", 303 | "--tags", 304 | "--dirty", 305 | "--always", 306 | "--long", 307 | "--match", 308 | f"{tag_prefix}[[:digit:]]*", 309 | ], 310 | cwd=root, 311 | ) 312 | # --long was added in git-1.5.5 313 | if describe_out is None: 314 | raise NotThisMethod("'git describe' failed") 315 | describe_out = describe_out.strip() 316 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) 317 | if full_out is None: 318 | raise NotThisMethod("'git rev-parse' failed") 319 | full_out = full_out.strip() 320 | 321 | pieces: Dict[str, Any] = {} 322 | pieces["long"] = full_out 323 | pieces["short"] = full_out[:7] # maybe improved later 324 | pieces["error"] = None 325 | 326 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) 327 | # --abbrev-ref was added in git-1.6.3 328 | if rc != 0 or branch_name is None: 329 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") 330 | branch_name = branch_name.strip() 331 | 332 | if branch_name == "HEAD": 333 | # If we aren't exactly on a branch, pick a branch which represents 334 | # the current commit. If all else fails, we are on a branchless 335 | # commit. 336 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) 337 | # --contains was added in git-1.5.4 338 | if rc != 0 or branches is None: 339 | raise NotThisMethod("'git branch --contains' returned error") 340 | branches = branches.split("\n") 341 | 342 | # Remove the first line if we're running detached 343 | if "(" in branches[0]: 344 | branches.pop(0) 345 | 346 | # Strip off the leading "* " from the list of branches. 347 | branches = [branch[2:] for branch in branches] 348 | if "master" in branches: 349 | branch_name = "master" 350 | elif not branches: 351 | branch_name = None 352 | else: 353 | # Pick the first branch that is returned. Good or bad. 354 | branch_name = branches[0] 355 | 356 | pieces["branch"] = branch_name 357 | 358 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] 359 | # TAG might have hyphens. 360 | git_describe = describe_out 361 | 362 | # look for -dirty suffix 363 | dirty = git_describe.endswith("-dirty") 364 | pieces["dirty"] = dirty 365 | if dirty: 366 | git_describe = git_describe[: git_describe.rindex("-dirty")] 367 | 368 | # now we have TAG-NUM-gHEX or HEX 369 | 370 | if "-" in git_describe: 371 | # TAG-NUM-gHEX 372 | mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe) 373 | if not mo: 374 | # unparsable. Maybe git-describe is misbehaving? 375 | pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out 376 | return pieces 377 | 378 | # tag 379 | full_tag = mo.group(1) 380 | if not full_tag.startswith(tag_prefix): 381 | if verbose: 382 | fmt = "tag '%s' doesn't start with prefix '%s'" 383 | print(fmt % (full_tag, tag_prefix)) 384 | pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % ( 385 | full_tag, 386 | tag_prefix, 387 | ) 388 | return pieces 389 | pieces["closest-tag"] = full_tag[len(tag_prefix) :] 390 | 391 | # distance: number of commits since tag 392 | pieces["distance"] = int(mo.group(2)) 393 | 394 | # commit: short hex revision ID 395 | pieces["short"] = mo.group(3) 396 | 397 | else: 398 | # HEX: no tags 399 | pieces["closest-tag"] = None 400 | out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) 401 | pieces["distance"] = len(out.split()) # total number of commits 402 | 403 | # commit date: see ISO-8601 comment in git_versions_from_keywords() 404 | date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() 405 | # Use only the last line. Previous lines may contain GPG signature 406 | # information. 407 | date = date.splitlines()[-1] 408 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) 409 | 410 | return pieces 411 | 412 | 413 | def plus_or_dot(pieces: Dict[str, Any]) -> str: 414 | """Return a + if we don't already have one, else return a .""" 415 | if "+" in pieces.get("closest-tag", ""): 416 | return "." 417 | return "+" 418 | 419 | 420 | def render_pep440(pieces: Dict[str, Any]) -> str: 421 | """Build up version string, with post-release "local version identifier". 422 | 423 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you 424 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty 425 | 426 | Exceptions: 427 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] 428 | """ 429 | if pieces["closest-tag"]: 430 | rendered = pieces["closest-tag"] 431 | if pieces["distance"] or pieces["dirty"]: 432 | rendered += plus_or_dot(pieces) 433 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 434 | if pieces["dirty"]: 435 | rendered += ".dirty" 436 | else: 437 | # exception #1 438 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 439 | if pieces["dirty"]: 440 | rendered += ".dirty" 441 | return rendered 442 | 443 | 444 | def render_pep440_branch(pieces: Dict[str, Any]) -> str: 445 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . 446 | 447 | The ".dev0" means not master branch. Note that .dev0 sorts backwards 448 | (a feature branch will appear "older" than the master branch). 449 | 450 | Exceptions: 451 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] 452 | """ 453 | if pieces["closest-tag"]: 454 | rendered = pieces["closest-tag"] 455 | if pieces["distance"] or pieces["dirty"]: 456 | if pieces["branch"] != "master": 457 | rendered += ".dev0" 458 | rendered += plus_or_dot(pieces) 459 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) 460 | if pieces["dirty"]: 461 | rendered += ".dirty" 462 | else: 463 | # exception #1 464 | rendered = "0" 465 | if pieces["branch"] != "master": 466 | rendered += ".dev0" 467 | rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) 468 | if pieces["dirty"]: 469 | rendered += ".dirty" 470 | return rendered 471 | 472 | 473 | def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: 474 | """Split pep440 version string at the post-release segment. 475 | 476 | Returns the release segments before the post-release and the 477 | post-release version number (or -1 if no post-release segment is present). 478 | """ 479 | vc = str.split(ver, ".post") 480 | return vc[0], int(vc[1] or 0) if len(vc) == 2 else None 481 | 482 | 483 | def render_pep440_pre(pieces: Dict[str, Any]) -> str: 484 | """TAG[.postN.devDISTANCE] -- No -dirty. 485 | 486 | Exceptions: 487 | 1: no tags. 0.post0.devDISTANCE 488 | """ 489 | if pieces["closest-tag"]: 490 | if pieces["distance"]: 491 | # update the post release segment 492 | tag_version, post_version = pep440_split_post(pieces["closest-tag"]) 493 | rendered = tag_version 494 | if post_version is not None: 495 | rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) 496 | else: 497 | rendered += ".post0.dev%d" % (pieces["distance"]) 498 | else: 499 | # no commits, use the tag as the version 500 | rendered = pieces["closest-tag"] 501 | else: 502 | # exception #1 503 | rendered = "0.post0.dev%d" % pieces["distance"] 504 | return rendered 505 | 506 | 507 | def render_pep440_post(pieces: Dict[str, Any]) -> str: 508 | """TAG[.postDISTANCE[.dev0]+gHEX] . 509 | 510 | The ".dev0" means dirty. Note that .dev0 sorts backwards 511 | (a dirty tree will appear "older" than the corresponding clean one), 512 | but you shouldn't be releasing software with -dirty anyways. 513 | 514 | Exceptions: 515 | 1: no tags. 0.postDISTANCE[.dev0] 516 | """ 517 | if pieces["closest-tag"]: 518 | rendered = pieces["closest-tag"] 519 | if pieces["distance"] or pieces["dirty"]: 520 | rendered += ".post%d" % pieces["distance"] 521 | if pieces["dirty"]: 522 | rendered += ".dev0" 523 | rendered += plus_or_dot(pieces) 524 | rendered += "g%s" % pieces["short"] 525 | else: 526 | # exception #1 527 | rendered = "0.post%d" % pieces["distance"] 528 | if pieces["dirty"]: 529 | rendered += ".dev0" 530 | rendered += "+g%s" % pieces["short"] 531 | return rendered 532 | 533 | 534 | def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: 535 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . 536 | 537 | The ".dev0" means not master branch. 538 | 539 | Exceptions: 540 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] 541 | """ 542 | if pieces["closest-tag"]: 543 | rendered = pieces["closest-tag"] 544 | if pieces["distance"] or pieces["dirty"]: 545 | rendered += ".post%d" % pieces["distance"] 546 | if pieces["branch"] != "master": 547 | rendered += ".dev0" 548 | rendered += plus_or_dot(pieces) 549 | rendered += "g%s" % pieces["short"] 550 | if pieces["dirty"]: 551 | rendered += ".dirty" 552 | else: 553 | # exception #1 554 | rendered = "0.post%d" % pieces["distance"] 555 | if pieces["branch"] != "master": 556 | rendered += ".dev0" 557 | rendered += "+g%s" % pieces["short"] 558 | if pieces["dirty"]: 559 | rendered += ".dirty" 560 | return rendered 561 | 562 | 563 | def render_pep440_old(pieces: Dict[str, Any]) -> str: 564 | """TAG[.postDISTANCE[.dev0]] . 565 | 566 | The ".dev0" means dirty. 567 | 568 | Exceptions: 569 | 1: no tags. 0.postDISTANCE[.dev0] 570 | """ 571 | if pieces["closest-tag"]: 572 | rendered = pieces["closest-tag"] 573 | if pieces["distance"] or pieces["dirty"]: 574 | rendered += ".post%d" % pieces["distance"] 575 | if pieces["dirty"]: 576 | rendered += ".dev0" 577 | else: 578 | # exception #1 579 | rendered = "0.post%d" % pieces["distance"] 580 | if pieces["dirty"]: 581 | rendered += ".dev0" 582 | return rendered 583 | 584 | 585 | def render_git_describe(pieces: Dict[str, Any]) -> str: 586 | """TAG[-DISTANCE-gHEX][-dirty]. 587 | 588 | Like 'git describe --tags --dirty --always'. 589 | 590 | Exceptions: 591 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 592 | """ 593 | if pieces["closest-tag"]: 594 | rendered = pieces["closest-tag"] 595 | if pieces["distance"]: 596 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 597 | else: 598 | # exception #1 599 | rendered = pieces["short"] 600 | if pieces["dirty"]: 601 | rendered += "-dirty" 602 | return rendered 603 | 604 | 605 | def render_git_describe_long(pieces: Dict[str, Any]) -> str: 606 | """TAG-DISTANCE-gHEX[-dirty]. 607 | 608 | Like 'git describe --tags --dirty --always -long'. 609 | The distance/hash is unconditional. 610 | 611 | Exceptions: 612 | 1: no tags. HEX[-dirty] (note: no 'g' prefix) 613 | """ 614 | if pieces["closest-tag"]: 615 | rendered = pieces["closest-tag"] 616 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) 617 | else: 618 | # exception #1 619 | rendered = pieces["short"] 620 | if pieces["dirty"]: 621 | rendered += "-dirty" 622 | return rendered 623 | 624 | 625 | def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: 626 | """Render the given version pieces into the requested style.""" 627 | if pieces["error"]: 628 | return { 629 | "version": "unknown", 630 | "full-revisionid": pieces.get("long"), 631 | "dirty": None, 632 | "error": pieces["error"], 633 | "date": None, 634 | } 635 | 636 | if not style or style == "default": 637 | style = "pep440" # the default 638 | 639 | if style == "pep440": 640 | rendered = render_pep440(pieces) 641 | elif style == "pep440-branch": 642 | rendered = render_pep440_branch(pieces) 643 | elif style == "pep440-pre": 644 | rendered = render_pep440_pre(pieces) 645 | elif style == "pep440-post": 646 | rendered = render_pep440_post(pieces) 647 | elif style == "pep440-post-branch": 648 | rendered = render_pep440_post_branch(pieces) 649 | elif style == "pep440-old": 650 | rendered = render_pep440_old(pieces) 651 | elif style == "git-describe": 652 | rendered = render_git_describe(pieces) 653 | elif style == "git-describe-long": 654 | rendered = render_git_describe_long(pieces) 655 | else: 656 | raise ValueError("unknown style '%s'" % style) 657 | 658 | return { 659 | "version": rendered, 660 | "full-revisionid": pieces["long"], 661 | "dirty": pieces["dirty"], 662 | "error": None, 663 | "date": pieces.get("date"), 664 | } 665 | 666 | 667 | def get_versions() -> Dict[str, Any]: 668 | """Get version information or return default if unable to do so.""" 669 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have 670 | # __file__, we can work backwards from there to the root. Some 671 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which 672 | # case we can only use expanded keywords. 673 | 674 | cfg = get_config() 675 | verbose = cfg.verbose 676 | 677 | try: 678 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) 679 | except NotThisMethod: 680 | pass 681 | 682 | try: 683 | root = os.path.realpath(__file__) 684 | # versionfile_source is the relative path from the top of the source 685 | # tree (where the .git directory might live) to this file. Invert 686 | # this to find the root from __file__. 687 | for _ in cfg.versionfile_source.split("/"): 688 | root = os.path.dirname(root) 689 | except NameError: 690 | return { 691 | "version": "0+unknown", 692 | "full-revisionid": None, 693 | "dirty": None, 694 | "error": "unable to find root of source tree", 695 | "date": None, 696 | } 697 | 698 | try: 699 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) 700 | return render(pieces, cfg.style) 701 | except NotThisMethod: 702 | pass 703 | 704 | try: 705 | if cfg.parentdir_prefix: 706 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) 707 | except NotThisMethod: 708 | pass 709 | 710 | return { 711 | "version": "0+unknown", 712 | "full-revisionid": None, 713 | "dirty": None, 714 | "error": "unable to compute version", 715 | "date": None, 716 | } 717 | --------------------------------------------------------------------------------