├── gcsfs
├── cli
│ ├── __init__.py
│ └── gcsfuse.py
├── tests
│ ├── __init__.py
│ ├── derived
│ │ ├── __init__.py
│ │ ├── gcsfs_test.py
│ │ └── gcsfs_fixtures.py
│ ├── fake-secret.json
│ ├── settings.py
│ ├── test_credentials.py
│ ├── test_zb_hns_utils.py
│ ├── test_inventory_report_listing.py
│ ├── utils.py
│ ├── test_manyopens.py
│ ├── test_fuse.py
│ ├── fake-service-account-credentials.json
│ ├── test_init.py
│ ├── test_mapping.py
│ ├── test_core_versioned.py
│ ├── test_retry.py
│ ├── test_checkers.py
│ ├── conftest.py
│ └── test_extended_gcsfs.py
├── dask_link.py
├── mapping.py
├── zb_hns_utils.py
├── __init__.py
├── zonal_file.py
├── checkers.py
├── retry.py
├── extended_gcsfs.py
├── credentials.py
└── _version.py
├── .gitattributes
├── docs
├── source
│ ├── _static
│ │ └── custom.css
│ ├── api.rst
│ ├── fuse.rst
│ ├── developer.rst
│ ├── code-of-conduct.rst
│ ├── index.rst
│ ├── changelog.rst
│ └── conf.py
├── environment.yml
├── make.bat
└── Makefile
├── .isort.cfg
├── .coveragerc
├── requirements.txt
├── MANIFEST.in
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── environment_gcsfs.yaml
├── README.md
├── .pre-commit-config.yaml
├── setup.cfg
├── setup.py
├── LICENSE.txt
├── .gitignore
├── .github
└── workflows
│ └── ci.yml
└── cloudbuild
└── e2e-tests-cloudbuild.yaml
/gcsfs/cli/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gcsfs/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/gcsfs/tests/derived/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | gcsfs/_version.py export-subst
2 |
--------------------------------------------------------------------------------
/gcsfs/dask_link.py:
--------------------------------------------------------------------------------
1 | def register():
2 | """
3 | Backward compatibility
4 | """
5 | pass
6 |
--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | .classifier:before {
2 | font-style: normal;
3 | margin: 0.5em;
4 | content: ":";
5 | }
6 |
--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | profile = black
3 | known_third_party = aiohttp,click,decorator,fsspec,fuse,google,google_auth_oauthlib,pytest,requests,setuptools
4 |
--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
1 | name: s3fs
2 | channels:
3 | - defaults
4 | dependencies:
5 | - python= 3.10
6 | - docutils<0.17
7 | - sphinx
8 | - sphinx_rtd_theme
9 |
--------------------------------------------------------------------------------
/.coveragerc:
--------------------------------------------------------------------------------
1 | [run]
2 | include =
3 | gcsfs/*
4 |
5 | omit =
6 | gcsfs/tests/test*
7 |
8 | [report]
9 | show_missing = True
10 |
11 | [html]
12 | directory = coverage_html_report
13 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp!=4.0.0a0, !=4.0.0a1
2 | decorator>4.1.2
3 | fsspec==2025.12.0
4 | google-auth>=1.2
5 | google-auth-oauthlib
6 | google-cloud-storage
7 | google-cloud-storage-control
8 | requests
9 |
--------------------------------------------------------------------------------
/gcsfs/mapping.py:
--------------------------------------------------------------------------------
1 | from .core import GCSFileSystem
2 |
3 |
4 | def GCSMap(root, gcs=None, check=False, create=False):
5 | """For backward compatibility"""
6 | gcs = gcs or GCSFileSystem.current()
7 | return gcs.get_mapper(root, check=check, create=create)
8 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include gcsfs *.py
2 | recursive-include docs *.rst
3 |
4 | include setup.py
5 | include README.rst
6 | include LICENSE.txt
7 | include MANIFEST.in
8 | include requirements.txt
9 |
10 | prune docs/_build
11 | include versioneer.py
12 | include gcsfs/_version.py
13 |
--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
1 | version: 2
2 |
3 | build:
4 | os: ubuntu-22.04
5 | tools:
6 | python: miniconda3-4.7
7 |
8 | conda:
9 | environment: docs/environment.yml
10 |
11 | python:
12 | install:
13 | - method: pip
14 | path: .
15 |
16 | sphinx:
17 | configuration: docs/source/conf.py
18 | fail_on_warning: true
19 |
--------------------------------------------------------------------------------
/gcsfs/tests/fake-secret.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "service_account",
3 | "private_key_id": "NOT A SECRET",
4 | "private_key": "ALSO NOT A SECRET",
5 | "client_email": "fake-name@fake-project.iam.gserviceaccount.com",
6 | "auth_uri": "https://accounts.google.com/o/oauth2/auth",
7 | "token_uri": "https://oauth2.googleapis.com/token"
8 | }
9 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | gcsfs is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more.
2 |
3 | ## Project specific notes
4 |
5 | For testing remote API calls this project uses [VCR](https://vcrpy.readthedocs.io/en/latest/). See the docs for more information https://gcsfs.readthedocs.io/en/latest/developer.html.
6 |
--------------------------------------------------------------------------------
/gcsfs/tests/derived/gcsfs_test.py:
--------------------------------------------------------------------------------
1 | import fsspec.tests.abstract as abstract
2 |
3 | from gcsfs.tests.derived.gcsfs_fixtures import GcsfsFixtures
4 |
5 |
6 | class TestGcsfsCopy(abstract.AbstractCopyTests, GcsfsFixtures):
7 | pass
8 |
9 |
10 | class TestGcsfsGet(abstract.AbstractGetTests, GcsfsFixtures):
11 | pass
12 |
13 |
14 | class TestGcsfsPut(abstract.AbstractPutTests, GcsfsFixtures):
15 | pass
16 |
--------------------------------------------------------------------------------
/gcsfs/zb_hns_utils.py:
--------------------------------------------------------------------------------
1 | from io import BytesIO
2 |
3 |
4 | async def download_range(offset, length, mrd):
5 | """
6 | Downloads a byte range from the file asynchronously.
7 | """
8 | # If length = 0, mrd returns till end of file, so handle that case here
9 | if length == 0:
10 | return b""
11 | buffer = BytesIO()
12 | await mrd.download_ranges([(offset, length, buffer)])
13 | return buffer.getvalue()
14 |
--------------------------------------------------------------------------------
/environment_gcsfs.yaml:
--------------------------------------------------------------------------------
1 | name: gcsfs_test
2 | channels:
3 | - conda-forge
4 | dependencies:
5 | - python==3.11
6 | - aiohttp
7 | - crcmod
8 | - decorator
9 | - fsspec
10 | - google-api-core
11 | - google-api-python-client
12 | - google-auth
13 | - google-auth-oauthlib
14 | - google-cloud-core
15 | - google-cloud-storage
16 | - grpcio
17 | - pytest
18 | - pytest-timeout
19 | - pytest-asyncio
20 | - pytest-subtests
21 | - requests
22 | - ujson
23 | - pip:
24 | - git+https://github.com/fsspec/filesystem_spec
25 |
--------------------------------------------------------------------------------
/gcsfs/tests/settings.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | TEST_BUCKET = os.getenv("GCSFS_TEST_BUCKET", "gcsfs_test")
4 | TEST_VERSIONED_BUCKET = os.getenv("GCSFS_TEST_VERSIONED_BUCKET", "gcsfs_test_versioned")
5 | TEST_ZONAL_BUCKET = os.getenv("GCSFS_ZONAL_TEST_BUCKET", "gcsfs_zonal_test")
6 | TEST_PROJECT = os.getenv("GCSFS_TEST_PROJECT", "project")
7 | TEST_REQUESTER_PAYS_BUCKET = f"{TEST_BUCKET}_req_pay"
8 | TEST_KMS_KEY = os.getenv(
9 | "GCSFS_TEST_KMS_KEY",
10 | f"projects/{TEST_PROJECT}/locations/us/keyRings/gcsfs_test/cryptKeys/gcsfs_test_key",
11 | )
12 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_credentials.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from gcsfs import GCSFileSystem
4 | from gcsfs.credentials import GoogleCredentials
5 | from gcsfs.retry import HttpError
6 |
7 |
8 | def test_googlecredentials_none():
9 | credentials = GoogleCredentials(project="myproject", token=None, access="read_only")
10 | headers = {}
11 | credentials.apply(headers)
12 |
13 |
14 | @pytest.mark.parametrize("token", ["", "incorrect.token", "x" * 100])
15 | def test_credentials_from_raw_token(token):
16 | with pytest.raises(HttpError, match="Invalid Credentials"):
17 | fs = GCSFileSystem(project="myproject", token=token)
18 | fs.ls("/")
19 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | gcsfs
2 | =====
3 |
4 | [|Build Status|](https://github.com/fsspec/gcsfs/actions)
5 | [|Docs|](https://gcsfs.readthedocs.io/en/latest/?badge=latest)
6 |
7 | Pythonic file-system for Google Cloud Storage for fsspec.
8 |
9 |
10 | Support
11 | -------
12 |
13 | Work on this repository is supported in part by:
14 |
15 | "Anaconda, Inc. - Advancing AI through open source."
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
1 | # See https://pre-commit.com for more information
2 | # See https://pre-commit.com/hooks.html for more hooks
3 | exclude: versioneer.py
4 | repos:
5 | - repo: https://github.com/pre-commit/pre-commit-hooks
6 | rev: v6.0.0
7 | hooks:
8 | - id: end-of-file-fixer
9 | - id: requirements-txt-fixer
10 | - id: trailing-whitespace
11 | - repo: https://github.com/psf/black-pre-commit-mirror
12 | rev: 25.11.0
13 | hooks:
14 | - id: black
15 | args:
16 | - --target-version=py310
17 | - repo: https://github.com/pycqa/flake8
18 | rev: 7.3.0
19 | hooks:
20 | - id: flake8
21 | - repo: https://github.com/asottile/seed-isort-config
22 | rev: v2.2.0
23 | hooks:
24 | - id: seed-isort-config
25 | - repo: https://github.com/pre-commit/mirrors-isort
26 | rev: v5.10.1
27 | hooks:
28 | - id: isort
29 |
--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
1 | [versioneer]
2 | VCS = git
3 | style = pep440
4 | versionfile_source = gcsfs/_version.py
5 | versionfile_build = gcsfs/_version.py
6 | tag_prefix =
7 |
8 | [flake8]
9 | exclude = versioneer.py,docs/source/conf.py
10 | ignore =
11 | # Extra space in brackets
12 | E20,
13 | # Multiple spaces around ","
14 | E231,E241,
15 | # Comments
16 | E26,
17 | # Import formatting
18 | E4,
19 | # Comparing types instead of isinstance
20 | E721,
21 | # Assigning lambda expression
22 | E731,
23 | # Ambiguous variable names
24 | E741,
25 | # line break before binary operator
26 | W503,
27 | # line break after binary operator
28 | W504,
29 | # redefinition of unused 'loop' from line 10
30 | F811,
31 | max-line-length = 120
32 |
33 | [tool:pytest]
34 | addopts =
35 | --color=yes --timeout=600
36 | log_cli = false
37 | log_cli_level = DEBUG
38 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_zb_hns_utils.py:
--------------------------------------------------------------------------------
1 | from unittest import mock
2 |
3 | import pytest
4 |
5 | from gcsfs import zb_hns_utils
6 |
7 |
8 | @pytest.mark.asyncio
9 | async def test_download_range():
10 | """
11 | Tests that download_range calls mrd.download_ranges with the correct
12 | parameters and returns the data written to the buffer.
13 | """
14 | offset = 10
15 | length = 20
16 | mock_mrd = mock.AsyncMock()
17 | expected_data = b"test data from download"
18 |
19 | # Simulate the download_ranges method writing data to the buffer
20 | async def mock_download_ranges(ranges):
21 | _offset, _length, buffer = ranges[0]
22 | buffer.write(expected_data)
23 |
24 | mock_mrd.download_ranges.side_effect = mock_download_ranges
25 |
26 | result = await zb_hns_utils.download_range(offset, length, mock_mrd)
27 |
28 | mock_mrd.download_ranges.assert_called_once_with([(offset, length, mock.ANY)])
29 | assert result == expected_data
30 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_inventory_report_listing.py:
--------------------------------------------------------------------------------
1 | import gcsfs.checkers
2 | import gcsfs.tests.settings
3 | from gcsfs.inventory_report import InventoryReport
4 |
5 | TEST_BUCKET = gcsfs.tests.settings.TEST_BUCKET
6 |
7 |
8 | # Basic integration test to ensure listing returns the correct result.
9 | def test_ls_base(monkeypatch, gcs):
10 | # First get results from original listing.
11 | items = gcs.ls(TEST_BUCKET)
12 |
13 | async def mock_fetch_snapshot(*args, **kwargs):
14 | return [{"name": item} for item in items], []
15 |
16 | # Patch the fetch_snapshot method with the replacement.
17 | monkeypatch.setattr(InventoryReport, "fetch_snapshot", mock_fetch_snapshot)
18 |
19 | inventory_report_info = {
20 | "location": "location",
21 | "id": "id",
22 | "use_snapshot_listing": False,
23 | }
24 |
25 | # Then get results from listing with inventory report.
26 | actual_items = gcs.ls(TEST_BUCKET, inventory_report_info=inventory_report_info)
27 |
28 | # Check equality.
29 | assert actual_items == items
30 |
--------------------------------------------------------------------------------
/gcsfs/__init__.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 |
4 | from ._version import get_versions
5 |
6 | logger = logging.getLogger(__name__)
7 | __version__ = get_versions()["version"]
8 | del get_versions
9 | from .core import GCSFileSystem
10 | from .mapping import GCSMap
11 |
12 | if os.getenv("GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT", "false").lower() in ("true", "1"):
13 | try:
14 | from .extended_gcsfs import ExtendedGcsFileSystem as GCSFileSystem
15 |
16 | logger.info(
17 | "gcsfs experimental features enabled via GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT."
18 | )
19 | except ImportError as e:
20 | logger.warning(
21 | f"GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT is set, but failed to import experimental features: {e}"
22 | )
23 | # Fallback to core GCSFileSystem, do not register here
24 |
25 | # TODO: GCSMap still refers to the original GCSFileSystem. This will be
26 | # addressed in a future update.
27 | __all__ = ["GCSFileSystem", "GCSMap"]
28 |
29 | from . import _version
30 |
31 | __version__ = _version.get_versions()["version"]
32 |
--------------------------------------------------------------------------------
/gcsfs/tests/utils.py:
--------------------------------------------------------------------------------
1 | import os
2 | import shutil
3 | import tempfile
4 | from contextlib import contextmanager
5 |
6 |
7 | @contextmanager
8 | def ignoring(*exceptions):
9 | try:
10 | yield
11 | except exceptions:
12 | pass
13 |
14 |
15 | @contextmanager
16 | def tempdir(dir=None):
17 | dirname = tempfile.mkdtemp(dir=dir)
18 | shutil.rmtree(dirname, ignore_errors=True)
19 |
20 | try:
21 | yield dirname
22 | finally:
23 | if os.path.exists(dirname):
24 | shutil.rmtree(dirname, ignore_errors=True)
25 |
26 |
27 | @contextmanager
28 | def tmpfile(extension="", dir=None):
29 | extension = "." + extension.lstrip(".")
30 | handle, filename = tempfile.mkstemp(extension, dir=dir)
31 | os.close(handle)
32 | os.remove(filename)
33 |
34 | try:
35 | yield filename
36 | finally:
37 | if os.path.exists(filename):
38 | if os.path.isdir(filename):
39 | shutil.rmtree(filename)
40 | else:
41 | with ignoring(OSError):
42 | os.remove(filename)
43 |
--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
1 | API
2 | ===
3 |
4 | .. currentmodule:: gcsfs.core
5 |
6 | .. autosummary::
7 | GCSFileSystem
8 | GCSFileSystem.cat
9 | GCSFileSystem.du
10 | GCSFileSystem.exists
11 | GCSFileSystem.get
12 | GCSFileSystem.glob
13 | GCSFileSystem.info
14 | GCSFileSystem.ls
15 | GCSFileSystem.mkdir
16 | GCSFileSystem.mv
17 | GCSFileSystem.open
18 | GCSFileSystem.put
19 | GCSFileSystem.read_block
20 | GCSFileSystem.rm
21 | GCSFileSystem.tail
22 | GCSFileSystem.touch
23 | GCSFileSystem.get_mapper
24 |
25 | .. autosummary::
26 | GCSFile
27 | GCSFile.close
28 | GCSFile.flush
29 | GCSFile.info
30 | GCSFile.read
31 | GCSFile.seek
32 | GCSFile.tell
33 | GCSFile.write
34 |
35 | .. currentmodule:: gcsfs.mapping
36 |
37 | .. currentmodule:: gcsfs.core
38 |
39 | .. autoclass:: GCSFileSystem
40 | :members:
41 | :inherited-members:
42 |
43 | .. autoclass:: GCSFile
44 | :members:
45 | :inherited-members:
46 |
47 | .. currentmodule:: gcsfs.mapping
48 |
49 | .. raw:: html
50 |
51 |
53 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_manyopens.py:
--------------------------------------------------------------------------------
1 | """
2 | Test helper to open the same file many times.
3 |
4 | This is not a python unit test, but rather a standalone program that will open
5 | a file repeatedly, to check whether a cloud storage transient error can
6 | defeat gcsfs. This is to be run against real GCS, since we cannot capture
7 | HTTP exceptions with VCR.
8 |
9 | Ideally you should see nothing, just the attempt count go up until we're done.
10 | """
11 |
12 | import sys
13 |
14 | import gcsfs
15 |
16 |
17 | def run():
18 | if len(sys.argv) != 4:
19 | print(
20 | "usage: python -m gcsfs.tests.test_manyopens "
21 | ' '
22 | )
23 | return
24 | project = sys.argv[1]
25 | credentials = sys.argv[2]
26 | file = sys.argv[3]
27 | print("project: " + project)
28 | for i in range(2000):
29 | # Issue #12 only reproduces if I re-create the fs object every time.
30 | fs = gcsfs.GCSFileSystem(project=project, token=credentials)
31 | print("attempt %s" % i)
32 | with fs.open(file, "rb") as o:
33 | o.readline()
34 |
35 |
36 | if __name__ == "__main__":
37 | run()
38 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from setuptools import setup
4 |
5 | import versioneer
6 |
7 | setup(
8 | name="gcsfs",
9 | version=versioneer.get_version(),
10 | cmdclass=versioneer.get_cmdclass(),
11 | description="Convenient Filesystem interface over GCS",
12 | url="https://github.com/fsspec/gcsfs",
13 | maintainer="Martin Durant",
14 | maintainer_email="mdurant@anaconda.com",
15 | license="BSD",
16 | classifiers=[
17 | "Development Status :: 4 - Beta",
18 | "Intended Audience :: Developers",
19 | "License :: OSI Approved :: BSD License",
20 | "Operating System :: OS Independent",
21 | "Programming Language :: Python :: 3.10",
22 | "Programming Language :: Python :: 3.11",
23 | "Programming Language :: Python :: 3.12",
24 | "Programming Language :: Python :: 3.13",
25 | "Programming Language :: Python :: 3.14",
26 | ],
27 | keywords=["google-cloud-storage", "gcloud", "file-system"],
28 | packages=["gcsfs", "gcsfs.cli"],
29 | install_requires=[open("requirements.txt").read().strip().split("\n")],
30 | extras_require={"gcsfuse": ["fusepy"], "crc": ["crcmod"]},
31 | python_requires=">=3.10",
32 | long_description_content_type="text/markdown",
33 | long_description=open("README.md").read(),
34 | zip_safe=False,
35 | )
36 |
--------------------------------------------------------------------------------
/gcsfs/tests/derived/gcsfs_fixtures.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import pytest
4 | from fsspec.tests.abstract import AbstractFixtures
5 |
6 | from gcsfs.core import GCSFileSystem
7 | from gcsfs.tests.conftest import _cleanup_gcs, allfiles
8 | from gcsfs.tests.settings import TEST_BUCKET
9 |
10 |
11 | class GcsfsFixtures(AbstractFixtures):
12 | @pytest.fixture(scope="class")
13 | def fs(self, gcs_factory, buckets_to_delete):
14 | GCSFileSystem.clear_instance_cache()
15 | gcs = gcs_factory()
16 | try: # ensure we're empty.
17 | # Create the bucket if it doesn't exist, otherwise clean it.
18 | if not gcs.exists(TEST_BUCKET):
19 | buckets_to_delete.add(TEST_BUCKET)
20 | gcs.mkdir(TEST_BUCKET)
21 | else:
22 | try:
23 | gcs.rm(gcs.find(TEST_BUCKET))
24 | except Exception as e:
25 | logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}")
26 |
27 | gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
28 | gcs.invalidate_cache()
29 | yield gcs
30 | finally:
31 | _cleanup_gcs(gcs)
32 |
33 | @pytest.fixture
34 | def fs_path(self):
35 | return TEST_BUCKET
36 |
37 | @pytest.fixture
38 | def supports_empty_directories(self):
39 | return False
40 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | BSD 3-Clause License
2 |
3 | Copyright (c) 2014-2018, Anaconda, Inc. and contributors
4 | All rights reserved.
5 |
6 | Redistribution and use in source and binary forms, with or without
7 | modification, are permitted provided that the following conditions are met:
8 |
9 | * Redistributions of source code must retain the above copyright notice, this
10 | list of conditions and the following disclaimer.
11 |
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 | this list of conditions and the following disclaimer in the documentation
14 | and/or other materials provided with the distribution.
15 |
16 | * Neither the name of the copyright holder nor the names of its
17 | contributors may be used to endorse or promote products derived from
18 | this software without specific prior written permission.
19 |
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # dask
2 | dask-worker-space/
3 |
4 | # private notebooks
5 | private/
6 |
7 | # Pyenv stuff
8 | .python-version
9 |
10 | # Byte-compiled / optimized / DLL files
11 | __pycache__/
12 | *.py[cod]
13 |
14 | # C extensions
15 | *.so
16 |
17 | # Distribution / packaging
18 | .Python
19 | env/
20 | build/
21 | develop-eggs/
22 | dist/
23 | downloads/
24 | pip-wheel-metadata/
25 | eggs/
26 | .eggs/
27 | lib/
28 | lib64/
29 | parts/
30 | sdist/
31 | var/
32 | *.egg-info/
33 | .installed.cfg
34 | *.egg
35 |
36 | # PyInstaller
37 | # Usually these files are written by a python script from a template
38 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
39 | *.manifest
40 | *.spec
41 |
42 | # Installer logs
43 | pip-log.txt
44 | pip-delete-this-directory.txt
45 |
46 | # Unit test / coverage reports
47 | htmlcov/
48 | .tox/
49 | .coverage
50 | .coverage.*
51 | .cache
52 | nosetests.xml
53 | coverage.xml
54 | *.cover
55 | junit/
56 |
57 | # Translations
58 | *.mo
59 | *.pot
60 |
61 | # Django stuff:
62 | *.log
63 |
64 | # Sphinx documentation
65 | docs/_build/
66 |
67 | # PyBuilder
68 | target/
69 |
70 | # DotEnv configuration
71 | .env
72 |
73 | # Database
74 | *.db
75 | *.rdb
76 |
77 | # Pycharm
78 | .idea
79 |
80 | # VS Code
81 | .vscode/
82 |
83 | # Spyder
84 | .spyproject/
85 |
86 | # Jupyter NB Checkpoints
87 | .ipynb_checkpoints/
88 |
89 | # exclude data from source control by default
90 | /data/
91 |
92 | # Mac OS-specific storage files
93 | .DS_Store
94 |
95 | # vim
96 | *.swp
97 | *.swo
98 |
99 | # Mypy cache
100 | .mypy_cache/
101 |
102 | #Pytest cache
103 | .pytest_cache/
104 |
105 | libs/*.whl
106 |
--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
1 | name: CI
2 |
3 | on: [push, pull_request, workflow_dispatch]
4 |
5 | defaults:
6 | run:
7 | shell: bash -l -eo pipefail {0}
8 |
9 | jobs:
10 | test:
11 | name: Python ${{ matrix.python-version }}
12 | runs-on: ubuntu-latest
13 | timeout-minutes: 30
14 | strategy:
15 | fail-fast: false
16 | matrix:
17 | python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"]
18 |
19 | steps:
20 | - name: Checkout source
21 | uses: actions/checkout@v5
22 |
23 | - name: Setup conda
24 | uses: conda-incubator/setup-miniconda@v3
25 | with:
26 | environment-file: environment_gcsfs.yaml
27 | python-version: ${{ matrix.PY }}
28 | activate-environment: gcsfs_test
29 |
30 | - name: Conda info
31 | run: |
32 | conda list
33 | conda --version
34 |
35 | - name: install
36 | run: |
37 | pip install -e .
38 | - name: Run Standard Tests
39 | run: |
40 | export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/gcsfs/tests/fake-secret.json
41 | pytest -vv -s \
42 | --log-format="%(asctime)s %(levelname)s %(message)s" \
43 | --log-date-format="%H:%M:%S" \
44 | gcsfs/
45 | - name: Run Tests with experimental support
46 | run: |
47 | export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/gcsfs/tests/fake-secret.json
48 | export GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT="true"
49 | pytest -vv -s \
50 | --log-format="%(asctime)s %(levelname)s %(message)s" \
51 | --log-date-format="%H:%M:%S" \
52 | gcsfs/
53 |
54 | lint:
55 | name: lint
56 | runs-on: ubuntu-latest
57 | steps:
58 | - uses: actions/checkout@v5
59 | - uses: actions/setup-python@v6
60 | with:
61 | python-version: "3.11"
62 | - uses: pre-commit/action@v3.0.1
63 |
--------------------------------------------------------------------------------
/gcsfs/zonal_file.py:
--------------------------------------------------------------------------------
1 | from fsspec import asyn
2 | from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import (
3 | AsyncMultiRangeDownloader,
4 | )
5 |
6 | from gcsfs.core import GCSFile
7 |
8 |
9 | class ZonalFile(GCSFile):
10 | """
11 | ZonalFile is subclass of GCSFile and handles data operations from
12 | Zonal buckets only using a high-performance gRPC path.
13 | """
14 |
15 | def __init__(self, *args, **kwargs):
16 | """
17 | Initializes the ZonalFile object.
18 | """
19 | super().__init__(*args, **kwargs)
20 | self.mrd = None
21 | if "r" in self.mode:
22 | self.mrd = asyn.sync(
23 | self.gcsfs.loop, self._init_mrd, self.bucket, self.key, self.generation
24 | )
25 | else:
26 | raise NotImplementedError(
27 | "Only read operations are currently supported for Zonal buckets."
28 | )
29 |
30 | async def _init_mrd(self, bucket_name, object_name, generation=None):
31 | """
32 | Initializes the AsyncMultiRangeDownloader.
33 | """
34 | return await AsyncMultiRangeDownloader.create_mrd(
35 | self.gcsfs.grpc_client, bucket_name, object_name, generation
36 | )
37 |
38 | def _fetch_range(self, start, end):
39 | """
40 | Overrides the default _fetch_range to implement the gRPC read path.
41 |
42 | """
43 | try:
44 | return self.gcsfs.cat_file(self.path, start=start, end=end, mrd=self.mrd)
45 | except RuntimeError as e:
46 | if "not satisfiable" in str(e):
47 | return b""
48 | raise
49 |
50 | def close(self):
51 | """
52 | Closes the ZonalFile and the underlying AsyncMultiRangeDownloader.
53 | """
54 | if self.mrd:
55 | asyn.sync(self.gcsfs.loop, self.mrd.close)
56 | super().close()
57 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_fuse.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import tempfile
4 | import threading
5 | import time
6 | from functools import partial
7 |
8 | import pytest
9 |
10 | from gcsfs.tests.settings import TEST_BUCKET
11 |
12 |
13 | @pytest.fixture
14 | def fsspec_fuse_run():
15 | """Fixture catches other errors on fuse import."""
16 | try:
17 | _fuse = pytest.importorskip("fuse") # noqa
18 |
19 | from fsspec.fuse import run as _fsspec_fuse_run
20 |
21 | return _fsspec_fuse_run
22 | except Exception as error:
23 | logging.debug("Error importing fuse: %s", error)
24 | pytest.skip("Error importing fuse.")
25 |
26 |
27 | @pytest.mark.xfail(reason="Failing test not previously tested.")
28 | @pytest.mark.timeout(180)
29 | def test_fuse(gcs, fsspec_fuse_run):
30 | mountpath = tempfile.mkdtemp()
31 | _run = partial(fsspec_fuse_run, gcs, TEST_BUCKET + "/", mountpath)
32 | th = threading.Thread(target=_run)
33 | th.daemon = True
34 | th.start()
35 |
36 | time.sleep(5)
37 | timeout = 20
38 | n = 40
39 | for i in range(n):
40 | logging.debug(f"Attempt # {i + 1} / {n} to create lock file.")
41 | try:
42 | open(os.path.join(mountpath, "lock"), "w").close()
43 | os.remove(os.path.join(mountpath, "lock"))
44 | break
45 | except Exception as error: # noqa: E722
46 | logging.debug("Error: %s", error)
47 | time.sleep(0.5)
48 | timeout -= 0.5
49 | assert timeout > 0
50 | else:
51 | raise AssertionError(f"Attempted lock file failed after {n} attempts.")
52 |
53 | with open(os.path.join(mountpath, "hello"), "w") as f:
54 | # NB this is in TEXT mode
55 | f.write("hello")
56 | files = os.listdir(mountpath)
57 | assert "hello" in files
58 | with open(os.path.join(mountpath, "hello")) as f:
59 | # NB this is in TEXT mode
60 | assert f.read() == "hello"
61 |
--------------------------------------------------------------------------------
/gcsfs/tests/fake-service-account-credentials.json:
--------------------------------------------------------------------------------
1 | {
2 | "type": "service_account",
3 | "project_id": "gcsfs",
4 | "private_key_id": "84e3fd6d7101ec632e7348e8940b2aca71133e71",
5 | "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDAJWz1KlBu2jRE\nlUahHKuJes34hj4pr8ADhgejpAguBBrubXVvSro7aSSbvyDC/GIcyDQ8Q33YK/kT\nufQvCez7iIACbtP53o6WjcrIAP+l8z9RUL9so+sBCaVRZzh74+cEMfWIbc3ACBB5\nU2BPBWQFtr3Qtbe8TUJ+liNcLb8I2JznfydHvl9cn0/50HeOB99Xho5JAY75aE0Y\nT+/aMTFlr/kUbekLRRi4pyE+uOA/ei5RmfwzqO366YLMtEC2DaHwTqSuxBWnbtTW\nu/OvYpmPHazd6own2zJLQ0Elnm5WC/d9YmxhHi/8pJFkkbVf/2CYWEBbmBI3ZOx3\n/nHQwcIPAgMBAAECggEAUztC/dYE/me10WmKLTrykTxpYTihT8RqG/ygbYGd63Tq\nx5IRlxJbJmYOrgp2IhBaXZZZjis8JXoyzBk2TXPyvChuLt+cIfYGdO/ZwZYxJ0z9\nhfdA3EoK/6mSe3cHcB8SEG6lqaHKyN6VaEC2DLTMlW8JvREiFEaxQY0+puzH/ge4\n2EypCP4pvlveH78EIIipPgWcJYGpv0bv8KErECuVHRjJv6vZqUjQdcIi73mCz/5u\nnQqLY8j9lOuCr9vBis7DZIyY2tn4vfqcqxfH9wuIFXnzIQW6Wyg0+bBQydHg1kJ2\nFOszfkBVxZ6LpcHGB4CV4c5z7Me2cMReXQz6VsyoLQKBgQD9v92rHZYDBy4/vGxx\nbpfUkAlcCGW8GXu+qsdmyhZdjSdjDLY6lav+6UoHIJgmnA7LsKPFgnEDrdn78KBb\n3wno3VHfozL5kF887q9hC/+UurwScCKIw5QkmWtsStVgjr6wPmAu6rspMz5xNjaa\nSU4YzlNcbBUUXUawhXytWPR+OwKBgQDB2bDCD00R2yfYFdjAKapqenOtMvrnihUi\nW9Se7Yizme7s25fDxF5CBPpOdKPU2EZUlqBC/5182oMUP/xYUOHJkuUhbYcvU0qr\n+BQewLwr6rs+O1QPTh/6e70SUFR+YJLaAHkDc6fvcdjtl+Zx/p02Zj+UiW3/D4Jj\nc0EqVr4qPQKBgQCbJx3a6xQ2dcWJoySLlxuvFQMkCt5pzQsk4jdaWmaifRSAM92Y\npLut+ecRxJRDx1gko7T/p2qC3WJT8iWbBx2ADRNqstcQUX5qO2dw5202+5bTj00O\nYsfKOSS96mPdzmo6SWl2RoB6CKM9hfCNFhVyhXXjJRMeiIoYlQZO1/1m0QKBgCzz\nat6FJ8z1MdcUsc9VmhPY00wdXzsjtOTjwHkeAa4MCvBXt2iI94Z9mwFoYLkxcZWZ\n3A3NMlrKXMzsTXq5PrI8Yu+Oc2OQ/+bCvv+ml7vjUYoLveFSr22pFd3STNWFVWhB\n5c3cGtwWXUQzDhfu/8umiCXMfHpBwW2IQ1srBCvNAoGATcC3oCFBC/HdGxdeJC5C\n59EoFvKdZsAdc2I5GS/DtZ1Wo9sXqubCaiUDz+4yty+ssHIZ1ikFr8rWfL6KFEs2\niTe+kgM/9FLFtftf1WDpbfIOumbz/6CiGLqsGNlO3ZaU0kYJ041SZ8RleTOYa0zO\noSTLwBo3vje+aflytEwS8SI=\n-----END PRIVATE KEY-----",
6 | "client_email": "fake@gscfs.iam.gserviceaccount.com",
7 | "auth_uri": "https://accounts.google.com/o/oauth2/auth",
8 | "token_uri": "https://oauth2.googleapis.com/token"
9 | }
10 |
--------------------------------------------------------------------------------
/docs/source/fuse.rst:
--------------------------------------------------------------------------------
1 | GCSFS and FUSE
2 | ==============
3 |
4 | Warning, this functionality is **experimental**.
5 |
6 | FUSE_ is a mechanism to mount user-level filesystems in unix-like
7 | systems (linux, osx, etc.). GCSFS is able to use FUSE to present remote
8 | data/keys as if they were a directory on your local file-system. This
9 | allows for standard shell command manipulation, and loading of data
10 | by libraries that can only handle local file-paths (e.g., netCDF/HDF5).
11 |
12 | .. _FUSE: https://github.com/libfuse/libfuse
13 |
14 | Requirements
15 | -------------
16 |
17 | In addition to a standard installation of GCSFS, you also need:
18 |
19 | - libfuse as a system install. The way to install this will depend
20 | on your OS. Examples include ``sudo apt-get install fuse``,
21 | ``sudo yum install fuse`` and download from osxfuse_.
22 |
23 | - fusepy_, which can be installed via conda or pip
24 |
25 | - pandas, which can also be installed via conda or pip (this library is
26 | used only for its timestring parsing).
27 |
28 | .. _osxfuse: https://osxfuse.github.io/
29 | .. _fusepy: https://github.com/fusepy/fusepy
30 |
31 | Usage
32 | -----
33 |
34 | FUSE functionality is available via the ``fsspec.fuse`` module. See the
35 | docstrings for further details.
36 |
37 | .. code-block:: python
38 |
39 | gcs = gcsfs.GCSFileSystem(..)
40 | from fsspec.fuse import run
41 | run(gcs, "bucket/path", "local/path", foreground=True, threads=False)
42 |
43 | Caveats
44 | -------
45 |
46 | This functionality is experimental. The command usage may change, and you should
47 | expect exceptions.
48 |
49 | Furthermore:
50 |
51 | - although mutation operations tentatively work, you should not at the moment
52 | depend on gcsfuse as a reliable system that won't loose your data.
53 |
54 | - permissions on GCS are complicated, so all files will be shown as fully-open
55 | 0o777, regardless of state. If a read fails, you likely don't have the right
56 | permissions.
57 |
58 | .. raw:: html
59 |
60 |
62 |
--------------------------------------------------------------------------------
/gcsfs/cli/gcsfuse.py:
--------------------------------------------------------------------------------
1 | import logging
2 |
3 | import click
4 | from fuse import FUSE
5 |
6 | from gcsfs.gcsfuse import GCSFS
7 |
8 |
9 | @click.command()
10 | @click.argument("bucket", type=str, required=True)
11 | @click.argument("mount_point", type=str, required=True)
12 | @click.option(
13 | "--token",
14 | type=str,
15 | required=False,
16 | default=None,
17 | help="Token to use for authentication",
18 | )
19 | @click.option(
20 | "--project-id", type=str, required=False, default="", help="Billing Project ID"
21 | )
22 | @click.option(
23 | "--foreground/--background",
24 | default=True,
25 | help="Run in the foreground or as a background process",
26 | )
27 | @click.option(
28 | "--threads/--no-threads", default=True, help="Whether to run with threads"
29 | )
30 | @click.option(
31 | "--cache_files", type=int, default=10, help="Number of open files to cache"
32 | )
33 | @click.option(
34 | "-v",
35 | "--verbose",
36 | count=True,
37 | help="Set logging level. '-v' for 'gcsfuse' logging."
38 | "'-v -v' for complete debug logging.",
39 | )
40 | def main(
41 | bucket, mount_point, token, project_id, foreground, threads, cache_files, verbose
42 | ):
43 | """Mount a Google Cloud Storage (GCS) bucket to a local directory"""
44 |
45 | if verbose == 1:
46 | logging.basicConfig(level=logging.INFO)
47 | logging.getLogger("gcsfs.gcsfuse").setLevel(logging.DEBUG)
48 | if verbose > 1:
49 | logging.basicConfig(level=logging.DEBUG)
50 |
51 | fmt = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
52 | if verbose == 1:
53 | logging.basicConfig(level=logging.INFO, format=fmt)
54 | logging.getLogger("gcsfs.gcsfuse").setLevel(logging.DEBUG)
55 | if verbose > 1:
56 | logging.basicConfig(level=logging.DEBUG, format=fmt)
57 |
58 | print(f"Mounting bucket {bucket} to directory {mount_point}")
59 | print("foreground:", foreground, ", nothreads:", not threads)
60 | FUSE(
61 | GCSFS(bucket, token=token, project=project_id, nfiles=cache_files),
62 | mount_point,
63 | nothreads=not threads,
64 | foreground=foreground,
65 | )
66 |
67 |
68 | if __name__ == "__main__":
69 | main()
70 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_init.py:
--------------------------------------------------------------------------------
1 | import os
2 | import sys
3 |
4 |
5 | class TestConditionalImport:
6 | def setup_method(self, method):
7 | """Setup for each test method."""
8 | self.original_env = os.environ.get("GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT")
9 |
10 | # Snapshot original gcsfs modules
11 | self.original_modules = {
12 | name: mod for name, mod in sys.modules.items() if name.startswith("gcsfs")
13 | }
14 |
15 | # Unload gcsfs modules to force re-import during the test
16 | modules_to_remove = list(self.original_modules.keys())
17 | for name in modules_to_remove:
18 | if name in sys.modules:
19 | del sys.modules[name]
20 |
21 | def teardown_method(self, method):
22 | """Teardown after each test method."""
23 | # Reset environment variable to its original state
24 | if self.original_env is not None:
25 | os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"] = self.original_env
26 | elif "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT" in os.environ:
27 | del os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"]
28 |
29 | # Clear any gcsfs modules loaded/modified during this test
30 | modules_to_remove = [name for name in sys.modules if name.startswith("gcsfs")]
31 | for name in modules_to_remove:
32 | if name in sys.modules:
33 | del sys.modules[name]
34 |
35 | # Restore the original gcsfs modules from the snapshot to avoid side effect
36 | # affecting other tests
37 | sys.modules.update(self.original_modules)
38 |
39 | def test_experimental_env_unset(self):
40 | """
41 | Tests gcsfs.GCSFileSystem is core.GCSFileSystem when
42 | GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT is NOT set.
43 | """
44 | if "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT" in os.environ:
45 | del os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"]
46 |
47 | import gcsfs
48 |
49 | assert (
50 | gcsfs.GCSFileSystem is gcsfs.core.GCSFileSystem
51 | ), "Should be core.GCSFileSystem"
52 | assert not hasattr(
53 | gcsfs, "ExtendedGcsFileSystem"
54 | ), "ExtendedGcsFileSystem should not be imported directly on gcsfs"
55 |
56 | def test_experimental_env_set(self):
57 | """
58 | Tests gcsfs.GCSFileSystem is extended_gcsfs.ExtendedGcsFileSystem when
59 | GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT IS set.
60 | """
61 | os.environ["GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"] = "true"
62 |
63 | import gcsfs
64 |
65 | assert (
66 | gcsfs.GCSFileSystem is gcsfs.extended_gcsfs.ExtendedGcsFileSystem
67 | ), "Should be ExtendedGcsFileSystem"
68 |
--------------------------------------------------------------------------------
/docs/source/developer.rst:
--------------------------------------------------------------------------------
1 | For Developers
2 | ==============
3 |
4 | We welcome contributions to gcsfs!
5 |
6 | Please file issues and requests on github_ and we welcome pull requests.
7 |
8 | .. _github: https://github.com/fsspec/gcsfs/issues
9 |
10 | Testing
11 | -------
12 |
13 | The testing framework supports using your own GCS-compliant endpoint, by
14 | setting the "STORAGE_EMULATOR_HOST" environment variable. If this is
15 | not set, then an emulator will be spun up using ``docker`` and
16 | `fake-gcs-server`_. This emulator has almost all the functionality of
17 | real GCS. A small number of tests run differently or are skipped.
18 |
19 | If you want to actually test against real GCS, then you should set
20 | STORAGE_EMULATOR_HOST to "https://storage.googleapis.com" and also
21 | provide appropriate GCSFS_TEST_BUCKET, GCSFS_TEST_VERSIONED_BUCKET
22 | (To use for tests that target GCS object versioning, this bucket must have versioning enabled),
23 | GCSFS_ZONAL_TEST_BUCKET(To use for testing Rapid storage features) and GCSFS_TEST_PROJECT,
24 | as well as setting your default google credentials (or providing them via the fsspec config).
25 |
26 | When running tests against a real GCS endpoint, you have two options for test buckets:
27 |
28 | - **Provide existing buckets**: If you specify buckets that already exist, the
29 | test suite will manage objects *within* them (creating, modifying, and deleting
30 | objects as needed). The buckets themselves will **not** be deleted upon completion.
31 | **Warning**: The test suite will clear the contents of the bucket at the beginning and end of the
32 | test run, so be sure to use a bucket that does not contain important data.
33 | - **Let the tests create buckets**: If you specify bucket names that do not exist,
34 | the test suite will create them for the test run and automatically delete them
35 | during final cleanup.
36 |
37 | End-to-end Testing CI Pipeline
38 | ---------------------------
39 |
40 | We have a Cloud Build pipeline for end-to-end tests which includes tests on zonal
41 | and regional buckets. When a pull request is created for the ``main`` branch,
42 | there will be a ``end-to-end-tests-trigger`` check in the GitHub checks section.
43 |
44 | The pipeline's behavior depends on the author of the pull request:
45 |
46 | - If the PR is created by an owner or a collaborator, the pipeline will be
47 | triggered immediately.
48 | - If the PR is from an external contributor, an owner or collaborator must add
49 | the comment ``/gcbrun`` to the PR to trigger the pipeline,
50 | until then pipeline would be in failure state.
51 |
52 | The pipeline will also be triggered when a new commit is added to the PR. For
53 | external contributors, a new ``/gcbrun`` comment is required from an owner or
54 | collaborator after the new commit. The pipeline can also be manually
55 | re-triggered by adding a ``/gcbrun`` comment or by using re-run option from Github UI.
56 |
57 | The logs from the test run are available in the "details" section of the Checks
58 | tab in the pull request.
59 |
60 | .. _fake-gcs-server: https://github.com/fsouza/fake-gcs-server
61 |
62 | .. raw:: html
63 |
64 |
66 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_mapping.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | from gcsfs.tests.settings import TEST_BUCKET
4 |
5 | MAPPING_ROOT = TEST_BUCKET + "/mapping"
6 |
7 |
8 | def test_api():
9 | import gcsfs
10 |
11 | assert "GCSMap" in dir(gcsfs)
12 | assert "mapping" in dir(gcsfs)
13 |
14 |
15 | def test_map_simple(gcs):
16 | d = gcs.get_mapper(MAPPING_ROOT)
17 | assert not d
18 |
19 | assert list(d) == list(d.keys()) == []
20 | assert list(d.values()) == []
21 | assert list(d.items()) == []
22 |
23 |
24 | def test_map_default_gcsfilesystem(gcs):
25 | d = gcs.get_mapper(MAPPING_ROOT)
26 | assert d.fs is gcs
27 |
28 |
29 | def test_map_errors(gcs):
30 | d = gcs.get_mapper(MAPPING_ROOT)
31 | with pytest.raises(KeyError):
32 | d["nonexistent"]
33 | try:
34 | gcs.get_mapper("does-not-exist")
35 | except Exception as e:
36 | assert "does-not-exist" in str(e)
37 |
38 |
39 | def test_map_with_data(gcs):
40 | d = gcs.get_mapper(MAPPING_ROOT)
41 | d["x"] = b"123"
42 | assert list(d) == list(d.keys()) == ["x"]
43 | assert list(d.values()) == [b"123"]
44 | assert list(d.items()) == [("x", b"123")]
45 | assert d["x"] == b"123"
46 | assert bool(d)
47 |
48 | assert gcs.find(MAPPING_ROOT) == [TEST_BUCKET + "/mapping/x"]
49 | d["x"] = b"000"
50 | assert d["x"] == b"000"
51 |
52 | d["y"] = b"456"
53 | assert d["y"] == b"456"
54 | assert set(d) == {"x", "y"}
55 |
56 | d.clear()
57 | assert list(d) == []
58 |
59 |
60 | def test_map_clear_empty(gcs):
61 | d = gcs.get_mapper(MAPPING_ROOT)
62 | d.clear()
63 | assert list(d) == []
64 | d["1"] = b"1"
65 | assert list(d) == ["1"] or list(d) == ["1"]
66 | d.clear()
67 | assert list(d) == []
68 |
69 |
70 | def test_map_pickle(gcs):
71 | d = gcs.get_mapper(MAPPING_ROOT)
72 | d["x"] = b"1"
73 | assert d["x"] == b"1"
74 |
75 | import pickle
76 |
77 | d2 = pickle.loads(pickle.dumps(d))
78 |
79 | assert d2["x"] == b"1"
80 |
81 |
82 | def test_map_array(gcs):
83 | from array import array
84 |
85 | d = gcs.get_mapper(MAPPING_ROOT)
86 | d["x"] = array("B", [65] * 1000)
87 |
88 | assert d["x"] == b"A" * 1000
89 |
90 |
91 | def test_map_bytearray(gcs):
92 | d = gcs.get_mapper(MAPPING_ROOT)
93 | d["x"] = bytearray(b"123")
94 |
95 | assert d["x"] == b"123"
96 |
97 |
98 | def test_new_bucket(gcs):
99 | new_bucket = TEST_BUCKET + "new-bucket"
100 | try:
101 | gcs.rmdir(new_bucket)
102 | except: # noqa: E722
103 | pass
104 | with pytest.raises(Exception) as e:
105 | d = gcs.get_mapper(new_bucket, check=True)
106 | assert "create=True" in str(e.value)
107 |
108 | try:
109 | d = gcs.get_mapper(new_bucket, create=True)
110 | assert not d
111 |
112 | d = gcs.get_mapper(new_bucket + "/new-directory")
113 | assert not d
114 | finally:
115 | gcs.rmdir(new_bucket)
116 |
117 |
118 | def test_map_pickle(gcs):
119 | import pickle
120 |
121 | d = gcs.get_mapper(MAPPING_ROOT)
122 | d["x"] = b"1234567890"
123 |
124 | b = pickle.dumps(d)
125 | assert b"1234567890" not in b
126 |
127 | e = pickle.loads(b)
128 |
129 | assert dict(e) == {"x": b"1234567890"}
130 |
--------------------------------------------------------------------------------
/gcsfs/checkers.py:
--------------------------------------------------------------------------------
1 | import base64
2 | from base64 import b64encode
3 | from hashlib import md5
4 |
5 | from .retry import ChecksumError
6 |
7 | try:
8 | import crcmod
9 | except ImportError:
10 | crcmod = None
11 |
12 |
13 | class ConsistencyChecker:
14 | def __init__(self):
15 | pass
16 |
17 | def update(self, data: bytes):
18 | pass
19 |
20 | def validate_json_response(self, gcs_object):
21 | pass
22 |
23 | def validate_headers(self, headers):
24 | pass
25 |
26 | def validate_http_response(self, r):
27 | pass
28 |
29 |
30 | class MD5Checker(ConsistencyChecker):
31 | def __init__(self):
32 | self.md = md5()
33 |
34 | def update(self, data):
35 | self.md.update(data)
36 |
37 | def validate_json_response(self, gcs_object):
38 | mdback = gcs_object["md5Hash"]
39 | if b64encode(self.md.digest()) != mdback.encode():
40 | raise ChecksumError("MD5 checksum failed")
41 |
42 | def validate_headers(self, headers):
43 | if headers is not None and "X-Goog-Hash" in headers:
44 |
45 | dig = [
46 | bit.split("=")[1]
47 | for bit in headers["X-Goog-Hash"].split(",")
48 | if bit and bit.strip().startswith("md5=")
49 | ]
50 | if dig:
51 | if b64encode(self.md.digest()).decode().rstrip("=") != dig[0]:
52 | raise ChecksumError("Checksum failure")
53 | else:
54 | raise NotImplementedError(
55 | "No md5 checksum available to do consistency check. GCS does "
56 | "not provide md5 sums for composite objects."
57 | )
58 |
59 | def validate_http_response(self, r):
60 | return self.validate_headers(r.headers)
61 |
62 |
63 | class SizeChecker(ConsistencyChecker):
64 | def __init__(self):
65 | self.size = 0
66 |
67 | def update(self, data: bytes):
68 | self.size += len(data)
69 |
70 | def validate_json_response(self, gcs_object):
71 | assert int(gcs_object["size"]) == self.size, "Size mismatch"
72 |
73 | def validate_http_response(self, r):
74 | assert r.content_length == self.size
75 |
76 |
77 | class Crc32cChecker(ConsistencyChecker):
78 | def __init__(self):
79 | self.crc32c = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF)
80 |
81 | def update(self, data: bytes):
82 | self.crc32c.update(data)
83 |
84 | def validate_json_response(self, gcs_object):
85 | # docs for gcs_object: https://cloud.google.com/storage/docs/json_api/v1/objects
86 | digest = self.crc32c.digest()
87 | digest_b64 = base64.b64encode(digest).decode()
88 | expected = gcs_object["crc32c"]
89 |
90 | if digest_b64 != expected:
91 | raise ChecksumError(f'Expected "{expected}". Got "{digest_b64}"')
92 |
93 | def validate_headers(self, headers):
94 | if headers is not None:
95 | hasher = headers.get("X-Goog-Hash", "")
96 | crc = [h.split("=", 1)[1] for h in hasher.split(",") if "crc32c" in h]
97 | if not crc:
98 | raise NotImplementedError("No crc32c checksum was provided by google!")
99 | if crc[0] != b64encode(self.crc32c.digest()).decode():
100 | raise ChecksumError()
101 |
102 | def validate_http_response(self, r):
103 | return self.validate_headers(r.headers)
104 |
105 |
106 | def get_consistency_checker(consistency: str | None) -> ConsistencyChecker:
107 | if consistency == "size":
108 | return SizeChecker()
109 | elif consistency == "md5":
110 | return MD5Checker()
111 | elif consistency == "crc32c":
112 | if crcmod is None:
113 | raise ImportError(
114 | "The python package `crcmod` is required for `consistency='crc32c'`. "
115 | "This can be installed with `pip install gcsfs[crc]`"
116 | )
117 | else:
118 | return Crc32cChecker()
119 | else:
120 | return ConsistencyChecker()
121 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_core_versioned.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import posixpath
4 |
5 | import pytest
6 | from google.cloud import storage
7 |
8 | from gcsfs import GCSFileSystem
9 | from gcsfs.tests.settings import TEST_VERSIONED_BUCKET
10 |
11 | a = TEST_VERSIONED_BUCKET + "/tmp/test/a"
12 | b = TEST_VERSIONED_BUCKET + "/tmp/test/b"
13 |
14 | # Flag to track if the bucket was created by this test run.
15 | _VERSIONED_BUCKET_CREATED_BY_TESTS = False
16 |
17 |
18 | def is_versioning_enabled():
19 | """
20 | Helper function to check if the test bucket has versioning enabled.
21 | Returns a tuple of (bool, reason_string).
22 | """
23 | # Don't skip when using an emulator, as we create the versioned bucket ourselves.
24 | global _VERSIONED_BUCKET_CREATED_BY_TESTS
25 | if os.environ.get("STORAGE_EMULATOR_HOST") != "https://storage.googleapis.com":
26 | return True, ""
27 | try:
28 | gcs = GCSFileSystem(project=os.getenv("GCSFS_TEST_PROJECT", "project"))
29 | if not gcs.exists(TEST_VERSIONED_BUCKET):
30 | logging.info(
31 | f"Creating versioned bucket for tests: {TEST_VERSIONED_BUCKET}"
32 | )
33 | gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True)
34 | _VERSIONED_BUCKET_CREATED_BY_TESTS = True
35 |
36 | client = storage.Client(
37 | credentials=gcs.credentials.credentials, project=gcs.project
38 | )
39 | bucket = client.get_bucket(TEST_VERSIONED_BUCKET)
40 | if bucket.versioning_enabled:
41 | return True, ""
42 | return (
43 | False,
44 | f"Bucket '{TEST_VERSIONED_BUCKET}' does not have versioning enabled.",
45 | )
46 | except Exception as e:
47 | return (
48 | False,
49 | f"Could not verify versioning status for bucket '{TEST_VERSIONED_BUCKET}': {e}",
50 | )
51 |
52 |
53 | pytestmark = pytest.mark.skipif(
54 | not is_versioning_enabled()[0], reason=is_versioning_enabled()[1]
55 | )
56 |
57 |
58 | def test_info_versioned(gcs_versioned):
59 | with gcs_versioned.open(a, "wb") as wo:
60 | wo.write(b"v1")
61 | v1 = gcs_versioned.info(a)["generation"]
62 | assert v1 is not None
63 | with gcs_versioned.open(a, "wb") as wo:
64 | wo.write(b"v2")
65 | v2 = gcs_versioned.info(a)["generation"]
66 | assert v2 is not None and v1 != v2
67 | assert gcs_versioned.info(f"{a}#{v1}")["generation"] == v1
68 | assert gcs_versioned.info(f"{a}?generation={v2}")["generation"] == v2
69 |
70 |
71 | def test_cat_versioned(gcs_versioned):
72 | with gcs_versioned.open(b, "wb") as wo:
73 | wo.write(b"v1")
74 | v1 = gcs_versioned.info(b)["generation"]
75 | assert v1 is not None
76 | with gcs_versioned.open(b, "wb") as wo:
77 | wo.write(b"v2")
78 | assert gcs_versioned.cat(f"{b}#{v1}") == b"v1"
79 |
80 |
81 | def test_cp_versioned(gcs_versioned):
82 | with gcs_versioned.open(a, "wb") as wo:
83 | wo.write(b"v1")
84 | v1 = gcs_versioned.info(a)["generation"]
85 | assert v1 is not None
86 | with gcs_versioned.open(a, "wb") as wo:
87 | wo.write(b"v2")
88 | gcs_versioned.cp_file(f"{a}#{v1}", b)
89 | assert gcs_versioned.cat(b) == b"v1"
90 |
91 |
92 | def test_ls_versioned(gcs_versioned):
93 | with gcs_versioned.open(b, "wb") as wo:
94 | wo.write(b"v1")
95 | v1 = gcs_versioned.info(b)["generation"]
96 | with gcs_versioned.open(b, "wb") as wo:
97 | wo.write(b"v2")
98 | v2 = gcs_versioned.info(b)["generation"]
99 | dpath = posixpath.dirname(b)
100 | versions = {f"{b}#{v1}", f"{b}#{v2}"}
101 | assert versions == set(gcs_versioned.ls(dpath, versions=True))
102 | assert versions == {
103 | entry["name"] for entry in gcs_versioned.ls(dpath, detail=True, versions=True)
104 | }
105 | assert gcs_versioned.ls(TEST_VERSIONED_BUCKET, versions=True) == [
106 | f"{TEST_VERSIONED_BUCKET}/tmp"
107 | ]
108 |
109 |
110 | def test_find_versioned(gcs_versioned):
111 | with gcs_versioned.open(a, "wb") as wo:
112 | wo.write(b"v1")
113 | v1 = gcs_versioned.info(a)["generation"]
114 | with gcs_versioned.open(a, "wb") as wo:
115 | wo.write(b"v2")
116 | v2 = gcs_versioned.info(a)["generation"]
117 | versions = {f"{a}#{v1}", f"{a}#{v2}"}
118 | assert versions == set(gcs_versioned.find(a, versions=True))
119 | assert versions == set(gcs_versioned.find(a, detail=True, versions=True))
120 |
121 |
122 | def test_write_captures_generation(gcs_versioned):
123 | with gcs_versioned.open(a, "wb") as wo:
124 | wo.write(b"test content")
125 | assert wo.generation is not None
126 | assert wo.generation == gcs_versioned.info(a)["generation"]
127 |
128 |
129 | def test_write_captures_generation_multipart(gcs_versioned):
130 | with gcs_versioned.open(b, "wb") as wo:
131 | wo.write(b"first chunk")
132 | wo.flush()
133 | wo.write(b"second chunk")
134 | assert wo.generation is not None
135 | assert wo.generation == gcs_versioned.info(b)["generation"]
136 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_retry.py:
--------------------------------------------------------------------------------
1 | import multiprocessing
2 | import os
3 | import pickle
4 | from concurrent.futures import ProcessPoolExecutor
5 |
6 | import pytest
7 | import requests
8 | from requests.exceptions import ProxyError
9 |
10 | from gcsfs.retry import HttpError, is_retriable, validate_response
11 | from gcsfs.tests.settings import TEST_BUCKET
12 | from gcsfs.tests.utils import tmpfile
13 |
14 |
15 | def test_tempfile():
16 | with tmpfile() as fn:
17 | with open(fn, "w"):
18 | pass
19 | assert os.path.exists(fn)
20 | assert not os.path.exists(fn)
21 |
22 |
23 | def test_retriable_exception():
24 | e = requests.exceptions.Timeout()
25 | assert is_retriable(e)
26 | e = ValueError
27 | assert not is_retriable(e)
28 |
29 | e = HttpError({"message": "", "code": 500})
30 | assert is_retriable(e)
31 |
32 | e = HttpError({"message": "", "code": "500"})
33 | assert is_retriable(e)
34 |
35 | e = HttpError({"message": "", "code": 400})
36 | assert not is_retriable(e)
37 |
38 | e = HttpError({"code": "429"})
39 | assert is_retriable(e)
40 |
41 | e = ProxyError()
42 | assert is_retriable(e)
43 |
44 |
45 | def test_pickle_serialization():
46 | expected = HttpError({"message": "", "code": 400})
47 |
48 | # Serialize/Deserialize
49 | serialized = pickle.dumps(expected)
50 | actual = pickle.loads(serialized)
51 |
52 | is_same_type = type(expected) is type(actual)
53 | is_same_args = expected.args == actual.args
54 |
55 | assert is_same_type and is_same_args
56 |
57 |
58 | def conditional_exception(process_id):
59 | # Raise only on second process (id=1)
60 | if process_id == 1:
61 | raise HttpError({"message": "", "code": 400})
62 |
63 |
64 | def test_multiprocessing_error_handling():
65 | # Ensure spawn context to avoid forking issues
66 | ctx = multiprocessing.get_context("spawn")
67 |
68 | # Run on two processes
69 | with ProcessPoolExecutor(2, mp_context=ctx) as p:
70 | results = p.map(conditional_exception, range(2))
71 |
72 | with pytest.raises(HttpError):
73 | _ = [result for result in results]
74 |
75 |
76 | def test_validate_response():
77 | validate_response(200, None, "/path")
78 |
79 | # HttpError with no JSON body
80 | with pytest.raises(HttpError) as e:
81 | validate_response(503, b"", "/path")
82 | assert e.value.code == 503
83 | assert e.value.message == ", 503"
84 |
85 | # HttpError with JSON body
86 | j = '{"error": {"code": 503, "message": "Service Unavailable"}}'
87 | with pytest.raises(HttpError) as e:
88 | validate_response(503, j, "/path")
89 | assert e.value.code == 503
90 | assert e.value.message == "Service Unavailable, 503"
91 |
92 | # 403
93 | j = '{"error": {"message": "Not ok"}}'
94 | with pytest.raises(IOError, match="Forbidden: /path\nNot ok"):
95 | validate_response(403, j, "/path")
96 |
97 | # 404
98 | with pytest.raises(FileNotFoundError):
99 | validate_response(404, b"", "/path")
100 |
101 | # 502
102 | with pytest.raises(ProxyError):
103 | validate_response(502, b"", "/path")
104 |
105 |
106 | def test_validate_response_error_is_string():
107 | # HttpError with JSON body
108 | j = '{"error": "Too Many Requests"}'
109 | with pytest.raises(HttpError) as e:
110 | validate_response(429, j, "/path")
111 | assert e.value.code == 429
112 | assert e.value.message == "Too Many Requests, 429"
113 |
114 |
115 | @pytest.mark.parametrize(
116 | ["file_path", "validate_get_error", "validate_list_error", "expected_error"],
117 | [
118 | (
119 | "/missing",
120 | FileNotFoundError,
121 | None,
122 | FileNotFoundError,
123 | ), # Not called
124 | (
125 | "/missing",
126 | OSError("Forbidden"),
127 | FileNotFoundError,
128 | FileNotFoundError,
129 | ),
130 | (
131 | "/2014-01-01.csv",
132 | None,
133 | None,
134 | None,
135 | ),
136 | (
137 | "/2014-01-01.csv",
138 | OSError("Forbidden"),
139 | None,
140 | None,
141 | ),
142 | ],
143 | ids=[
144 | "missing_with_get_perms",
145 | "missing_with_list_perms",
146 | "existing_with_get_perms",
147 | "existing_with_list_perms",
148 | ],
149 | )
150 | def test_metadata_read_permissions(
151 | file_path, validate_get_error, validate_list_error, expected_error, gcs
152 | ):
153 | def _validate_response(self, status, content, path):
154 | if path.endswith(f"/o{file_path}") and validate_get_error is not None:
155 | raise validate_get_error
156 | if path.endswith("/o/") and validate_list_error is not None:
157 | raise validate_list_error
158 | validate_response(status, content, path)
159 |
160 | if expected_error is None:
161 | gcs.ls(TEST_BUCKET + file_path)
162 | gcs.info(TEST_BUCKET + file_path)
163 | assert gcs.exists(TEST_BUCKET + file_path)
164 | else:
165 | with pytest.raises(expected_error):
166 | gcs.ls(TEST_BUCKET + file_path)
167 | with pytest.raises(expected_error):
168 | gcs.info(TEST_BUCKET + file_path)
169 | assert gcs.exists(TEST_BUCKET + file_path) is False
170 |
--------------------------------------------------------------------------------
/docs/source/code-of-conduct.rst:
--------------------------------------------------------------------------------
1 | Code of Conduct
2 | ===============
3 |
4 | All participants in the fsspec community are expected to adhere to a Code of Conduct.
5 |
6 | As contributors and maintainers of this project, and in the interest of
7 | fostering an open and welcoming community, we pledge to respect all people who
8 | contribute through reporting issues, posting feature requests, updating
9 | documentation, submitting pull requests or patches, and other activities.
10 |
11 | We are committed to making participation in this project a harassment-free
12 | experience for everyone, treating everyone as unique humans deserving of
13 | respect.
14 |
15 | Examples of unacceptable behaviour by participants include:
16 |
17 | - The use of sexualized language or imagery
18 | - Personal attacks
19 | - Trolling or insulting/derogatory comments
20 | - Public or private harassment
21 | - Publishing other's private information, such as physical or electronic
22 | addresses, without explicit permission
23 | - Other unethical or unprofessional conduct
24 |
25 | Project maintainers have the right and responsibility to remove, edit, or
26 | reject comments, commits, code, wiki edits, issues, and other contributions
27 | that are not aligned to this Code of Conduct, or to ban temporarily or
28 | permanently any contributor for other behaviours that they deem inappropriate,
29 | threatening, offensive, or harmful.
30 |
31 | By adopting this Code of Conduct, project maintainers commit themselves
32 | to fairly and consistently applying these principles to every aspect of
33 | managing this project. Project maintainers who do not follow or enforce
34 | the Code of Conduct may be permanently removed from the project team.
35 |
36 | This code of conduct applies both within project spaces and in public
37 | spaces when an individual is representing the project or its community.
38 |
39 | If you feel the code of conduct has been violated, please report the
40 | incident to the fsspec core team.
41 |
42 | Reporting
43 | ---------
44 |
45 | If you believe someone is violating theCode of Conduct we ask that you report it
46 | to the Project by emailing community@anaconda.com. All reports will be kept
47 | confidential. In some cases we may determine that a public statement will need
48 | to be made. If that's the case, the identities of all victims and reporters
49 | will remain confidential unless those individuals instruct us otherwise.
50 | If you believe anyone is in physical danger, please notify appropriate law
51 | enforcement first.
52 |
53 | In your report please include:
54 |
55 | - Your contact info
56 | - Names (real, nicknames, or pseudonyms) of any individuals involved.
57 | If there were other witnesses besides you, please try to include them as well.
58 | - When and where the incident occurred. Please be as specific as possible.
59 | - Your account of what occurred. If there is a publicly available record
60 | please include a link.
61 | - Any extra context you believe existed for the incident.
62 | - If you believe this incident is ongoing.
63 | - If you believe any member of the core team has a conflict of interest
64 | in adjudicating the incident.
65 | - What, if any, corrective response you believe would be appropriate.
66 | - Any other information you believe we should have.
67 |
68 | Core team members are obligated to maintain confidentiality with regard
69 | to the reporter and details of an incident.
70 |
71 | What happens next?
72 | ~~~~~~~~~~~~~~~~~~
73 |
74 | You will receive an email acknowledging receipt of your complaint.
75 | The core team will immediately meet to review the incident and determine:
76 |
77 | - What happened.
78 | - Whether this event constitutes a code of conduct violation.
79 | - Who the bad actor was.
80 | - Whether this is an ongoing situation, or if there is a threat to anyone's
81 | physical safety.
82 | - If this is determined to be an ongoing incident or a threat to physical safety,
83 | the working groups' immediate priority will be to protect everyone involved.
84 |
85 | If a member of the core team is one of the named parties, they will not be
86 | included in any discussions, and will not be provided with any confidential
87 | details from the reporter.
88 |
89 | If anyone on the core team believes they have a conflict of interest in
90 | adjudicating on a reported issue, they will inform the other core team
91 | members, and exempt themselves from any discussion about the issue.
92 | Following this declaration, they will not be provided with any confidential
93 | details from the reporter.
94 |
95 | Once the working group has a complete account of the events they will make a
96 | decision as to how to response. Responses may include:
97 |
98 | - Nothing (if we determine no violation occurred).
99 | - A private reprimand from the working group to the individual(s) involved.
100 | - A public reprimand.
101 | - An imposed vacation
102 | - A permanent or temporary ban from some or all spaces (GitHub repositories, etc.)
103 | - A request for a public or private apology.
104 |
105 | We'll respond within one week to the person who filed the report with either a
106 | resolution or an explanation of why the situation is not yet resolved.
107 |
108 | Once we've determined our final action, we'll contact the original reporter
109 | to let them know what action (if any) we'll be taking. We'll take into account
110 | feedback from the reporter on the appropriateness of our response, but we
111 | don't guarantee we'll act on it.
112 |
113 | Acknowledgement
114 | ---------------
115 |
116 | This CoC is modified from the one by `BeeWare`_, which in turn refers to
117 | the `Contributor Covenant`_ and the `Django`_ project.
118 |
119 | .. _BeeWare: https://beeware.org/community/behavior/code-of-conduct/
120 | .. _Contributor Covenant: https://www.contributor-covenant.org/version/1/3/0/code-of-conduct/
121 | .. _Django: https://www.djangoproject.com/conduct/reporting/
122 |
123 | .. raw:: html
124 |
125 |
127 |
--------------------------------------------------------------------------------
/gcsfs/retry.py:
--------------------------------------------------------------------------------
1 | import asyncio
2 | import json
3 | import logging
4 | import random
5 |
6 | import aiohttp.client_exceptions
7 | import google.auth.exceptions
8 | import requests.exceptions
9 | from decorator import decorator
10 |
11 | logger = logging.getLogger("gcsfs")
12 |
13 |
14 | class HttpError(Exception):
15 | """Holds the message and code from cloud errors."""
16 |
17 | def __init__(self, error_response=None):
18 | # Save error_response for potential pickle.
19 | self._error_response = error_response
20 | if error_response:
21 | self.code = error_response.get("code", None)
22 | self.message = error_response.get("message", "")
23 | if self.code:
24 | if isinstance(self.message, bytes):
25 | self.message += (", %s" % self.code).encode()
26 | else:
27 | self.message += ", %s" % self.code
28 | else:
29 | self.message = ""
30 | self.code = None
31 | # Call the base class constructor with the parameters it needs
32 | super().__init__(self.message)
33 |
34 | def __reduce__(self):
35 | """This makes the Exception pickleable."""
36 |
37 | # This is basically deconstructing the HttpError when pickled.
38 | return HttpError, (self._error_response,)
39 |
40 |
41 | class ChecksumError(Exception):
42 | """Raised when the md5 hash of the content does not match the header."""
43 |
44 | pass
45 |
46 |
47 | RETRIABLE_EXCEPTIONS = (
48 | requests.exceptions.ChunkedEncodingError,
49 | requests.exceptions.ConnectionError,
50 | requests.exceptions.ReadTimeout,
51 | requests.exceptions.Timeout,
52 | requests.exceptions.ProxyError,
53 | requests.exceptions.SSLError,
54 | requests.exceptions.ContentDecodingError,
55 | google.auth.exceptions.RefreshError,
56 | aiohttp.client_exceptions.ClientError,
57 | ChecksumError,
58 | )
59 |
60 |
61 | errs = list(range(500, 505)) + [
62 | # Request Timeout
63 | 408,
64 | # Too Many Requests
65 | 429,
66 | ]
67 | errs = set(errs + [str(e) for e in errs])
68 |
69 |
70 | def is_retriable(exception):
71 | """Returns True if this exception is retriable."""
72 |
73 | if isinstance(exception, HttpError):
74 | # Add 401 to retriable errors when it's an auth expiration issue
75 | if exception.code == 401 and "Invalid Credentials" in str(exception.message):
76 | return True
77 | return exception.code in errs
78 |
79 | return isinstance(exception, RETRIABLE_EXCEPTIONS)
80 |
81 |
82 | def validate_response(status, content, path, args=None):
83 | """
84 | Check the requests object r, raise error if it's not ok.
85 |
86 | Parameters
87 | ----------
88 | r: requests response object
89 | path: associated URL path, for error messages
90 | """
91 | if status >= 400 and status != 499:
92 | # 499 is special "upload was cancelled" status
93 | if args:
94 | from .core import quote
95 |
96 | path = path.format(*[quote(p) for p in args])
97 | if status == 404:
98 | raise FileNotFoundError(path)
99 |
100 | error = None
101 | if hasattr(content, "decode"):
102 | content = content.decode()
103 | try:
104 | error = json.loads(content)["error"]
105 | # Sometimes the error message is a string.
106 | if isinstance(error, str):
107 | msg = error
108 | else:
109 | msg = error["message"]
110 | except json.decoder.JSONDecodeError:
111 | msg = content
112 |
113 | if status == 403:
114 | raise OSError(f"Forbidden: {path}\n{msg}")
115 | elif status == 412:
116 | raise FileExistsError(path)
117 | elif status == 502:
118 | raise requests.exceptions.ProxyError()
119 | elif "invalid" in str(msg):
120 | raise ValueError(f"Bad Request: {path}\n{msg}")
121 | elif error and not isinstance(error, str):
122 | raise HttpError(error)
123 | elif status:
124 | raise HttpError({"code": status, "message": msg}) # text-like
125 | else:
126 | raise RuntimeError(msg)
127 |
128 |
129 | @decorator
130 | async def retry_request(func, retries=6, *args, **kwargs):
131 | for retry in range(retries):
132 | try:
133 | if retry > 0:
134 | await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
135 | return await func(*args, **kwargs)
136 | except (
137 | HttpError,
138 | requests.exceptions.RequestException,
139 | google.auth.exceptions.GoogleAuthError,
140 | ChecksumError,
141 | aiohttp.client_exceptions.ClientError,
142 | ) as e:
143 | if (
144 | isinstance(e, HttpError)
145 | and e.code == 400
146 | and "requester pays" in e.message
147 | ):
148 | msg = (
149 | "Bucket is requester pays. "
150 | "Set `requester_pays=True` when creating the GCSFileSystem."
151 | )
152 | raise ValueError(msg) from e
153 | # Special test for 404 to avoid retrying the request
154 | if (
155 | isinstance(e, aiohttp.client_exceptions.ClientResponseError)
156 | and e.status == 404
157 | ):
158 | logger.debug("Request returned 404, no retries.")
159 | raise e
160 | if isinstance(e, HttpError) and e.code == 404:
161 | logger.debug("Request returned 404, no retries.")
162 | raise e
163 | if retry == retries - 1:
164 | logger.exception(f"{func.__name__} out of retries on exception: {e}")
165 | raise e
166 | if is_retriable(e):
167 | logger.debug(f"{func.__name__} retrying after exception: {e}")
168 | continue
169 | logger.exception(f"{func.__name__} non-retriable exception: {e}")
170 | raise e
171 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_checkers.py:
--------------------------------------------------------------------------------
1 | import base64
2 | from hashlib import md5
3 |
4 | import pytest
5 |
6 | from gcsfs.checkers import Crc32cChecker, MD5Checker, SizeChecker, crcmod
7 | from gcsfs.retry import ChecksumError
8 |
9 |
10 | def google_response_from_data(expected_data: bytes, actual_data=None):
11 | actual_data = actual_data or expected_data
12 | checksum = md5(actual_data)
13 | checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
14 | if crcmod is not None:
15 | checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF)
16 | checksum.update(actual_data)
17 | crc = base64.b64encode(checksum.digest()).decode()
18 |
19 | class response:
20 | content_length = len(actual_data)
21 | headers = {"X-Goog-Hash": f"md5={checksum_b64}"}
22 | if crcmod is not None:
23 | headers["X-Goog-Hash"] += f", crc32c={crc}"
24 |
25 | return response
26 |
27 |
28 | def google_response_from_data_with_reverse_header_order(
29 | expected_data: bytes, actual_data=None
30 | ):
31 | actual_data = actual_data or expected_data
32 | checksum = md5(actual_data)
33 | checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
34 | if crcmod is not None:
35 | checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF)
36 | checksum.update(actual_data)
37 | crc = base64.b64encode(checksum.digest()).decode()
38 |
39 | class response:
40 | content_length = len(actual_data)
41 | headers = {}
42 | if crcmod is not None:
43 | headers["X-Goog-Hash"] = f"crc32c={crc}, md5={checksum_b64}"
44 | else:
45 | headers["X-Goog-Hash"] = f"md5={checksum_b64}"
46 |
47 | return response
48 |
49 |
50 | def google_json_response_from_data(expected_data: bytes, actual_data=None):
51 | actual_data = actual_data or expected_data
52 | checksum = md5(actual_data)
53 | checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
54 |
55 | response = {"md5Hash": checksum_b64, "size": len(actual_data)}
56 |
57 | # some manual checksums verified using gsutil ls -L
58 | # also can add using https://crccalc.com/
59 | # be careful about newlines
60 | crc32c_points = {
61 | b"hello world\n": "8P9ykg==",
62 | b"different checksum": "DoesntMatter==",
63 | }
64 |
65 | try:
66 | response["crc32c"] = crc32c_points[actual_data]
67 | except KeyError:
68 | pass
69 |
70 | return response
71 |
72 |
73 | params = [
74 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
75 | (MD5Checker(), b"hello world", b"hello world", ()),
76 | ]
77 |
78 | if crcmod is not None:
79 | params.append(
80 | (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,))
81 | )
82 | params.append((Crc32cChecker(), b"hello world", b"hello world", ()))
83 |
84 |
85 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
86 | def test_validate_headers(checker, data, actual_data, raises):
87 | response = google_response_from_data(actual_data)
88 | checker.update(data)
89 |
90 | if raises:
91 | with pytest.raises(raises):
92 | checker.validate_headers(response.headers)
93 | else:
94 | checker.validate_headers(response.headers)
95 |
96 |
97 | params = [
98 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
99 | (MD5Checker(), b"hello world", b"hello world", ()),
100 | ]
101 |
102 | if crcmod is not None:
103 | params.append(
104 | (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,))
105 | )
106 | params.append((Crc32cChecker(), b"hello world", b"hello world", ()))
107 |
108 |
109 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
110 | def test_validate_headers_with_reverse_order(checker, data, actual_data, raises):
111 | response = google_response_from_data_with_reverse_header_order(actual_data)
112 | checker.update(data)
113 |
114 | if raises:
115 | with pytest.raises(raises):
116 | checker.validate_headers(response.headers)
117 | else:
118 | checker.validate_headers(response.headers)
119 |
120 |
121 | params = [
122 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
123 | (MD5Checker(), b"hello world", b"hello world", ()),
124 | (SizeChecker(), b"hello world", b"hello world", ()),
125 | (SizeChecker(), b"hello world", b"different size", (AssertionError,)),
126 | ]
127 |
128 | if crcmod is not None:
129 | params.append((Crc32cChecker(), b"hello world", b"hello world", ()))
130 | params.append(
131 | (Crc32cChecker(), b"hello world", b"different size", (ChecksumError,))
132 | )
133 |
134 |
135 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
136 | def test_checker_validate_http_response(checker, data, actual_data, raises):
137 | response = google_response_from_data(data, actual_data=actual_data)
138 | checker.update(data)
139 | if raises:
140 | with pytest.raises(raises):
141 | checker.validate_http_response(response)
142 | else:
143 | checker.validate_http_response(response)
144 |
145 |
146 | params = [
147 | (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
148 | (MD5Checker(), b"hello world", b"hello world", ()),
149 | (SizeChecker(), b"hello world", b"hello world", ()),
150 | (SizeChecker(), b"hello world", b"different size", (AssertionError,)),
151 | ]
152 | if crcmod is not None:
153 | params.extend(
154 | [
155 | (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,)),
156 | (Crc32cChecker(), b"hello world\n", b"hello world\n", ()),
157 | ]
158 | )
159 |
160 |
161 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
162 | def test_checker_validate_json_response(checker, data, actual_data, raises):
163 | response = google_json_response_from_data(data, actual_data=actual_data)
164 | checker.update(data)
165 | if raises:
166 | with pytest.raises(raises):
167 | checker.validate_json_response(response)
168 | else:
169 | checker.validate_json_response(response)
170 |
--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
1 | GCSFS
2 | =====
3 |
4 | A pythonic file-system interface to `Google Cloud Storage`_.
5 |
6 | Please file issues and requests on github_ and we welcome pull requests.
7 |
8 | .. _github: https://github.com/fsspec/gcsfs/issues
9 |
10 |
11 | This package depends on fsspec_, and inherits many useful behaviours from there,
12 | including integration with Dask, and the facility for key-value dict-like
13 | objects of the type used by zarr.
14 |
15 | .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/
16 |
17 | Installation
18 | ------------
19 |
20 | The GCSFS library can be installed using ``conda``:
21 |
22 | .. code-block:: bash
23 |
24 | conda install -c conda-forge gcsfs
25 |
26 | or ``pip``:
27 |
28 | .. code-block:: bash
29 |
30 | pip install gcsfs
31 |
32 | or by cloning the repository:
33 |
34 | .. code-block:: bash
35 |
36 | git clone https://github.com/fsspec/gcsfs/
37 | cd gcsfs/
38 | pip install .
39 |
40 | Examples
41 | --------
42 |
43 | Locate and read a file:
44 |
45 | .. code-block:: python
46 |
47 | >>> import gcsfs
48 | >>> fs = gcsfs.GCSFileSystem(project='my-google-project')
49 | >>> fs.ls('my-bucket')
50 | ['my-file.txt']
51 | >>> with fs.open('my-bucket/my-file.txt', 'rb') as f:
52 | ... print(f.read())
53 | b'Hello, world'
54 |
55 | (see also :meth:`~gcsfs.core.GCSFileSystem.walk` and :meth:`~gcsfs.core.GCSFileSystem.glob`)
56 |
57 | Read with delimited blocks:
58 |
59 | .. code-block:: python
60 |
61 | >>> fs.read_block(path, offset=1000, length=10, delimiter=b'\n')
62 | b'A whole line of text\n'
63 |
64 | Write with blocked caching:
65 |
66 | .. code-block:: python
67 |
68 | >>> with fs.open('mybucket/new-file', 'wb') as f:
69 | ... f.write(2*2**20 * b'a')
70 | ... f.write(2*2**20 * b'a') # data is flushed and file closed
71 | >>> fs.du('mybucket/new-file')
72 | {'mybucket/new-file': 4194304}
73 |
74 | Because GCSFS faithfully copies the Python file interface it can be used
75 | smoothly with other projects that consume the file interface like ``gzip`` or
76 | ``pandas``.
77 |
78 | .. code-block:: python
79 |
80 | >>> with fs.open('mybucket/my-file.csv.gz', 'rb') as f:
81 | ... g = gzip.GzipFile(fileobj=f) # Decompress data with gzip
82 | ... df = pd.read_csv(g) # Read CSV file with Pandas
83 |
84 | Credentials
85 | -----------
86 |
87 | Several modes of authentication are supported:
88 |
89 | - if ``token=None`` (default), GCSFS will attempt to use your default gcloud
90 | credentials or, attempt to get credentials from the google metadata
91 | service, or fall back to anonymous access. This will work for most
92 | users without further action. Note that the default project may also
93 | be found, but it is often best to supply this anyway (only affects bucket-
94 | level operations).
95 |
96 | - if ``token='cloud'``, we assume we are running within google (compute
97 | or container engine) and fetch the credentials automatically from the
98 | metadata service.
99 |
100 | - if ``token=dict(...)`` or ``token=``, you may supply a token
101 | generated by the gcloud_ utility. This can be
102 |
103 | - a python dictionary
104 |
105 | - the path to a file containing the JSON returned by logging in with the
106 | gcloud CLI tool (e.g.,
107 | ``~/.config/gcloud/application_default_credentials.json`` or
108 | ``~/.config/gcloud/legacy_credentials//adc.json``)
110 |
111 | - the path to a service account key
112 |
113 | - a google.auth.credentials.Credentials_ object
114 |
115 | Note that ``~`` will not be automatically expanded to the user home
116 | directory, and must be manually expanded with a utility like
117 | ``os.path.expanduser()``.
118 |
119 | - you can also generate tokens via Oauth2 in the browser using ``token='browser'``,
120 | which gcsfs then caches in a special file, ~/.gcs_tokens, and can subsequently be accessed with ``token='cache'``.
121 |
122 | - anonymous only access can be selected using ``token='anon'``, e.g. to access
123 | public resources such as 'anaconda-public-data'.
124 |
125 | .. _google.auth.credentials.Credentials: https://google-auth.readthedocs.io/en/master/reference/google.auth.credentials.html#google.auth.credentials.Credentials
126 |
127 | The acquired session tokens are *not* preserved when serializing the instances, so
128 | it is safe to pass them to worker processes on other machines if using in a
129 | distributed computation context. If credentials are given by a file path, however,
130 | then this file must exist on every machine.
131 |
132 |
133 | Integration
134 | -----------
135 |
136 | The libraries ``intake``, ``pandas`` and ``dask`` accept URLs with the prefix
137 | "gcs://", and will use gcsfs to complete the IO operation in question. The
138 | IO functions take an argument ``storage_options``, which will be passed
139 | to ``GCSFileSystem``, for example:
140 |
141 | .. code-block:: python
142 |
143 | df = pd.read_excel("gcs://bucket/path/file.xls",
144 | storage_options={"token": "anon"})
145 |
146 | This gives the chance to pass any credentials or other necessary
147 | arguments needed to gcsfs.
148 |
149 |
150 | Async
151 | -----
152 |
153 | ``gcsfs`` is implemented using ``aiohttp``, and offers async functionality.
154 | A number of methods of ``GCSFileSystem`` are ``async``, for for each of these,
155 | there is also a synchronous version with the same name and lack of a "_"
156 | prefix.
157 |
158 | If you wish to call ``gcsfs`` from async code, then you should pass
159 | ``asynchronous=True, loop=loop`` to the constructor (the latter is optional,
160 | if you wish to use both async and sync methods). You must also explicitly
161 | await the client creation before making any GCS call.
162 |
163 | .. code-block:: python
164 |
165 | async def run_program():
166 | gcs = GCSFileSystem(asynchronous=True)
167 | print(await gcs._ls(""))
168 |
169 | asyncio.run(run_program()) # or call from your async code
170 |
171 | Concurrent async operations are also used internally for bulk operations
172 | such as ``pipe/cat``, ``get/put``, ``cp/mv/rm``. The async calls are
173 | hidden behind a synchronisation layer, so are designed to be called
174 | from normal code. If you are *not*
175 | using async-style programming, you do not need to know about how this
176 | works, but you might find the implementation interesting.
177 |
178 | For every synchronous function there is asynchronous one prefixed by ``_``, but
179 | the ``open`` operation does not support async operation. If you need it to open
180 | some file in async manner, it's better to asynchronously download it to
181 | temporary location and working with it from there.
182 |
183 | Proxy
184 | -----
185 |
186 | ``gcsfs`` uses ``aiohttp`` for calls to the storage api, which by default
187 | ignores ``HTTP_PROXY/HTTPS_PROXY`` environment variables. To read
188 | proxy settings from the environment provide ``session_kwargs`` as follows:
189 |
190 | .. code-block:: python
191 |
192 | fs = GCSFileSystem(project='my-google-project', session_kwargs={'trust_env': True})
193 |
194 | For further reference check `aiohttp proxy support`_.
195 |
196 | .. _aiohttp proxy support: https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support
197 |
198 |
199 | Contents
200 | ========
201 |
202 | .. toctree::
203 | api
204 | developer
205 | fuse
206 | changelog
207 | code-of-conduct
208 | :maxdepth: 2
209 |
210 |
211 | .. _Google Cloud Storage: https://cloud.google.com/storage/docs/
212 |
213 | .. _gcloud: https://cloud.google.com/sdk/docs/
214 |
215 | .. _dask: http://dask.pydata.org/en/latest/remote-data-services.html
216 |
217 | .. _zarr: http://zarr.readthedocs.io/en/latest/tutorial.html#storage-alternatives
218 |
219 | Indices and tables
220 | ==================
221 |
222 | * :ref:`genindex`
223 | * :ref:`modindex`
224 | * :ref:`search`
225 |
226 |
227 | These docs pages collect anonymous tracking data using goatcounter, and the
228 | dashboard is available to the public: https://gcsfs.goatcounter.com/ .
229 |
230 | .. raw:: html
231 |
232 |
234 |
--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
1 | @ECHO OFF
2 |
3 | REM Command file for Sphinx documentation
4 |
5 | if "%SPHINXBUILD%" == "" (
6 | set SPHINXBUILD=sphinx-build
7 | )
8 | set BUILDDIR=build
9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
11 | if NOT "%PAPER%" == "" (
12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
14 | )
15 |
16 | if "%1" == "" goto help
17 |
18 | if "%1" == "help" (
19 | :help
20 | echo.Please use `make ^` where ^ is one of
21 | echo. html to make standalone HTML files
22 | echo. dirhtml to make HTML files named index.html in directories
23 | echo. singlehtml to make a single large HTML file
24 | echo. pickle to make pickle files
25 | echo. json to make JSON files
26 | echo. htmlhelp to make HTML files and a HTML help project
27 | echo. qthelp to make HTML files and a qthelp project
28 | echo. devhelp to make HTML files and a Devhelp project
29 | echo. epub to make an epub
30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter
31 | echo. text to make text files
32 | echo. man to make manual pages
33 | echo. texinfo to make Texinfo files
34 | echo. gettext to make PO message catalogs
35 | echo. changes to make an overview over all changed/added/deprecated items
36 | echo. xml to make Docutils-native XML files
37 | echo. pseudoxml to make pseudoxml-XML files for display purposes
38 | echo. linkcheck to check all external links for integrity
39 | echo. doctest to run all doctests embedded in the documentation if enabled
40 | echo. coverage to run coverage check of the documentation if enabled
41 | goto end
42 | )
43 |
44 | if "%1" == "clean" (
45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
46 | del /q /s %BUILDDIR%\*
47 | goto end
48 | )
49 |
50 |
51 | REM Check if sphinx-build is available and fallback to Python version if any
52 | %SPHINXBUILD% 1>NUL 2>NUL
53 | if errorlevel 9009 goto sphinx_python
54 | goto sphinx_ok
55 |
56 | :sphinx_python
57 |
58 | set SPHINXBUILD=python -m sphinx.__init__
59 | %SPHINXBUILD% 2> nul
60 | if errorlevel 9009 (
61 | echo.
62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
63 | echo.installed, then set the SPHINXBUILD environment variable to point
64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you
65 | echo.may add the Sphinx directory to PATH.
66 | echo.
67 | echo.If you don't have Sphinx installed, grab it from
68 | echo.http://sphinx-doc.org/
69 | exit /b 1
70 | )
71 |
72 | :sphinx_ok
73 |
74 |
75 | if "%1" == "html" (
76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
77 | if errorlevel 1 exit /b 1
78 | echo.
79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html.
80 | goto end
81 | )
82 |
83 | if "%1" == "dirhtml" (
84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
85 | if errorlevel 1 exit /b 1
86 | echo.
87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
88 | goto end
89 | )
90 |
91 | if "%1" == "singlehtml" (
92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
93 | if errorlevel 1 exit /b 1
94 | echo.
95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
96 | goto end
97 | )
98 |
99 | if "%1" == "pickle" (
100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | if errorlevel 1 exit /b 1
102 | echo.
103 | echo.Build finished; now you can process the pickle files.
104 | goto end
105 | )
106 |
107 | if "%1" == "json" (
108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | if errorlevel 1 exit /b 1
110 | echo.
111 | echo.Build finished; now you can process the JSON files.
112 | goto end
113 | )
114 |
115 | if "%1" == "htmlhelp" (
116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | if errorlevel 1 exit /b 1
118 | echo.
119 | echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | goto end
122 | )
123 |
124 | if "%1" == "qthelp" (
125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | if errorlevel 1 exit /b 1
127 | echo.
128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\S3Fs.qhcp
131 | echo.To view the help file:
132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\S3Fs.ghc
133 | goto end
134 | )
135 |
136 | if "%1" == "devhelp" (
137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | if errorlevel 1 exit /b 1
139 | echo.
140 | echo.Build finished.
141 | goto end
142 | )
143 |
144 | if "%1" == "epub" (
145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | if errorlevel 1 exit /b 1
147 | echo.
148 | echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | goto end
150 | )
151 |
152 | if "%1" == "latex" (
153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | if errorlevel 1 exit /b 1
155 | echo.
156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | goto end
158 | )
159 |
160 | if "%1" == "latexpdf" (
161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | cd %BUILDDIR%/latex
163 | make all-pdf
164 | cd %~dp0
165 | echo.
166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | goto end
168 | )
169 |
170 | if "%1" == "latexpdfja" (
171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | cd %BUILDDIR%/latex
173 | make all-pdf-ja
174 | cd %~dp0
175 | echo.
176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | goto end
178 | )
179 |
180 | if "%1" == "text" (
181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | if errorlevel 1 exit /b 1
183 | echo.
184 | echo.Build finished. The text files are in %BUILDDIR%/text.
185 | goto end
186 | )
187 |
188 | if "%1" == "man" (
189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | if errorlevel 1 exit /b 1
191 | echo.
192 | echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | goto end
194 | )
195 |
196 | if "%1" == "texinfo" (
197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | if errorlevel 1 exit /b 1
199 | echo.
200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | goto end
202 | )
203 |
204 | if "%1" == "gettext" (
205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | if errorlevel 1 exit /b 1
207 | echo.
208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | goto end
210 | )
211 |
212 | if "%1" == "changes" (
213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | if errorlevel 1 exit /b 1
215 | echo.
216 | echo.The overview file is in %BUILDDIR%/changes.
217 | goto end
218 | )
219 |
220 | if "%1" == "linkcheck" (
221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | if errorlevel 1 exit /b 1
223 | echo.
224 | echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | goto end
227 | )
228 |
229 | if "%1" == "doctest" (
230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | if errorlevel 1 exit /b 1
232 | echo.
233 | echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | goto end
236 | )
237 |
238 | if "%1" == "coverage" (
239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | if errorlevel 1 exit /b 1
241 | echo.
242 | echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | goto end
245 | )
246 |
247 | if "%1" == "xml" (
248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | if errorlevel 1 exit /b 1
250 | echo.
251 | echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | goto end
253 | )
254 |
255 | if "%1" == "pseudoxml" (
256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | if errorlevel 1 exit /b 1
258 | echo.
259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | goto end
261 | )
262 |
263 | :end
264 |
--------------------------------------------------------------------------------
/cloudbuild/e2e-tests-cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | substitutions:
2 | _REGION: "us-central1"
3 | _ZONE: "us-central1-a"
4 | _SHORT_BUILD_ID: ${BUILD_ID:0:8}
5 |
6 | steps:
7 | # Step 0: Generate a persistent SSH key for this build run.
8 | # This prevents gcloud from adding a new key to the OS Login profile on every ssh/scp command.
9 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
10 | id: "generate-ssh-key"
11 | entrypoint: "bash"
12 | args:
13 | - "-c"
14 | - |
15 | mkdir -p /workspace/.ssh
16 | # Generate the SSH key
17 | ssh-keygen -t rsa -f /workspace/.ssh/google_compute_engine -N '' -C gcb
18 | # Save the public key content to a file for the cleanup step
19 | cat /workspace/.ssh/google_compute_engine.pub > /workspace/gcb_ssh_key.pub
20 | waitFor: ["-"]
21 |
22 | # Step 1: Create a unique standard GCS bucket for the test run.
23 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
24 | id: "create-standard-bucket"
25 | entrypoint: "gcloud"
26 | args:
27 | - "storage"
28 | - "buckets"
29 | - "create"
30 | - "gs://gcsfs-test-standard-${_SHORT_BUILD_ID}"
31 | - "--project=${PROJECT_ID}"
32 | - "--location=${_REGION}"
33 | waitFor: ["-"]
34 |
35 | # Step 2: Create a unique versioned GCS bucket for the test run.
36 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
37 | id: "create-versioned-bucket"
38 | entrypoint: "gcloud"
39 | args:
40 | - "storage"
41 | - "buckets"
42 | - "create"
43 | - "gs://gcsfs-test-versioned-${_SHORT_BUILD_ID}"
44 | - "--project=${PROJECT_ID}"
45 | - "--location=${_REGION}"
46 | waitFor: ["-"]
47 |
48 | # Step 2a: Enable versioning on the versioned bucket.
49 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
50 | id: "enable-bucket-versioning"
51 | entrypoint: "gcloud"
52 | args:
53 | - "storage"
54 | - "buckets"
55 | - "update"
56 | - "gs://gcsfs-test-versioned-${_SHORT_BUILD_ID}"
57 | - "--versioning"
58 | waitFor:
59 | - "create-versioned-bucket"
60 |
61 | # Step 3: Create a GCE VM to run the tests.
62 | # The VM is created in the same zone as the buckets to test rapid storage features.
63 | # It's given the 'cloud-platform' scope to allow it to access GCS and other services.
64 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
65 | id: "create-vm"
66 | entrypoint: "gcloud"
67 | args:
68 | - "compute"
69 | - "instances"
70 | - "create"
71 | - "gcsfs-test-vm-${_SHORT_BUILD_ID}"
72 | - "--project=${PROJECT_ID}"
73 | - "--zone=${_ZONE}"
74 | - "--machine-type=e2-medium"
75 | - "--image-family=debian-13"
76 | - "--image-project=debian-cloud"
77 | - "--service-account=${_ZONAL_VM_SERVICE_ACCOUNT}"
78 | - "--scopes=https://www.googleapis.com/auth/cloud-platform" # Full access to project APIs
79 | - "--metadata=enable-oslogin=TRUE"
80 | waitFor: ["-"]
81 |
82 | # Step 4: Run the integration tests inside the newly created VM.
83 | # This step uses 'gcloud compute ssh' to execute a remote script.
84 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
85 | id: "run-tests-on-vm"
86 | entrypoint: "bash"
87 | args:
88 | - "-c"
89 | - |
90 | set -e
91 | # Wait for the VM to be fully initialized and SSH to be ready.
92 | for i in {1..10}; do
93 | if gcloud compute ssh gcsfs-test-vm-${_SHORT_BUILD_ID} --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine --command="echo VM is ready"; then
94 | break # Break if SSH is successful
95 | fi
96 | echo "Waiting for VM to become available... (attempt $$i/10)"
97 | sleep 15
98 | done
99 |
100 | # Copy the source code from the Cloud Build workspace to the VM's home directory, using the generated key.
101 | gcloud compute scp --recurse . gcsfs-test-vm-${_SHORT_BUILD_ID}:~ --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine
102 |
103 | # Script to be executed on the VM.
104 | # This script installs dependencies, sets environment variables, and runs pytest.
105 | VM_SCRIPT="
106 | set -e
107 | echo '--- Installing dependencies on VM ---'
108 | sudo apt-get update > /dev/null
109 | sudo apt-get install -y python3-pip python3-venv fuse fuse3 libfuse2 > /dev/null
110 |
111 | echo '--- Installing Python and dependencies on VM ---'
112 | python3 -m venv env
113 | source env/bin/activate
114 |
115 | pip install --upgrade pip > /dev/null
116 | # Install testing libraries explicitly, as they are not in setup.py
117 | pip install pytest pytest-timeout pytest-subtests pytest-asyncio fusepy google-cloud-storage > /dev/null
118 | pip install -e . > /dev/null
119 |
120 | echo '--- Preparing test environment on VM ---'
121 | export GCSFS_TEST_BUCKET='gcsfs-test-standard-${_SHORT_BUILD_ID}'
122 | export GCSFS_TEST_VERSIONED_BUCKET='gcsfs-test-versioned-${_SHORT_BUILD_ID}'
123 | export GCSFS_ZONAL_TEST_BUCKET='${_GCSFS_ZONAL_TEST_BUCKET}'
124 |
125 | export STORAGE_EMULATOR_HOST=https://storage.googleapis.com
126 | export GCSFS_TEST_PROJECT=${PROJECT_ID}
127 | export GCSFS_TEST_KMS_KEY=projects/${PROJECT_ID}/locations/${_REGION}/keyRings/${_GCSFS_KEY_RING_NAME}/cryptoKeys/${_GCSFS_KEY_NAME}
128 |
129 | echo '--- Running standard tests on VM ---'
130 | pytest -vv -s \
131 | --log-format='%(asctime)s %(levelname)s %(message)s' \
132 | --log-date-format='%H:%M:%S' \
133 | --color=no \
134 | gcsfs/ \
135 | --deselect gcsfs/tests/test_core.py::test_sign
136 |
137 | echo '--- Running Zonal tests on VM ---'
138 | export GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT="true"
139 | pytest -vv -s \
140 | --log-format='%(asctime)s %(levelname)s %(message)s' \
141 | --log-date-format='%H:%M:%S' \
142 | --color=no \
143 | gcsfs/tests/test_extended_gcsfs.py
144 | "
145 |
146 | # Execute the script on the VM via SSH.
147 | gcloud compute ssh gcsfs-test-vm-${_SHORT_BUILD_ID} --zone=${_ZONE} --internal-ip --ssh-key-file=/workspace/.ssh/google_compute_engine --command="$$VM_SCRIPT"
148 | waitFor:
149 | - "create-vm"
150 | - "create-standard-bucket"
151 | - "enable-bucket-versioning"
152 | - "generate-ssh-key"
153 |
154 | # --- Cleanup Steps ---
155 |
156 | # Step 5: Clean up the SSH key from the OS Login profile.
157 | # This step is crucial to prevent key accumulation.
158 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
159 | id: "cleanup-ssh-key"
160 | entrypoint: "bash"
161 | args:
162 | - "-c"
163 | - |
164 | echo "--- Removing SSH key from OS Login profile to prevent accumulation ---"
165 | gcloud compute os-login ssh-keys remove \
166 | --key-file=/workspace/gcb_ssh_key.pub || true
167 | waitFor:
168 | - "run-tests-on-vm"
169 |
170 | # Step 6: Delete the GCE VM.
171 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
172 | id: "delete-vm"
173 | entrypoint: "gcloud"
174 | args:
175 | - "compute"
176 | - "instances"
177 | - "delete"
178 | - "gcsfs-test-vm-${_SHORT_BUILD_ID}"
179 | - "--zone=${_ZONE}"
180 | - "--quiet"
181 | waitFor:
182 | - "cleanup-ssh-key"
183 |
184 | # Step 7: Delete the standard GCS bucket.
185 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
186 | id: "delete-standard-bucket"
187 | entrypoint: "gcloud"
188 | args:
189 | [
190 | "storage",
191 | "rm",
192 | "--recursive",
193 | "gs://gcsfs-test-standard-${_SHORT_BUILD_ID}",
194 | ]
195 | waitFor:
196 | - "run-tests-on-vm"
197 |
198 | # Step 8: Delete the versioned GCS bucket.
199 | - name: "gcr.io/google.com/cloudsdktool/cloud-sdk"
200 | id: "delete-versioned-bucket"
201 | entrypoint: "gcloud"
202 | args:
203 | [
204 | "storage",
205 | "rm",
206 | "--recursive",
207 | "gs://gcsfs-test-versioned-${_SHORT_BUILD_ID}",
208 | ]
209 | waitFor:
210 | - "run-tests-on-vm"
211 |
212 | timeout: "3600s" # 60 minutes
213 |
214 | options:
215 | logging: CLOUD_LOGGING_ONLY
216 | pool:
217 | name: "projects/${PROJECT_ID}/locations/us-central1/workerPools/cloud-build-worker-pool"
218 |
--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
1 | # Makefile for Sphinx documentation
2 | #
3 |
4 | # You can set these variables from the command line.
5 | SPHINXOPTS =
6 | SPHINXBUILD = sphinx-build
7 | PAPER =
8 | BUILDDIR = build
9 |
10 | # User-friendly check for sphinx-build
11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
13 | endif
14 |
15 | # Internal variables.
16 | PAPEROPT_a4 = -D latex_paper_size=a4
17 | PAPEROPT_letter = -D latex_paper_size=letter
18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
19 | # the i18n builder cannot share the environment and doctrees with the others
20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
21 |
22 | .PHONY: help
23 | help:
24 | @echo "Please use \`make ' where is one of"
25 | @echo " html to make standalone HTML files"
26 | @echo " dirhtml to make HTML files named index.html in directories"
27 | @echo " singlehtml to make a single large HTML file"
28 | @echo " pickle to make pickle files"
29 | @echo " json to make JSON files"
30 | @echo " htmlhelp to make HTML files and a HTML help project"
31 | @echo " qthelp to make HTML files and a qthelp project"
32 | @echo " applehelp to make an Apple Help Book"
33 | @echo " devhelp to make HTML files and a Devhelp project"
34 | @echo " epub to make an epub"
35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
36 | @echo " latexpdf to make LaTeX files and run them through pdflatex"
37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
38 | @echo " text to make text files"
39 | @echo " man to make manual pages"
40 | @echo " texinfo to make Texinfo files"
41 | @echo " info to make Texinfo files and run them through makeinfo"
42 | @echo " gettext to make PO message catalogs"
43 | @echo " changes to make an overview of all changed/added/deprecated items"
44 | @echo " xml to make Docutils-native XML files"
45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes"
46 | @echo " linkcheck to check all external links for integrity"
47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)"
48 | @echo " coverage to run coverage check of the documentation (if enabled)"
49 |
50 | .PHONY: clean
51 | clean:
52 | rm -rf $(BUILDDIR)/*
53 |
54 | .PHONY: html
55 | html:
56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
57 | @echo
58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
59 |
60 | .PHONY: dirhtml
61 | dirhtml:
62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
63 | @echo
64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
65 |
66 | .PHONY: singlehtml
67 | singlehtml:
68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
69 | @echo
70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
71 |
72 | .PHONY: pickle
73 | pickle:
74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
75 | @echo
76 | @echo "Build finished; now you can process the pickle files."
77 |
78 | .PHONY: json
79 | json:
80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
81 | @echo
82 | @echo "Build finished; now you can process the JSON files."
83 |
84 | .PHONY: htmlhelp
85 | htmlhelp:
86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
87 | @echo
88 | @echo "Build finished; now you can run HTML Help Workshop with the" \
89 | ".hhp project file in $(BUILDDIR)/htmlhelp."
90 |
91 | .PHONY: qthelp
92 | qthelp:
93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
94 | @echo
95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \
96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/GCSFs.qhcp"
98 | @echo "To view the help file:"
99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/GCSFs.qhc"
100 |
101 | .PHONY: applehelp
102 | applehelp:
103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | @echo
105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | @echo "N.B. You won't be able to view it unless you put it in" \
107 | "~/Library/Documentation/Help or install it in your application" \
108 | "bundle."
109 |
110 | .PHONY: devhelp
111 | devhelp:
112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | @echo
114 | @echo "Build finished."
115 | @echo "To view the help file:"
116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/GCSFs"
117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/GCSFs"
118 | @echo "# devhelp"
119 |
120 | .PHONY: epub
121 | epub:
122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | @echo
124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 |
126 | .PHONY: latex
127 | latex:
128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | @echo
130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | "(use \`make latexpdf' here to do that automatically)."
133 |
134 | .PHONY: latexpdf
135 | latexpdf:
136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | @echo "Running LaTeX files through pdflatex..."
138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 |
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | @echo "Running LaTeX files through platex and dvipdfmx..."
145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 |
148 | .PHONY: text
149 | text:
150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | @echo
152 | @echo "Build finished. The text files are in $(BUILDDIR)/text."
153 |
154 | .PHONY: man
155 | man:
156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | @echo
158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 |
160 | .PHONY: texinfo
161 | texinfo:
162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | @echo
164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | @echo "Run \`make' in that directory to run these through makeinfo" \
166 | "(use \`make info' here to do that automatically)."
167 |
168 | .PHONY: info
169 | info:
170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | @echo "Running Texinfo files through makeinfo..."
172 | make -C $(BUILDDIR)/texinfo info
173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 |
175 | .PHONY: gettext
176 | gettext:
177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | @echo
179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 |
181 | .PHONY: changes
182 | changes:
183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | @echo
185 | @echo "The overview file is in $(BUILDDIR)/changes."
186 |
187 | .PHONY: linkcheck
188 | linkcheck:
189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | @echo
191 | @echo "Link check complete; look for any errors in the above output " \
192 | "or in $(BUILDDIR)/linkcheck/output.txt."
193 |
194 | .PHONY: doctest
195 | doctest:
196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | @echo "Testing of doctests in the sources finished, look at the " \
198 | "results in $(BUILDDIR)/doctest/output.txt."
199 |
200 | .PHONY: coverage
201 | coverage:
202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | @echo "Testing of coverage in the sources finished, look at the " \
204 | "results in $(BUILDDIR)/coverage/python.txt."
205 |
206 | .PHONY: xml
207 | xml:
208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | @echo
210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 |
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | @echo
216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 |
--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
1 | Changelog
2 | =========
3 |
4 | Note: in some releases, there are no changes, because we always guarantee
5 | releasing in step with fsspec.
6 |
7 | 2025.12.0
8 | ---------
9 |
10 | * Fix CI when run against rela SGC buckets (#71)
11 | * Run extended rests when env var is set (#712)
12 | * Support py3.14 and drop 3.9 (#709)
13 | * Introduce ExtendedGcsFileSystem for Zonal Bucket gRPC Read Path (#707)
14 |
15 | 2025.10.0
16 | ---------
17 |
18 | * fix info() performance regression (#705)
19 | * add CoC (#703)
20 | * mkdir should not create bucket by default (#701)
21 | * add anonymous tracker to docs (#700)
22 |
23 | 2025.9.0
24 | --------
25 |
26 | * fix slow ls iterations (#697)
27 | * Ensure right error type for get() on nonexistent (#695)
28 |
29 | 2025.7.0
30 | --------
31 |
32 | * acknowledge Anaconda support (#691)
33 | * less refreshing for CI (#690)
34 |
35 | 2025.5.1
36 | --------
37 |
38 | * Fix token timezone comparison (#683, 688)
39 |
40 | 2025.5.0
41 | --------
42 |
43 | * Avoid deprecated utcnow (#680)
44 | * Add support for specifying Cloud KMS keys when creating files (#679)
45 | * Yet another fix for isdir (#676)
46 | * Create warning for appending mode 'a' operations (#675)
47 | * add userProject to batch deletion query (#673)
48 |
49 | 2025.3.2
50 | --------
51 |
52 | no changes
53 |
54 | 2025.3.1
55 | --------
56 |
57 | * Fix find with path not ending with "/" (#668)
58 | * remove "beta" note from doc (#666)
59 | * don't check expiry of creds that don't expire (#665)
60 |
61 | 2025.3.0
62 | --------
63 |
64 | * Improvements for credentials refresh under high load (#658)
65 |
66 | 2025.2.0
67 | --------
68 |
69 | * guess upload file MIME types (#655)
70 | * better shutdown cleanup (#657)
71 |
72 | 2024.12.0
73 | ---------
74 |
75 | * Exclusive write (#651)
76 | * Avoid IndexError on integer seconds (#649)
77 | * note on non-posixness (#648)
78 | * handle chache_timeout=0 (#646)
79 |
80 | 2024.10.0
81 | ---------
82 |
83 | * Remove race condition in credentials (#643)
84 | * fix md5 hash order logic (#640)
85 |
86 | 2024.9.0
87 | --------
88 |
89 | * In case error in a pure string (#631)
90 |
91 | 2024.6.1
92 | --------
93 |
94 | no changes
95 |
96 | 2024.6.0
97 | --------
98 |
99 | * Add seek(0) to request data to prevent issues on retries (#624)
100 |
101 | 2024.5.0
102 | --------
103 |
104 | * swap order of "gcs", "gs" protocols (#620)
105 | * fix get_file for relative lpath (#618)
106 |
107 | 2024.3.1
108 | --------
109 |
110 | * fix expiration= for sign() (#613)
111 | * do populate dircache in ls() (#612)
112 | * allow passing extra options to mkdir (#610)
113 | * credentials docs (#609)
114 | * retry in bulk rm (#608)
115 | * clean up loop on close (#606)
116 |
117 | 2024.2.0
118 | --------
119 |
120 | * doc for passing tokens (#603)
121 |
122 | 2023.12.2
123 | ---------
124 |
125 | no changes
126 |
127 | 2023.12.1
128 | ---------
129 |
130 | no changes
131 |
132 | 2023.12.0
133 | ---------
134 |
135 | * use same version when paginating list (#591)
136 | * fix double asterisk glob test (#589)
137 |
138 | 2023.10.0
139 | ---------
140 |
141 | * Fix for transactions of small files (#586)
142 |
143 | 2023.9.2
144 | --------
145 |
146 | * CI updates (#582)
147 |
148 | 2023.9.1
149 | --------
150 |
151 | * small fixes following #573 (#578)
152 |
153 | 2023.9.0
154 | --------
155 |
156 | * bulk operations edge cases (#576, 572)
157 | * inventory report based file listing (#573)
158 | * pickle HttpError (#571)
159 | * avoid warnings (#569)
160 | * maxdepth in find() (#566)
161 | * invalidate dircache (#564)
162 | * standard metadata field names (#563)
163 | * performance of building cache in find() (#561)
164 |
165 |
166 | 2023.6.0
167 | --------
168 |
169 | * allow raw/session token for auth (#554)
170 | * fix listings_expiry_time kwargs (#551)
171 | * allow setting fixed metadata on put/pipe (#550)
172 |
173 | 2023.5.0
174 | --------
175 |
176 | * Allow emulator host without protocol (#548)
177 | * Prevent upload retry from closing the file being sent (#540)
178 |
179 | 2023.4.0
180 | --------
181 |
182 | No changes
183 |
184 | 2023.3.0
185 | --------
186 |
187 | * Don't let find() mess up dircache (#531)
188 | * Drop py3.7 (#529)
189 | * Update docs (#528)
190 | * Make times UTC (#527)
191 | * Use BytesIO for large bodies (#525)
192 | * Fix: Don't append generation when it is absent (#523)
193 | * get/put/cp consistency tests (#521)
194 |
195 | 2023.1.0
196 | --------
197 |
198 | * Support create time (#516, 518)
199 | * defer async session creation (#513, 514)
200 | * support listing of file versions (#509)
201 | * fix ``sign`` following versioned split protocol (#513)
202 |
203 | 2022.11.0
204 | ---------
205 |
206 | * implement object versioning (#504)
207 |
208 | 2022.10.0
209 | ---------
210 |
211 | * bump fsspec to 2022.10.0 (#503)
212 |
213 | 2022.8.1
214 | --------
215 |
216 | * don't install prerelease aiohttp (#490)
217 |
218 | 2022.7.1
219 | --------
220 |
221 | * Try cloud auth by default (#479)
222 |
223 | 2022.5.0
224 | --------
225 |
226 | * invalidate listings cache for simple put/pipe (#474)
227 | * conform _mkdir and _cat_file to upstream (#471)
228 |
229 | 2022.3.0
230 | --------
231 |
232 | (note that this release happened in 2022.4, but we label as 2022.3 to match
233 | fsspec)
234 |
235 | * bucket exists workaround (#464)
236 | * dirmarkers (#459)
237 | * check connection (#457)
238 | * browser connection now uses local server (#456)
239 | * bucket location (#455)
240 | * ensure auth is closed (#452)
241 |
242 | 2022.02.0
243 | ---------
244 |
245 | * fix list_buckets without cache (#449)
246 | * drop py36 (#445)
247 |
248 | 2022.01.0
249 | ---------
250 |
251 | * update refname for versions (#442)
252 |
253 | 2021.11.1
254 | ---------
255 |
256 | * don't touch cache when doing find with a prefix (#437)
257 |
258 | 2021.11.0
259 | ---------
260 |
261 | * move to fsspec org
262 | * add support for google fixed_key_metadata (#429)
263 | * deprecate `content_encoding` parameter of setxattrs method (#429)
264 | * use emulator for resting instead of vcrpy (#424)
265 |
266 | 2021.10.1
267 | ---------
268 |
269 | * url signing (#411)
270 | * default callback (#422)
271 |
272 | 2021.10.0
273 | ---------
274 |
275 | * min version for decorator
276 | * default callback in get (#422)
277 |
278 | 2021.09.0
279 | ---------
280 |
281 | * correctly recognise 404 (#419)
282 | * fix for .details due to upstream (#417)
283 | * callbacks in get/put (#416)
284 | * "%" in paths (#415)
285 |
286 | 2021.08.1
287 | ---------
288 |
289 | * don't retry 404s (#406)
290 |
291 | 2021.07.0
292 | ---------
293 |
294 | * fix find/glob with a prefix (#399)
295 |
296 | 2021.06.1
297 | ---------
298 |
299 | * kwargs to aiohttpClient session
300 | * graceful timeout when disconnecting at finalise (#397)
301 |
302 | 2021.06.0
303 | ---------
304 |
305 | * negative ranges in cat_file (#394)
306 |
307 | 2021.05.0
308 | ---------
309 |
310 | * no credentials bug fix (#390)
311 | * use googleapis.com (#388)
312 | * more retries (#387, 385, 380)
313 | * Code cleanup (#381)
314 | * license to match stated one (#378)
315 | * deps updated (#376)
316 |
317 | Version 2021.04.0
318 | -----------------
319 |
320 | * switch to calver and fsspec pin
321 |
322 | Version 0.8.0
323 | -------------
324 |
325 | * keep up with fsspec 0.9.0 async
326 | * one-shot find
327 | * consistency checkers
328 | * retries for intermittent issues
329 | * timeouts
330 | * partial cat
331 | * http error status
332 | * CI to GHA
333 |
334 | Version 0.7.0
335 | -------------
336 |
337 | * async operations via aiohttp
338 |
339 |
340 | Version 0.6.0
341 | -------------
342 |
343 | * **API-breaking**: Changed requester-pays handling for ``GCSFileSystem``.
344 |
345 | The ``user_project`` keyword has been removed, and has been replaced with
346 | the ``requester_pays`` keyword. If you're working with a ``requester_pays`` bucket
347 | you will need to explicitly pass ``requester_pays-True``. This will include your
348 | ``project`` ID in requests made to GCS.
349 |
350 | Version 0.5.3
351 | -------------
352 |
353 | * ``GCSFileSystem`` now validates that the ``project`` provided, if any, matches the
354 | Google default project when using ``token-'google_default'`` to authenticate (:pr:`219`).
355 | * Fixed bug in ``GCSFileSystem.cat`` on objects in requester-pays buckets (:pr:`217`).
356 |
357 | Version 0.5.2
358 | -------------
359 |
360 | * Fixed bug in ``user_project`` fallback for default Google authentication (:pr:`213`)
361 |
362 | Version 0.5.1
363 | -------------
364 |
365 | * ``user_project`` now falls back to the ``project`` if provided (:pr:`208`)
366 |
367 | Version 0.5.0
368 | -------------
369 |
370 | * Added the ability to make requester-pays requests with the ``user_project`` parameter (:pr:`206`)
371 |
372 | Version 0.4.0
373 | -------------
374 |
375 | * Improved performance when serializing filesystem objects (:pr:`182`)
376 | * Fixed authorization errors when using ``gcsfs`` within multithreaded code (:pr:`183`, :pr:`192`)
377 | * Added contributing instructions (:pr:`185`)
378 | * Improved performance for :meth:`gcsfs.GCSFileSystem.info` (:pr:`187`)
379 | * Fixed bug in :meth:`gcsfs.GCSFileSystem.info` raising an error (:pr:`190`)
380 |
381 | .. raw:: html
382 |
383 |
385 |
--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | #
3 | # GCSFs documentation build configuration file, created by
4 | # sphinx-quickstart on Mon Mar 21 15:20:01 2016.
5 | #
6 | # This file is execfile()d with the current directory set to its
7 | # containing dir.
8 | #
9 | # Note that not all possible configuration values are present in this
10 | # autogenerated file.
11 | #
12 | # All configuration values have a default; values that are commented out
13 | # serve to show the default.
14 |
15 | # If extensions (or modules to document with autodoc) are in another directory,
16 | # add these directories to sys.path here. If the directory is relative to the
17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
18 | # sys.path.insert(0, os.path.abspath('.'))
19 |
20 | # -- General configuration ------------------------------------------------
21 |
22 | # If your documentation needs a minimal Sphinx version, state it here.
23 | # needs_sphinx = '1.0'
24 |
25 | # Add any Sphinx extension module names here, as strings. They can be
26 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
27 | # ones.
28 | extensions = [
29 | "sphinx.ext.autodoc",
30 | "sphinx.ext.todo",
31 | "sphinx.ext.ifconfig",
32 | "sphinx.ext.viewcode",
33 | "sphinx.ext.autosummary",
34 | "sphinx.ext.extlinks",
35 | "sphinx.ext.napoleon",
36 | ]
37 |
38 | # Add any paths that contain templates here, relative to this directory.
39 | templates_path = ["_templates"]
40 |
41 | # The suffix(es) of source filenames.
42 | # You can specify multiple suffix as a list of string:
43 | # source_suffix = ['.rst', '.md']
44 | source_suffix = ".rst"
45 |
46 | # The encoding of source files.
47 | # source_encoding = 'utf-8-sig'
48 |
49 | # The master toctree document.
50 | master_doc = "index"
51 |
52 | # General information about the project.
53 | project = "GCSFs"
54 | copyright = "2017, Continuum Analytics"
55 | author = "Continuum Analytics"
56 |
57 | # The version info for the project you're documenting, acts as replacement for
58 | # |version| and |release|, also used in various other places throughout the
59 | # built documents.
60 | #
61 | # The short X.Y version.
62 | import gcsfs
63 |
64 | version = gcsfs.__version__
65 | # The full version, including alpha/beta/rc tags.
66 | release = version
67 |
68 | # There are two options for replacing |today|: either, you set today to some
69 | # non-false value, then it is used:
70 | # today = ''
71 | # Else, today_fmt is used as the format for a strftime call.
72 | # today_fmt = '%B %d, %Y'
73 |
74 | # List of patterns, relative to source directory, that match files and
75 | # directories to ignore when looking for source files.
76 | exclude_patterns = []
77 |
78 | # The reST default role (used for this markup: `text`) to use for all
79 | # documents.
80 | # default_role = None
81 |
82 | # If true, '()' will be appended to :func: etc. cross-reference text.
83 | # add_function_parentheses = True
84 |
85 | # If true, the current module name will be prepended to all description
86 | # unit titles (such as .. function::).
87 | # add_module_names = True
88 |
89 | # If true, sectionauthor and moduleauthor directives will be shown in the
90 | # output. They are ignored by default.
91 | # show_authors = False
92 |
93 | # The name of the Pygments (syntax highlighting) style to use.
94 | pygments_style = "sphinx"
95 |
96 | # A list of ignored prefixes for module index sorting.
97 | # modindex_common_prefix = []
98 |
99 | # If true, keep warnings as "system message" paragraphs in the built documents.
100 | # keep_warnings = False
101 |
102 | # If true, `todo` and `todoList` produce output, else they produce nothing.
103 | todo_include_todos = False
104 |
105 |
106 | # -- Options for HTML output ----------------------------------------------
107 |
108 | html_theme = "sphinx_rtd_theme"
109 |
110 | # Theme options are theme-specific and customize the look and feel of a theme
111 | # further. For a list of options available for each theme, see the
112 | # documentation.
113 | # html_theme_options = {}
114 |
115 | # Add any paths that contain custom themes here, relative to this directory.
116 | # html_theme_path = []
117 |
118 | # The name for this set of Sphinx documents. If None, it defaults to
119 | # " v documentation".
120 | # html_title = None
121 |
122 | # A shorter title for the navigation bar. Default is the same as html_title.
123 | # html_short_title = None
124 |
125 | # The name of an image file (relative to this directory) to place at the top
126 | # of the sidebar.
127 | # html_logo = None
128 |
129 | # The name of an image file (within the static path) to use as favicon of the
130 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
131 | # pixels large.
132 | # html_favicon = None
133 |
134 | # Add any paths that contain custom static files (such as style sheets) here,
135 | # relative to this directory. They are copied after the builtin static files,
136 | # so a file named "default.css" will overwrite the builtin "default.css".
137 | html_static_path = ["_static"]
138 |
139 | # Custom CSS file to override read the docs default CSS.
140 | # Contains workaround for RTD not rendering colon between argument name and type
141 | html_css_files = ["custom.css"]
142 |
143 | # Add any extra paths that contain custom files (such as robots.txt or
144 | # .htaccess) here, relative to this directory. These files are copied
145 | # directly to the root of the documentation.
146 | # html_extra_path = []
147 |
148 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
149 | # using the given strftime format.
150 | # html_last_updated_fmt = '%b %d, %Y'
151 |
152 | # If true, SmartyPants will be used to convert quotes and dashes to
153 | # typographically correct entities.
154 | # html_use_smartypants = True
155 |
156 | # Custom sidebar templates, maps document names to template names.
157 | # html_sidebars = {}
158 |
159 | # Additional templates that should be rendered to pages, maps page names to
160 | # template names.
161 | # html_additional_pages = {}
162 |
163 | # If false, no module index is generated.
164 | # html_domain_indices = True
165 |
166 | # If false, no index is generated.
167 | # html_use_index = True
168 |
169 | # If true, the index is split into individual pages for each letter.
170 | # html_split_index = False
171 |
172 | # If true, links to the reST sources are added to the pages.
173 | # html_show_sourcelink = True
174 |
175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
176 | # html_show_sphinx = True
177 |
178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
179 | # html_show_copyright = True
180 |
181 | # If true, an OpenSearch description file will be output, and all pages will
182 | # contain a tag referring to it. The value of this option must be the
183 | # base URL from which the finished HTML is served.
184 | # html_use_opensearch = ''
185 |
186 | # This is the file name suffix for HTML files (e.g. ".xhtml").
187 | # html_file_suffix = None
188 |
189 | # Language to be used for generating the HTML full-text search index.
190 | # Sphinx supports the following languages:
191 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
192 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
193 | # html_search_language = 'en'
194 |
195 | # A dictionary with options for the search language support, empty by default.
196 | # Now only 'ja' uses this config value
197 | # html_search_options = {'type': 'default'}
198 |
199 | # The name of a javascript file (relative to the configuration directory) that
200 | # implements a search results scorer. If empty, the default will be used.
201 | # html_search_scorer = 'scorer.js'
202 |
203 | # Output file base name for HTML help builder.
204 | htmlhelp_basename = "GCSFSdoc"
205 |
206 | # -- Options for LaTeX output ---------------------------------------------
207 |
208 | latex_elements = {
209 | # The paper size ('letterpaper' or 'a4paper').
210 | #'papersize': 'letterpaper',
211 | # The font size ('10pt', '11pt' or '12pt').
212 | #'pointsize': '10pt',
213 | # Additional stuff for the LaTeX preamble.
214 | #'preamble': '',
215 | # Latex figure (float) alignment
216 | #'figure_align': 'htbp',
217 | }
218 |
219 | # Grouping the document tree into LaTeX files. List of tuples
220 | # (source start file, target name, title,
221 | # author, documentclass [howto, manual, or own class]).
222 | latex_documents = [
223 | (master_doc, "GCSFs.tex", "GCSFs Documentation", "Continuum Analytics", "manual")
224 | ]
225 |
226 | # The name of an image file (relative to this directory) to place at the top of
227 | # the title page.
228 | # latex_logo = None
229 |
230 | # For "manual" documents, if this is true, then toplevel headings are parts,
231 | # not chapters.
232 | # latex_use_parts = False
233 |
234 | # If true, show page references after internal links.
235 | # latex_show_pagerefs = False
236 |
237 | # If true, show URL addresses after external links.
238 | # latex_show_urls = False
239 |
240 | # Documents to append as an appendix to all manuals.
241 | # latex_appendices = []
242 |
243 | # If false, no module index is generated.
244 | # latex_domain_indices = True
245 |
246 |
247 | # -- Options for manual page output ---------------------------------------
248 |
249 | # One entry per manual page. List of tuples
250 | # (source start file, name, description, authors, manual section).
251 | man_pages = [(master_doc, "gcsfs", "GCSFs Documentation", [author], 1)]
252 |
253 | # If true, show URL addresses after external links.
254 | # man_show_urls = False
255 |
256 |
257 | # -- Options for Texinfo output -------------------------------------------
258 |
259 | # Grouping the document tree into Texinfo files. List of tuples
260 | # (source start file, target name, title, author,
261 | # dir menu entry, description, category)
262 | texinfo_documents = [
263 | (
264 | master_doc,
265 | "GCSFs",
266 | "GCSFs Documentation",
267 | author,
268 | "GCSFs",
269 | "One line description of project.",
270 | "Miscellaneous",
271 | )
272 | ]
273 |
274 | # Documents to append as an appendix to all manuals.
275 | # texinfo_appendices = []
276 |
277 | # If false, no module index is generated.
278 | # texinfo_domain_indices = True
279 |
280 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
281 | # texinfo_show_urls = 'footnote'
282 |
283 | # If true, do not generate a @detailmenu in the "Top" node's menu.
284 | # texinfo_no_detailmenu = False
285 |
286 | extlinks = {"pr": ("https://github.com/fsspec/gcsfs/pull/%s", "PR #%s")}
287 |
--------------------------------------------------------------------------------
/gcsfs/extended_gcsfs.py:
--------------------------------------------------------------------------------
1 | import logging
2 | from enum import Enum
3 |
4 | from fsspec import asyn
5 | from google.api_core import exceptions as api_exceptions
6 | from google.api_core import gapic_v1
7 | from google.api_core.client_info import ClientInfo
8 | from google.auth.credentials import AnonymousCredentials
9 | from google.cloud import storage_control_v2
10 | from google.cloud.storage._experimental.asyncio.async_grpc_client import AsyncGrpcClient
11 | from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import (
12 | AsyncMultiRangeDownloader,
13 | )
14 |
15 | from gcsfs import __version__ as version
16 | from gcsfs import zb_hns_utils
17 | from gcsfs.core import GCSFile, GCSFileSystem
18 | from gcsfs.zonal_file import ZonalFile
19 |
20 | logger = logging.getLogger("gcsfs")
21 |
22 | USER_AGENT = "python-gcsfs"
23 |
24 |
25 | class BucketType(Enum):
26 | ZONAL_HIERARCHICAL = "ZONAL_HIERARCHICAL"
27 | HIERARCHICAL = "HIERARCHICAL"
28 | NON_HIERARCHICAL = "NON_HIERARCHICAL"
29 | UNKNOWN = "UNKNOWN"
30 |
31 |
32 | gcs_file_types = {
33 | BucketType.ZONAL_HIERARCHICAL: ZonalFile,
34 | BucketType.NON_HIERARCHICAL: GCSFile,
35 | BucketType.HIERARCHICAL: GCSFile,
36 | BucketType.UNKNOWN: GCSFile,
37 | }
38 |
39 |
40 | class ExtendedGcsFileSystem(GCSFileSystem):
41 | """
42 | This class will be used when GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT env variable is set to true.
43 | ExtendedGcsFileSystem is a subclass of GCSFileSystem that adds new logic for bucket types
44 | including zonal and hierarchical. For buckets without special properties, it forwards requests
45 | to the parent class GCSFileSystem for default processing.
46 | """
47 |
48 | def __init__(self, *args, **kwargs):
49 | super().__init__(*args, **kwargs)
50 | self.grpc_client = None
51 | self.storage_control_client = None
52 | # Adds user-passed credentials to ExtendedGcsFileSystem to pass to gRPC/Storage Control clients.
53 | # We unwrap the nested credentials here because self.credentials is a GCSFS wrapper,
54 | # but the clients expect the underlying google.auth credentials object.
55 | self.credential = self.credentials.credentials
56 | # When token="anon", self.credentials.credentials is None. This is
57 | # often used for testing with emulators. However, the gRPC and storage
58 | # control clients require a credentials object for initialization.
59 | # We explicitly use AnonymousCredentials() to allow unauthenticated access.
60 | if self.credentials.token == "anon":
61 | self.credential = AnonymousCredentials()
62 | # initializing grpc and storage control client for Hierarchical and
63 | # zonal bucket operations
64 | self.grpc_client = asyn.sync(self.loop, self._create_grpc_client)
65 | self._storage_control_client = asyn.sync(
66 | self.loop, self._create_control_plane_client
67 | )
68 | self._storage_layout_cache = {}
69 |
70 | async def _create_grpc_client(self):
71 | if self.grpc_client is None:
72 | return AsyncGrpcClient(
73 | credentials=self.credential,
74 | client_info=ClientInfo(user_agent=f"{USER_AGENT}/{version}"),
75 | ).grpc_client
76 | else:
77 | return self.grpc_client
78 |
79 | async def _create_control_plane_client(self):
80 | # Initialize the storage control plane client for bucket
81 | # metadata operations
82 | client_info = gapic_v1.client_info.ClientInfo(
83 | user_agent=f"{USER_AGENT}/{version}"
84 | )
85 | return storage_control_v2.StorageControlAsyncClient(
86 | credentials=self.credential, client_info=client_info
87 | )
88 |
89 | async def _lookup_bucket_type(self, bucket):
90 | if bucket in self._storage_layout_cache:
91 | return self._storage_layout_cache[bucket]
92 | bucket_type = await self._get_bucket_type(bucket)
93 | # Dont cache UNKNOWN type
94 | if bucket_type == BucketType.UNKNOWN:
95 | return BucketType.UNKNOWN
96 | self._storage_layout_cache[bucket] = bucket_type
97 | return self._storage_layout_cache[bucket]
98 |
99 | _sync_lookup_bucket_type = asyn.sync_wrapper(_lookup_bucket_type)
100 |
101 | async def _get_bucket_type(self, bucket):
102 | try:
103 | bucket_name_value = f"projects/_/buckets/{bucket}/storageLayout"
104 | response = await self._storage_control_client.get_storage_layout(
105 | name=bucket_name_value
106 | )
107 |
108 | if response.location_type == "zone":
109 | return BucketType.ZONAL_HIERARCHICAL
110 | else:
111 | # This should be updated to include HNS in the future
112 | return BucketType.NON_HIERARCHICAL
113 | except api_exceptions.NotFound:
114 | logger.warning(f"Error: Bucket {bucket} not found or you lack permissions.")
115 | return BucketType.UNKNOWN
116 | except Exception as e:
117 | logger.error(
118 | f"Could not determine bucket type for bucket name {bucket}: {e}"
119 | )
120 | # Default to UNKNOWN in case bucket type is not obtained
121 | return BucketType.UNKNOWN
122 |
123 | def _open(
124 | self,
125 | path,
126 | mode="rb",
127 | block_size=None,
128 | cache_options=None,
129 | acl=None,
130 | consistency=None,
131 | metadata=None,
132 | autocommit=True,
133 | fixed_key_metadata=None,
134 | generation=None,
135 | **kwargs,
136 | ):
137 | """
138 | Open a file.
139 | """
140 | bucket, _, _ = self.split_path(path)
141 | bucket_type = self._sync_lookup_bucket_type(bucket)
142 | return gcs_file_types[bucket_type](
143 | self,
144 | path,
145 | mode,
146 | block_size=block_size or self.default_block_size,
147 | cache_options=cache_options,
148 | consistency=consistency or self.consistency,
149 | metadata=metadata,
150 | acl=acl,
151 | autocommit=autocommit,
152 | fixed_key_metadata=fixed_key_metadata,
153 | generation=generation,
154 | **kwargs,
155 | )
156 |
157 | # Replacement method for _process_limits to support new params (offset and length) for MRD.
158 | async def _process_limits_to_offset_and_length(self, path, start, end):
159 | """
160 | Calculates the read offset and length from start and end parameters.
161 |
162 | Args:
163 | path (str): The path to the file.
164 | start (int | None): The starting byte position.
165 | end (int | None): The ending byte position.
166 |
167 | Returns:
168 | tuple: A tuple containing (offset, length).
169 |
170 | Raises:
171 | ValueError: If the calculated range is invalid.
172 | """
173 | size = None
174 |
175 | if start is None:
176 | offset = 0
177 | elif start < 0:
178 | size = (await self._info(path))["size"] if size is None else size
179 | offset = size + start
180 | else:
181 | offset = start
182 |
183 | if end is None:
184 | size = (await self._info(path))["size"] if size is None else size
185 | effective_end = size
186 | elif end < 0:
187 | size = (await self._info(path))["size"] if size is None else size
188 | effective_end = size + end
189 | else:
190 | effective_end = end
191 |
192 | if offset < 0:
193 | raise ValueError(f"Calculated start offset ({offset}) cannot be negative.")
194 | if effective_end < offset:
195 | raise ValueError(
196 | f"Calculated end position ({effective_end}) cannot be before start offset ({offset})."
197 | )
198 | elif effective_end == offset:
199 | length = 0 # Handle zero-length slice
200 | else:
201 | length = effective_end - offset # Normal case
202 | size = (await self._info(path))["size"] if size is None else size
203 | if effective_end > size:
204 | length = max(0, size - offset) # Clamp and ensure non-negative
205 |
206 | return offset, length
207 |
208 | sync_process_limits_to_offset_and_length = asyn.sync_wrapper(
209 | _process_limits_to_offset_and_length
210 | )
211 |
212 | async def _is_zonal_bucket(self, bucket):
213 | bucket_type = await self._lookup_bucket_type(bucket)
214 | return bucket_type == BucketType.ZONAL_HIERARCHICAL
215 |
216 | async def _cat_file(self, path, start=None, end=None, mrd=None, **kwargs):
217 | """Fetch a file's contents as bytes, with an optimized path for Zonal buckets.
218 |
219 | This method overrides the parent `_cat_file` to read objects in Zonal buckets using gRPC.
220 |
221 | Args:
222 | path (str): The full GCS path to the file (e.g., "bucket/object").
223 | start (int, optional): The starting byte position to read from.
224 | end (int, optional): The ending byte position to read to.
225 | mrd (AsyncMultiRangeDownloader, optional): An existing multi-range
226 | downloader instance. If not provided, a new one will be created for Zonal buckets.
227 |
228 | Returns:
229 | bytes: The content of the file or file range.
230 | """
231 | mrd = kwargs.pop("mrd", None)
232 | mrd_created = False
233 |
234 | # A new MRD is required when read is done directly by the
235 | # GCSFilesystem class without creating a GCSFile object first.
236 | if mrd is None:
237 | bucket, object_name, generation = self.split_path(path)
238 | # Fall back to default implementation if not a zonal bucket
239 | if not await self._is_zonal_bucket(bucket):
240 | return await super()._cat_file(path, start=start, end=end, **kwargs)
241 |
242 | mrd = await AsyncMultiRangeDownloader.create_mrd(
243 | self.grpc_client, bucket, object_name, generation
244 | )
245 | mrd_created = True
246 |
247 | offset, length = await self._process_limits_to_offset_and_length(
248 | path, start, end
249 | )
250 | try:
251 | return await zb_hns_utils.download_range(
252 | offset=offset, length=length, mrd=mrd
253 | )
254 | finally:
255 | # Explicit cleanup if we created the MRD
256 | if mrd_created:
257 | await mrd.close()
258 |
--------------------------------------------------------------------------------
/gcsfs/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import logging
2 | import os
3 | import shlex
4 | import subprocess
5 | import time
6 |
7 | import fsspec
8 | import pytest
9 | import requests
10 | from google.cloud import storage
11 |
12 | from gcsfs import GCSFileSystem
13 | from gcsfs.tests.settings import TEST_BUCKET, TEST_VERSIONED_BUCKET, TEST_ZONAL_BUCKET
14 |
15 | files = {
16 | "test/accounts.1.json": (
17 | b'{"amount": 100, "name": "Alice"}\n'
18 | b'{"amount": 200, "name": "Bob"}\n'
19 | b'{"amount": 300, "name": "Charlie"}\n'
20 | b'{"amount": 400, "name": "Dennis"}\n'
21 | ),
22 | "test/accounts.2.json": (
23 | b'{"amount": 500, "name": "Alice"}\n'
24 | b'{"amount": 600, "name": "Bob"}\n'
25 | b'{"amount": 700, "name": "Charlie"}\n'
26 | b'{"amount": 800, "name": "Dennis"}\n'
27 | ),
28 | }
29 |
30 | csv_files = {
31 | "2014-01-01.csv": (
32 | b"name,amount,id\n" b"Alice,100,1\n" b"Bob,200,2\n" b"Charlie,300,3\n"
33 | ),
34 | "2014-01-02.csv": b"name,amount,id\n",
35 | "2014-01-03.csv": (
36 | b"name,amount,id\n" b"Dennis,400,4\n" b"Edith,500,5\n" b"Frank,600,6\n"
37 | ),
38 | }
39 | text_files = {
40 | "nested/file1": b"hello\n",
41 | "nested/file2": b"world",
42 | "nested/nested2/file1": b"hello\n",
43 | "nested/nested2/file2": b"world",
44 | }
45 | allfiles = dict(**files, **csv_files, **text_files)
46 | a = TEST_BUCKET + "/tmp/test/a"
47 | b = TEST_BUCKET + "/tmp/test/b"
48 | c = TEST_BUCKET + "/tmp/test/c"
49 | d = TEST_BUCKET + "/tmp/test/d"
50 |
51 | params = dict()
52 |
53 |
54 | def stop_docker(container):
55 | cmd = shlex.split('docker ps -a -q --filter "name=%s"' % container)
56 | cid = subprocess.check_output(cmd).strip().decode()
57 | if cid:
58 | subprocess.call(["docker", "rm", "-f", "-v", cid])
59 |
60 |
61 | @pytest.fixture(scope="session")
62 | def docker_gcs():
63 | if "STORAGE_EMULATOR_HOST" in os.environ:
64 | # assume using real API or otherwise have a server already set up
65 | yield os.getenv("STORAGE_EMULATOR_HOST")
66 | return
67 | params["token"] = "anon"
68 | container = "gcsfs_test"
69 | cmd = (
70 | "docker run -d -p 4443:4443 --name gcsfs_test fsouza/fake-gcs-server:latest -scheme "
71 | "http -public-host 0.0.0.0:4443 -external-url http://localhost:4443 "
72 | "-backend memory"
73 | )
74 | stop_docker(container)
75 | subprocess.check_output(shlex.split(cmd))
76 | url = "http://0.0.0.0:4443"
77 | timeout = 10
78 | while True:
79 | try:
80 | r = requests.get(url + "/storage/v1/b")
81 | if r.ok:
82 | yield url
83 | break
84 | except Exception as e: # noqa: E722
85 | timeout -= 1
86 | if timeout < 0:
87 | raise SystemError from e
88 | time.sleep(1)
89 | stop_docker(container)
90 |
91 |
92 | @pytest.fixture(scope="session")
93 | def gcs_factory(docker_gcs):
94 | params["endpoint_url"] = docker_gcs
95 |
96 | def factory(**kwargs):
97 | GCSFileSystem.clear_instance_cache()
98 | return fsspec.filesystem("gcs", **params, **kwargs)
99 |
100 | return factory
101 |
102 |
103 | @pytest.fixture(scope="session")
104 | def buckets_to_delete():
105 | """
106 | Provides a session-scoped set to track the names of GCS buckets that are
107 | created by the test suite.
108 |
109 | When tests run, they may create new GCS buckets. If these buckets are not
110 | deleted, they will persist after the test run, leading to resource leakage.
111 | This set acts as a registry of buckets that the `final_cleanup` fixture
112 | should remove at the end of the entire test session.
113 | """
114 | return set()
115 |
116 |
117 | @pytest.fixture
118 | def gcs(gcs_factory, buckets_to_delete, populate=True):
119 | gcs = gcs_factory()
120 | try: # ensure we're empty.
121 | # Create the bucket if it doesn't exist, otherwise clean it.
122 | if not gcs.exists(TEST_BUCKET):
123 | gcs.mkdir(TEST_BUCKET)
124 | # By adding the bucket name to this set, we are marking it for
125 | # deletion at the end of the test session. This ensures that if
126 | # the test suite creates the bucket, it will also be responsible
127 | # for deleting it. If the bucket already existed, we assume it's
128 | # managed externally and should not be deleted by the tests.
129 | buckets_to_delete.add(TEST_BUCKET)
130 | else:
131 | try:
132 | gcs.rm(gcs.find(TEST_BUCKET))
133 | except Exception as e:
134 | logging.warning(f"Failed to empty bucket {TEST_BUCKET}: {e}")
135 |
136 | if populate:
137 | gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
138 | gcs.invalidate_cache()
139 | yield gcs
140 | finally:
141 | _cleanup_gcs(gcs)
142 |
143 |
144 | @pytest.fixture
145 | def extended_gcs_factory(gcs_factory, buckets_to_delete, populate=True):
146 | created_instances = []
147 |
148 | def factory(**kwargs):
149 | fs = _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate, **kwargs)
150 | created_instances.append(fs)
151 | return fs
152 |
153 | yield factory
154 |
155 | for fs in created_instances:
156 | _cleanup_gcs(fs)
157 |
158 |
159 | @pytest.fixture
160 | def extended_gcsfs(gcs_factory, buckets_to_delete, populate=True):
161 | extended_gcsfs = _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate)
162 | try:
163 | yield extended_gcsfs
164 | finally:
165 | _cleanup_gcs(extended_gcsfs)
166 |
167 |
168 | def _cleanup_gcs(gcs):
169 | """Clean the bucket contents, logging a warning on failure."""
170 | try:
171 | gcs.rm(gcs.find(TEST_BUCKET))
172 | except Exception as e:
173 | logging.warning(f"Failed to clean up GCS bucket {TEST_BUCKET}: {e}")
174 |
175 |
176 | @pytest.fixture(scope="session", autouse=True)
177 | def final_cleanup(gcs_factory, buckets_to_delete):
178 | """
179 | A session-scoped, auto-use fixture that deletes all buckets registered
180 | in the `buckets_to_delete` set after the entire test session is complete.
181 | """
182 | yield
183 | # This code runs after the entire test session finishes
184 |
185 | gcs = gcs_factory()
186 | for bucket in buckets_to_delete:
187 | # The cleanup logic attempts to delete every bucket that was
188 | # added to the set during the session. For real GCS, only delete if
189 | # created by the test suite.
190 | try:
191 | if gcs.exists(bucket):
192 | gcs.rm(bucket, recursive=True)
193 | logging.info(f"Cleaned up bucket: {bucket}")
194 | except Exception as e:
195 | logging.warning(f"Failed to perform final cleanup for bucket {bucket}: {e}")
196 |
197 |
198 | @pytest.fixture
199 | def gcs_versioned(gcs_factory, buckets_to_delete):
200 | gcs = gcs_factory()
201 | gcs.version_aware = True
202 | is_real_gcs = (
203 | os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
204 | )
205 | try: # ensure we're empty.
206 | # The versioned bucket might be created by `is_versioning_enabled`
207 | # in test_core_versioned.py. We must register it for cleanup only if
208 | # it was created by this test run.
209 | try:
210 | from gcsfs.tests.test_core_versioned import (
211 | _VERSIONED_BUCKET_CREATED_BY_TESTS,
212 | )
213 |
214 | if _VERSIONED_BUCKET_CREATED_BY_TESTS:
215 | buckets_to_delete.add(TEST_VERSIONED_BUCKET)
216 | except ImportError:
217 | pass # test_core_versioned is not being run
218 | if is_real_gcs:
219 | cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
220 | else:
221 | # For emulators, we delete and recreate the bucket for a clean state
222 | try:
223 | gcs.rm(TEST_VERSIONED_BUCKET, recursive=True)
224 | except FileNotFoundError:
225 | pass
226 | gcs.mkdir(TEST_VERSIONED_BUCKET, enable_versioning=True)
227 | buckets_to_delete.add(TEST_VERSIONED_BUCKET)
228 | gcs.invalidate_cache()
229 | yield gcs
230 | finally:
231 | # Ensure the bucket is empty after the test.
232 | try:
233 | if is_real_gcs:
234 | cleanup_versioned_bucket(gcs, TEST_VERSIONED_BUCKET)
235 | except Exception as e:
236 | logging.warning(
237 | f"Failed to clean up versioned bucket {TEST_VERSIONED_BUCKET} after test: {e}"
238 | )
239 |
240 |
241 | def cleanup_versioned_bucket(gcs, bucket_name, prefix=None):
242 | """
243 | Deletes all object versions in a bucket using the google-cloud-storage client,
244 | ensuring it uses the same credentials as the gcsfs instance.
245 | """
246 | # Define a retry policy for API calls to handle rate limiting.
247 | # This can retry on 429 Too Many Requests errors, which can happen
248 | # when deleting many object versions quickly.
249 | from google.api_core.retry import Retry
250 |
251 | retry_policy = Retry(
252 | initial=1.0, # Initial delay in seconds
253 | maximum=30.0, # Maximum delay in seconds
254 | multiplier=1.2, # Backoff factor
255 | )
256 |
257 | client = storage.Client(
258 | credentials=gcs.credentials.credentials, project=gcs.project
259 | )
260 |
261 | # List all blobs, including old versions
262 | blobs_to_delete = list(client.list_blobs(bucket_name, versions=True, prefix=prefix))
263 |
264 | if not blobs_to_delete:
265 | logging.info("No object versions to delete in %s.", bucket_name)
266 | return
267 |
268 | logging.info(
269 | "Deleting %d object versions from %s.", len(blobs_to_delete), bucket_name
270 | )
271 | time.sleep(2)
272 | for blob in blobs_to_delete:
273 | blob.delete(retry=retry_policy)
274 |
275 | logging.info("Successfully deleted %d object versions.", len(blobs_to_delete))
276 |
277 |
278 | def _create_extended_gcsfs(gcs_factory, buckets_to_delete, populate=True, **kwargs):
279 | is_real_gcs = (
280 | os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
281 | )
282 |
283 | extended_gcsfs = gcs_factory(**kwargs)
284 | # Only create/delete/populate the bucket if we are NOT using the real GCS endpoint.
285 | if not is_real_gcs:
286 | try:
287 | extended_gcsfs.rm(TEST_ZONAL_BUCKET, recursive=True)
288 | except FileNotFoundError:
289 | pass
290 | extended_gcsfs.mkdir(TEST_ZONAL_BUCKET)
291 | buckets_to_delete.add(TEST_ZONAL_BUCKET)
292 | if populate:
293 | extended_gcsfs.pipe(
294 | {TEST_ZONAL_BUCKET + "/" + k: v for k, v in allfiles.items()}
295 | )
296 | extended_gcsfs.invalidate_cache()
297 | return extended_gcsfs
298 |
--------------------------------------------------------------------------------
/gcsfs/credentials.py:
--------------------------------------------------------------------------------
1 | import json
2 | import logging
3 | import os
4 | import pickle
5 | import textwrap
6 | import threading
7 | import warnings
8 | from datetime import datetime, timezone
9 |
10 | import google.auth as gauth
11 | import google.auth.compute_engine
12 | import google.auth.credentials
13 | import google.auth.exceptions
14 | import requests
15 | from google.auth.transport.requests import Request
16 | from google.oauth2 import service_account
17 | from google.oauth2.credentials import Credentials
18 | from google_auth_oauthlib.flow import InstalledAppFlow
19 |
20 | from gcsfs.retry import HttpError
21 |
22 | logger = logging.getLogger("gcsfs.credentials")
23 |
24 | tfile = os.path.join(os.path.expanduser("~"), ".gcs_tokens")
25 |
26 | not_secret = {
27 | "client_id": "586241054156-9kst7ltfj66svc342pcn43vp6ta3idin"
28 | ".apps.googleusercontent.com",
29 | "client_secret": "xto0LIFYX35mmHF9T1R2QBqT",
30 | }
31 |
32 | client_config = {
33 | "installed": {
34 | "client_id": not_secret["client_id"],
35 | "client_secret": not_secret["client_secret"],
36 | "auth_uri": "https://accounts.google.com/o/oauth2/auth",
37 | "token_uri": "https://accounts.google.com/o/oauth2/token",
38 | }
39 | }
40 |
41 |
42 | class GoogleCredentials:
43 | def __init__(self, project, access, token, check_credentials=None, on_google=True):
44 | self.scope = "https://www.googleapis.com/auth/devstorage." + access
45 | self.project = project
46 | self.access = access
47 | self.heads = {}
48 |
49 | self.credentials = None
50 | self.method = None
51 | self.lock = threading.Lock()
52 | self.token = token
53 | self.on_google = on_google
54 | self.connect(method=token)
55 |
56 | if check_credentials:
57 | warnings.warn(
58 | "The `check_credentials` argument is deprecated and will be removed in a future release.",
59 | DeprecationWarning,
60 | )
61 |
62 | @classmethod
63 | def load_tokens(cls):
64 | """Get "browser" tokens from disc"""
65 | try:
66 | with open(tfile, "rb") as f:
67 | tokens = pickle.load(f)
68 | except Exception:
69 | tokens = {}
70 | GoogleCredentials.tokens = tokens
71 |
72 | @staticmethod
73 | def _save_tokens():
74 | try:
75 | with open(tfile, "wb") as f:
76 | pickle.dump(GoogleCredentials.tokens, f, 2)
77 | except Exception as e:
78 | warnings.warn("Saving token cache failed: " + str(e))
79 |
80 | def _connect_google_default(self):
81 | credentials, project = gauth.default(scopes=[self.scope])
82 | msg = textwrap.dedent(
83 | """\
84 | User-provided project '{}' does not match the google default project '{}'. Either
85 |
86 | 1. Accept the google-default project by not passing a `project` to GCSFileSystem
87 | 2. Configure the default project to match the user-provided project (gcloud config set project)
88 | 3. Use an authorization method other than 'google_default' by providing 'token=...'
89 | """
90 | )
91 | if self.project and self.project != project:
92 | raise ValueError(msg.format(self.project, project))
93 | self.project = project
94 | self.credentials = credentials
95 |
96 | def _connect_cloud(self):
97 | if not self.on_google:
98 | raise ValueError
99 | self.credentials = gauth.compute_engine.Credentials()
100 | try:
101 | with requests.Session() as session:
102 | req = Request(session)
103 | self.credentials.refresh(req)
104 | except gauth.exceptions.RefreshError as error:
105 | raise ValueError("Invalid gcloud credentials") from error
106 |
107 | def _connect_cache(self):
108 | if len(self.tokens) == 0:
109 | raise ValueError("No cached tokens")
110 |
111 | project, access = self.project, self.access
112 | if (project, access) in self.tokens:
113 | credentials = self.tokens[(project, access)]
114 | self.credentials = credentials
115 |
116 | def _dict_to_credentials(self, token):
117 | """
118 | Convert old dict-style token.
119 |
120 | Does not preserve access token itself, assumes refresh required.
121 | """
122 | try:
123 | token = service_account.Credentials.from_service_account_info(
124 | token, scopes=[self.scope]
125 | )
126 | except: # noqa: E722
127 | # TODO: catch specific exceptions
128 | # According https://github.com/googleapis/python-cloud-core/blob/master/google/cloud/client.py
129 | # Scopes required for authenticating with a service. User authentication fails
130 | # with invalid_scope if scope is specified.
131 | token = Credentials(
132 | None,
133 | refresh_token=token["refresh_token"],
134 | client_secret=token["client_secret"],
135 | client_id=token["client_id"],
136 | token_uri="https://oauth2.googleapis.com/token",
137 | )
138 | return token
139 |
140 | def _connect_token(self, token):
141 | """
142 | Connect using a concrete token
143 |
144 | Parameters
145 | ----------
146 | token: str, dict or Credentials
147 | If a str and a valid file name, try to load as a Service file, or next as a JSON;
148 | if not a valid file name, assume it's a valid raw (non-renewable/session) token, and pass to Credentials. If
149 | dict, try to interpret as credentials; if Credentials, use directly.
150 | """
151 | if isinstance(token, str):
152 | if os.path.exists(token):
153 | try:
154 | # is this a "service" token?
155 | self._connect_service(token)
156 | return
157 | except: # noqa: E722
158 | # TODO: catch specific exceptions
159 | # some other kind of token file
160 | # will raise exception if is not json
161 | with open(token) as data:
162 | token = json.load(data)
163 | else:
164 | token = Credentials(token)
165 | if isinstance(token, dict):
166 | credentials = self._dict_to_credentials(token)
167 | elif isinstance(token, google.auth.credentials.Credentials):
168 | credentials = token
169 | else:
170 | raise ValueError("Token format not understood")
171 | self.credentials = credentials
172 | if self.credentials.valid:
173 | self.credentials.apply(self.heads)
174 |
175 | def _credentials_valid(self, refresh_buffer):
176 | return (
177 | self.credentials.valid
178 | # In addition to checking current validity, we ensure that there is
179 | # not a near-future expiry to avoid errors when expiration hits.
180 | and (
181 | (
182 | self.credentials.expiry
183 | and (
184 | self.credentials.expiry.replace(tzinfo=timezone.utc)
185 | - datetime.now(timezone.utc)
186 | ).total_seconds()
187 | > refresh_buffer
188 | )
189 | or not self.credentials.expiry
190 | )
191 | )
192 |
193 | def maybe_refresh(self, refresh_buffer=300):
194 | """
195 | Check and refresh credentials if needed
196 | """
197 | if self.credentials is None:
198 | return # anon
199 |
200 | if self._credentials_valid(refresh_buffer):
201 | return # still good, with buffer
202 |
203 | with requests.Session() as session:
204 | req = Request(session)
205 | with self.lock:
206 | if self._credentials_valid(refresh_buffer):
207 | return # repeat check to avoid race conditions
208 |
209 | logger.debug("GCS refresh")
210 | try:
211 | self.credentials.refresh(req)
212 | except gauth.exceptions.RefreshError as error:
213 | # Re-raise as HttpError with a 401 code and the expected message
214 | raise HttpError(
215 | {"code": 401, "message": "Invalid Credentials"}
216 | ) from error
217 |
218 | # https://github.com/fsspec/filesystem_spec/issues/565
219 | self.credentials.apply(self.heads)
220 |
221 | def apply(self, out):
222 | """Insert credential headers in-place to a dictionary"""
223 | self.maybe_refresh()
224 | if self.credentials is not None:
225 | self.credentials.apply(out)
226 |
227 | def _connect_service(self, fn):
228 | # raises exception if the file does not match expectation
229 | credentials = service_account.Credentials.from_service_account_file(
230 | fn, scopes=[self.scope]
231 | )
232 | self.credentials = credentials
233 |
234 | def _connect_anon(self):
235 | self.credentials = None
236 |
237 | def _connect_browser(self):
238 | flow = InstalledAppFlow.from_client_config(client_config, [self.scope])
239 | credentials = flow.run_local_server()
240 | self.tokens[(self.project, self.access)] = credentials
241 | self._save_tokens()
242 | self.credentials = credentials
243 |
244 | def connect(self, method=None):
245 | """
246 | Establish session token. A new token will be requested if the current
247 | one is within 100s of expiry.
248 |
249 | Parameters
250 | ----------
251 | method: str (google_default|cache|cloud|token|anon|browser) or None
252 | Type of authorisation to implement - calls `_connect_*` methods.
253 | If None, will try sequence of methods.
254 | """
255 | if method not in [
256 | "google_default",
257 | "cache",
258 | "cloud",
259 | "token",
260 | "anon",
261 | None,
262 | ]:
263 | self._connect_token(method)
264 | elif method is None:
265 | for meth in ["google_default", "cache", "cloud", "anon"]:
266 | try:
267 | self.connect(method=meth)
268 | logger.debug("Connected with method %s", meth)
269 | break
270 | except (google.auth.exceptions.GoogleAuthError, ValueError) as e:
271 | # GoogleAuthError is the base class for all authentication
272 | # errors
273 | logger.debug(
274 | 'Connection with method "%s" failed' % meth, exc_info=e
275 | )
276 | # Reset credentials if they were set but the authentication failed
277 | # (reverts to 'anon' behavior)
278 | self.credentials = None
279 | else:
280 | # Since the 'anon' connection method should always succeed,
281 | # getting here means something has gone terribly wrong.
282 | raise RuntimeError("All connection methods have failed!")
283 | else:
284 | self.__getattribute__("_connect_" + method)()
285 | self.method = method
286 |
--------------------------------------------------------------------------------
/gcsfs/tests/test_extended_gcsfs.py:
--------------------------------------------------------------------------------
1 | import contextlib
2 | import io
3 | import os
4 | from itertools import chain
5 | from unittest import mock
6 |
7 | import pytest
8 | from google.cloud.storage._experimental.asyncio.async_multi_range_downloader import (
9 | AsyncMultiRangeDownloader,
10 | )
11 | from google.cloud.storage.exceptions import DataCorruption
12 |
13 | from gcsfs.checkers import ConsistencyChecker, MD5Checker, SizeChecker
14 | from gcsfs.extended_gcsfs import BucketType
15 | from gcsfs.tests.conftest import csv_files, files, text_files
16 | from gcsfs.tests.settings import TEST_ZONAL_BUCKET
17 |
18 | file = "test/accounts.1.json"
19 | file_path = f"{TEST_ZONAL_BUCKET}/{file}"
20 | json_data = files[file]
21 | lines = io.BytesIO(json_data).readlines()
22 | file_size = len(json_data)
23 |
24 | REQUIRED_ENV_VAR = "GCSFS_EXPERIMENTAL_ZB_HNS_SUPPORT"
25 |
26 | a = TEST_ZONAL_BUCKET + "/tmp/test/a"
27 | b = TEST_ZONAL_BUCKET + "/tmp/test/b"
28 | c = TEST_ZONAL_BUCKET + "/tmp/test/c"
29 |
30 | # If the condition is True, only then tests in this file are run.
31 | should_run = os.getenv(REQUIRED_ENV_VAR, "false").lower() in (
32 | "true",
33 | "1",
34 | )
35 | pytestmark = pytest.mark.skipif(
36 | not should_run, reason=f"Skipping tests: {REQUIRED_ENV_VAR} env variable is not set"
37 | )
38 |
39 |
40 | @pytest.fixture
41 | def gcs_bucket_mocks():
42 | """A factory fixture for mocking bucket functionality for different bucket types."""
43 |
44 | @contextlib.contextmanager
45 | def _gcs_bucket_mocks_factory(file_data, bucket_type_val):
46 | """Creates mocks for a given file content and bucket type."""
47 | is_real_gcs = (
48 | os.environ.get("STORAGE_EMULATOR_HOST") == "https://storage.googleapis.com"
49 | )
50 | if is_real_gcs:
51 | yield None
52 | return
53 | patch_target_lookup_bucket_type = (
54 | "gcsfs.extended_gcsfs.ExtendedGcsFileSystem._lookup_bucket_type"
55 | )
56 | patch_target_sync_lookup_bucket_type = (
57 | "gcsfs.extended_gcsfs.ExtendedGcsFileSystem._sync_lookup_bucket_type"
58 | )
59 | patch_target_create_mrd = (
60 | "google.cloud.storage._experimental.asyncio.async_multi_range_downloader"
61 | ".AsyncMultiRangeDownloader.create_mrd"
62 | )
63 | patch_target_gcsfs_cat_file = "gcsfs.core.GCSFileSystem._cat_file"
64 |
65 | async def download_side_effect(read_requests, **kwargs):
66 | if read_requests and len(read_requests) == 1:
67 | param_offset, param_length, buffer_arg = read_requests[0]
68 | if hasattr(buffer_arg, "write"):
69 | buffer_arg.write(
70 | file_data[param_offset : param_offset + param_length]
71 | )
72 | return [mock.Mock(error=None)]
73 |
74 | mock_downloader = mock.Mock(spec=AsyncMultiRangeDownloader)
75 | mock_downloader.download_ranges = mock.AsyncMock(
76 | side_effect=download_side_effect
77 | )
78 |
79 | mock_create_mrd = mock.AsyncMock(return_value=mock_downloader)
80 | with (
81 | mock.patch(
82 | patch_target_sync_lookup_bucket_type, return_value=bucket_type_val
83 | ) as mock_sync_lookup_bucket_type,
84 | mock.patch(
85 | patch_target_lookup_bucket_type,
86 | return_value=bucket_type_val,
87 | ),
88 | mock.patch(patch_target_create_mrd, mock_create_mrd),
89 | mock.patch(
90 | patch_target_gcsfs_cat_file, new_callable=mock.AsyncMock
91 | ) as mock_cat_file,
92 | ):
93 | mocks = {
94 | "sync_lookup_bucket_type": mock_sync_lookup_bucket_type,
95 | "create_mrd": mock_create_mrd,
96 | "downloader": mock_downloader,
97 | "cat_file": mock_cat_file,
98 | }
99 | yield mocks
100 | # Common assertion for all tests using this mock
101 | mock_cat_file.assert_not_called()
102 |
103 | return _gcs_bucket_mocks_factory
104 |
105 |
106 | read_block_params = [
107 | # Read specific chunk
108 | pytest.param(3, 10, None, json_data[3 : 3 + 10], id="offset=3, length=10"),
109 | # Read from beginning up to length
110 | pytest.param(0, 5, None, json_data[0:5], id="offset=0, length=5"),
111 | # Read from offset to end (simulate large length)
112 | pytest.param(15, 5000, None, json_data[15:], id="offset=15, length=large"),
113 | # Read beyond end of file (should return empty bytes)
114 | pytest.param(file_size + 10, 5, None, b"", id="offset>size, length=5"),
115 | # Read exactly at the end (zero length)
116 | pytest.param(file_size, 10, None, b"", id="offset=size, length=10"),
117 | # Read with delimiter
118 | pytest.param(1, 35, b"\n", lines[1], id="offset=1, length=35, delimiter=newline"),
119 | pytest.param(0, 30, b"\n", lines[0], id="offset=0, length=35, delimiter=newline"),
120 | pytest.param(
121 | 0, 35, b"\n", lines[0] + lines[1], id="offset=0, length=35, delimiter=newline"
122 | ),
123 | ]
124 |
125 |
126 | def test_read_block_zb(extended_gcsfs, gcs_bucket_mocks, subtests):
127 | for param in read_block_params:
128 | with subtests.test(id=param.id):
129 | offset, length, delimiter, expected_data = param.values
130 | path = file_path
131 |
132 | with gcs_bucket_mocks(
133 | json_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL
134 | ) as mocks:
135 | result = extended_gcsfs.read_block(path, offset, length, delimiter)
136 |
137 | assert result == expected_data
138 | if mocks:
139 | mocks["sync_lookup_bucket_type"].assert_called_once_with(
140 | TEST_ZONAL_BUCKET
141 | )
142 | if expected_data:
143 | mocks["downloader"].download_ranges.assert_called_with(
144 | [(offset, mock.ANY, mock.ANY)]
145 | )
146 | else:
147 | mocks["downloader"].download_ranges.assert_not_called()
148 |
149 |
150 | @pytest.mark.parametrize("bucket_type_val", list(BucketType))
151 | def test_open_uses_correct_blocksize_and_consistency_for_all_bucket_types(
152 | extended_gcs_factory, gcs_bucket_mocks, bucket_type_val
153 | ):
154 | csv_file = "2014-01-01.csv"
155 | csv_file_path = f"{TEST_ZONAL_BUCKET}/{csv_file}"
156 | csv_data = csv_files[csv_file]
157 |
158 | custom_filesystem_block_size = 100 * 1024 * 1024
159 | extended_gcsfs = extended_gcs_factory(
160 | block_size=custom_filesystem_block_size, consistency="md5"
161 | )
162 |
163 | with gcs_bucket_mocks(csv_data, bucket_type_val=bucket_type_val):
164 | with extended_gcsfs.open(csv_file_path, "rb") as f:
165 | assert f.blocksize == custom_filesystem_block_size
166 | assert isinstance(f.checker, MD5Checker)
167 |
168 | file_block_size = 1024 * 1024
169 | with extended_gcsfs.open(
170 | csv_file_path, "rb", block_size=file_block_size, consistency="size"
171 | ) as f:
172 | assert f.blocksize == file_block_size
173 | assert isinstance(f.checker, SizeChecker)
174 |
175 |
176 | @pytest.mark.parametrize("bucket_type_val", list(BucketType))
177 | def test_open_uses_default_blocksize_and_consistency_from_fs(
178 | extended_gcsfs, gcs_bucket_mocks, bucket_type_val
179 | ):
180 | csv_file = "2014-01-01.csv"
181 | csv_file_path = f"{TEST_ZONAL_BUCKET}/{csv_file}"
182 | csv_data = csv_files[csv_file]
183 |
184 | with gcs_bucket_mocks(csv_data, bucket_type_val=bucket_type_val):
185 | with extended_gcsfs.open(csv_file_path, "rb") as f:
186 | assert f.blocksize == extended_gcsfs.default_block_size
187 | assert type(f.checker) is ConsistencyChecker
188 |
189 |
190 | def test_read_small_zb(extended_gcsfs, gcs_bucket_mocks):
191 | csv_file = "2014-01-01.csv"
192 | csv_file_path = f"{TEST_ZONAL_BUCKET}/{csv_file}"
193 | csv_data = csv_files[csv_file]
194 |
195 | with gcs_bucket_mocks(
196 | csv_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL
197 | ) as mocks:
198 | with extended_gcsfs.open(csv_file_path, "rb", block_size=10) as f:
199 | out = []
200 | i = 1
201 | while True:
202 | i += 1
203 | data = f.read(3)
204 | if data == b"":
205 | break
206 | out.append(data)
207 | assert extended_gcsfs.cat(csv_file_path) == b"".join(out)
208 | # cache drop
209 | assert len(f.cache.cache) < len(out)
210 | if mocks:
211 | mocks["sync_lookup_bucket_type"].assert_called_once_with(
212 | TEST_ZONAL_BUCKET
213 | )
214 |
215 |
216 | def test_readline_zb(extended_gcsfs, gcs_bucket_mocks):
217 | all_items = chain.from_iterable(
218 | [files.items(), csv_files.items(), text_files.items()]
219 | )
220 | for k, data in all_items:
221 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL):
222 | with extended_gcsfs.open("/".join([TEST_ZONAL_BUCKET, k]), "rb") as f:
223 | result = f.readline()
224 | expected = data.split(b"\n")[0] + (b"\n" if data.count(b"\n") else b"")
225 | assert result == expected
226 |
227 |
228 | def test_readline_from_cache_zb(extended_gcsfs, gcs_bucket_mocks):
229 | data = b"a,b\n11,22\n3,4"
230 | if not extended_gcsfs.on_google:
231 | with mock.patch.object(
232 | extended_gcsfs, "_sync_lookup_bucket_type", return_value=BucketType.UNKNOWN
233 | ):
234 | with extended_gcsfs.open(a, "wb") as f:
235 | f.write(data)
236 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL):
237 | with extended_gcsfs.open(a, "rb") as f:
238 | result = f.readline()
239 | assert result == b"a,b\n"
240 | assert f.loc == 4
241 | assert f.cache.cache == data
242 |
243 | result = f.readline()
244 | assert result == b"11,22\n"
245 | assert f.loc == 10
246 | assert f.cache.cache == data
247 |
248 | result = f.readline()
249 | assert result == b"3,4"
250 | assert f.loc == 13
251 | assert f.cache.cache == data
252 |
253 |
254 | def test_readline_empty_zb(extended_gcsfs, gcs_bucket_mocks):
255 | data = b""
256 | if not extended_gcsfs.on_google:
257 | with mock.patch.object(
258 | extended_gcsfs, "_sync_lookup_bucket_type", return_value=BucketType.UNKNOWN
259 | ):
260 | with extended_gcsfs.open(b, "wb") as f:
261 | f.write(data)
262 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL):
263 | with extended_gcsfs.open(b, "rb") as f:
264 | result = f.readline()
265 | assert result == data
266 |
267 |
268 | def test_readline_blocksize_zb(extended_gcsfs, gcs_bucket_mocks):
269 | data = b"ab\n" + b"a" * (2**18) + b"\nab"
270 | if not extended_gcsfs.on_google:
271 | with mock.patch.object(
272 | extended_gcsfs, "_sync_lookup_bucket_type", return_value=BucketType.UNKNOWN
273 | ):
274 | with extended_gcsfs.open(c, "wb") as f:
275 | f.write(data)
276 | with gcs_bucket_mocks(data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL):
277 | with extended_gcsfs.open(c, "rb", block_size=2**18) as f:
278 | result = f.readline()
279 | expected = b"ab\n"
280 | assert result == expected
281 |
282 | result = f.readline()
283 | expected = b"a" * (2**18) + b"\n"
284 | assert result == expected
285 |
286 | result = f.readline()
287 | expected = b"ab"
288 | assert result == expected
289 |
290 |
291 | @pytest.mark.parametrize(
292 | "start,end,exp_offset,exp_length,exp_exc",
293 | [
294 | (None, None, 0, file_size, None), # full file
295 | (-10, None, file_size - 10, 10, None), # start negative
296 | (10, -10, 10, file_size - 20, None), # end negative
297 | (20, 20, 20, 0, None), # zero-length slice
298 | (50, 40, None, None, ValueError), # end before start -> raises
299 | (-200, None, None, None, ValueError), # offset negative -> raises
300 | (file_size - 10, 200, file_size - 10, 10, None), # end > size clamps
301 | (
302 | file_size + 10,
303 | file_size + 20,
304 | file_size + 10,
305 | 0,
306 | None,
307 | ), # offset > size -> empty
308 | ],
309 | )
310 | def test_process_limits_parametrized(
311 | extended_gcsfs, start, end, exp_offset, exp_length, exp_exc
312 | ):
313 | if exp_exc is not None:
314 | with pytest.raises(exp_exc):
315 | extended_gcsfs.sync_process_limits_to_offset_and_length(
316 | file_path, start, end
317 | )
318 | else:
319 | offset, length = extended_gcsfs.sync_process_limits_to_offset_and_length(
320 | file_path, start, end
321 | )
322 | assert offset == exp_offset
323 | assert length == exp_length
324 |
325 |
326 | @pytest.mark.parametrize(
327 | "exception_to_raise",
328 | [ValueError, DataCorruption, Exception],
329 | )
330 | def test_mrd_exception_handling(extended_gcsfs, gcs_bucket_mocks, exception_to_raise):
331 | """
332 | Tests that _cat_file correctly propagates exceptions from mrd.download_ranges.
333 | """
334 | with gcs_bucket_mocks(
335 | json_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL
336 | ) as mocks:
337 | if extended_gcsfs.on_google:
338 | pytest.skip("Cannot mock exceptions on real GCS")
339 |
340 | # Configure the mock to raise a specified exception
341 | if exception_to_raise is DataCorruption:
342 | # The first argument is 'response', the message is in '*args'
343 | mocks["downloader"].download_ranges.side_effect = exception_to_raise(
344 | None, "Test exception raised"
345 | )
346 | else:
347 | mocks["downloader"].download_ranges.side_effect = exception_to_raise(
348 | "Test exception raised"
349 | )
350 |
351 | with pytest.raises(exception_to_raise, match="Test exception raised"):
352 | extended_gcsfs.read_block(file_path, 0, 10)
353 |
354 | mocks["downloader"].download_ranges.assert_called_once()
355 |
356 |
357 | def test_mrd_stream_cleanup(extended_gcsfs, gcs_bucket_mocks):
358 | """
359 | Tests that mrd stream is properly closed with file closure.
360 | """
361 | with gcs_bucket_mocks(
362 | json_data, bucket_type_val=BucketType.ZONAL_HIERARCHICAL
363 | ) as mocks:
364 | if not extended_gcsfs.on_google:
365 |
366 | def close_side_effect():
367 | mocks["downloader"].is_stream_open = False
368 |
369 | mocks["downloader"].close.side_effect = close_side_effect
370 |
371 | with extended_gcsfs.open(file_path, "rb") as f:
372 | assert f.mrd is not None
373 |
374 | assert True is f.closed
375 | assert False is f.mrd.is_stream_open
376 |
--------------------------------------------------------------------------------
/gcsfs/_version.py:
--------------------------------------------------------------------------------
1 | # This file helps to compute a version number in source trees obtained from
2 | # git-archive tarball (such as those provided by githubs download-from-tag
3 | # feature). Distribution tarballs (built by setup.py sdist) and build
4 | # directories (produced by setup.py build) will contain a much shorter file
5 | # that just contains the computed version number.
6 |
7 | # This file is released into the public domain.
8 | # Generated by versioneer-0.29
9 | # https://github.com/python-versioneer/python-versioneer
10 |
11 | """Git implementation of _version.py."""
12 |
13 | import errno
14 | import functools
15 | import os
16 | import re
17 | import subprocess
18 | import sys
19 | from typing import Any, Callable, Dict, List, Optional, Tuple
20 |
21 |
22 | def get_keywords() -> Dict[str, str]:
23 | """Get the keywords needed to look up the version information."""
24 | # these strings will be replaced by git during git-archive.
25 | # setup.py/versioneer.py will grep for the variable names, so they must
26 | # each be defined on a line of their own. _version.py will just call
27 | # get_keywords().
28 | git_refnames = " (HEAD -> main)"
29 | git_full = "4d4f04f51ccd0cdc43ef59da76aacfb3ed73db47"
30 | git_date = "2025-12-16 21:07:31 +0530"
31 | keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
32 | return keywords
33 |
34 |
35 | class VersioneerConfig:
36 | """Container for Versioneer configuration parameters."""
37 |
38 | VCS: str
39 | style: str
40 | tag_prefix: str
41 | parentdir_prefix: str
42 | versionfile_source: str
43 | verbose: bool
44 |
45 |
46 | def get_config() -> VersioneerConfig:
47 | """Create, populate and return the VersioneerConfig() object."""
48 | # these strings are filled in when 'setup.py versioneer' creates
49 | # _version.py
50 | cfg = VersioneerConfig()
51 | cfg.VCS = "git"
52 | cfg.style = "pep440"
53 | cfg.tag_prefix = ""
54 | cfg.parentdir_prefix = "None"
55 | cfg.versionfile_source = "gcsfs/_version.py"
56 | cfg.verbose = False
57 | return cfg
58 |
59 |
60 | class NotThisMethod(Exception):
61 | """Exception raised if a method is not valid for the current scenario."""
62 |
63 |
64 | LONG_VERSION_PY: Dict[str, str] = {}
65 | HANDLERS: Dict[str, Dict[str, Callable]] = {}
66 |
67 |
68 | def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator
69 | """Create decorator to mark a method as the handler of a VCS."""
70 |
71 | def decorate(f: Callable) -> Callable:
72 | """Store f in HANDLERS[vcs][method]."""
73 | if vcs not in HANDLERS:
74 | HANDLERS[vcs] = {}
75 | HANDLERS[vcs][method] = f
76 | return f
77 |
78 | return decorate
79 |
80 |
81 | def run_command(
82 | commands: List[str],
83 | args: List[str],
84 | cwd: Optional[str] = None,
85 | verbose: bool = False,
86 | hide_stderr: bool = False,
87 | env: Optional[Dict[str, str]] = None,
88 | ) -> Tuple[Optional[str], Optional[int]]:
89 | """Call the given command(s)."""
90 | assert isinstance(commands, list)
91 | process = None
92 |
93 | popen_kwargs: Dict[str, Any] = {}
94 | if sys.platform == "win32":
95 | # This hides the console window if pythonw.exe is used
96 | startupinfo = subprocess.STARTUPINFO()
97 | startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
98 | popen_kwargs["startupinfo"] = startupinfo
99 |
100 | for command in commands:
101 | try:
102 | dispcmd = str([command] + args)
103 | # remember shell=False, so use git.cmd on windows, not just git
104 | process = subprocess.Popen(
105 | [command] + args,
106 | cwd=cwd,
107 | env=env,
108 | stdout=subprocess.PIPE,
109 | stderr=(subprocess.PIPE if hide_stderr else None),
110 | **popen_kwargs,
111 | )
112 | break
113 | except OSError as e:
114 | if e.errno == errno.ENOENT:
115 | continue
116 | if verbose:
117 | print("unable to run %s" % dispcmd)
118 | print(e)
119 | return None, None
120 | else:
121 | if verbose:
122 | print("unable to find command, tried %s" % (commands,))
123 | return None, None
124 | stdout = process.communicate()[0].strip().decode()
125 | if process.returncode != 0:
126 | if verbose:
127 | print("unable to run %s (error)" % dispcmd)
128 | print("stdout was %s" % stdout)
129 | return None, process.returncode
130 | return stdout, process.returncode
131 |
132 |
133 | def versions_from_parentdir(
134 | parentdir_prefix: str,
135 | root: str,
136 | verbose: bool,
137 | ) -> Dict[str, Any]:
138 | """Try to determine the version from the parent directory name.
139 |
140 | Source tarballs conventionally unpack into a directory that includes both
141 | the project name and a version string. We will also support searching up
142 | two directory levels for an appropriately named parent directory
143 | """
144 | rootdirs = []
145 |
146 | for _ in range(3):
147 | dirname = os.path.basename(root)
148 | if dirname.startswith(parentdir_prefix):
149 | return {
150 | "version": dirname[len(parentdir_prefix) :],
151 | "full-revisionid": None,
152 | "dirty": False,
153 | "error": None,
154 | "date": None,
155 | }
156 | rootdirs.append(root)
157 | root = os.path.dirname(root) # up a level
158 |
159 | if verbose:
160 | print(
161 | "Tried directories %s but none started with prefix %s"
162 | % (str(rootdirs), parentdir_prefix)
163 | )
164 | raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
165 |
166 |
167 | @register_vcs_handler("git", "get_keywords")
168 | def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
169 | """Extract version information from the given file."""
170 | # the code embedded in _version.py can just fetch the value of these
171 | # keywords. When used from setup.py, we don't want to import _version.py,
172 | # so we do it with a regexp instead. This function is not used from
173 | # _version.py.
174 | keywords: Dict[str, str] = {}
175 | try:
176 | with open(versionfile_abs, "r") as fobj:
177 | for line in fobj:
178 | if line.strip().startswith("git_refnames ="):
179 | mo = re.search(r'=\s*"(.*)"', line)
180 | if mo:
181 | keywords["refnames"] = mo.group(1)
182 | if line.strip().startswith("git_full ="):
183 | mo = re.search(r'=\s*"(.*)"', line)
184 | if mo:
185 | keywords["full"] = mo.group(1)
186 | if line.strip().startswith("git_date ="):
187 | mo = re.search(r'=\s*"(.*)"', line)
188 | if mo:
189 | keywords["date"] = mo.group(1)
190 | except OSError:
191 | pass
192 | return keywords
193 |
194 |
195 | @register_vcs_handler("git", "keywords")
196 | def git_versions_from_keywords(
197 | keywords: Dict[str, str],
198 | tag_prefix: str,
199 | verbose: bool,
200 | ) -> Dict[str, Any]:
201 | """Get version information from git keywords."""
202 | if "refnames" not in keywords:
203 | raise NotThisMethod("Short version file found")
204 | date = keywords.get("date")
205 | if date is not None:
206 | # Use only the last line. Previous lines may contain GPG signature
207 | # information.
208 | date = date.splitlines()[-1]
209 |
210 | # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
211 | # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
212 | # -like" string, which we must then edit to make compliant), because
213 | # it's been around since git-1.5.3, and it's too difficult to
214 | # discover which version we're using, or to work around using an
215 | # older one.
216 | date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
217 | refnames = keywords["refnames"].strip()
218 | if refnames.startswith("$Format"):
219 | if verbose:
220 | print("keywords are unexpanded, not using")
221 | raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
222 | refs = {r.strip() for r in refnames.strip("()").split(",")}
223 | # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
224 | # just "foo-1.0". If we see a "tag: " prefix, prefer those.
225 | TAG = "tag: "
226 | tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
227 | if not tags:
228 | # Either we're using git < 1.8.3, or there really are no tags. We use
229 | # a heuristic: assume all version tags have a digit. The old git %d
230 | # expansion behaves like git log --decorate=short and strips out the
231 | # refs/heads/ and refs/tags/ prefixes that would let us distinguish
232 | # between branches and tags. By ignoring refnames without digits, we
233 | # filter out many common branch names like "release" and
234 | # "stabilization", as well as "HEAD" and "master".
235 | tags = {r for r in refs if re.search(r"\d", r)}
236 | if verbose:
237 | print("discarding '%s', no digits" % ",".join(refs - tags))
238 | if verbose:
239 | print("likely tags: %s" % ",".join(sorted(tags)))
240 | for ref in sorted(tags):
241 | # sorting will prefer e.g. "2.0" over "2.0rc1"
242 | if ref.startswith(tag_prefix):
243 | r = ref[len(tag_prefix) :]
244 | # Filter out refs that exactly match prefix or that don't start
245 | # with a number once the prefix is stripped (mostly a concern
246 | # when prefix is '')
247 | if not re.match(r"\d", r):
248 | continue
249 | if verbose:
250 | print("picking %s" % r)
251 | return {
252 | "version": r,
253 | "full-revisionid": keywords["full"].strip(),
254 | "dirty": False,
255 | "error": None,
256 | "date": date,
257 | }
258 | # no suitable tags, so version is "0+unknown", but full hex is still there
259 | if verbose:
260 | print("no suitable tags, using unknown + full revision id")
261 | return {
262 | "version": "0+unknown",
263 | "full-revisionid": keywords["full"].strip(),
264 | "dirty": False,
265 | "error": "no suitable tags",
266 | "date": None,
267 | }
268 |
269 |
270 | @register_vcs_handler("git", "pieces_from_vcs")
271 | def git_pieces_from_vcs(
272 | tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command
273 | ) -> Dict[str, Any]:
274 | """Get version from 'git describe' in the root of the source tree.
275 |
276 | This only gets called if the git-archive 'subst' keywords were *not*
277 | expanded, and _version.py hasn't already been rewritten with a short
278 | version string, meaning we're inside a checked out source tree.
279 | """
280 | GITS = ["git"]
281 | if sys.platform == "win32":
282 | GITS = ["git.cmd", "git.exe"]
283 |
284 | # GIT_DIR can interfere with correct operation of Versioneer.
285 | # It may be intended to be passed to the Versioneer-versioned project,
286 | # but that should not change where we get our version from.
287 | env = os.environ.copy()
288 | env.pop("GIT_DIR", None)
289 | runner = functools.partial(runner, env=env)
290 |
291 | _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose)
292 | if rc != 0:
293 | if verbose:
294 | print("Directory %s not under git control" % root)
295 | raise NotThisMethod("'git rev-parse --git-dir' returned error")
296 |
297 | # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
298 | # if there isn't one, this yields HEX[-dirty] (no NUM)
299 | describe_out, rc = runner(
300 | GITS,
301 | [
302 | "describe",
303 | "--tags",
304 | "--dirty",
305 | "--always",
306 | "--long",
307 | "--match",
308 | f"{tag_prefix}[[:digit:]]*",
309 | ],
310 | cwd=root,
311 | )
312 | # --long was added in git-1.5.5
313 | if describe_out is None:
314 | raise NotThisMethod("'git describe' failed")
315 | describe_out = describe_out.strip()
316 | full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
317 | if full_out is None:
318 | raise NotThisMethod("'git rev-parse' failed")
319 | full_out = full_out.strip()
320 |
321 | pieces: Dict[str, Any] = {}
322 | pieces["long"] = full_out
323 | pieces["short"] = full_out[:7] # maybe improved later
324 | pieces["error"] = None
325 |
326 | branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
327 | # --abbrev-ref was added in git-1.6.3
328 | if rc != 0 or branch_name is None:
329 | raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
330 | branch_name = branch_name.strip()
331 |
332 | if branch_name == "HEAD":
333 | # If we aren't exactly on a branch, pick a branch which represents
334 | # the current commit. If all else fails, we are on a branchless
335 | # commit.
336 | branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
337 | # --contains was added in git-1.5.4
338 | if rc != 0 or branches is None:
339 | raise NotThisMethod("'git branch --contains' returned error")
340 | branches = branches.split("\n")
341 |
342 | # Remove the first line if we're running detached
343 | if "(" in branches[0]:
344 | branches.pop(0)
345 |
346 | # Strip off the leading "* " from the list of branches.
347 | branches = [branch[2:] for branch in branches]
348 | if "master" in branches:
349 | branch_name = "master"
350 | elif not branches:
351 | branch_name = None
352 | else:
353 | # Pick the first branch that is returned. Good or bad.
354 | branch_name = branches[0]
355 |
356 | pieces["branch"] = branch_name
357 |
358 | # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
359 | # TAG might have hyphens.
360 | git_describe = describe_out
361 |
362 | # look for -dirty suffix
363 | dirty = git_describe.endswith("-dirty")
364 | pieces["dirty"] = dirty
365 | if dirty:
366 | git_describe = git_describe[: git_describe.rindex("-dirty")]
367 |
368 | # now we have TAG-NUM-gHEX or HEX
369 |
370 | if "-" in git_describe:
371 | # TAG-NUM-gHEX
372 | mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
373 | if not mo:
374 | # unparsable. Maybe git-describe is misbehaving?
375 | pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
376 | return pieces
377 |
378 | # tag
379 | full_tag = mo.group(1)
380 | if not full_tag.startswith(tag_prefix):
381 | if verbose:
382 | fmt = "tag '%s' doesn't start with prefix '%s'"
383 | print(fmt % (full_tag, tag_prefix))
384 | pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
385 | full_tag,
386 | tag_prefix,
387 | )
388 | return pieces
389 | pieces["closest-tag"] = full_tag[len(tag_prefix) :]
390 |
391 | # distance: number of commits since tag
392 | pieces["distance"] = int(mo.group(2))
393 |
394 | # commit: short hex revision ID
395 | pieces["short"] = mo.group(3)
396 |
397 | else:
398 | # HEX: no tags
399 | pieces["closest-tag"] = None
400 | out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
401 | pieces["distance"] = len(out.split()) # total number of commits
402 |
403 | # commit date: see ISO-8601 comment in git_versions_from_keywords()
404 | date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
405 | # Use only the last line. Previous lines may contain GPG signature
406 | # information.
407 | date = date.splitlines()[-1]
408 | pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
409 |
410 | return pieces
411 |
412 |
413 | def plus_or_dot(pieces: Dict[str, Any]) -> str:
414 | """Return a + if we don't already have one, else return a ."""
415 | if "+" in pieces.get("closest-tag", ""):
416 | return "."
417 | return "+"
418 |
419 |
420 | def render_pep440(pieces: Dict[str, Any]) -> str:
421 | """Build up version string, with post-release "local version identifier".
422 |
423 | Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
424 | get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
425 |
426 | Exceptions:
427 | 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
428 | """
429 | if pieces["closest-tag"]:
430 | rendered = pieces["closest-tag"]
431 | if pieces["distance"] or pieces["dirty"]:
432 | rendered += plus_or_dot(pieces)
433 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
434 | if pieces["dirty"]:
435 | rendered += ".dirty"
436 | else:
437 | # exception #1
438 | rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
439 | if pieces["dirty"]:
440 | rendered += ".dirty"
441 | return rendered
442 |
443 |
444 | def render_pep440_branch(pieces: Dict[str, Any]) -> str:
445 | """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
446 |
447 | The ".dev0" means not master branch. Note that .dev0 sorts backwards
448 | (a feature branch will appear "older" than the master branch).
449 |
450 | Exceptions:
451 | 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
452 | """
453 | if pieces["closest-tag"]:
454 | rendered = pieces["closest-tag"]
455 | if pieces["distance"] or pieces["dirty"]:
456 | if pieces["branch"] != "master":
457 | rendered += ".dev0"
458 | rendered += plus_or_dot(pieces)
459 | rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
460 | if pieces["dirty"]:
461 | rendered += ".dirty"
462 | else:
463 | # exception #1
464 | rendered = "0"
465 | if pieces["branch"] != "master":
466 | rendered += ".dev0"
467 | rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
468 | if pieces["dirty"]:
469 | rendered += ".dirty"
470 | return rendered
471 |
472 |
473 | def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
474 | """Split pep440 version string at the post-release segment.
475 |
476 | Returns the release segments before the post-release and the
477 | post-release version number (or -1 if no post-release segment is present).
478 | """
479 | vc = str.split(ver, ".post")
480 | return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
481 |
482 |
483 | def render_pep440_pre(pieces: Dict[str, Any]) -> str:
484 | """TAG[.postN.devDISTANCE] -- No -dirty.
485 |
486 | Exceptions:
487 | 1: no tags. 0.post0.devDISTANCE
488 | """
489 | if pieces["closest-tag"]:
490 | if pieces["distance"]:
491 | # update the post release segment
492 | tag_version, post_version = pep440_split_post(pieces["closest-tag"])
493 | rendered = tag_version
494 | if post_version is not None:
495 | rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
496 | else:
497 | rendered += ".post0.dev%d" % (pieces["distance"])
498 | else:
499 | # no commits, use the tag as the version
500 | rendered = pieces["closest-tag"]
501 | else:
502 | # exception #1
503 | rendered = "0.post0.dev%d" % pieces["distance"]
504 | return rendered
505 |
506 |
507 | def render_pep440_post(pieces: Dict[str, Any]) -> str:
508 | """TAG[.postDISTANCE[.dev0]+gHEX] .
509 |
510 | The ".dev0" means dirty. Note that .dev0 sorts backwards
511 | (a dirty tree will appear "older" than the corresponding clean one),
512 | but you shouldn't be releasing software with -dirty anyways.
513 |
514 | Exceptions:
515 | 1: no tags. 0.postDISTANCE[.dev0]
516 | """
517 | if pieces["closest-tag"]:
518 | rendered = pieces["closest-tag"]
519 | if pieces["distance"] or pieces["dirty"]:
520 | rendered += ".post%d" % pieces["distance"]
521 | if pieces["dirty"]:
522 | rendered += ".dev0"
523 | rendered += plus_or_dot(pieces)
524 | rendered += "g%s" % pieces["short"]
525 | else:
526 | # exception #1
527 | rendered = "0.post%d" % pieces["distance"]
528 | if pieces["dirty"]:
529 | rendered += ".dev0"
530 | rendered += "+g%s" % pieces["short"]
531 | return rendered
532 |
533 |
534 | def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
535 | """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
536 |
537 | The ".dev0" means not master branch.
538 |
539 | Exceptions:
540 | 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
541 | """
542 | if pieces["closest-tag"]:
543 | rendered = pieces["closest-tag"]
544 | if pieces["distance"] or pieces["dirty"]:
545 | rendered += ".post%d" % pieces["distance"]
546 | if pieces["branch"] != "master":
547 | rendered += ".dev0"
548 | rendered += plus_or_dot(pieces)
549 | rendered += "g%s" % pieces["short"]
550 | if pieces["dirty"]:
551 | rendered += ".dirty"
552 | else:
553 | # exception #1
554 | rendered = "0.post%d" % pieces["distance"]
555 | if pieces["branch"] != "master":
556 | rendered += ".dev0"
557 | rendered += "+g%s" % pieces["short"]
558 | if pieces["dirty"]:
559 | rendered += ".dirty"
560 | return rendered
561 |
562 |
563 | def render_pep440_old(pieces: Dict[str, Any]) -> str:
564 | """TAG[.postDISTANCE[.dev0]] .
565 |
566 | The ".dev0" means dirty.
567 |
568 | Exceptions:
569 | 1: no tags. 0.postDISTANCE[.dev0]
570 | """
571 | if pieces["closest-tag"]:
572 | rendered = pieces["closest-tag"]
573 | if pieces["distance"] or pieces["dirty"]:
574 | rendered += ".post%d" % pieces["distance"]
575 | if pieces["dirty"]:
576 | rendered += ".dev0"
577 | else:
578 | # exception #1
579 | rendered = "0.post%d" % pieces["distance"]
580 | if pieces["dirty"]:
581 | rendered += ".dev0"
582 | return rendered
583 |
584 |
585 | def render_git_describe(pieces: Dict[str, Any]) -> str:
586 | """TAG[-DISTANCE-gHEX][-dirty].
587 |
588 | Like 'git describe --tags --dirty --always'.
589 |
590 | Exceptions:
591 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
592 | """
593 | if pieces["closest-tag"]:
594 | rendered = pieces["closest-tag"]
595 | if pieces["distance"]:
596 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
597 | else:
598 | # exception #1
599 | rendered = pieces["short"]
600 | if pieces["dirty"]:
601 | rendered += "-dirty"
602 | return rendered
603 |
604 |
605 | def render_git_describe_long(pieces: Dict[str, Any]) -> str:
606 | """TAG-DISTANCE-gHEX[-dirty].
607 |
608 | Like 'git describe --tags --dirty --always -long'.
609 | The distance/hash is unconditional.
610 |
611 | Exceptions:
612 | 1: no tags. HEX[-dirty] (note: no 'g' prefix)
613 | """
614 | if pieces["closest-tag"]:
615 | rendered = pieces["closest-tag"]
616 | rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
617 | else:
618 | # exception #1
619 | rendered = pieces["short"]
620 | if pieces["dirty"]:
621 | rendered += "-dirty"
622 | return rendered
623 |
624 |
625 | def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
626 | """Render the given version pieces into the requested style."""
627 | if pieces["error"]:
628 | return {
629 | "version": "unknown",
630 | "full-revisionid": pieces.get("long"),
631 | "dirty": None,
632 | "error": pieces["error"],
633 | "date": None,
634 | }
635 |
636 | if not style or style == "default":
637 | style = "pep440" # the default
638 |
639 | if style == "pep440":
640 | rendered = render_pep440(pieces)
641 | elif style == "pep440-branch":
642 | rendered = render_pep440_branch(pieces)
643 | elif style == "pep440-pre":
644 | rendered = render_pep440_pre(pieces)
645 | elif style == "pep440-post":
646 | rendered = render_pep440_post(pieces)
647 | elif style == "pep440-post-branch":
648 | rendered = render_pep440_post_branch(pieces)
649 | elif style == "pep440-old":
650 | rendered = render_pep440_old(pieces)
651 | elif style == "git-describe":
652 | rendered = render_git_describe(pieces)
653 | elif style == "git-describe-long":
654 | rendered = render_git_describe_long(pieces)
655 | else:
656 | raise ValueError("unknown style '%s'" % style)
657 |
658 | return {
659 | "version": rendered,
660 | "full-revisionid": pieces["long"],
661 | "dirty": pieces["dirty"],
662 | "error": None,
663 | "date": pieces.get("date"),
664 | }
665 |
666 |
667 | def get_versions() -> Dict[str, Any]:
668 | """Get version information or return default if unable to do so."""
669 | # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
670 | # __file__, we can work backwards from there to the root. Some
671 | # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
672 | # case we can only use expanded keywords.
673 |
674 | cfg = get_config()
675 | verbose = cfg.verbose
676 |
677 | try:
678 | return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
679 | except NotThisMethod:
680 | pass
681 |
682 | try:
683 | root = os.path.realpath(__file__)
684 | # versionfile_source is the relative path from the top of the source
685 | # tree (where the .git directory might live) to this file. Invert
686 | # this to find the root from __file__.
687 | for _ in cfg.versionfile_source.split("/"):
688 | root = os.path.dirname(root)
689 | except NameError:
690 | return {
691 | "version": "0+unknown",
692 | "full-revisionid": None,
693 | "dirty": None,
694 | "error": "unable to find root of source tree",
695 | "date": None,
696 | }
697 |
698 | try:
699 | pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
700 | return render(pieces, cfg.style)
701 | except NotThisMethod:
702 | pass
703 |
704 | try:
705 | if cfg.parentdir_prefix:
706 | return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
707 | except NotThisMethod:
708 | pass
709 |
710 | return {
711 | "version": "0+unknown",
712 | "full-revisionid": None,
713 | "dirty": None,
714 | "error": "unable to compute version",
715 | "date": None,
716 | }
717 |
--------------------------------------------------------------------------------