├── .coveragerc
├── .gitattributes
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── .isort.cfg
├── .pre-commit-config.yaml
├── .readthedocs.yaml
├── CONTRIBUTING.md
├── LICENSE.txt
├── MANIFEST.in
├── README.rst
├── docs
    ├── Makefile
    ├── environment.yml
    ├── make.bat
    └── source
    │   ├── _static
    │       └── custom.css
    │   ├── api.rst
    │   ├── changelog.rst
    │   ├── conf.py
    │   ├── developer.rst
    │   ├── fuse.rst
    │   └── index.rst
├── environment_gcsfs.yaml
├── gcsfs
    ├── __init__.py
    ├── _version.py
    ├── checkers.py
    ├── cli
    │   ├── __init__.py
    │   └── gcsfuse.py
    ├── core.py
    ├── credentials.py
    ├── dask_link.py
    ├── inventory_report.py
    ├── mapping.py
    ├── retry.py
    └── tests
    │   ├── __init__.py
    │   ├── conftest.py
    │   ├── derived
    │       ├── __init__.py
    │       ├── gcsfs_fixtures.py
    │       └── gcsfs_test.py
    │   ├── fake-secret.json
    │   ├── fake-service-account-credentials.json
    │   ├── settings.py
    │   ├── test_checkers.py
    │   ├── test_core.py
    │   ├── test_credentials.py
    │   ├── test_fuse.py
    │   ├── test_inventory_report.py
    │   ├── test_inventory_report_listing.py
    │   ├── test_manyopens.py
    │   ├── test_mapping.py
    │   ├── test_retry.py
    │   └── utils.py
├── requirements.txt
├── setup.cfg
├── setup.py
└── versioneer.py


/.coveragerc:
--------------------------------------------------------------------------------
 1 | [run]
 2 | include =
 3 |     gcsfs/*
 4 | 
 5 | omit =
 6 |     gcsfs/tests/test*
 7 | 
 8 | [report]
 9 | show_missing = True
10 | 
11 | [html]
12 | directory = coverage_html_report
13 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | gcsfs/_version.py export-subst
2 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push, pull_request, workflow_dispatch]
 4 | 
 5 | defaults:
 6 |   run:
 7 |     shell: bash -l -eo pipefail {0}
 8 | 
 9 | jobs:
10 |   test:
11 |     name: Python ${{ matrix.python-version }}
12 |     runs-on: ubuntu-latest
13 |     timeout-minutes: 30
14 |     strategy:
15 |       fail-fast: false
16 |       matrix:
17 |         python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
18 | 
19 |     steps:
20 |       - name: Checkout source
21 |         uses: actions/checkout@v4
22 | 
23 |       - name: Setup conda
24 |         uses: conda-incubator/setup-miniconda@v3
25 |         with:
26 |           environment-file: environment_gcsfs.yaml
27 |           python-version: ${{ matrix.PY }}
28 |           activate-environment: gcsfs_test
29 | 
30 |       - name: Conda info
31 |         run: |
32 |           conda list
33 |           conda --version
34 | 
35 |       - name: install
36 |         run: |
37 |           pip install -e .
38 |       - name: Run tests
39 |         run: |
40 |           export GOOGLE_APPLICATION_CREDENTIALS=$(pwd)/gcsfs/tests/fake-secret.json
41 |           pytest -vv -s \
42 |           --log-format="%(asctime)s %(levelname)s %(message)s" \
43 |           --log-date-format="%H:%M:%S" \
44 |           gcsfs/
45 | 
46 |   lint:
47 |     name: lint
48 |     runs-on: ubuntu-latest
49 |     steps:
50 |       - uses: actions/checkout@v4
51 |       - uses: actions/setup-python@v4
52 |         with:
53 |           python-version: "3.11"
54 |       - uses: pre-commit/action@v3.0.0
55 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # dask
  2 | dask-worker-space/
  3 | 
  4 | # private notebooks
  5 | private/
  6 | 
  7 | # Pyenv stuff
  8 | .python-version
  9 | 
 10 | # Byte-compiled / optimized / DLL files
 11 | __pycache__/
 12 | *.py[cod]
 13 | 
 14 | # C extensions
 15 | *.so
 16 | 
 17 | # Distribution / packaging
 18 | .Python
 19 | env/
 20 | build/
 21 | develop-eggs/
 22 | dist/
 23 | downloads/
 24 | pip-wheel-metadata/
 25 | eggs/
 26 | .eggs/
 27 | lib/
 28 | lib64/
 29 | parts/
 30 | sdist/
 31 | var/
 32 | *.egg-info/
 33 | .installed.cfg
 34 | *.egg
 35 | 
 36 | # PyInstaller
 37 | #  Usually these files are written by a python script from a template
 38 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 39 | *.manifest
 40 | *.spec
 41 | 
 42 | # Installer logs
 43 | pip-log.txt
 44 | pip-delete-this-directory.txt
 45 | 
 46 | # Unit test / coverage reports
 47 | htmlcov/
 48 | .tox/
 49 | .coverage
 50 | .coverage.*
 51 | .cache
 52 | nosetests.xml
 53 | coverage.xml
 54 | *.cover
 55 | junit/
 56 | 
 57 | # Translations
 58 | *.mo
 59 | *.pot
 60 | 
 61 | # Django stuff:
 62 | *.log
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # DotEnv configuration
 71 | .env
 72 | 
 73 | # Database
 74 | *.db
 75 | *.rdb
 76 | 
 77 | # Pycharm
 78 | .idea
 79 | 
 80 | # VS Code
 81 | .vscode/
 82 | 
 83 | # Spyder
 84 | .spyproject/
 85 | 
 86 | # Jupyter NB Checkpoints
 87 | .ipynb_checkpoints/
 88 | 
 89 | # exclude data from source control by default
 90 | /data/
 91 | 
 92 | # Mac OS-specific storage files
 93 | .DS_Store
 94 | 
 95 | # vim
 96 | *.swp
 97 | *.swo
 98 | 
 99 | # Mypy cache
100 | .mypy_cache/
101 | 
102 | #Pytest cache
103 | .pytest_cache/
104 | 
105 | libs/*.whl
106 | 


--------------------------------------------------------------------------------
/.isort.cfg:
--------------------------------------------------------------------------------
1 | [settings]
2 | known_third_party = aiohttp,click,decorator,fsspec,fuse,google,google_auth_oauthlib,pytest,requests,setuptools
3 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | # See https://pre-commit.com for more information
 2 | # See https://pre-commit.com/hooks.html for more hooks
 3 | exclude: versioneer.py
 4 | repos:
 5 |   - repo: https://github.com/pre-commit/pre-commit-hooks
 6 |     rev: v4.4.0
 7 |     hooks:
 8 |       - id: end-of-file-fixer
 9 |       - id: requirements-txt-fixer
10 |       - id: trailing-whitespace
11 |   - repo: https://github.com/psf/black
12 |     rev: 22.10.0
13 |     hooks:
14 |       - id: black
15 |         args:
16 |           - --target-version=py37
17 |   - repo: https://github.com/pycqa/flake8
18 |     rev: 6.0.0
19 |     hooks:
20 |       - id: flake8
21 |   - repo: https://github.com/asottile/seed-isort-config
22 |     rev: v2.2.0
23 |     hooks:
24 |       - id: seed-isort-config
25 |   - repo: https://github.com/pre-commit/mirrors-isort
26 |     rev: v5.7.0
27 |     hooks:
28 |       - id: isort
29 | 


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-22.04
 5 |   tools:
 6 |     python: miniconda3-4.7
 7 | 
 8 | conda:
 9 |   environment: docs/environment.yml
10 | 
11 | python:
12 |   install:
13 |     - method: pip
14 |       path: .
15 | 
16 | sphinx:
17 |   configuration: docs/source/conf.py
18 |   fail_on_warning: true
19 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | gcsfs is a community maintained project. We welcome contributions in the form of bug reports, documentation, code, design proposals, and more.
2 | 
3 | ## Project specific notes
4 | 
5 | For testing remote API calls this project uses [VCR](https://vcrpy.readthedocs.io/en/latest/). See the docs for more information https://gcsfs.readthedocs.io/en/latest/developer.html.
6 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | BSD 3-Clause License
 2 | 
 3 | Copyright (c) 2014-2018, Anaconda, Inc. and contributors
 4 | All rights reserved.
 5 | 
 6 | Redistribution and use in source and binary forms, with or without
 7 | modification, are permitted provided that the following conditions are met:
 8 | 
 9 | * Redistributions of source code must retain the above copyright notice, this
10 |   list of conditions and the following disclaimer.
11 | 
12 | * Redistributions in binary form must reproduce the above copyright notice,
13 |   this list of conditions and the following disclaimer in the documentation
14 |   and/or other materials provided with the distribution.
15 | 
16 | * Neither the name of the copyright holder nor the names of its
17 |   contributors may be used to endorse or promote products derived from
18 |   this software without specific prior written permission.
19 | 
20 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
24 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
26 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
27 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
28 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
 1 | recursive-include gcsfs *.py
 2 | recursive-include docs *.rst
 3 | 
 4 | include setup.py
 5 | include README.rst
 6 | include LICENSE.txt
 7 | include MANIFEST.in
 8 | include requirements.txt
 9 | 
10 | prune docs/_build
11 | include versioneer.py
12 | include gcsfs/_version.py
13 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | gcsfs
 2 | =====
 3 | 
 4 | |Build Status| |Doc Status|
 5 | 
 6 | Pythonic file-system for Google Cloud Storage
 7 | 
 8 | 
 9 | For documentation, go to readthedocs_.
10 | 
11 | .. _readthedocs: http://gcsfs.readthedocs.io/en/latest/
12 | 
13 | .. |Build Status| image:: https://github.com/fsspec/gcsfs/workflows/CI/badge.svg
14 |     :target: https://github.com/fsspec/gcsfs/actions
15 |     :alt: Build Status
16 | .. |Doc Status| image:: https://readthedocs.org/projects/gcsfs/badge/?version=latest
17 |     :target: https://gcsfs.readthedocs.io/en/latest/?badge=latest
18 |     :alt: Documentation Status
19 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
  1 | # Makefile for Sphinx documentation
  2 | #
  3 | 
  4 | # You can set these variables from the command line.
  5 | SPHINXOPTS    =
  6 | SPHINXBUILD   = sphinx-build
  7 | PAPER         =
  8 | BUILDDIR      = build
  9 | 
 10 | # User-friendly check for sphinx-build
 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1)
 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/)
 13 | endif
 14 | 
 15 | # Internal variables.
 16 | PAPEROPT_a4     = -D latex_paper_size=a4
 17 | PAPEROPT_letter = -D latex_paper_size=letter
 18 | ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 19 | # the i18n builder cannot share the environment and doctrees with the others
 20 | I18NSPHINXOPTS  = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source
 21 | 
 22 | .PHONY: help
 23 | help:
 24 | 	@echo "Please use \`make <target>' where <target> is one of"
 25 | 	@echo "  html       to make standalone HTML files"
 26 | 	@echo "  dirhtml    to make HTML files named index.html in directories"
 27 | 	@echo "  singlehtml to make a single large HTML file"
 28 | 	@echo "  pickle     to make pickle files"
 29 | 	@echo "  json       to make JSON files"
 30 | 	@echo "  htmlhelp   to make HTML files and a HTML help project"
 31 | 	@echo "  qthelp     to make HTML files and a qthelp project"
 32 | 	@echo "  applehelp  to make an Apple Help Book"
 33 | 	@echo "  devhelp    to make HTML files and a Devhelp project"
 34 | 	@echo "  epub       to make an epub"
 35 | 	@echo "  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter"
 36 | 	@echo "  latexpdf   to make LaTeX files and run them through pdflatex"
 37 | 	@echo "  latexpdfja to make LaTeX files and run them through platex/dvipdfmx"
 38 | 	@echo "  text       to make text files"
 39 | 	@echo "  man        to make manual pages"
 40 | 	@echo "  texinfo    to make Texinfo files"
 41 | 	@echo "  info       to make Texinfo files and run them through makeinfo"
 42 | 	@echo "  gettext    to make PO message catalogs"
 43 | 	@echo "  changes    to make an overview of all changed/added/deprecated items"
 44 | 	@echo "  xml        to make Docutils-native XML files"
 45 | 	@echo "  pseudoxml  to make pseudoxml-XML files for display purposes"
 46 | 	@echo "  linkcheck  to check all external links for integrity"
 47 | 	@echo "  doctest    to run all doctests embedded in the documentation (if enabled)"
 48 | 	@echo "  coverage   to run coverage check of the documentation (if enabled)"
 49 | 
 50 | .PHONY: clean
 51 | clean:
 52 | 	rm -rf $(BUILDDIR)/*
 53 | 
 54 | .PHONY: html
 55 | html:
 56 | 	$(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html
 57 | 	@echo
 58 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/html."
 59 | 
 60 | .PHONY: dirhtml
 61 | dirhtml:
 62 | 	$(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml
 63 | 	@echo
 64 | 	@echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml."
 65 | 
 66 | .PHONY: singlehtml
 67 | singlehtml:
 68 | 	$(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml
 69 | 	@echo
 70 | 	@echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml."
 71 | 
 72 | .PHONY: pickle
 73 | pickle:
 74 | 	$(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle
 75 | 	@echo
 76 | 	@echo "Build finished; now you can process the pickle files."
 77 | 
 78 | .PHONY: json
 79 | json:
 80 | 	$(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json
 81 | 	@echo
 82 | 	@echo "Build finished; now you can process the JSON files."
 83 | 
 84 | .PHONY: htmlhelp
 85 | htmlhelp:
 86 | 	$(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp
 87 | 	@echo
 88 | 	@echo "Build finished; now you can run HTML Help Workshop with the" \
 89 | 	      ".hhp project file in $(BUILDDIR)/htmlhelp."
 90 | 
 91 | .PHONY: qthelp
 92 | qthelp:
 93 | 	$(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp
 94 | 	@echo
 95 | 	@echo "Build finished; now you can run "qcollectiongenerator" with the" \
 96 | 	      ".qhcp project file in $(BUILDDIR)/qthelp, like this:"
 97 | 	@echo "# qcollectiongenerator $(BUILDDIR)/qthelp/GCSFs.qhcp"
 98 | 	@echo "To view the help file:"
 99 | 	@echo "# assistant -collectionFile $(BUILDDIR)/qthelp/GCSFs.qhc"
100 | 
101 | .PHONY: applehelp
102 | applehelp:
103 | 	$(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp
104 | 	@echo
105 | 	@echo "Build finished. The help book is in $(BUILDDIR)/applehelp."
106 | 	@echo "N.B. You won't be able to view it unless you put it in" \
107 | 	      "~/Library/Documentation/Help or install it in your application" \
108 | 	      "bundle."
109 | 
110 | .PHONY: devhelp
111 | devhelp:
112 | 	$(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp
113 | 	@echo
114 | 	@echo "Build finished."
115 | 	@echo "To view the help file:"
116 | 	@echo "# mkdir -p $$HOME/.local/share/devhelp/GCSFs"
117 | 	@echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/GCSFs"
118 | 	@echo "# devhelp"
119 | 
120 | .PHONY: epub
121 | epub:
122 | 	$(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub
123 | 	@echo
124 | 	@echo "Build finished. The epub file is in $(BUILDDIR)/epub."
125 | 
126 | .PHONY: latex
127 | latex:
128 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
129 | 	@echo
130 | 	@echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex."
131 | 	@echo "Run \`make' in that directory to run these through (pdf)latex" \
132 | 	      "(use \`make latexpdf' here to do that automatically)."
133 | 
134 | .PHONY: latexpdf
135 | latexpdf:
136 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
137 | 	@echo "Running LaTeX files through pdflatex..."
138 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf
139 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
140 | 
141 | .PHONY: latexpdfja
142 | latexpdfja:
143 | 	$(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex
144 | 	@echo "Running LaTeX files through platex and dvipdfmx..."
145 | 	$(MAKE) -C $(BUILDDIR)/latex all-pdf-ja
146 | 	@echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex."
147 | 
148 | .PHONY: text
149 | text:
150 | 	$(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text
151 | 	@echo
152 | 	@echo "Build finished. The text files are in $(BUILDDIR)/text."
153 | 
154 | .PHONY: man
155 | man:
156 | 	$(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man
157 | 	@echo
158 | 	@echo "Build finished. The manual pages are in $(BUILDDIR)/man."
159 | 
160 | .PHONY: texinfo
161 | texinfo:
162 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
163 | 	@echo
164 | 	@echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo."
165 | 	@echo "Run \`make' in that directory to run these through makeinfo" \
166 | 	      "(use \`make info' here to do that automatically)."
167 | 
168 | .PHONY: info
169 | info:
170 | 	$(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo
171 | 	@echo "Running Texinfo files through makeinfo..."
172 | 	make -C $(BUILDDIR)/texinfo info
173 | 	@echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo."
174 | 
175 | .PHONY: gettext
176 | gettext:
177 | 	$(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale
178 | 	@echo
179 | 	@echo "Build finished. The message catalogs are in $(BUILDDIR)/locale."
180 | 
181 | .PHONY: changes
182 | changes:
183 | 	$(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes
184 | 	@echo
185 | 	@echo "The overview file is in $(BUILDDIR)/changes."
186 | 
187 | .PHONY: linkcheck
188 | linkcheck:
189 | 	$(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck
190 | 	@echo
191 | 	@echo "Link check complete; look for any errors in the above output " \
192 | 	      "or in $(BUILDDIR)/linkcheck/output.txt."
193 | 
194 | .PHONY: doctest
195 | doctest:
196 | 	$(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest
197 | 	@echo "Testing of doctests in the sources finished, look at the " \
198 | 	      "results in $(BUILDDIR)/doctest/output.txt."
199 | 
200 | .PHONY: coverage
201 | coverage:
202 | 	$(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage
203 | 	@echo "Testing of coverage in the sources finished, look at the " \
204 | 	      "results in $(BUILDDIR)/coverage/python.txt."
205 | 
206 | .PHONY: xml
207 | xml:
208 | 	$(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml
209 | 	@echo
210 | 	@echo "Build finished. The XML files are in $(BUILDDIR)/xml."
211 | 
212 | .PHONY: pseudoxml
213 | pseudoxml:
214 | 	$(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml
215 | 	@echo
216 | 	@echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml."
217 | 


--------------------------------------------------------------------------------
/docs/environment.yml:
--------------------------------------------------------------------------------
1 | name: s3fs
2 | channels:
3 |   - defaults
4 | dependencies:
5 |   - python= 3.9
6 |   - docutils<0.17
7 |   - sphinx
8 |   - sphinx_rtd_theme
9 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
  1 | @ECHO OFF
  2 | 
  3 | REM Command file for Sphinx documentation
  4 | 
  5 | if "%SPHINXBUILD%" == "" (
  6 | 	set SPHINXBUILD=sphinx-build
  7 | )
  8 | set BUILDDIR=build
  9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source
 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source
 11 | if NOT "%PAPER%" == "" (
 12 | 	set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS%
 13 | 	set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS%
 14 | )
 15 | 
 16 | if "%1" == "" goto help
 17 | 
 18 | if "%1" == "help" (
 19 | 	:help
 20 | 	echo.Please use `make ^<target^>` where ^<target^> is one of
 21 | 	echo.  html       to make standalone HTML files
 22 | 	echo.  dirhtml    to make HTML files named index.html in directories
 23 | 	echo.  singlehtml to make a single large HTML file
 24 | 	echo.  pickle     to make pickle files
 25 | 	echo.  json       to make JSON files
 26 | 	echo.  htmlhelp   to make HTML files and a HTML help project
 27 | 	echo.  qthelp     to make HTML files and a qthelp project
 28 | 	echo.  devhelp    to make HTML files and a Devhelp project
 29 | 	echo.  epub       to make an epub
 30 | 	echo.  latex      to make LaTeX files, you can set PAPER=a4 or PAPER=letter
 31 | 	echo.  text       to make text files
 32 | 	echo.  man        to make manual pages
 33 | 	echo.  texinfo    to make Texinfo files
 34 | 	echo.  gettext    to make PO message catalogs
 35 | 	echo.  changes    to make an overview over all changed/added/deprecated items
 36 | 	echo.  xml        to make Docutils-native XML files
 37 | 	echo.  pseudoxml  to make pseudoxml-XML files for display purposes
 38 | 	echo.  linkcheck  to check all external links for integrity
 39 | 	echo.  doctest    to run all doctests embedded in the documentation if enabled
 40 | 	echo.  coverage   to run coverage check of the documentation if enabled
 41 | 	goto end
 42 | )
 43 | 
 44 | if "%1" == "clean" (
 45 | 	for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i
 46 | 	del /q /s %BUILDDIR%\*
 47 | 	goto end
 48 | )
 49 | 
 50 | 
 51 | REM Check if sphinx-build is available and fallback to Python version if any
 52 | %SPHINXBUILD% 1>NUL 2>NUL
 53 | if errorlevel 9009 goto sphinx_python
 54 | goto sphinx_ok
 55 | 
 56 | :sphinx_python
 57 | 
 58 | set SPHINXBUILD=python -m sphinx.__init__
 59 | %SPHINXBUILD% 2> nul
 60 | if errorlevel 9009 (
 61 | 	echo.
 62 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
 63 | 	echo.installed, then set the SPHINXBUILD environment variable to point
 64 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
 65 | 	echo.may add the Sphinx directory to PATH.
 66 | 	echo.
 67 | 	echo.If you don't have Sphinx installed, grab it from
 68 | 	echo.http://sphinx-doc.org/
 69 | 	exit /b 1
 70 | )
 71 | 
 72 | :sphinx_ok
 73 | 
 74 | 
 75 | if "%1" == "html" (
 76 | 	%SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html
 77 | 	if errorlevel 1 exit /b 1
 78 | 	echo.
 79 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/html.
 80 | 	goto end
 81 | )
 82 | 
 83 | if "%1" == "dirhtml" (
 84 | 	%SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml
 85 | 	if errorlevel 1 exit /b 1
 86 | 	echo.
 87 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml.
 88 | 	goto end
 89 | )
 90 | 
 91 | if "%1" == "singlehtml" (
 92 | 	%SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml
 93 | 	if errorlevel 1 exit /b 1
 94 | 	echo.
 95 | 	echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml.
 96 | 	goto end
 97 | )
 98 | 
 99 | if "%1" == "pickle" (
100 | 	%SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle
101 | 	if errorlevel 1 exit /b 1
102 | 	echo.
103 | 	echo.Build finished; now you can process the pickle files.
104 | 	goto end
105 | )
106 | 
107 | if "%1" == "json" (
108 | 	%SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json
109 | 	if errorlevel 1 exit /b 1
110 | 	echo.
111 | 	echo.Build finished; now you can process the JSON files.
112 | 	goto end
113 | )
114 | 
115 | if "%1" == "htmlhelp" (
116 | 	%SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp
117 | 	if errorlevel 1 exit /b 1
118 | 	echo.
119 | 	echo.Build finished; now you can run HTML Help Workshop with the ^
120 | .hhp project file in %BUILDDIR%/htmlhelp.
121 | 	goto end
122 | )
123 | 
124 | if "%1" == "qthelp" (
125 | 	%SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp
126 | 	if errorlevel 1 exit /b 1
127 | 	echo.
128 | 	echo.Build finished; now you can run "qcollectiongenerator" with the ^
129 | .qhcp project file in %BUILDDIR%/qthelp, like this:
130 | 	echo.^> qcollectiongenerator %BUILDDIR%\qthelp\S3Fs.qhcp
131 | 	echo.To view the help file:
132 | 	echo.^> assistant -collectionFile %BUILDDIR%\qthelp\S3Fs.ghc
133 | 	goto end
134 | )
135 | 
136 | if "%1" == "devhelp" (
137 | 	%SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp
138 | 	if errorlevel 1 exit /b 1
139 | 	echo.
140 | 	echo.Build finished.
141 | 	goto end
142 | )
143 | 
144 | if "%1" == "epub" (
145 | 	%SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub
146 | 	if errorlevel 1 exit /b 1
147 | 	echo.
148 | 	echo.Build finished. The epub file is in %BUILDDIR%/epub.
149 | 	goto end
150 | )
151 | 
152 | if "%1" == "latex" (
153 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
154 | 	if errorlevel 1 exit /b 1
155 | 	echo.
156 | 	echo.Build finished; the LaTeX files are in %BUILDDIR%/latex.
157 | 	goto end
158 | )
159 | 
160 | if "%1" == "latexpdf" (
161 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
162 | 	cd %BUILDDIR%/latex
163 | 	make all-pdf
164 | 	cd %~dp0
165 | 	echo.
166 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
167 | 	goto end
168 | )
169 | 
170 | if "%1" == "latexpdfja" (
171 | 	%SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex
172 | 	cd %BUILDDIR%/latex
173 | 	make all-pdf-ja
174 | 	cd %~dp0
175 | 	echo.
176 | 	echo.Build finished; the PDF files are in %BUILDDIR%/latex.
177 | 	goto end
178 | )
179 | 
180 | if "%1" == "text" (
181 | 	%SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text
182 | 	if errorlevel 1 exit /b 1
183 | 	echo.
184 | 	echo.Build finished. The text files are in %BUILDDIR%/text.
185 | 	goto end
186 | )
187 | 
188 | if "%1" == "man" (
189 | 	%SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man
190 | 	if errorlevel 1 exit /b 1
191 | 	echo.
192 | 	echo.Build finished. The manual pages are in %BUILDDIR%/man.
193 | 	goto end
194 | )
195 | 
196 | if "%1" == "texinfo" (
197 | 	%SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo
198 | 	if errorlevel 1 exit /b 1
199 | 	echo.
200 | 	echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo.
201 | 	goto end
202 | )
203 | 
204 | if "%1" == "gettext" (
205 | 	%SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale
206 | 	if errorlevel 1 exit /b 1
207 | 	echo.
208 | 	echo.Build finished. The message catalogs are in %BUILDDIR%/locale.
209 | 	goto end
210 | )
211 | 
212 | if "%1" == "changes" (
213 | 	%SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes
214 | 	if errorlevel 1 exit /b 1
215 | 	echo.
216 | 	echo.The overview file is in %BUILDDIR%/changes.
217 | 	goto end
218 | )
219 | 
220 | if "%1" == "linkcheck" (
221 | 	%SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck
222 | 	if errorlevel 1 exit /b 1
223 | 	echo.
224 | 	echo.Link check complete; look for any errors in the above output ^
225 | or in %BUILDDIR%/linkcheck/output.txt.
226 | 	goto end
227 | )
228 | 
229 | if "%1" == "doctest" (
230 | 	%SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest
231 | 	if errorlevel 1 exit /b 1
232 | 	echo.
233 | 	echo.Testing of doctests in the sources finished, look at the ^
234 | results in %BUILDDIR%/doctest/output.txt.
235 | 	goto end
236 | )
237 | 
238 | if "%1" == "coverage" (
239 | 	%SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage
240 | 	if errorlevel 1 exit /b 1
241 | 	echo.
242 | 	echo.Testing of coverage in the sources finished, look at the ^
243 | results in %BUILDDIR%/coverage/python.txt.
244 | 	goto end
245 | )
246 | 
247 | if "%1" == "xml" (
248 | 	%SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml
249 | 	if errorlevel 1 exit /b 1
250 | 	echo.
251 | 	echo.Build finished. The XML files are in %BUILDDIR%/xml.
252 | 	goto end
253 | )
254 | 
255 | if "%1" == "pseudoxml" (
256 | 	%SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml
257 | 	if errorlevel 1 exit /b 1
258 | 	echo.
259 | 	echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml.
260 | 	goto end
261 | )
262 | 
263 | :end
264 | 


--------------------------------------------------------------------------------
/docs/source/_static/custom.css:
--------------------------------------------------------------------------------
1 | .classifier:before {
2 |     font-style: normal;
3 |     margin: 0.5em;
4 |     content: ":";
5 | }
6 | 


--------------------------------------------------------------------------------
/docs/source/api.rst:
--------------------------------------------------------------------------------
 1 | API
 2 | ===
 3 | 
 4 | .. currentmodule:: gcsfs.core
 5 | 
 6 | .. autosummary::
 7 |    GCSFileSystem
 8 |    GCSFileSystem.cat
 9 |    GCSFileSystem.du
10 |    GCSFileSystem.exists
11 |    GCSFileSystem.get
12 |    GCSFileSystem.glob
13 |    GCSFileSystem.info
14 |    GCSFileSystem.ls
15 |    GCSFileSystem.mkdir
16 |    GCSFileSystem.mv
17 |    GCSFileSystem.open
18 |    GCSFileSystem.put
19 |    GCSFileSystem.read_block
20 |    GCSFileSystem.rm
21 |    GCSFileSystem.tail
22 |    GCSFileSystem.touch
23 |    GCSFileSystem.get_mapper
24 | 
25 | .. autosummary::
26 |    GCSFile
27 |    GCSFile.close
28 |    GCSFile.flush
29 |    GCSFile.info
30 |    GCSFile.read
31 |    GCSFile.seek
32 |    GCSFile.tell
33 |    GCSFile.write
34 | 
35 | .. currentmodule:: gcsfs.mapping
36 | 
37 | .. currentmodule:: gcsfs.core
38 | 
39 | .. autoclass:: GCSFileSystem
40 |    :members:
41 |    :inherited-members:
42 | 
43 | .. autoclass:: GCSFile
44 |    :members:
45 |    :inherited-members:
46 | 
47 | .. currentmodule:: gcsfs.mapping
48 | 


--------------------------------------------------------------------------------
/docs/source/changelog.rst:
--------------------------------------------------------------------------------
  1 | Changelog
  2 | =========
  3 | 
  4 | Note: in some releases, there are no changes, because we always guarantee
  5 | releasing in step with fsspec.
  6 | 
  7 | 2025.5.1
  8 | --------
  9 | 
 10 | * Fix token timezone comparison (#683, 688)
 11 | 
 12 | 2025.5.0
 13 | --------
 14 | 
 15 | * Avoid deprecated utcnow (#680)
 16 | * Add support for specifying Cloud KMS keys when creating files (#679)
 17 | * Yet another fix for isdir (#676)
 18 | * Create warning for appending mode 'a' operations (#675)
 19 | * add userProject to batch deletion query (#673)
 20 | 
 21 | 2025.3.2
 22 | --------
 23 | 
 24 | no changes
 25 | 
 26 | 2025.3.1
 27 | --------
 28 | 
 29 | * Fix find with path not ending with "/" (#668)
 30 | * remove "beta" note from doc (#666)
 31 | * don't check expiry of creds that don't expire (#665)
 32 | 
 33 | 2025.3.0
 34 | --------
 35 | 
 36 | * Improvements for credentials refresh under high load (#658)
 37 | 
 38 | 2025.2.0
 39 | --------
 40 | 
 41 | * guess upload file MIME types (#655)
 42 | * better shutdown cleanup (#657)
 43 | 
 44 | 2024.12.0
 45 | ---------
 46 | 
 47 | * Exclusive write (#651)
 48 | * Avoid IndexError on integer seconds (#649)
 49 | * note on non-posixness (#648)
 50 | * handle chache_timeout=0 (#646)
 51 | 
 52 | 2024.10.0
 53 | ---------
 54 | 
 55 | * Remove race condition in credentials (#643)
 56 | * fix md5 hash order logic (#640)
 57 | 
 58 | 2024.9.0
 59 | --------
 60 | 
 61 | * In case error in a pure string (#631)
 62 | 
 63 | 2024.6.1
 64 | --------
 65 | 
 66 | no changes
 67 | 
 68 | 2024.6.0
 69 | --------
 70 | 
 71 | * Add seek(0) to request data to prevent issues on retries (#624)
 72 | 
 73 | 2024.5.0
 74 | --------
 75 | 
 76 | * swap order of "gcs", "gs" protocols (#620)
 77 | * fix get_file for relative lpath (#618)
 78 | 
 79 | 2024.3.1
 80 | --------
 81 | 
 82 | * fix expiration= for sign() (#613)
 83 | * do populate dircache in ls() (#612)
 84 | * allow passing extra options to mkdir (#610)
 85 | * credentials docs (#609)
 86 | * retry in bulk rm (#608)
 87 | * clean up loop on close (#606)
 88 | 
 89 | 2024.2.0
 90 | --------
 91 | 
 92 | * doc for passing tokens (#603)
 93 | 
 94 | 2023.12.2
 95 | ---------
 96 | 
 97 | no changes
 98 | 
 99 | 2023.12.1
100 | ---------
101 | 
102 | no changes
103 | 
104 | 2023.12.0
105 | ---------
106 | 
107 | * use same version when paginating list (#591)
108 | * fix double asterisk glob test (#589)
109 | 
110 | 2023.10.0
111 | ---------
112 | 
113 | * Fix for transactions of small files (#586)
114 | 
115 | 2023.9.2
116 | --------
117 | 
118 | * CI updates (#582)
119 | 
120 | 2023.9.1
121 | --------
122 | 
123 | * small fixes following #573 (#578)
124 | 
125 | 2023.9.0
126 | --------
127 | 
128 | * bulk operations edge cases (#576, 572)
129 | * inventory report based file listing (#573)
130 | * pickle HttpError (#571)
131 | * avoid warnings (#569)
132 | * maxdepth in find() (#566)
133 | * invalidate dircache (#564)
134 | * standard metadata field names (#563)
135 | * performance of building cache in find() (#561)
136 | 
137 | 
138 | 2023.6.0
139 | --------
140 | 
141 | * allow raw/session token for auth (#554)
142 | * fix listings_expiry_time kwargs (#551)
143 | * allow setting fixed metadata on put/pipe (#550)
144 | 
145 | 2023.5.0
146 | --------
147 | 
148 | * Allow emulator host without protocol (#548)
149 | * Prevent upload retry from closing the file being sent (#540)
150 | 
151 | 2023.4.0
152 | --------
153 | 
154 | No changes
155 | 
156 | 2023.3.0
157 | --------
158 | 
159 | * Don't let find() mess up dircache (#531)
160 | * Drop py3.7 (#529)
161 | * Update docs (#528)
162 | * Make times UTC (#527)
163 | * Use BytesIO for large bodies (#525)
164 | * Fix: Don't append generation when it is absent (#523)
165 | * get/put/cp consistency tests (#521)
166 | 
167 | 2023.1.0
168 | --------
169 | 
170 | * Support create time (#516, 518)
171 | * defer async session creation (#513, 514)
172 | * support listing of file versions (#509)
173 | * fix ``sign`` following versioned split protocol (#513)
174 | 
175 | 2022.11.0
176 | ---------
177 | 
178 | * implement object versioning (#504)
179 | 
180 | 2022.10.0
181 | ---------
182 | 
183 | * bump fsspec to 2022.10.0 (#503)
184 | 
185 | 2022.8.1
186 | --------
187 | 
188 | * don't install prerelease aiohttp (#490)
189 | 
190 | 2022.7.1
191 | --------
192 | 
193 | * Try cloud auth by default (#479)
194 | 
195 | 2022.5.0
196 | --------
197 | 
198 | * invalidate listings cache for simple put/pipe (#474)
199 | * conform _mkdir and _cat_file to upstream (#471)
200 | 
201 | 2022.3.0
202 | --------
203 | 
204 | (note that this release happened in 2022.4, but we label as 2022.3 to match
205 | fsspec)
206 | 
207 | * bucket exists workaround (#464)
208 | * dirmarkers (#459)
209 | * check connection (#457)
210 | * browser connection now uses local server (#456)
211 | * bucket location (#455)
212 | * ensure auth is closed (#452)
213 | 
214 | 2022.02.0
215 | ---------
216 | 
217 | * fix list_buckets without cache (#449)
218 | * drop py36 (#445)
219 | 
220 | 2022.01.0
221 | ---------
222 | 
223 | * update refname for versions (#442)
224 | 
225 | 2021.11.1
226 | ---------
227 | 
228 | * don't touch cache when doing find with a prefix (#437)
229 | 
230 | 2021.11.0
231 | ---------
232 | 
233 | * move to fsspec org
234 | * add support for google fixed_key_metadata (#429)
235 | * deprecate `content_encoding` parameter of setxattrs method (#429)
236 | * use emulator for resting instead of vcrpy (#424)
237 | 
238 | 2021.10.1
239 | ---------
240 | 
241 | * url signing (#411)
242 | * default callback (#422)
243 | 
244 | 2021.10.0
245 | ---------
246 | 
247 | * min version for decorator
248 | * default callback in get (#422)
249 | 
250 | 2021.09.0
251 | ---------
252 | 
253 | * correctly recognise 404 (#419)
254 | * fix for .details due to upstream (#417)
255 | * callbacks in get/put (#416)
256 | * "%" in paths (#415)
257 | 
258 | 2021.08.1
259 | ---------
260 | 
261 | * don't retry 404s (#406)
262 | 
263 | 2021.07.0
264 | ---------
265 | 
266 | * fix find/glob with a prefix (#399)
267 | 
268 | 2021.06.1
269 | ---------
270 | 
271 | * kwargs to aiohttpClient session
272 | * graceful timeout when disconnecting at finalise (#397)
273 | 
274 | 2021.06.0
275 | ---------
276 | 
277 | * negative ranges in cat_file (#394)
278 | 
279 | 2021.05.0
280 | ---------
281 | 
282 | * no credentials bug fix (#390)
283 | * use googleapis.com (#388)
284 | * more retries (#387, 385, 380)
285 | * Code cleanup (#381)
286 | * license to match stated one (#378)
287 | * deps updated (#376)
288 | 
289 | Version 2021.04.0
290 | -----------------
291 | 
292 | * switch to calver and fsspec pin
293 | 
294 | Version 0.8.0
295 | -------------
296 | 
297 | * keep up with fsspec 0.9.0 async
298 | * one-shot find
299 | * consistency checkers
300 | * retries for intermittent issues
301 | * timeouts
302 | * partial cat
303 | * http error status
304 | * CI to GHA
305 | 
306 | Version 0.7.0
307 | -------------
308 | 
309 | * async operations via aiohttp
310 | 
311 | 
312 | Version 0.6.0
313 | -------------
314 | 
315 | * **API-breaking**: Changed requester-pays handling for ``GCSFileSystem``.
316 | 
317 |   The ``user_project`` keyword has been removed, and has been replaced with
318 |   the ``requester_pays`` keyword. If you're working with a ``requester_pays`` bucket
319 |   you will need to explicitly pass ``requester_pays-True``. This will include your
320 |   ``project`` ID in requests made to GCS.
321 | 
322 | Version 0.5.3
323 | -------------
324 | 
325 | * ``GCSFileSystem`` now validates that the ``project`` provided, if any, matches the
326 |   Google default project when using ``token-'google_default'`` to authenticate (:pr:`219`).
327 | * Fixed bug in ``GCSFileSystem.cat`` on objects in requester-pays buckets (:pr:`217`).
328 | 
329 | Version 0.5.2
330 | -------------
331 | 
332 | * Fixed bug in ``user_project`` fallback for default Google authentication (:pr:`213`)
333 | 
334 | Version 0.5.1
335 | -------------
336 | 
337 | * ``user_project`` now falls back to the ``project`` if provided (:pr:`208`)
338 | 
339 | Version 0.5.0
340 | -------------
341 | 
342 | * Added the ability to make requester-pays requests with the ``user_project`` parameter (:pr:`206`)
343 | 
344 | Version 0.4.0
345 | -------------
346 | 
347 | * Improved performance when serializing filesystem objects (:pr:`182`)
348 | * Fixed authorization errors when using ``gcsfs`` within multithreaded code (:pr:`183`, :pr:`192`)
349 | * Added contributing instructions (:pr:`185`)
350 | * Improved performance for :meth:`gcsfs.GCSFileSystem.info` (:pr:`187`)
351 | * Fixed bug in :meth:`gcsfs.GCSFileSystem.info` raising an error (:pr:`190`)
352 | 


--------------------------------------------------------------------------------
/docs/source/conf.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | #
  3 | # GCSFs documentation build configuration file, created by
  4 | # sphinx-quickstart on Mon Mar 21 15:20:01 2016.
  5 | #
  6 | # This file is execfile()d with the current directory set to its
  7 | # containing dir.
  8 | #
  9 | # Note that not all possible configuration values are present in this
 10 | # autogenerated file.
 11 | #
 12 | # All configuration values have a default; values that are commented out
 13 | # serve to show the default.
 14 | 
 15 | # If extensions (or modules to document with autodoc) are in another directory,
 16 | # add these directories to sys.path here. If the directory is relative to the
 17 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 18 | # sys.path.insert(0, os.path.abspath('.'))
 19 | 
 20 | # -- General configuration ------------------------------------------------
 21 | 
 22 | # If your documentation needs a minimal Sphinx version, state it here.
 23 | # needs_sphinx = '1.0'
 24 | 
 25 | # Add any Sphinx extension module names here, as strings. They can be
 26 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 27 | # ones.
 28 | extensions = [
 29 |     "sphinx.ext.autodoc",
 30 |     "sphinx.ext.todo",
 31 |     "sphinx.ext.ifconfig",
 32 |     "sphinx.ext.viewcode",
 33 |     "sphinx.ext.autosummary",
 34 |     "sphinx.ext.extlinks",
 35 |     "sphinx.ext.napoleon",
 36 | ]
 37 | 
 38 | # Add any paths that contain templates here, relative to this directory.
 39 | templates_path = ["_templates"]
 40 | 
 41 | # The suffix(es) of source filenames.
 42 | # You can specify multiple suffix as a list of string:
 43 | # source_suffix = ['.rst', '.md']
 44 | source_suffix = ".rst"
 45 | 
 46 | # The encoding of source files.
 47 | # source_encoding = 'utf-8-sig'
 48 | 
 49 | # The master toctree document.
 50 | master_doc = "index"
 51 | 
 52 | # General information about the project.
 53 | project = "GCSFs"
 54 | copyright = "2017, Continuum Analytics"
 55 | author = "Continuum Analytics"
 56 | 
 57 | # The version info for the project you're documenting, acts as replacement for
 58 | # |version| and |release|, also used in various other places throughout the
 59 | # built documents.
 60 | #
 61 | # The short X.Y version.
 62 | import gcsfs
 63 | 
 64 | version = gcsfs.__version__
 65 | # The full version, including alpha/beta/rc tags.
 66 | release = version
 67 | 
 68 | # There are two options for replacing |today|: either, you set today to some
 69 | # non-false value, then it is used:
 70 | # today = ''
 71 | # Else, today_fmt is used as the format for a strftime call.
 72 | # today_fmt = '%B %d, %Y'
 73 | 
 74 | # List of patterns, relative to source directory, that match files and
 75 | # directories to ignore when looking for source files.
 76 | exclude_patterns = []
 77 | 
 78 | # The reST default role (used for this markup: `text`) to use for all
 79 | # documents.
 80 | # default_role = None
 81 | 
 82 | # If true, '()' will be appended to :func: etc. cross-reference text.
 83 | # add_function_parentheses = True
 84 | 
 85 | # If true, the current module name will be prepended to all description
 86 | # unit titles (such as .. function::).
 87 | # add_module_names = True
 88 | 
 89 | # If true, sectionauthor and moduleauthor directives will be shown in the
 90 | # output. They are ignored by default.
 91 | # show_authors = False
 92 | 
 93 | # The name of the Pygments (syntax highlighting) style to use.
 94 | pygments_style = "sphinx"
 95 | 
 96 | # A list of ignored prefixes for module index sorting.
 97 | # modindex_common_prefix = []
 98 | 
 99 | # If true, keep warnings as "system message" paragraphs in the built documents.
100 | # keep_warnings = False
101 | 
102 | # If true, `todo` and `todoList` produce output, else they produce nothing.
103 | todo_include_todos = False
104 | 
105 | 
106 | # -- Options for HTML output ----------------------------------------------
107 | 
108 | html_theme = "sphinx_rtd_theme"
109 | 
110 | # Theme options are theme-specific and customize the look and feel of a theme
111 | # further.  For a list of options available for each theme, see the
112 | # documentation.
113 | # html_theme_options = {}
114 | 
115 | # Add any paths that contain custom themes here, relative to this directory.
116 | # html_theme_path = []
117 | 
118 | # The name for this set of Sphinx documents.  If None, it defaults to
119 | # "<project> v<release> documentation".
120 | # html_title = None
121 | 
122 | # A shorter title for the navigation bar.  Default is the same as html_title.
123 | # html_short_title = None
124 | 
125 | # The name of an image file (relative to this directory) to place at the top
126 | # of the sidebar.
127 | # html_logo = None
128 | 
129 | # The name of an image file (within the static path) to use as favicon of the
130 | # docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
131 | # pixels large.
132 | # html_favicon = None
133 | 
134 | # Add any paths that contain custom static files (such as style sheets) here,
135 | # relative to this directory. They are copied after the builtin static files,
136 | # so a file named "default.css" will overwrite the builtin "default.css".
137 | html_static_path = ["_static"]
138 | 
139 | # Custom CSS file to override read the docs default CSS.
140 | # Contains workaround for RTD not rendering colon between argument name and type
141 | html_css_files = ["custom.css"]
142 | 
143 | # Add any extra paths that contain custom files (such as robots.txt or
144 | # .htaccess) here, relative to this directory. These files are copied
145 | # directly to the root of the documentation.
146 | # html_extra_path = []
147 | 
148 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
149 | # using the given strftime format.
150 | # html_last_updated_fmt = '%b %d, %Y'
151 | 
152 | # If true, SmartyPants will be used to convert quotes and dashes to
153 | # typographically correct entities.
154 | # html_use_smartypants = True
155 | 
156 | # Custom sidebar templates, maps document names to template names.
157 | # html_sidebars = {}
158 | 
159 | # Additional templates that should be rendered to pages, maps page names to
160 | # template names.
161 | # html_additional_pages = {}
162 | 
163 | # If false, no module index is generated.
164 | # html_domain_indices = True
165 | 
166 | # If false, no index is generated.
167 | # html_use_index = True
168 | 
169 | # If true, the index is split into individual pages for each letter.
170 | # html_split_index = False
171 | 
172 | # If true, links to the reST sources are added to the pages.
173 | # html_show_sourcelink = True
174 | 
175 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
176 | # html_show_sphinx = True
177 | 
178 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
179 | # html_show_copyright = True
180 | 
181 | # If true, an OpenSearch description file will be output, and all pages will
182 | # contain a <link> tag referring to it.  The value of this option must be the
183 | # base URL from which the finished HTML is served.
184 | # html_use_opensearch = ''
185 | 
186 | # This is the file name suffix for HTML files (e.g. ".xhtml").
187 | # html_file_suffix = None
188 | 
189 | # Language to be used for generating the HTML full-text search index.
190 | # Sphinx supports the following languages:
191 | #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
192 | #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
193 | # html_search_language = 'en'
194 | 
195 | # A dictionary with options for the search language support, empty by default.
196 | # Now only 'ja' uses this config value
197 | # html_search_options = {'type': 'default'}
198 | 
199 | # The name of a javascript file (relative to the configuration directory) that
200 | # implements a search results scorer. If empty, the default will be used.
201 | # html_search_scorer = 'scorer.js'
202 | 
203 | # Output file base name for HTML help builder.
204 | htmlhelp_basename = "GCSFSdoc"
205 | 
206 | # -- Options for LaTeX output ---------------------------------------------
207 | 
208 | latex_elements = {
209 |     # The paper size ('letterpaper' or 'a4paper').
210 |     #'papersize': 'letterpaper',
211 |     # The font size ('10pt', '11pt' or '12pt').
212 |     #'pointsize': '10pt',
213 |     # Additional stuff for the LaTeX preamble.
214 |     #'preamble': '',
215 |     # Latex figure (float) alignment
216 |     #'figure_align': 'htbp',
217 | }
218 | 
219 | # Grouping the document tree into LaTeX files. List of tuples
220 | # (source start file, target name, title,
221 | #  author, documentclass [howto, manual, or own class]).
222 | latex_documents = [
223 |     (master_doc, "GCSFs.tex", "GCSFs Documentation", "Continuum Analytics", "manual")
224 | ]
225 | 
226 | # The name of an image file (relative to this directory) to place at the top of
227 | # the title page.
228 | # latex_logo = None
229 | 
230 | # For "manual" documents, if this is true, then toplevel headings are parts,
231 | # not chapters.
232 | # latex_use_parts = False
233 | 
234 | # If true, show page references after internal links.
235 | # latex_show_pagerefs = False
236 | 
237 | # If true, show URL addresses after external links.
238 | # latex_show_urls = False
239 | 
240 | # Documents to append as an appendix to all manuals.
241 | # latex_appendices = []
242 | 
243 | # If false, no module index is generated.
244 | # latex_domain_indices = True
245 | 
246 | 
247 | # -- Options for manual page output ---------------------------------------
248 | 
249 | # One entry per manual page. List of tuples
250 | # (source start file, name, description, authors, manual section).
251 | man_pages = [(master_doc, "gcsfs", "GCSFs Documentation", [author], 1)]
252 | 
253 | # If true, show URL addresses after external links.
254 | # man_show_urls = False
255 | 
256 | 
257 | # -- Options for Texinfo output -------------------------------------------
258 | 
259 | # Grouping the document tree into Texinfo files. List of tuples
260 | # (source start file, target name, title, author,
261 | #  dir menu entry, description, category)
262 | texinfo_documents = [
263 |     (
264 |         master_doc,
265 |         "GCSFs",
266 |         "GCSFs Documentation",
267 |         author,
268 |         "GCSFs",
269 |         "One line description of project.",
270 |         "Miscellaneous",
271 |     )
272 | ]
273 | 
274 | # Documents to append as an appendix to all manuals.
275 | # texinfo_appendices = []
276 | 
277 | # If false, no module index is generated.
278 | # texinfo_domain_indices = True
279 | 
280 | # How to display URL addresses: 'footnote', 'no', or 'inline'.
281 | # texinfo_show_urls = 'footnote'
282 | 
283 | # If true, do not generate a @detailmenu in the "Top" node's menu.
284 | # texinfo_no_detailmenu = False
285 | 
286 | extlinks = {"pr": ("https://github.com/fsspec/gcsfs/pull/%s", "PR #%s")}
287 | 


--------------------------------------------------------------------------------
/docs/source/developer.rst:
--------------------------------------------------------------------------------
 1 | For Developers
 2 | ==============
 3 | 
 4 | We welcome contributions to gcsfs!
 5 | 
 6 | Please file issues and requests on github_ and we welcome pull requests.
 7 | 
 8 | .. _github: https://github.com/fsspec/gcsfs/issues
 9 | 
10 | Testing
11 | -------
12 | 
13 | The testing framework supports using your own GCS-compliant endpoint, by
14 | setting the "STORAGE_EMULATOR_HOST" environment variable. If this is
15 | not set, then an emulator will be spun up using ``docker`` and
16 | `fake-gcs-server`_. This emulator has almost all the functionality of
17 | real GCS. A small number of tests run differently or are skipped.
18 | 
19 | If you want to actually test against real GCS, then you should set
20 | STORAGE_EMULATOR_HOST to "https://storage.googleapis.com" and also
21 | provide appropriate GCSFS_TEST_BUCKET and GCSFS_TEST_PROJECT, as well
22 | as setting your default google credentials (or providing them via the
23 | fsspec config).
24 | 
25 | .. _fake-gcs-server: https://github.com/fsouza/fake-gcs-server
26 | 


--------------------------------------------------------------------------------
/docs/source/fuse.rst:
--------------------------------------------------------------------------------
 1 | GCSFS and FUSE
 2 | ==============
 3 | 
 4 | Warning, this functionality is **experimental**.
 5 | 
 6 | FUSE_ is a mechanism to mount user-level filesystems in unix-like
 7 | systems (linux, osx, etc.). GCSFS is able to use FUSE to present remote
 8 | data/keys as if they were a directory on your local file-system. This
 9 | allows for standard shell command manipulation, and loading of data
10 | by libraries that can only handle local file-paths (e.g., netCDF/HDF5).
11 | 
12 | .. _FUSE: https://github.com/libfuse/libfuse
13 | 
14 | Requirements
15 | -------------
16 | 
17 | In addition to a standard installation of GCSFS, you also need:
18 | 
19 |    - libfuse as a system install. The way to install this will depend
20 |      on your OS. Examples include ``sudo apt-get install fuse``,
21 |      ``sudo yum install fuse`` and download from osxfuse_.
22 | 
23 |    - fusepy_, which can be installed via conda or pip
24 | 
25 |    - pandas, which can also be installed via conda or pip (this library is
26 |      used only for its timestring parsing).
27 | 
28 | .. _osxfuse: https://osxfuse.github.io/
29 | .. _fusepy: https://github.com/fusepy/fusepy
30 | 
31 | Usage
32 | -----
33 | 
34 | FUSE functionality is available via the ``fsspec.fuse`` module. See the
35 | docstrings for further details.
36 | 
37 | .. code-block:: python
38 | 
39 |     gcs = gcsfs.GCSFileSystem(..)
40 |     from fsspec.fuse import run
41 |     run(gcs, "bucket/path", "local/path", foreground=True, threads=False)
42 | 
43 | Caveats
44 | -------
45 | 
46 | This functionality is experimental. The command usage may change, and you should
47 | expect exceptions.
48 | 
49 | Furthermore:
50 | 
51 |    - although mutation operations tentatively work, you should not at the moment
52 |      depend on gcsfuse as a reliable system that won't loose your data.
53 | 
54 |    - permissions on GCS are complicated, so all files will be shown as fully-open
55 |      0o777, regardless of state. If a read fails, you likely don't have the right
56 |      permissions.
57 | 


--------------------------------------------------------------------------------
/docs/source/index.rst:
--------------------------------------------------------------------------------
  1 | GCSFS
  2 | =====
  3 | 
  4 | A pythonic file-system interface to `Google Cloud Storage`_.
  5 | 
  6 | Please file issues and requests on github_ and we welcome pull requests.
  7 | 
  8 | .. _github: https://github.com/fsspec/gcsfs/issues
  9 | 
 10 | 
 11 | This package depends on fsspec_, and inherits many useful behaviours from there,
 12 | including integration with Dask, and the facility for key-value dict-like
 13 | objects of the type used by zarr.
 14 | 
 15 | .. _fsspec: https://filesystem-spec.readthedocs.io/en/latest/
 16 | 
 17 | Installation
 18 | ------------
 19 | 
 20 | The GCSFS library can be installed using ``conda``:
 21 | 
 22 | .. code-block:: bash
 23 | 
 24 |    conda install -c conda-forge gcsfs
 25 | 
 26 | or ``pip``:
 27 | 
 28 | .. code-block:: bash
 29 | 
 30 |    pip install gcsfs
 31 | 
 32 | or by cloning the repository:
 33 | 
 34 | .. code-block:: bash
 35 | 
 36 |    git clone https://github.com/fsspec/gcsfs/
 37 |    cd gcsfs/
 38 |    pip install .
 39 | 
 40 | Examples
 41 | --------
 42 | 
 43 | Locate and read a file:
 44 | 
 45 | .. code-block:: python
 46 | 
 47 |    >>> import gcsfs
 48 |    >>> fs = gcsfs.GCSFileSystem(project='my-google-project')
 49 |    >>> fs.ls('my-bucket')
 50 |    ['my-file.txt']
 51 |    >>> with fs.open('my-bucket/my-file.txt', 'rb') as f:
 52 |    ...     print(f.read())
 53 |    b'Hello, world'
 54 | 
 55 | (see also :meth:`~gcsfs.core.GCSFileSystem.walk` and :meth:`~gcsfs.core.GCSFileSystem.glob`)
 56 | 
 57 | Read with delimited blocks:
 58 | 
 59 | .. code-block:: python
 60 | 
 61 |    >>> fs.read_block(path, offset=1000, length=10, delimiter=b'\n')
 62 |    b'A whole line of text\n'
 63 | 
 64 | Write with blocked caching:
 65 | 
 66 | .. code-block:: python
 67 | 
 68 |    >>> with fs.open('mybucket/new-file', 'wb') as f:
 69 |    ...     f.write(2*2**20 * b'a')
 70 |    ...     f.write(2*2**20 * b'a') # data is flushed and file closed
 71 |    >>> fs.du('mybucket/new-file')
 72 |    {'mybucket/new-file': 4194304}
 73 | 
 74 | Because GCSFS faithfully copies the Python file interface it can be used
 75 | smoothly with other projects that consume the file interface like ``gzip`` or
 76 | ``pandas``.
 77 | 
 78 | .. code-block:: python
 79 | 
 80 |    >>> with fs.open('mybucket/my-file.csv.gz', 'rb') as f:
 81 |    ...     g = gzip.GzipFile(fileobj=f)  # Decompress data with gzip
 82 |    ...     df = pd.read_csv(g)           # Read CSV file with Pandas
 83 | 
 84 | Credentials
 85 | -----------
 86 | 
 87 | Several modes of authentication are supported:
 88 | 
 89 |     - if ``token=None`` (default), GCSFS will attempt to use your default gcloud
 90 |       credentials or, attempt to get credentials from the google metadata
 91 |       service, or fall back to anonymous access. This will work for most
 92 |       users without further action. Note that the default project may also
 93 |       be found, but it is often best to supply this anyway (only affects bucket-
 94 |       level operations).
 95 | 
 96 |     - if ``token='cloud'``, we assume we are running within google (compute
 97 |       or container engine) and fetch the credentials automatically from the
 98 |       metadata service.
 99 | 
100 |     - if ``token=dict(...)`` or ``token=<filepath>``, you may supply a token
101 |       generated by the gcloud_ utility. This can be
102 | 
103 |       - a python dictionary
104 | 
105 |       - the path to a file containing the JSON returned by logging in with the
106 |         gcloud CLI tool (e.g.,
107 |         ``~/.config/gcloud/application_default_credentials.json`` or
108 |         ``~/.config/gcloud/legacy_credentials/<YOUR GOOGLE
109 |         USERNAME>/adc.json``)
110 | 
111 |       - the path to a service account key
112 | 
113 |       - a google.auth.credentials.Credentials_ object
114 | 
115 |       Note that ``~`` will not be automatically expanded to the user home
116 |       directory, and must be manually expanded with a utility like
117 |       ``os.path.expanduser()``.
118 | 
119 |     - you can also generate tokens via Oauth2 in the browser using ``token='browser'``,
120 |       which gcsfs then caches in a special file, ~/.gcs_tokens, and can subsequently be accessed with ``token='cache'``.
121 | 
122 |     - anonymous only access can be selected using ``token='anon'``, e.g. to access
123 |       public resources such as 'anaconda-public-data'.
124 | 
125 | .. _google.auth.credentials.Credentials: https://google-auth.readthedocs.io/en/master/reference/google.auth.credentials.html#google.auth.credentials.Credentials
126 | 
127 | The acquired session tokens are *not* preserved when serializing the instances, so
128 | it is safe to pass them to worker processes on other machines if using in a
129 | distributed computation context. If credentials are given by a file path, however,
130 | then this file must exist on every machine.
131 | 
132 | 
133 | Integration
134 | -----------
135 | 
136 | The libraries ``intake``, ``pandas`` and ``dask`` accept URLs with the prefix
137 | "gcs://", and will use gcsfs to complete the IO operation in question. The
138 | IO functions take an argument ``storage_options``, which will be passed
139 | to ``GCSFileSystem``, for example:
140 | 
141 | .. code-block:: python
142 | 
143 |    df = pd.read_excel("gcs://bucket/path/file.xls",
144 |                       storage_options={"token": "anon"})
145 | 
146 | This gives the chance to pass any credentials or other necessary
147 | arguments needed to gcsfs.
148 | 
149 | 
150 | Async
151 | -----
152 | 
153 | ``gcsfs`` is implemented using ``aiohttp``, and offers async functionality.
154 | A number of methods of ``GCSFileSystem`` are ``async``, for for each of these,
155 | there is also a synchronous version with the same name and lack of a "_"
156 | prefix.
157 | 
158 | If you wish to call ``gcsfs`` from async code, then you should pass
159 | ``asynchronous=True, loop=loop`` to the constructor (the latter is optional,
160 | if you wish to use both async and sync methods). You must also explicitly
161 | await the client creation before making any GCS call.
162 | 
163 | .. code-block:: python
164 | 
165 |     async def run_program():
166 |         gcs = GCSFileSystem(asynchronous=True)
167 |         print(await gcs._ls(""))
168 | 
169 |     asyncio.run(run_program())  # or call from your async code
170 | 
171 | Concurrent async operations are also used internally for bulk operations
172 | such as ``pipe/cat``, ``get/put``, ``cp/mv/rm``. The async calls are
173 | hidden behind a synchronisation layer, so are designed to be called
174 | from normal code. If you are *not*
175 | using async-style programming, you do not need to know about how this
176 | works, but you might find the implementation interesting.
177 | 
178 | For every synchronous function there is asynchronous one prefixed by ``_``, but
179 | the ``open`` operation does not support async operation. If you need it to open
180 | some file in async manner, it's better to asynchronously download it to
181 | temporary location and working with it from there.
182 | 
183 | Proxy
184 | -----
185 | 
186 | ``gcsfs`` uses ``aiohttp`` for calls to the storage api, which by default
187 | ignores ``HTTP_PROXY/HTTPS_PROXY`` environment variables. To read
188 | proxy settings from the environment provide ``session_kwargs`` as follows:
189 | 
190 | .. code-block:: python
191 | 
192 |    fs = GCSFileSystem(project='my-google-project', session_kwargs={'trust_env': True})
193 | 
194 | For further reference check `aiohttp proxy support`_.
195 | 
196 | .. _aiohttp proxy support: https://docs.aiohttp.org/en/stable/client_advanced.html#proxy-support
197 | 
198 | 
199 | Contents
200 | ========
201 | 
202 | .. toctree::
203 |    api
204 |    developer
205 |    fuse
206 |    changelog
207 |    :maxdepth: 2
208 | 
209 | 
210 | .. _Google Cloud Storage: https://cloud.google.com/storage/docs/
211 | 
212 | .. _gcloud: https://cloud.google.com/sdk/docs/
213 | 
214 | .. _dask: http://dask.pydata.org/en/latest/remote-data-services.html
215 | 
216 | .. _zarr: http://zarr.readthedocs.io/en/latest/tutorial.html#storage-alternatives
217 | 
218 | Indices and tables
219 | ==================
220 | 
221 | * :ref:`genindex`
222 | * :ref:`modindex`
223 | * :ref:`search`
224 | 


--------------------------------------------------------------------------------
/environment_gcsfs.yaml:
--------------------------------------------------------------------------------
 1 | name: gcsfs_test
 2 | channels:
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python==3.11
 6 |   - aiohttp
 7 |   - crcmod
 8 |   - decorator
 9 |   - fsspec
10 |   - google-api-core
11 |   - google-api-python-client
12 |   - google-auth
13 |   - google-auth-oauthlib
14 |   - google-cloud-core
15 |   - google-cloud-storage
16 |   - pytest
17 |   - pytest-timeout
18 |   - pytest-asyncio
19 |   - requests
20 |   - ujson
21 |   - pip:
22 |       - git+https://github.com/fsspec/filesystem_spec
23 | 


--------------------------------------------------------------------------------
/gcsfs/__init__.py:
--------------------------------------------------------------------------------
 1 | from ._version import get_versions
 2 | 
 3 | __version__ = get_versions()["version"]
 4 | del get_versions
 5 | from .core import GCSFileSystem
 6 | from .mapping import GCSMap
 7 | 
 8 | __all__ = ["GCSFileSystem", "GCSMap"]
 9 | 
10 | from . import _version
11 | 
12 | __version__ = _version.get_versions()["version"]
13 | 


--------------------------------------------------------------------------------
/gcsfs/_version.py:
--------------------------------------------------------------------------------
  1 | # This file helps to compute a version number in source trees obtained from
  2 | # git-archive tarball (such as those provided by githubs download-from-tag
  3 | # feature). Distribution tarballs (built by setup.py sdist) and build
  4 | # directories (produced by setup.py build) will contain a much shorter file
  5 | # that just contains the computed version number.
  6 | 
  7 | # This file is released into the public domain.
  8 | # Generated by versioneer-0.29
  9 | # https://github.com/python-versioneer/python-versioneer
 10 | 
 11 | """Git implementation of _version.py."""
 12 | 
 13 | import errno
 14 | import functools
 15 | import os
 16 | import re
 17 | import subprocess
 18 | import sys
 19 | from typing import Any, Callable, Dict, List, Optional, Tuple
 20 | 
 21 | 
 22 | def get_keywords() -> Dict[str, str]:
 23 |     """Get the keywords needed to look up the version information."""
 24 |     # these strings will be replaced by git during git-archive.
 25 |     # setup.py/versioneer.py will grep for the variable names, so they must
 26 |     # each be defined on a line of their own. _version.py will just call
 27 |     # get_keywords().
 28 |     git_refnames = " (HEAD -> main)"
 29 |     git_full = "7872bd7a931fb4285d5762ff5d861b8653fc7b70"
 30 |     git_date = "2025-06-10 11:00:39 -0400"
 31 |     keywords = {"refnames": git_refnames, "full": git_full, "date": git_date}
 32 |     return keywords
 33 | 
 34 | 
 35 | class VersioneerConfig:
 36 |     """Container for Versioneer configuration parameters."""
 37 | 
 38 |     VCS: str
 39 |     style: str
 40 |     tag_prefix: str
 41 |     parentdir_prefix: str
 42 |     versionfile_source: str
 43 |     verbose: bool
 44 | 
 45 | 
 46 | def get_config() -> VersioneerConfig:
 47 |     """Create, populate and return the VersioneerConfig() object."""
 48 |     # these strings are filled in when 'setup.py versioneer' creates
 49 |     # _version.py
 50 |     cfg = VersioneerConfig()
 51 |     cfg.VCS = "git"
 52 |     cfg.style = "pep440"
 53 |     cfg.tag_prefix = ""
 54 |     cfg.parentdir_prefix = "None"
 55 |     cfg.versionfile_source = "gcsfs/_version.py"
 56 |     cfg.verbose = False
 57 |     return cfg
 58 | 
 59 | 
 60 | class NotThisMethod(Exception):
 61 |     """Exception raised if a method is not valid for the current scenario."""
 62 | 
 63 | 
 64 | LONG_VERSION_PY: Dict[str, str] = {}
 65 | HANDLERS: Dict[str, Dict[str, Callable]] = {}
 66 | 
 67 | 
 68 | def register_vcs_handler(vcs: str, method: str) -> Callable:  # decorator
 69 |     """Create decorator to mark a method as the handler of a VCS."""
 70 | 
 71 |     def decorate(f: Callable) -> Callable:
 72 |         """Store f in HANDLERS[vcs][method]."""
 73 |         if vcs not in HANDLERS:
 74 |             HANDLERS[vcs] = {}
 75 |         HANDLERS[vcs][method] = f
 76 |         return f
 77 | 
 78 |     return decorate
 79 | 
 80 | 
 81 | def run_command(
 82 |     commands: List[str],
 83 |     args: List[str],
 84 |     cwd: Optional[str] = None,
 85 |     verbose: bool = False,
 86 |     hide_stderr: bool = False,
 87 |     env: Optional[Dict[str, str]] = None,
 88 | ) -> Tuple[Optional[str], Optional[int]]:
 89 |     """Call the given command(s)."""
 90 |     assert isinstance(commands, list)
 91 |     process = None
 92 | 
 93 |     popen_kwargs: Dict[str, Any] = {}
 94 |     if sys.platform == "win32":
 95 |         # This hides the console window if pythonw.exe is used
 96 |         startupinfo = subprocess.STARTUPINFO()
 97 |         startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW
 98 |         popen_kwargs["startupinfo"] = startupinfo
 99 | 
100 |     for command in commands:
101 |         try:
102 |             dispcmd = str([command] + args)
103 |             # remember shell=False, so use git.cmd on windows, not just git
104 |             process = subprocess.Popen(
105 |                 [command] + args,
106 |                 cwd=cwd,
107 |                 env=env,
108 |                 stdout=subprocess.PIPE,
109 |                 stderr=(subprocess.PIPE if hide_stderr else None),
110 |                 **popen_kwargs,
111 |             )
112 |             break
113 |         except OSError as e:
114 |             if e.errno == errno.ENOENT:
115 |                 continue
116 |             if verbose:
117 |                 print("unable to run %s" % dispcmd)
118 |                 print(e)
119 |             return None, None
120 |     else:
121 |         if verbose:
122 |             print("unable to find command, tried %s" % (commands,))
123 |         return None, None
124 |     stdout = process.communicate()[0].strip().decode()
125 |     if process.returncode != 0:
126 |         if verbose:
127 |             print("unable to run %s (error)" % dispcmd)
128 |             print("stdout was %s" % stdout)
129 |         return None, process.returncode
130 |     return stdout, process.returncode
131 | 
132 | 
133 | def versions_from_parentdir(
134 |     parentdir_prefix: str,
135 |     root: str,
136 |     verbose: bool,
137 | ) -> Dict[str, Any]:
138 |     """Try to determine the version from the parent directory name.
139 | 
140 |     Source tarballs conventionally unpack into a directory that includes both
141 |     the project name and a version string. We will also support searching up
142 |     two directory levels for an appropriately named parent directory
143 |     """
144 |     rootdirs = []
145 | 
146 |     for _ in range(3):
147 |         dirname = os.path.basename(root)
148 |         if dirname.startswith(parentdir_prefix):
149 |             return {
150 |                 "version": dirname[len(parentdir_prefix) :],
151 |                 "full-revisionid": None,
152 |                 "dirty": False,
153 |                 "error": None,
154 |                 "date": None,
155 |             }
156 |         rootdirs.append(root)
157 |         root = os.path.dirname(root)  # up a level
158 | 
159 |     if verbose:
160 |         print(
161 |             "Tried directories %s but none started with prefix %s"
162 |             % (str(rootdirs), parentdir_prefix)
163 |         )
164 |     raise NotThisMethod("rootdir doesn't start with parentdir_prefix")
165 | 
166 | 
167 | @register_vcs_handler("git", "get_keywords")
168 | def git_get_keywords(versionfile_abs: str) -> Dict[str, str]:
169 |     """Extract version information from the given file."""
170 |     # the code embedded in _version.py can just fetch the value of these
171 |     # keywords. When used from setup.py, we don't want to import _version.py,
172 |     # so we do it with a regexp instead. This function is not used from
173 |     # _version.py.
174 |     keywords: Dict[str, str] = {}
175 |     try:
176 |         with open(versionfile_abs, "r") as fobj:
177 |             for line in fobj:
178 |                 if line.strip().startswith("git_refnames ="):
179 |                     mo = re.search(r'=\s*"(.*)"', line)
180 |                     if mo:
181 |                         keywords["refnames"] = mo.group(1)
182 |                 if line.strip().startswith("git_full ="):
183 |                     mo = re.search(r'=\s*"(.*)"', line)
184 |                     if mo:
185 |                         keywords["full"] = mo.group(1)
186 |                 if line.strip().startswith("git_date ="):
187 |                     mo = re.search(r'=\s*"(.*)"', line)
188 |                     if mo:
189 |                         keywords["date"] = mo.group(1)
190 |     except OSError:
191 |         pass
192 |     return keywords
193 | 
194 | 
195 | @register_vcs_handler("git", "keywords")
196 | def git_versions_from_keywords(
197 |     keywords: Dict[str, str],
198 |     tag_prefix: str,
199 |     verbose: bool,
200 | ) -> Dict[str, Any]:
201 |     """Get version information from git keywords."""
202 |     if "refnames" not in keywords:
203 |         raise NotThisMethod("Short version file found")
204 |     date = keywords.get("date")
205 |     if date is not None:
206 |         # Use only the last line.  Previous lines may contain GPG signature
207 |         # information.
208 |         date = date.splitlines()[-1]
209 | 
210 |         # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant
211 |         # datestamp. However we prefer "%ci" (which expands to an "ISO-8601
212 |         # -like" string, which we must then edit to make compliant), because
213 |         # it's been around since git-1.5.3, and it's too difficult to
214 |         # discover which version we're using, or to work around using an
215 |         # older one.
216 |         date = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
217 |     refnames = keywords["refnames"].strip()
218 |     if refnames.startswith("$Format"):
219 |         if verbose:
220 |             print("keywords are unexpanded, not using")
221 |         raise NotThisMethod("unexpanded keywords, not a git-archive tarball")
222 |     refs = {r.strip() for r in refnames.strip("()").split(",")}
223 |     # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of
224 |     # just "foo-1.0". If we see a "tag: " prefix, prefer those.
225 |     TAG = "tag: "
226 |     tags = {r[len(TAG) :] for r in refs if r.startswith(TAG)}
227 |     if not tags:
228 |         # Either we're using git < 1.8.3, or there really are no tags. We use
229 |         # a heuristic: assume all version tags have a digit. The old git %d
230 |         # expansion behaves like git log --decorate=short and strips out the
231 |         # refs/heads/ and refs/tags/ prefixes that would let us distinguish
232 |         # between branches and tags. By ignoring refnames without digits, we
233 |         # filter out many common branch names like "release" and
234 |         # "stabilization", as well as "HEAD" and "master".
235 |         tags = {r for r in refs if re.search(r"\d", r)}
236 |         if verbose:
237 |             print("discarding '%s', no digits" % ",".join(refs - tags))
238 |     if verbose:
239 |         print("likely tags: %s" % ",".join(sorted(tags)))
240 |     for ref in sorted(tags):
241 |         # sorting will prefer e.g. "2.0" over "2.0rc1"
242 |         if ref.startswith(tag_prefix):
243 |             r = ref[len(tag_prefix) :]
244 |             # Filter out refs that exactly match prefix or that don't start
245 |             # with a number once the prefix is stripped (mostly a concern
246 |             # when prefix is '')
247 |             if not re.match(r"\d", r):
248 |                 continue
249 |             if verbose:
250 |                 print("picking %s" % r)
251 |             return {
252 |                 "version": r,
253 |                 "full-revisionid": keywords["full"].strip(),
254 |                 "dirty": False,
255 |                 "error": None,
256 |                 "date": date,
257 |             }
258 |     # no suitable tags, so version is "0+unknown", but full hex is still there
259 |     if verbose:
260 |         print("no suitable tags, using unknown + full revision id")
261 |     return {
262 |         "version": "0+unknown",
263 |         "full-revisionid": keywords["full"].strip(),
264 |         "dirty": False,
265 |         "error": "no suitable tags",
266 |         "date": None,
267 |     }
268 | 
269 | 
270 | @register_vcs_handler("git", "pieces_from_vcs")
271 | def git_pieces_from_vcs(
272 |     tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command
273 | ) -> Dict[str, Any]:
274 |     """Get version from 'git describe' in the root of the source tree.
275 | 
276 |     This only gets called if the git-archive 'subst' keywords were *not*
277 |     expanded, and _version.py hasn't already been rewritten with a short
278 |     version string, meaning we're inside a checked out source tree.
279 |     """
280 |     GITS = ["git"]
281 |     if sys.platform == "win32":
282 |         GITS = ["git.cmd", "git.exe"]
283 | 
284 |     # GIT_DIR can interfere with correct operation of Versioneer.
285 |     # It may be intended to be passed to the Versioneer-versioned project,
286 |     # but that should not change where we get our version from.
287 |     env = os.environ.copy()
288 |     env.pop("GIT_DIR", None)
289 |     runner = functools.partial(runner, env=env)
290 | 
291 |     _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose)
292 |     if rc != 0:
293 |         if verbose:
294 |             print("Directory %s not under git control" % root)
295 |         raise NotThisMethod("'git rev-parse --git-dir' returned error")
296 | 
297 |     # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty]
298 |     # if there isn't one, this yields HEX[-dirty] (no NUM)
299 |     describe_out, rc = runner(
300 |         GITS,
301 |         [
302 |             "describe",
303 |             "--tags",
304 |             "--dirty",
305 |             "--always",
306 |             "--long",
307 |             "--match",
308 |             f"{tag_prefix}[[:digit:]]*",
309 |         ],
310 |         cwd=root,
311 |     )
312 |     # --long was added in git-1.5.5
313 |     if describe_out is None:
314 |         raise NotThisMethod("'git describe' failed")
315 |     describe_out = describe_out.strip()
316 |     full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root)
317 |     if full_out is None:
318 |         raise NotThisMethod("'git rev-parse' failed")
319 |     full_out = full_out.strip()
320 | 
321 |     pieces: Dict[str, Any] = {}
322 |     pieces["long"] = full_out
323 |     pieces["short"] = full_out[:7]  # maybe improved later
324 |     pieces["error"] = None
325 | 
326 |     branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root)
327 |     # --abbrev-ref was added in git-1.6.3
328 |     if rc != 0 or branch_name is None:
329 |         raise NotThisMethod("'git rev-parse --abbrev-ref' returned error")
330 |     branch_name = branch_name.strip()
331 | 
332 |     if branch_name == "HEAD":
333 |         # If we aren't exactly on a branch, pick a branch which represents
334 |         # the current commit. If all else fails, we are on a branchless
335 |         # commit.
336 |         branches, rc = runner(GITS, ["branch", "--contains"], cwd=root)
337 |         # --contains was added in git-1.5.4
338 |         if rc != 0 or branches is None:
339 |             raise NotThisMethod("'git branch --contains' returned error")
340 |         branches = branches.split("\n")
341 | 
342 |         # Remove the first line if we're running detached
343 |         if "(" in branches[0]:
344 |             branches.pop(0)
345 | 
346 |         # Strip off the leading "* " from the list of branches.
347 |         branches = [branch[2:] for branch in branches]
348 |         if "master" in branches:
349 |             branch_name = "master"
350 |         elif not branches:
351 |             branch_name = None
352 |         else:
353 |             # Pick the first branch that is returned. Good or bad.
354 |             branch_name = branches[0]
355 | 
356 |     pieces["branch"] = branch_name
357 | 
358 |     # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty]
359 |     # TAG might have hyphens.
360 |     git_describe = describe_out
361 | 
362 |     # look for -dirty suffix
363 |     dirty = git_describe.endswith("-dirty")
364 |     pieces["dirty"] = dirty
365 |     if dirty:
366 |         git_describe = git_describe[: git_describe.rindex("-dirty")]
367 | 
368 |     # now we have TAG-NUM-gHEX or HEX
369 | 
370 |     if "-" in git_describe:
371 |         # TAG-NUM-gHEX
372 |         mo = re.search(r"^(.+)-(\d+)-g([0-9a-f]+)$", git_describe)
373 |         if not mo:
374 |             # unparsable. Maybe git-describe is misbehaving?
375 |             pieces["error"] = "unable to parse git-describe output: '%s'" % describe_out
376 |             return pieces
377 | 
378 |         # tag
379 |         full_tag = mo.group(1)
380 |         if not full_tag.startswith(tag_prefix):
381 |             if verbose:
382 |                 fmt = "tag '%s' doesn't start with prefix '%s'"
383 |                 print(fmt % (full_tag, tag_prefix))
384 |             pieces["error"] = "tag '%s' doesn't start with prefix '%s'" % (
385 |                 full_tag,
386 |                 tag_prefix,
387 |             )
388 |             return pieces
389 |         pieces["closest-tag"] = full_tag[len(tag_prefix) :]
390 | 
391 |         # distance: number of commits since tag
392 |         pieces["distance"] = int(mo.group(2))
393 | 
394 |         # commit: short hex revision ID
395 |         pieces["short"] = mo.group(3)
396 | 
397 |     else:
398 |         # HEX: no tags
399 |         pieces["closest-tag"] = None
400 |         out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root)
401 |         pieces["distance"] = len(out.split())  # total number of commits
402 | 
403 |     # commit date: see ISO-8601 comment in git_versions_from_keywords()
404 |     date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
405 |     # Use only the last line.  Previous lines may contain GPG signature
406 |     # information.
407 |     date = date.splitlines()[-1]
408 |     pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1)
409 | 
410 |     return pieces
411 | 
412 | 
413 | def plus_or_dot(pieces: Dict[str, Any]) -> str:
414 |     """Return a + if we don't already have one, else return a ."""
415 |     if "+" in pieces.get("closest-tag", ""):
416 |         return "."
417 |     return "+"
418 | 
419 | 
420 | def render_pep440(pieces: Dict[str, Any]) -> str:
421 |     """Build up version string, with post-release "local version identifier".
422 | 
423 |     Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you
424 |     get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty
425 | 
426 |     Exceptions:
427 |     1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty]
428 |     """
429 |     if pieces["closest-tag"]:
430 |         rendered = pieces["closest-tag"]
431 |         if pieces["distance"] or pieces["dirty"]:
432 |             rendered += plus_or_dot(pieces)
433 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
434 |             if pieces["dirty"]:
435 |                 rendered += ".dirty"
436 |     else:
437 |         # exception #1
438 |         rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
439 |         if pieces["dirty"]:
440 |             rendered += ".dirty"
441 |     return rendered
442 | 
443 | 
444 | def render_pep440_branch(pieces: Dict[str, Any]) -> str:
445 |     """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] .
446 | 
447 |     The ".dev0" means not master branch. Note that .dev0 sorts backwards
448 |     (a feature branch will appear "older" than the master branch).
449 | 
450 |     Exceptions:
451 |     1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty]
452 |     """
453 |     if pieces["closest-tag"]:
454 |         rendered = pieces["closest-tag"]
455 |         if pieces["distance"] or pieces["dirty"]:
456 |             if pieces["branch"] != "master":
457 |                 rendered += ".dev0"
458 |             rendered += plus_or_dot(pieces)
459 |             rendered += "%d.g%s" % (pieces["distance"], pieces["short"])
460 |             if pieces["dirty"]:
461 |                 rendered += ".dirty"
462 |     else:
463 |         # exception #1
464 |         rendered = "0"
465 |         if pieces["branch"] != "master":
466 |             rendered += ".dev0"
467 |         rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"])
468 |         if pieces["dirty"]:
469 |             rendered += ".dirty"
470 |     return rendered
471 | 
472 | 
473 | def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]:
474 |     """Split pep440 version string at the post-release segment.
475 | 
476 |     Returns the release segments before the post-release and the
477 |     post-release version number (or -1 if no post-release segment is present).
478 |     """
479 |     vc = str.split(ver, ".post")
480 |     return vc[0], int(vc[1] or 0) if len(vc) == 2 else None
481 | 
482 | 
483 | def render_pep440_pre(pieces: Dict[str, Any]) -> str:
484 |     """TAG[.postN.devDISTANCE] -- No -dirty.
485 | 
486 |     Exceptions:
487 |     1: no tags. 0.post0.devDISTANCE
488 |     """
489 |     if pieces["closest-tag"]:
490 |         if pieces["distance"]:
491 |             # update the post release segment
492 |             tag_version, post_version = pep440_split_post(pieces["closest-tag"])
493 |             rendered = tag_version
494 |             if post_version is not None:
495 |                 rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
496 |             else:
497 |                 rendered += ".post0.dev%d" % (pieces["distance"])
498 |         else:
499 |             # no commits, use the tag as the version
500 |             rendered = pieces["closest-tag"]
501 |     else:
502 |         # exception #1
503 |         rendered = "0.post0.dev%d" % pieces["distance"]
504 |     return rendered
505 | 
506 | 
507 | def render_pep440_post(pieces: Dict[str, Any]) -> str:
508 |     """TAG[.postDISTANCE[.dev0]+gHEX] .
509 | 
510 |     The ".dev0" means dirty. Note that .dev0 sorts backwards
511 |     (a dirty tree will appear "older" than the corresponding clean one),
512 |     but you shouldn't be releasing software with -dirty anyways.
513 | 
514 |     Exceptions:
515 |     1: no tags. 0.postDISTANCE[.dev0]
516 |     """
517 |     if pieces["closest-tag"]:
518 |         rendered = pieces["closest-tag"]
519 |         if pieces["distance"] or pieces["dirty"]:
520 |             rendered += ".post%d" % pieces["distance"]
521 |             if pieces["dirty"]:
522 |                 rendered += ".dev0"
523 |             rendered += plus_or_dot(pieces)
524 |             rendered += "g%s" % pieces["short"]
525 |     else:
526 |         # exception #1
527 |         rendered = "0.post%d" % pieces["distance"]
528 |         if pieces["dirty"]:
529 |             rendered += ".dev0"
530 |         rendered += "+g%s" % pieces["short"]
531 |     return rendered
532 | 
533 | 
534 | def render_pep440_post_branch(pieces: Dict[str, Any]) -> str:
535 |     """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] .
536 | 
537 |     The ".dev0" means not master branch.
538 | 
539 |     Exceptions:
540 |     1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty]
541 |     """
542 |     if pieces["closest-tag"]:
543 |         rendered = pieces["closest-tag"]
544 |         if pieces["distance"] or pieces["dirty"]:
545 |             rendered += ".post%d" % pieces["distance"]
546 |             if pieces["branch"] != "master":
547 |                 rendered += ".dev0"
548 |             rendered += plus_or_dot(pieces)
549 |             rendered += "g%s" % pieces["short"]
550 |             if pieces["dirty"]:
551 |                 rendered += ".dirty"
552 |     else:
553 |         # exception #1
554 |         rendered = "0.post%d" % pieces["distance"]
555 |         if pieces["branch"] != "master":
556 |             rendered += ".dev0"
557 |         rendered += "+g%s" % pieces["short"]
558 |         if pieces["dirty"]:
559 |             rendered += ".dirty"
560 |     return rendered
561 | 
562 | 
563 | def render_pep440_old(pieces: Dict[str, Any]) -> str:
564 |     """TAG[.postDISTANCE[.dev0]] .
565 | 
566 |     The ".dev0" means dirty.
567 | 
568 |     Exceptions:
569 |     1: no tags. 0.postDISTANCE[.dev0]
570 |     """
571 |     if pieces["closest-tag"]:
572 |         rendered = pieces["closest-tag"]
573 |         if pieces["distance"] or pieces["dirty"]:
574 |             rendered += ".post%d" % pieces["distance"]
575 |             if pieces["dirty"]:
576 |                 rendered += ".dev0"
577 |     else:
578 |         # exception #1
579 |         rendered = "0.post%d" % pieces["distance"]
580 |         if pieces["dirty"]:
581 |             rendered += ".dev0"
582 |     return rendered
583 | 
584 | 
585 | def render_git_describe(pieces: Dict[str, Any]) -> str:
586 |     """TAG[-DISTANCE-gHEX][-dirty].
587 | 
588 |     Like 'git describe --tags --dirty --always'.
589 | 
590 |     Exceptions:
591 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
592 |     """
593 |     if pieces["closest-tag"]:
594 |         rendered = pieces["closest-tag"]
595 |         if pieces["distance"]:
596 |             rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
597 |     else:
598 |         # exception #1
599 |         rendered = pieces["short"]
600 |     if pieces["dirty"]:
601 |         rendered += "-dirty"
602 |     return rendered
603 | 
604 | 
605 | def render_git_describe_long(pieces: Dict[str, Any]) -> str:
606 |     """TAG-DISTANCE-gHEX[-dirty].
607 | 
608 |     Like 'git describe --tags --dirty --always -long'.
609 |     The distance/hash is unconditional.
610 | 
611 |     Exceptions:
612 |     1: no tags. HEX[-dirty]  (note: no 'g' prefix)
613 |     """
614 |     if pieces["closest-tag"]:
615 |         rendered = pieces["closest-tag"]
616 |         rendered += "-%d-g%s" % (pieces["distance"], pieces["short"])
617 |     else:
618 |         # exception #1
619 |         rendered = pieces["short"]
620 |     if pieces["dirty"]:
621 |         rendered += "-dirty"
622 |     return rendered
623 | 
624 | 
625 | def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]:
626 |     """Render the given version pieces into the requested style."""
627 |     if pieces["error"]:
628 |         return {
629 |             "version": "unknown",
630 |             "full-revisionid": pieces.get("long"),
631 |             "dirty": None,
632 |             "error": pieces["error"],
633 |             "date": None,
634 |         }
635 | 
636 |     if not style or style == "default":
637 |         style = "pep440"  # the default
638 | 
639 |     if style == "pep440":
640 |         rendered = render_pep440(pieces)
641 |     elif style == "pep440-branch":
642 |         rendered = render_pep440_branch(pieces)
643 |     elif style == "pep440-pre":
644 |         rendered = render_pep440_pre(pieces)
645 |     elif style == "pep440-post":
646 |         rendered = render_pep440_post(pieces)
647 |     elif style == "pep440-post-branch":
648 |         rendered = render_pep440_post_branch(pieces)
649 |     elif style == "pep440-old":
650 |         rendered = render_pep440_old(pieces)
651 |     elif style == "git-describe":
652 |         rendered = render_git_describe(pieces)
653 |     elif style == "git-describe-long":
654 |         rendered = render_git_describe_long(pieces)
655 |     else:
656 |         raise ValueError("unknown style '%s'" % style)
657 | 
658 |     return {
659 |         "version": rendered,
660 |         "full-revisionid": pieces["long"],
661 |         "dirty": pieces["dirty"],
662 |         "error": None,
663 |         "date": pieces.get("date"),
664 |     }
665 | 
666 | 
667 | def get_versions() -> Dict[str, Any]:
668 |     """Get version information or return default if unable to do so."""
669 |     # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have
670 |     # __file__, we can work backwards from there to the root. Some
671 |     # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which
672 |     # case we can only use expanded keywords.
673 | 
674 |     cfg = get_config()
675 |     verbose = cfg.verbose
676 | 
677 |     try:
678 |         return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose)
679 |     except NotThisMethod:
680 |         pass
681 | 
682 |     try:
683 |         root = os.path.realpath(__file__)
684 |         # versionfile_source is the relative path from the top of the source
685 |         # tree (where the .git directory might live) to this file. Invert
686 |         # this to find the root from __file__.
687 |         for _ in cfg.versionfile_source.split("/"):
688 |             root = os.path.dirname(root)
689 |     except NameError:
690 |         return {
691 |             "version": "0+unknown",
692 |             "full-revisionid": None,
693 |             "dirty": None,
694 |             "error": "unable to find root of source tree",
695 |             "date": None,
696 |         }
697 | 
698 |     try:
699 |         pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose)
700 |         return render(pieces, cfg.style)
701 |     except NotThisMethod:
702 |         pass
703 | 
704 |     try:
705 |         if cfg.parentdir_prefix:
706 |             return versions_from_parentdir(cfg.parentdir_prefix, root, verbose)
707 |     except NotThisMethod:
708 |         pass
709 | 
710 |     return {
711 |         "version": "0+unknown",
712 |         "full-revisionid": None,
713 |         "dirty": None,
714 |         "error": "unable to compute version",
715 |         "date": None,
716 |     }
717 | 


--------------------------------------------------------------------------------
/gcsfs/checkers.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | from base64 import b64encode
  3 | from hashlib import md5
  4 | from typing import Optional
  5 | 
  6 | from .retry import ChecksumError
  7 | 
  8 | try:
  9 |     import crcmod
 10 | except ImportError:
 11 |     crcmod = None
 12 | 
 13 | 
 14 | class ConsistencyChecker:
 15 |     def __init__(self):
 16 |         pass
 17 | 
 18 |     def update(self, data: bytes):
 19 |         pass
 20 | 
 21 |     def validate_json_response(self, gcs_object):
 22 |         pass
 23 | 
 24 |     def validate_headers(self, headers):
 25 |         pass
 26 | 
 27 |     def validate_http_response(self, r):
 28 |         pass
 29 | 
 30 | 
 31 | class MD5Checker(ConsistencyChecker):
 32 |     def __init__(self):
 33 |         self.md = md5()
 34 | 
 35 |     def update(self, data):
 36 |         self.md.update(data)
 37 | 
 38 |     def validate_json_response(self, gcs_object):
 39 |         mdback = gcs_object["md5Hash"]
 40 |         if b64encode(self.md.digest()) != mdback.encode():
 41 |             raise ChecksumError("MD5 checksum failed")
 42 | 
 43 |     def validate_headers(self, headers):
 44 |         if headers is not None and "X-Goog-Hash" in headers:
 45 | 
 46 |             dig = [
 47 |                 bit.split("=")[1]
 48 |                 for bit in headers["X-Goog-Hash"].split(",")
 49 |                 if bit and bit.strip().startswith("md5=")
 50 |             ]
 51 |             if dig:
 52 |                 if b64encode(self.md.digest()).decode().rstrip("=") != dig[0]:
 53 |                     raise ChecksumError("Checksum failure")
 54 |             else:
 55 |                 raise NotImplementedError(
 56 |                     "No md5 checksum available to do consistency check. GCS does "
 57 |                     "not provide md5 sums for composite objects."
 58 |                 )
 59 | 
 60 |     def validate_http_response(self, r):
 61 |         return self.validate_headers(r.headers)
 62 | 
 63 | 
 64 | class SizeChecker(ConsistencyChecker):
 65 |     def __init__(self):
 66 |         self.size = 0
 67 | 
 68 |     def update(self, data: bytes):
 69 |         self.size += len(data)
 70 | 
 71 |     def validate_json_response(self, gcs_object):
 72 |         assert int(gcs_object["size"]) == self.size, "Size mismatch"
 73 | 
 74 |     def validate_http_response(self, r):
 75 |         assert r.content_length == self.size
 76 | 
 77 | 
 78 | class Crc32cChecker(ConsistencyChecker):
 79 |     def __init__(self):
 80 |         self.crc32c = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF)
 81 | 
 82 |     def update(self, data: bytes):
 83 |         self.crc32c.update(data)
 84 | 
 85 |     def validate_json_response(self, gcs_object):
 86 |         # docs for gcs_object: https://cloud.google.com/storage/docs/json_api/v1/objects
 87 |         digest = self.crc32c.digest()
 88 |         digest_b64 = base64.b64encode(digest).decode()
 89 |         expected = gcs_object["crc32c"]
 90 | 
 91 |         if digest_b64 != expected:
 92 |             raise ChecksumError(f'Expected "{expected}". Got "{digest_b64}"')
 93 | 
 94 |     def validate_headers(self, headers):
 95 |         if headers is not None:
 96 |             hasher = headers.get("X-Goog-Hash", "")
 97 |             crc = [h.split("=", 1)[1] for h in hasher.split(",") if "crc32c" in h]
 98 |             if not crc:
 99 |                 raise NotImplementedError("No crc32c checksum was provided by google!")
100 |             if crc[0] != b64encode(self.crc32c.digest()).decode():
101 |                 raise ChecksumError()
102 | 
103 |     def validate_http_response(self, r):
104 |         return self.validate_headers(r.headers)
105 | 
106 | 
107 | def get_consistency_checker(consistency: Optional[str]) -> ConsistencyChecker:
108 |     if consistency == "size":
109 |         return SizeChecker()
110 |     elif consistency == "md5":
111 |         return MD5Checker()
112 |     elif consistency == "crc32c":
113 |         if crcmod is None:
114 |             raise ImportError(
115 |                 "The python package `crcmod` is required for `consistency='crc32c'`. "
116 |                 "This can be installed with `pip install gcsfs[crc]`"
117 |             )
118 |         else:
119 |             return Crc32cChecker()
120 |     else:
121 |         return ConsistencyChecker()
122 | 


--------------------------------------------------------------------------------
/gcsfs/cli/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fsspec/gcsfs/7872bd7a931fb4285d5762ff5d861b8653fc7b70/gcsfs/cli/__init__.py


--------------------------------------------------------------------------------
/gcsfs/cli/gcsfuse.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import click
 4 | from fuse import FUSE
 5 | 
 6 | from gcsfs.gcsfuse import GCSFS
 7 | 
 8 | 
 9 | @click.command()
10 | @click.argument("bucket", type=str, required=True)
11 | @click.argument("mount_point", type=str, required=True)
12 | @click.option(
13 |     "--token",
14 |     type=str,
15 |     required=False,
16 |     default=None,
17 |     help="Token to use for authentication",
18 | )
19 | @click.option(
20 |     "--project-id", type=str, required=False, default="", help="Billing Project ID"
21 | )
22 | @click.option(
23 |     "--foreground/--background",
24 |     default=True,
25 |     help="Run in the foreground or as a background process",
26 | )
27 | @click.option(
28 |     "--threads/--no-threads", default=True, help="Whether to run with threads"
29 | )
30 | @click.option(
31 |     "--cache_files", type=int, default=10, help="Number of open files to cache"
32 | )
33 | @click.option(
34 |     "-v",
35 |     "--verbose",
36 |     count=True,
37 |     help="Set logging level. '-v' for 'gcsfuse' logging."
38 |     "'-v -v' for complete debug logging.",
39 | )
40 | def main(
41 |     bucket, mount_point, token, project_id, foreground, threads, cache_files, verbose
42 | ):
43 |     """Mount a Google Cloud Storage (GCS) bucket to a local directory"""
44 | 
45 |     if verbose == 1:
46 |         logging.basicConfig(level=logging.INFO)
47 |         logging.getLogger("gcsfs.gcsfuse").setLevel(logging.DEBUG)
48 |     if verbose > 1:
49 |         logging.basicConfig(level=logging.DEBUG)
50 | 
51 |     fmt = "%(asctime)s %(name)-12s %(levelname)-8s %(message)s"
52 |     if verbose == 1:
53 |         logging.basicConfig(level=logging.INFO, format=fmt)
54 |         logging.getLogger("gcsfs.gcsfuse").setLevel(logging.DEBUG)
55 |     if verbose > 1:
56 |         logging.basicConfig(level=logging.DEBUG, format=fmt)
57 | 
58 |     print(f"Mounting bucket {bucket} to directory {mount_point}")
59 |     print("foreground:", foreground, ", nothreads:", not threads)
60 |     FUSE(
61 |         GCSFS(bucket, token=token, project=project_id, nfiles=cache_files),
62 |         mount_point,
63 |         nothreads=not threads,
64 |         foreground=foreground,
65 |     )
66 | 
67 | 
68 | if __name__ == "__main__":
69 |     main()
70 | 


--------------------------------------------------------------------------------
/gcsfs/credentials.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | import logging
  3 | import os
  4 | import pickle
  5 | import textwrap
  6 | import threading
  7 | import warnings
  8 | from datetime import datetime, timezone
  9 | 
 10 | import google.auth as gauth
 11 | import google.auth.compute_engine
 12 | import google.auth.credentials
 13 | import google.auth.exceptions
 14 | import requests
 15 | from google.auth.transport.requests import Request
 16 | from google.oauth2 import service_account
 17 | from google.oauth2.credentials import Credentials
 18 | from google_auth_oauthlib.flow import InstalledAppFlow
 19 | 
 20 | from gcsfs.retry import HttpError
 21 | 
 22 | logger = logging.getLogger("gcsfs.credentials")
 23 | 
 24 | tfile = os.path.join(os.path.expanduser("~"), ".gcs_tokens")
 25 | 
 26 | not_secret = {
 27 |     "client_id": "586241054156-9kst7ltfj66svc342pcn43vp6ta3idin"
 28 |     ".apps.googleusercontent.com",
 29 |     "client_secret": "xto0LIFYX35mmHF9T1R2QBqT",
 30 | }
 31 | 
 32 | client_config = {
 33 |     "installed": {
 34 |         "client_id": not_secret["client_id"],
 35 |         "client_secret": not_secret["client_secret"],
 36 |         "auth_uri": "https://accounts.google.com/o/oauth2/auth",
 37 |         "token_uri": "https://accounts.google.com/o/oauth2/token",
 38 |     }
 39 | }
 40 | 
 41 | 
 42 | class GoogleCredentials:
 43 |     def __init__(self, project, access, token, check_credentials=None, on_google=True):
 44 |         self.scope = "https://www.googleapis.com/auth/devstorage." + access
 45 |         self.project = project
 46 |         self.access = access
 47 |         self.heads = {}
 48 | 
 49 |         self.credentials = None
 50 |         self.method = None
 51 |         self.lock = threading.Lock()
 52 |         self.token = token
 53 |         self.on_google = on_google
 54 |         self.connect(method=token)
 55 | 
 56 |         if check_credentials:
 57 |             warnings.warn(
 58 |                 "The `check_credentials` argument is deprecated and will be removed in a future release.",
 59 |                 DeprecationWarning,
 60 |             )
 61 | 
 62 |     @classmethod
 63 |     def load_tokens(cls):
 64 |         """Get "browser" tokens from disc"""
 65 |         try:
 66 |             with open(tfile, "rb") as f:
 67 |                 tokens = pickle.load(f)
 68 |         except Exception:
 69 |             tokens = {}
 70 |         GoogleCredentials.tokens = tokens
 71 | 
 72 |     @staticmethod
 73 |     def _save_tokens():
 74 |         try:
 75 |             with open(tfile, "wb") as f:
 76 |                 pickle.dump(GoogleCredentials.tokens, f, 2)
 77 |         except Exception as e:
 78 |             warnings.warn("Saving token cache failed: " + str(e))
 79 | 
 80 |     def _connect_google_default(self):
 81 |         credentials, project = gauth.default(scopes=[self.scope])
 82 |         msg = textwrap.dedent(
 83 |             """\
 84 |         User-provided project '{}' does not match the google default project '{}'. Either
 85 | 
 86 |           1. Accept the google-default project by not passing a `project` to GCSFileSystem
 87 |           2. Configure the default project to match the user-provided project (gcloud config set project)
 88 |           3. Use an authorization method other than 'google_default' by providing 'token=...'
 89 |         """
 90 |         )
 91 |         if self.project and self.project != project:
 92 |             raise ValueError(msg.format(self.project, project))
 93 |         self.project = project
 94 |         self.credentials = credentials
 95 | 
 96 |     def _connect_cloud(self):
 97 |         if not self.on_google:
 98 |             raise ValueError
 99 |         self.credentials = gauth.compute_engine.Credentials()
100 |         try:
101 |             with requests.Session() as session:
102 |                 req = Request(session)
103 |                 self.credentials.refresh(req)
104 |         except gauth.exceptions.RefreshError as error:
105 |             raise ValueError("Invalid gcloud credentials") from error
106 | 
107 |     def _connect_cache(self):
108 |         if len(self.tokens) == 0:
109 |             raise ValueError("No cached tokens")
110 | 
111 |         project, access = self.project, self.access
112 |         if (project, access) in self.tokens:
113 |             credentials = self.tokens[(project, access)]
114 |             self.credentials = credentials
115 | 
116 |     def _dict_to_credentials(self, token):
117 |         """
118 |         Convert old dict-style token.
119 | 
120 |         Does not preserve access token itself, assumes refresh required.
121 |         """
122 |         try:
123 |             token = service_account.Credentials.from_service_account_info(
124 |                 token, scopes=[self.scope]
125 |             )
126 |         except:  # noqa: E722
127 |             # TODO: catch specific exceptions
128 |             # According https://github.com/googleapis/python-cloud-core/blob/master/google/cloud/client.py
129 |             # Scopes required for authenticating with a service. User authentication fails
130 |             # with invalid_scope if scope is specified.
131 |             token = Credentials(
132 |                 None,
133 |                 refresh_token=token["refresh_token"],
134 |                 client_secret=token["client_secret"],
135 |                 client_id=token["client_id"],
136 |                 token_uri="https://oauth2.googleapis.com/token",
137 |             )
138 |         return token
139 | 
140 |     def _connect_token(self, token):
141 |         """
142 |         Connect using a concrete token
143 | 
144 |         Parameters
145 |         ----------
146 |         token: str, dict or Credentials
147 |             If a str and a valid file name, try to load as a Service file, or next as a JSON;
148 |             if not a valid file name, assume it's a valid raw (non-renewable/session) token, and pass to Credentials. If
149 |             dict, try to interpret as credentials; if Credentials, use directly.
150 |         """
151 |         if isinstance(token, str):
152 |             if os.path.exists(token):
153 |                 try:
154 |                     # is this a "service" token?
155 |                     self._connect_service(token)
156 |                     return
157 |                 except:  # noqa: E722
158 |                     # TODO: catch specific exceptions
159 |                     # some other kind of token file
160 |                     # will raise exception if is not json
161 |                     with open(token) as data:
162 |                         token = json.load(data)
163 |             else:
164 |                 token = Credentials(token)
165 |         if isinstance(token, dict):
166 |             credentials = self._dict_to_credentials(token)
167 |         elif isinstance(token, google.auth.credentials.Credentials):
168 |             credentials = token
169 |         else:
170 |             raise ValueError("Token format not understood")
171 |         self.credentials = credentials
172 |         if self.credentials.valid:
173 |             self.credentials.apply(self.heads)
174 | 
175 |     def _credentials_valid(self, refresh_buffer):
176 |         return (
177 |             self.credentials.valid
178 |             # In addition to checking current validity, we ensure that there is
179 |             # not a near-future expiry to avoid errors when expiration hits.
180 |             and (
181 |                 (
182 |                     self.credentials.expiry
183 |                     and (
184 |                         self.credentials.expiry.replace(tzinfo=timezone.utc)
185 |                         - datetime.now(timezone.utc)
186 |                     ).total_seconds()
187 |                     > refresh_buffer
188 |                 )
189 |                 or not self.credentials.expiry
190 |             )
191 |         )
192 | 
193 |     def maybe_refresh(self, refresh_buffer=300):
194 |         """
195 |         Check and refresh credentials if needed
196 |         """
197 |         if self.credentials is None:
198 |             return  # anon
199 | 
200 |         if self._credentials_valid(refresh_buffer):
201 |             return  # still good, with buffer
202 | 
203 |         with requests.Session() as session:
204 |             req = Request(session)
205 |             with self.lock:
206 |                 if self._credentials_valid(refresh_buffer):
207 |                     return  # repeat check to avoid race conditions
208 | 
209 |                 logger.debug("GCS refresh")
210 |                 try:
211 |                     self.credentials.refresh(req)
212 |                 except gauth.exceptions.RefreshError as error:
213 |                     # Re-raise as HttpError with a 401 code and the expected message
214 |                     raise HttpError(
215 |                         {"code": 401, "message": "Invalid Credentials"}
216 |                     ) from error
217 | 
218 |                 # https://github.com/fsspec/filesystem_spec/issues/565
219 |                 self.credentials.apply(self.heads)
220 | 
221 |     def apply(self, out):
222 |         """Insert credential headers in-place to a dictionary"""
223 |         self.maybe_refresh()
224 |         if self.credentials is not None:
225 |             self.credentials.apply(out)
226 | 
227 |     def _connect_service(self, fn):
228 |         # raises exception if the file does not match expectation
229 |         credentials = service_account.Credentials.from_service_account_file(
230 |             fn, scopes=[self.scope]
231 |         )
232 |         self.credentials = credentials
233 | 
234 |     def _connect_anon(self):
235 |         self.credentials = None
236 | 
237 |     def _connect_browser(self):
238 |         flow = InstalledAppFlow.from_client_config(client_config, [self.scope])
239 |         credentials = flow.run_local_server()
240 |         self.tokens[(self.project, self.access)] = credentials
241 |         self._save_tokens()
242 |         self.credentials = credentials
243 | 
244 |     def connect(self, method=None):
245 |         """
246 |         Establish session token. A new token will be requested if the current
247 |         one is within 100s of expiry.
248 | 
249 |         Parameters
250 |         ----------
251 |         method: str (google_default|cache|cloud|token|anon|browser) or None
252 |             Type of authorisation to implement - calls `_connect_*` methods.
253 |             If None, will try sequence of methods.
254 |         """
255 |         if method not in [
256 |             "google_default",
257 |             "cache",
258 |             "cloud",
259 |             "token",
260 |             "anon",
261 |             None,
262 |         ]:
263 |             self._connect_token(method)
264 |         elif method is None:
265 |             for meth in ["google_default", "cache", "cloud", "anon"]:
266 |                 try:
267 |                     self.connect(method=meth)
268 |                     logger.debug("Connected with method %s", meth)
269 |                     break
270 |                 except (google.auth.exceptions.GoogleAuthError, ValueError) as e:
271 |                     # GoogleAuthError is the base class for all authentication
272 |                     # errors
273 |                     logger.debug(
274 |                         'Connection with method "%s" failed' % meth, exc_info=e
275 |                     )
276 |                     # Reset credentials if they were set but the authentication failed
277 |                     # (reverts to 'anon' behavior)
278 |                     self.credentials = None
279 |             else:
280 |                 # Since the 'anon' connection method should always succeed,
281 |                 # getting here means something has gone terribly wrong.
282 |                 raise RuntimeError("All connection methods have failed!")
283 |         else:
284 |             self.__getattribute__("_connect_" + method)()
285 |             self.method = method
286 | 


--------------------------------------------------------------------------------
/gcsfs/dask_link.py:
--------------------------------------------------------------------------------
1 | def register():
2 |     """
3 |     Backward compatibility
4 |     """
5 |     pass
6 | 


--------------------------------------------------------------------------------
/gcsfs/inventory_report.py:
--------------------------------------------------------------------------------
  1 | from datetime import datetime
  2 | 
  3 | 
  4 | class InventoryReport:
  5 |     """
  6 |     A utility class for fetching and processing inventory reports from GCS.
  7 | 
  8 |     The 'InventoryReport' class provides logic to support logic to fetch
  9 |     inventory reports, and process their content to obtain a final snapshot
 10 |     of objects in the latest inventory reports.
 11 | 
 12 |     High-Level Functionality:
 13 |     ------------------------
 14 |     1. Fetching Inventory Reports:
 15 |        - The class offers methods to fetch inventory report configurations and
 16 |          metadata from GCS.
 17 |        - It validates the inventory report information provided by the user.
 18 |        - Inventory report configurations include options for parsing CSV format
 19 |          and specifying the bucket and destination path.
 20 | 
 21 |     2. Parsing and Processing Inventory Report Content:
 22 |        - The class processes the raw content of inventory reports to extract
 23 |          object details such as name, size, etc.
 24 |        - It supports listing objects using a snapshot option or filtering
 25 |          based on a user-defined prefix.
 26 |        - The class handles CSV parsing, removes header (if specified), and
 27 |          fetches required object metadata.
 28 | 
 29 |     3. Constructing the Final Snapshot:
 30 |        - If the user wishes to use the snapshot to do listing directly, the
 31 |          snapshot will contain the relevant object details and subdirectory
 32 |          prefixes, filtered by the prefix.
 33 | 
 34 |        - If the user wishes to use the snapshot as a starting point for async
 35 |          listing, the snapshot will only contain a list of object names,
 36 |          filtered by the prefix.
 37 | 
 38 |     Note:
 39 |     -----
 40 |     - The class should only be internally used in the 'GCSFileSystem' as an
 41 |       optional configuration during listing.
 42 | 
 43 |     Example Usage:
 44 |     --------------
 45 |     # Should already be instanted in 'core.py'
 46 |     gcs_file_system = GCSFileSystem(...)
 47 | 
 48 |     # User defines inventory report information
 49 |     inventory_report_info = {
 50 |         "use_snapshot_listing": True,
 51 |         "location": "us-east1",
 52 |         "id": "inventory_report_id"
 53 |     }
 54 | 
 55 |     # User defines a prefix for filtering objects
 56 |     prefix = "prefix/"
 57 | 
 58 |     # Fetch the snapshot based on inventory reports
 59 |     items, prefixes = await InventoryReport.fetch_snapshot(
 60 |     gcs_file_system, inventory_report_info, prefix)
 61 |     """
 62 | 
 63 |     # HTTP endpoint of the Storage Insights Service.
 64 |     BASE_URL = "https://storageinsights.googleapis.com/v1"
 65 | 
 66 |     @classmethod
 67 |     async def fetch_snapshot(cls, gcs_file_system, inventory_report_info, prefix):
 68 |         """
 69 |         Main entry point of the 'InventoryReport' class.
 70 |         Fetches the latest snapshot of objects based on inventory report configuration.
 71 | 
 72 |         Parameters:
 73 |             gcs_file_system (GCSFileSystem): An instance of the 'GCSFileSystem'
 74 |             class (see 'core.py').
 75 |             inventory_report_info (dict): A client-configured dictionary
 76 |             containing inventory report information.
 77 |             prefix (str): Listing prefix specified by the client.
 78 | 
 79 |         Returns:
 80 |             tuple: A tuple containing two lists: the 'items' list representing
 81 |             object details for the snapshot, and the 'prefixes' list containing
 82 |             subdirectory prefixes.
 83 | 
 84 |             Note: when 'use_snapshot_listing' in 'inventory_report_info' is set
 85 |             to False, the 'prefixes' list will be empty, and the 'items' list
 86 |             will contain only the object names.
 87 |         """
 88 |         # Validate the inventory report info that the user passes in.
 89 |         cls._validate_inventory_report_info(inventory_report_info)
 90 | 
 91 |         # Parse the inventory report info.
 92 |         use_snapshot_listing = inventory_report_info.get("use_snapshot_listing")
 93 |         inventory_report_location = inventory_report_info.get("location")
 94 |         inventory_report_id = inventory_report_info.get("id")
 95 | 
 96 |         # Fetch the inventory report configuration.
 97 |         raw_inventory_report_config = await cls._fetch_raw_inventory_report_config(
 98 |             gcs_file_system=gcs_file_system,
 99 |             location=inventory_report_location,
100 |             id=inventory_report_id,
101 |         )
102 | 
103 |         # Parse the inventory report configuration.
104 |         inventory_report_config = cls._parse_raw_inventory_report_config(
105 |             raw_inventory_report_config=raw_inventory_report_config,
106 |             use_snapshot_listing=use_snapshot_listing,
107 |         )
108 | 
109 |         # Use the config to fetch all inventory report metadata.
110 |         unsorted_inventory_report_metadata = await cls._fetch_inventory_report_metadata(
111 |             gcs_file_system=gcs_file_system,
112 |             inventory_report_config=inventory_report_config,
113 |         )
114 | 
115 |         # Sort the metadata based on reverse created time order.
116 |         inventory_report_metadata = cls._sort_inventory_report_metadata(
117 |             unsorted_inventory_report_metadata=unsorted_inventory_report_metadata
118 |         )
119 | 
120 |         # Download the most recent inventory reports in raw form.
121 |         bucket = inventory_report_config.bucket
122 |         inventory_report_content = await cls._download_inventory_report_content(
123 |             gcs_file_system=gcs_file_system,
124 |             inventory_report_metadata=inventory_report_metadata,
125 |             bucket=bucket,
126 |         )
127 | 
128 |         # Parse the raw inventory reports into snapshot objects.
129 |         objects = cls._parse_inventory_report_content(
130 |             gcs_file_system=gcs_file_system,
131 |             inventory_report_content=inventory_report_content,
132 |             inventory_report_config=inventory_report_config,
133 |             use_snapshot_listing=use_snapshot_listing,
134 |             bucket=bucket,
135 |         )
136 | 
137 |         # Construct the final snapshot based on the fetched objects.
138 |         snapshot = cls._construct_final_snapshot(
139 |             objects=objects, prefix=prefix, use_snapshot_listing=use_snapshot_listing
140 |         )
141 | 
142 |         # Return the final snapshot.
143 |         return snapshot
144 | 
145 |     def _validate_inventory_report_info(inventory_report_info):
146 |         """
147 |         Validates the inventory report information dictionary that user
148 |         passes in.
149 | 
150 |         Parameters:
151 |             inventory_report_info (dict): A dictionary containing the inventory
152 |             report information with the following keys:
153 |                 - "use_snapshot_listing" (bool): A flag indicating whether
154 |                   to use snapshot listing in the inventory report.
155 |                 - "location" (str): The location of the inventory report in GCS.
156 |                 - "id" (str): The ID of the inventory report in GCS.
157 | 
158 |         Raises:
159 |             ValueError: If any required key (use_snapshot_listing, location, id)
160 |             is missing from the inventory_report_info dictionary.
161 |         """
162 |         if "use_snapshot_listing" not in inventory_report_info:
163 |             raise ValueError("Use snapshot listing is not configured.")
164 |         if "location" not in inventory_report_info:
165 |             raise ValueError("Inventory report location is not configured.")
166 |         if "id" not in inventory_report_info:
167 |             raise ValueError("Inventory report id is not configured.")
168 | 
169 |     async def _fetch_raw_inventory_report_config(gcs_file_system, location, id):
170 |         """
171 |         Fetches the raw inventory report configuration from GCS based on the
172 |         specified location and ID.
173 | 
174 |         Parameters:
175 |             gcs_file_system (GCSFileSystem): An instance of the 'GCSFileSystem'
176 |             class (see 'core.py').
177 |             location (str): The location of the inventory report in GCS.
178 |             id (str): The ID of the inventory report in GCS.
179 | 
180 |         Returns:
181 |             dict: A dictionary containing the raw inventory report
182 |             configuration retrieved from GCS.
183 | 
184 |         Raises:
185 |             Exception: If there is an error while fetching the inventory
186 |             report configuration.
187 |         """
188 |         project = gcs_file_system.project
189 |         url = "{}/projects/{}/locations/{}/reportConfigs/{}"
190 |         url = url.format(InventoryReport.BASE_URL, project, location, id)
191 |         try:
192 |             raw_inventory_report_config = await gcs_file_system._call(
193 |                 "GET", url, json_out=True
194 |             )
195 |             return raw_inventory_report_config
196 |         except Exception as e:
197 |             raise ValueError(
198 |                 f"Error encountered when fetching inventory report config: {e}."
199 |             )
200 | 
201 |     def _parse_raw_inventory_report_config(
202 |         raw_inventory_report_config, use_snapshot_listing
203 |     ):
204 |         """
205 |         Parses the raw inventory report configuration and validates its properties.
206 | 
207 |         Parameters:
208 |             raw_inventory_report_config (dict): A dictionary containing the raw
209 |             inventory report configuration retrieved from GCS.
210 |             use_snapshot_listing (bool): A flag indicating whether to use snapshot
211 |             listing in the inventory report.
212 | 
213 |         Returns:
214 |             InventoryReportConfig: An instance of the InventoryReportConfig
215 |             class representing the parsed inventory report configuration.
216 | 
217 |         Raises:
218 |             ValueError: If the current date is outside the start and
219 |             end range specified in the inventory report config.
220 |             ValueError: If the "name" field is not present in the metadata
221 |             fields of the report config.
222 |             ValueError: If "size" field is not present in the metadata
223 |             fields and use_snapshot_listing is True.
224 |         """
225 |         # Parse the report config.
226 |         frequency_options = raw_inventory_report_config.get("frequencyOptions")
227 |         start_date = InventoryReport._convert_obj_to_date(
228 |             frequency_options.get("startDate")
229 |         )
230 |         end_date = InventoryReport._convert_obj_to_date(
231 |             frequency_options.get("endDate")
232 |         )
233 |         object_metadata_report_options = raw_inventory_report_config.get(
234 |             "objectMetadataReportOptions"
235 |         )
236 |         storage_destination_options = object_metadata_report_options.get(
237 |             "storageDestinationOptions"
238 |         )
239 | 
240 |         # Save relevant report config properties.
241 |         csv_options = raw_inventory_report_config.get("csvOptions")
242 |         bucket = storage_destination_options.get("bucket")
243 |         destination_path = storage_destination_options.get("destinationPath")
244 |         metadata_fields = object_metadata_report_options.get("metadataFields")
245 | 
246 |         # Validate date, making sure the current date is within the start and end range.
247 |         today = datetime.now()
248 |         if today < start_date or today > end_date:
249 |             raise ValueError(
250 |                 f"Current date {today} is outside the range \
251 |                 {start_date} and {end_date} specified by the inventory report config."
252 |             )
253 | 
254 |         # Validate object name exists in the metadata fields.
255 |         # Note that the size field is mandated to be included in the
256 |         # config when the client sets up the inventory report.
257 |         obj_name_idx = metadata_fields.index("name")
258 | 
259 |         # If the user wants to do listing based on the snapshot, also
260 |         # validate the report contains size metadata for each object.
261 |         if use_snapshot_listing:
262 |             try:
263 |                 metadata_fields.index("size")
264 |             except ValueError:
265 |                 raise ValueError(
266 |                     "If you want to use the snapshot for listing, the object size \
267 |                         metadata has to be included in the inventory report."
268 |                 )
269 | 
270 |         # Finally, construct and return the inventory report config.
271 |         inventory_report_config = InventoryReportConfig(
272 |             csv_options=csv_options,
273 |             bucket=bucket,
274 |             destination_path=destination_path,
275 |             metadata_fields=metadata_fields,
276 |             obj_name_idx=obj_name_idx,
277 |         )
278 | 
279 |         return inventory_report_config
280 | 
281 |     async def _fetch_inventory_report_metadata(
282 |         gcs_file_system, inventory_report_config
283 |     ):
284 |         """
285 |         Fetches all inventory report metadata from GCS based on the specified
286 |         inventory report config.
287 | 
288 |         Parameters:
289 |             gcs_file_system (GCSFileSystem): An instance of the 'GCSFileSystem'
290 |             class (see 'core.py').
291 |             inventory_report_config (InventoryReportConfig): An instance of
292 |             the InventoryReportConfig class representing the inventory report
293 |             configuration.
294 | 
295 |         Returns:
296 |             list: A list containing dictionaries representing the metadata of
297 |             objects from the inventory reports.
298 | 
299 |         Raises:
300 |             ValueError: If the fetched inventory reports are empty.
301 |         """
302 |         # There might be multiple inventory reports in the bucket.
303 |         inventory_report_metadata = []
304 | 
305 |         # Extract out bucket and destination path of the inventory reports.
306 |         bucket = inventory_report_config.bucket
307 |         destination_path = inventory_report_config.destination_path
308 | 
309 |         # Fetch the first page.
310 |         page = await gcs_file_system._call(
311 |             "GET", "b/{}/o", bucket, prefix=destination_path, json_out=True
312 |         )
313 | 
314 |         inventory_report_metadata.extend(page.get("items", []))
315 |         next_page_token = page.get("nextPageToken", None)
316 | 
317 |         # Keep fetching new pages as long as next page token exists.
318 |         # Note that the iteration in the while loop should most likely
319 |         # be minimal. For reference, a million objects is split up into
320 |         # two reports, and if the report is generated daily, then in a year,
321 |         # there will be roughly ~700 reports generated, which will still be
322 |         # fetched in a single page.
323 |         while next_page_token is not None:
324 |             page = await gcs_file_system._call(
325 |                 "GET",
326 |                 "b/{}/o",
327 |                 bucket,
328 |                 prefix=destination_path,
329 |                 json_out=True,
330 |                 pageToken=next_page_token,
331 |             )
332 | 
333 |             inventory_report_metadata.extend(page.get("items", []))
334 |             next_page_token = page.get("nextPageToken", None)
335 | 
336 |         # If no reports are fetched, indicates there is an error.
337 |         if len(inventory_report_metadata) == 0:
338 |             raise ValueError(
339 |                 "No inventory reports to fetch. Check if \
340 |                 your inventory report is set up correctly."
341 |             )
342 | 
343 |         return inventory_report_metadata
344 | 
345 |     def _sort_inventory_report_metadata(unsorted_inventory_report_metadata):
346 |         """
347 |         Sorts the inventory report metadata based on the 'timeCreated' field
348 |         in reverse chronological order.
349 | 
350 |         Parameters:
351 |             unsorted_inventory_report_metadata (list): A list of dictionaries
352 |             representing the metadata of objects from the inventory reports.
353 | 
354 |         Returns:
355 |             list: A sorted list of dictionaries representing the inventory
356 |             report metadata, sorted in reverse chronological order based
357 |             on 'timeCreated'.
358 |         """
359 |         return sorted(
360 |             unsorted_inventory_report_metadata,
361 |             key=lambda ir: InventoryReport._convert_str_to_datetime(
362 |                 ir.get("timeCreated")
363 |             ),
364 |             reverse=True,
365 |         )
366 | 
367 |     async def _download_inventory_report_content(
368 |         gcs_file_system, inventory_report_metadata, bucket
369 |     ):
370 |         """
371 |         Downloads the most recent inventory report content from GCS based on
372 |         the inventory report metadata.
373 | 
374 |         Parameters:
375 |             gcs_file_system (GCSFileSystem): An instance of the 'GCSFileSystem'
376 |             class (see 'core.py').
377 |             inventory_report_metadata (list): A list of dictionaries
378 |             representing the metadata of objects from the inventory reports.
379 |             bucket (str): The name of the GCS bucket containing
380 |             the inventory reports.
381 | 
382 |         Returns:
383 |             list: A list containing the content of the most recent inventory
384 |             report as strings.
385 |         """
386 |         # Get the most recent inventory report date.
387 |         most_recent_inventory_report = inventory_report_metadata[0]
388 |         most_recent_date = InventoryReport._convert_str_to_datetime(
389 |             most_recent_inventory_report.get("timeCreated")
390 |         ).date()
391 | 
392 |         inventory_report_content = []
393 | 
394 |         # Run a for loop here, since there might be multiple inventory reports
395 |         # generated on the same day. For reference, 1 million objects will be
396 |         # split into only 2 inventory reports, so it is very rare that there
397 |         # will be many inventory reports on the same day. But including this
398 |         # logic for robustness.
399 |         for metadata in inventory_report_metadata:
400 |             inventory_report_date = InventoryReport._convert_str_to_datetime(
401 |                 metadata["timeCreated"]
402 |             ).date()
403 | 
404 |             if inventory_report_date == most_recent_date:
405 |                 # Download the raw inventory report if the date matches.
406 |                 # Header is not needed, we only need to process and store
407 |                 # the content.
408 |                 _header, encoded_content = await gcs_file_system._call(
409 |                     "GET", "b/{}/o/{}", bucket, metadata.get("name"), alt="media"
410 |                 )
411 | 
412 |                 # Decode the binary content into string for the content.
413 |                 decoded_content = encoded_content.decode()
414 | 
415 |                 inventory_report_content.append(decoded_content)
416 | 
417 |         return inventory_report_content
418 | 
419 |     def _parse_inventory_report_content(
420 |         gcs_file_system,
421 |         inventory_report_content,
422 |         inventory_report_config,
423 |         use_snapshot_listing,
424 |         bucket,
425 |     ):
426 |         """
427 |         Parses the raw inventory report content and extracts object details.
428 | 
429 |         Parameters:
430 |             gcs_file_system (GCSFileSystem): An instance of the 'GCSFileSystem'
431 |             class (see 'core.py').
432 |             inventory_report_content (list): A list of strings containing the
433 |             raw content of the inventory report.
434 |             inventory_report_config (InventoryReportConfig): An instance of the
435 |             InventoryReportConfig class representing the inventory report
436 |             configuration.
437 |             use_snapshot_listing (bool): A flag indicating whether to use snapshot
438 |             listing in the inventory report.
439 |             bucket (str): The name of the GCS bucket containing the inventory
440 |             reports.
441 | 
442 |         Returns:
443 |             list: A list of dictionaries representing object details parsed
444 |             from the inventory report content.
445 |         """
446 |         # Get the csv configuration for each inventory report.
447 |         csv_options = inventory_report_config.csv_options
448 |         record_separator = csv_options.get("recordSeparator", "\n")
449 |         delimiter = csv_options.get("delimiter", ",")
450 |         header_required = csv_options.get("headerRequired", False)
451 | 
452 |         objects = []
453 | 
454 |         for content in inventory_report_content:
455 |             # Split the content into lines based on the specified separator.
456 |             lines = content.split(record_separator)
457 | 
458 |             # Remove the header, if present.
459 |             if header_required:
460 |                 lines = lines[1:]
461 | 
462 |             # Parse each line of the inventory report.
463 |             for line in lines:
464 |                 obj = InventoryReport._parse_inventory_report_line(
465 |                     inventory_report_line=line,
466 |                     use_snapshot_listing=use_snapshot_listing,
467 |                     gcs_file_system=gcs_file_system,
468 |                     inventory_report_config=inventory_report_config,
469 |                     delimiter=delimiter,
470 |                     bucket=bucket,
471 |                 )
472 | 
473 |                 objects.append(obj)
474 | 
475 |         return objects
476 | 
477 |     def _parse_inventory_report_line(
478 |         inventory_report_line,
479 |         use_snapshot_listing,
480 |         gcs_file_system,
481 |         inventory_report_config,
482 |         delimiter,
483 |         bucket,
484 |     ):
485 |         """
486 |         Parses a single line of the inventory report and extracts object details.
487 | 
488 |         Parameters:
489 |             inventory_report_line (str): A string representing a single line of
490 |             the raw content from the inventory report.
491 |             use_snapshot_listing (bool): A flag indicating whether to use snapshot
492 |             listing in the inventory report.
493 |             gcs_file_system (GCSFileSystem): An instance of the 'GCSFileSystem'
494 |             class (see 'core.py').
495 |             inventory_report_config (InventoryReportConfig): An instance of the
496 |             InventoryReportConfig class representing the inventory report
497 |             configuration.
498 |             delimiter (str): The delimiter used in the inventory report content
499 |             to separate fields.
500 |             bucket (str): The name of the GCS bucket containing the inventory
501 |             reports.
502 | 
503 |         Returns:
504 |             dict: A dictionary representing object details parsed from the
505 |             inventory report line.
506 |         """
507 |         obj_name_idx = inventory_report_config.obj_name_idx
508 |         metadata_fields = inventory_report_config.metadata_fields
509 | 
510 |         # If the client wants to do listing from the snapshot, we need
511 |         # to fetch all the metadata for each object. Otherwise, we only
512 |         # need to fetch the name.
513 |         if use_snapshot_listing is True:
514 |             obj = gcs_file_system._process_object(
515 |                 {
516 |                     key: value
517 |                     for key, value in zip(
518 |                         metadata_fields, inventory_report_line.strip().split(delimiter)
519 |                     )
520 |                 },
521 |                 bucket,
522 |             )
523 |         else:
524 |             obj = {"name": inventory_report_line.strip().split(delimiter)[obj_name_idx]}
525 | 
526 |         return obj
527 | 
528 |     def _construct_final_snapshot(objects, prefix, use_snapshot_listing):
529 |         """
530 |         Constructs the final snapshot based on the retrieved objects and prefix.
531 | 
532 |         Parameters:
533 |             objects (list): A list of dictionaries representing object details
534 |             from the inventory report.
535 |             prefix (str): A prefix used to filter objects in the snapshot based
536 |             on their names.
537 |             use_snapshot_listing (bool): A flag indicating whether to use snapshot
538 |             listing in the inventory report.
539 | 
540 |         Returns:
541 |             tuple: A tuple containing two lists: the 'items' list representing
542 |             object details for the snapshot, and the 'prefixes' list containing
543 |             subdirectory prefixes. If 'use_snapshot_listing' is set to False,
544 |             'prefix' will also be empty, and 'items' will contains the object
545 |             names in the snapshot.
546 |         """
547 |         if prefix is None:
548 |             prefix = ""
549 | 
550 |         # Filter the prefix and returns the list if the user does not want to use
551 |         # the snapshot for listing.
552 |         if use_snapshot_listing is False:
553 |             return [obj for obj in objects if obj.get("name").startswith(prefix)], []
554 | 
555 |         else:
556 |             # If the user wants to use the snapshot, generate both the items and
557 |             # prefixes manually.
558 |             items = []
559 |             prefixes = set()
560 | 
561 |             for obj in objects:
562 |                 # Fetch the name of the object.
563 |                 obj_name = obj.get("name")
564 | 
565 |                 # If the object name doesn't start with the prefix, continue.
566 |                 # In the case where prefix is empty, it will always return
567 |                 # true (which is the expected behavior).
568 |                 if not obj_name.startswith(prefix):
569 |                     continue
570 | 
571 |                 # Remove the prefix.
572 |                 object_name_no_prefix = obj_name[len(prefix) :]
573 | 
574 |                 # Determine whether the object name is a directory.
575 |                 first_delimiter_idx = object_name_no_prefix.find("/")
576 | 
577 |                 # If not, then append it to items.
578 |                 if first_delimiter_idx == -1:
579 |                     items.append(obj)
580 |                     continue
581 | 
582 |                 # If it is, recompose the directory and add to the prefix set.
583 |                 dir = object_name_no_prefix[:first_delimiter_idx]
584 |                 obj_prefix = (
585 |                     prefix.rstrip("/")
586 |                     + ("" if prefix == "" else "/")
587 |                     + dir
588 |                     + ("" if dir == "" else "/")
589 |                 )
590 |                 prefixes.add(obj_prefix)
591 | 
592 |         return items, list(prefixes)
593 | 
594 |     @staticmethod
595 |     def _convert_obj_to_date(obj):
596 |         """
597 |         Converts a dictionary representing a date object to a datetime object.
598 | 
599 |         Parameters:
600 |             obj (dict): A dictionary representing a date object with keys "day",
601 |             "month", and "year".
602 | 
603 |         Returns:
604 |             datetime: A datetime object representing the converted date.
605 |         """
606 |         day = obj["day"]
607 |         month = obj["month"]
608 |         year = obj["year"]
609 |         return datetime(year, month, day)
610 | 
611 |     @staticmethod
612 |     def _convert_str_to_datetime(str):
613 |         """
614 |         Converts an ISO-formatted date string to a datetime object.
615 | 
616 |         Parameters:
617 |             date_string (str): An ISO-formatted date string with or without
618 |             timezone information (Z).
619 | 
620 |         Returns:
621 |             datetime: A datetime object representing the converted date and time.
622 |         """
623 |         return datetime.fromisoformat(str.replace("Z", "+00:00"))
624 | 
625 | 
626 | class InventoryReportConfig(object):
627 |     """
628 |     Represents the configuration for fetching inventory reports.
629 | 
630 |     Attributes:
631 |         csv_options (dict): A dictionary containing options for parsing CSV
632 |         format in the inventory reports.
633 |         bucket (str): The name of the GCS bucket from which to fetch the
634 |         inventory reports.
635 |         destination_path (str): The path within the GCS bucket where the
636 |         inventory reports are stored.
637 |         metadata_fields (list): A list of strings representing metadata
638 |         fields to be extracted from the inventory reports.
639 |         obj_name_idx (int): The index of the "name" field in the 'metadata_fields'
640 |         list, used to identify object names.
641 |     """
642 | 
643 |     def __init__(
644 |         self, csv_options, bucket, destination_path, metadata_fields, obj_name_idx
645 |     ):
646 |         self.csv_options = csv_options
647 |         self.bucket = bucket
648 |         self.destination_path = destination_path
649 |         self.metadata_fields = metadata_fields
650 |         self.obj_name_idx = obj_name_idx
651 | 


--------------------------------------------------------------------------------
/gcsfs/mapping.py:
--------------------------------------------------------------------------------
1 | from .core import GCSFileSystem
2 | 
3 | 
4 | def GCSMap(root, gcs=None, check=False, create=False):
5 |     """For backward compatibility"""
6 |     gcs = gcs or GCSFileSystem.current()
7 |     return gcs.get_mapper(root, check=check, create=create)
8 | 


--------------------------------------------------------------------------------
/gcsfs/retry.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import json
  3 | import logging
  4 | import random
  5 | 
  6 | import aiohttp.client_exceptions
  7 | import google.auth.exceptions
  8 | import requests.exceptions
  9 | from decorator import decorator
 10 | 
 11 | logger = logging.getLogger("gcsfs")
 12 | 
 13 | 
 14 | class HttpError(Exception):
 15 |     """Holds the message and code from cloud errors."""
 16 | 
 17 |     def __init__(self, error_response=None):
 18 |         # Save error_response for potential pickle.
 19 |         self._error_response = error_response
 20 |         if error_response:
 21 |             self.code = error_response.get("code", None)
 22 |             self.message = error_response.get("message", "")
 23 |             if self.code:
 24 |                 if isinstance(self.message, bytes):
 25 |                     self.message += (", %s" % self.code).encode()
 26 |                 else:
 27 |                     self.message += ", %s" % self.code
 28 |         else:
 29 |             self.message = ""
 30 |             self.code = None
 31 |         # Call the base class constructor with the parameters it needs
 32 |         super().__init__(self.message)
 33 | 
 34 |     def __reduce__(self):
 35 |         """This makes the Exception pickleable."""
 36 | 
 37 |         # This is basically deconstructing the HttpError when pickled.
 38 |         return HttpError, (self._error_response,)
 39 | 
 40 | 
 41 | class ChecksumError(Exception):
 42 |     """Raised when the md5 hash of the content does not match the header."""
 43 | 
 44 |     pass
 45 | 
 46 | 
 47 | RETRIABLE_EXCEPTIONS = (
 48 |     requests.exceptions.ChunkedEncodingError,
 49 |     requests.exceptions.ConnectionError,
 50 |     requests.exceptions.ReadTimeout,
 51 |     requests.exceptions.Timeout,
 52 |     requests.exceptions.ProxyError,
 53 |     requests.exceptions.SSLError,
 54 |     requests.exceptions.ContentDecodingError,
 55 |     google.auth.exceptions.RefreshError,
 56 |     aiohttp.client_exceptions.ClientError,
 57 |     ChecksumError,
 58 | )
 59 | 
 60 | 
 61 | errs = list(range(500, 505)) + [
 62 |     # Request Timeout
 63 |     408,
 64 |     # Too Many Requests
 65 |     429,
 66 | ]
 67 | errs = set(errs + [str(e) for e in errs])
 68 | 
 69 | 
 70 | def is_retriable(exception):
 71 |     """Returns True if this exception is retriable."""
 72 | 
 73 |     if isinstance(exception, HttpError):
 74 |         # Add 401 to retriable errors when it's an auth expiration issue
 75 |         if exception.code == 401 and "Invalid Credentials" in str(exception.message):
 76 |             return True
 77 |         return exception.code in errs
 78 | 
 79 |     return isinstance(exception, RETRIABLE_EXCEPTIONS)
 80 | 
 81 | 
 82 | def validate_response(status, content, path, args=None):
 83 |     """
 84 |     Check the requests object r, raise error if it's not ok.
 85 | 
 86 |     Parameters
 87 |     ----------
 88 |     r: requests response object
 89 |     path: associated URL path, for error messages
 90 |     """
 91 |     if status >= 400 and status != 499:
 92 |         # 499 is special "upload was cancelled" status
 93 |         if args:
 94 |             from .core import quote
 95 | 
 96 |             path = path.format(*[quote(p) for p in args])
 97 |         if status == 404:
 98 |             raise FileNotFoundError(path)
 99 | 
100 |         error = None
101 |         if hasattr(content, "decode"):
102 |             content = content.decode()
103 |         try:
104 |             error = json.loads(content)["error"]
105 |             # Sometimes the error message is a string.
106 |             if isinstance(error, str):
107 |                 msg = error
108 |             else:
109 |                 msg = error["message"]
110 |         except json.decoder.JSONDecodeError:
111 |             msg = content
112 | 
113 |         if status == 403:
114 |             raise OSError(f"Forbidden: {path}\n{msg}")
115 |         elif status == 412:
116 |             raise FileExistsError(path)
117 |         elif status == 502:
118 |             raise requests.exceptions.ProxyError()
119 |         elif "invalid" in str(msg):
120 |             raise ValueError(f"Bad Request: {path}\n{msg}")
121 |         elif error and not isinstance(error, str):
122 |             raise HttpError(error)
123 |         elif status:
124 |             raise HttpError({"code": status, "message": msg})  # text-like
125 |         else:
126 |             raise RuntimeError(msg)
127 | 
128 | 
129 | @decorator
130 | async def retry_request(func, retries=6, *args, **kwargs):
131 |     for retry in range(retries):
132 |         try:
133 |             if retry > 0:
134 |                 await asyncio.sleep(min(random.random() + 2 ** (retry - 1), 32))
135 |             return await func(*args, **kwargs)
136 |         except (
137 |             HttpError,
138 |             requests.exceptions.RequestException,
139 |             google.auth.exceptions.GoogleAuthError,
140 |             ChecksumError,
141 |             aiohttp.client_exceptions.ClientError,
142 |         ) as e:
143 |             if (
144 |                 isinstance(e, HttpError)
145 |                 and e.code == 400
146 |                 and "requester pays" in e.message
147 |             ):
148 |                 msg = (
149 |                     "Bucket is requester pays. "
150 |                     "Set `requester_pays=True` when creating the GCSFileSystem."
151 |                 )
152 |                 raise ValueError(msg) from e
153 |             # Special test for 404 to avoid retrying the request
154 |             if (
155 |                 isinstance(e, aiohttp.client_exceptions.ClientResponseError)
156 |                 and e.status == 404
157 |             ):
158 |                 logger.debug("Request returned 404, no retries.")
159 |                 raise e
160 |             if isinstance(e, HttpError) and e.code == 404:
161 |                 logger.debug("Request returned 404, no retries.")
162 |                 raise e
163 |             if retry == retries - 1:
164 |                 logger.exception(f"{func.__name__} out of retries on exception: {e}")
165 |                 raise e
166 |             if is_retriable(e):
167 |                 logger.debug(f"{func.__name__} retrying after exception: {e}")
168 |                 continue
169 |             logger.exception(f"{func.__name__} non-retriable exception: {e}")
170 |             raise e
171 | 


--------------------------------------------------------------------------------
/gcsfs/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fsspec/gcsfs/7872bd7a931fb4285d5762ff5d861b8653fc7b70/gcsfs/tests/__init__.py


--------------------------------------------------------------------------------
/gcsfs/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import shlex
  3 | import subprocess
  4 | import time
  5 | 
  6 | import fsspec
  7 | import pytest
  8 | import requests
  9 | 
 10 | from gcsfs import GCSFileSystem
 11 | from gcsfs.tests.settings import TEST_BUCKET
 12 | 
 13 | files = {
 14 |     "test/accounts.1.json": (
 15 |         b'{"amount": 100, "name": "Alice"}\n'
 16 |         b'{"amount": 200, "name": "Bob"}\n'
 17 |         b'{"amount": 300, "name": "Charlie"}\n'
 18 |         b'{"amount": 400, "name": "Dennis"}\n'
 19 |     ),
 20 |     "test/accounts.2.json": (
 21 |         b'{"amount": 500, "name": "Alice"}\n'
 22 |         b'{"amount": 600, "name": "Bob"}\n'
 23 |         b'{"amount": 700, "name": "Charlie"}\n'
 24 |         b'{"amount": 800, "name": "Dennis"}\n'
 25 |     ),
 26 | }
 27 | 
 28 | csv_files = {
 29 |     "2014-01-01.csv": (
 30 |         b"name,amount,id\n" b"Alice,100,1\n" b"Bob,200,2\n" b"Charlie,300,3\n"
 31 |     ),
 32 |     "2014-01-02.csv": b"name,amount,id\n",
 33 |     "2014-01-03.csv": (
 34 |         b"name,amount,id\n" b"Dennis,400,4\n" b"Edith,500,5\n" b"Frank,600,6\n"
 35 |     ),
 36 | }
 37 | text_files = {
 38 |     "nested/file1": b"hello\n",
 39 |     "nested/file2": b"world",
 40 |     "nested/nested2/file1": b"hello\n",
 41 |     "nested/nested2/file2": b"world",
 42 | }
 43 | allfiles = dict(**files, **csv_files, **text_files)
 44 | a = TEST_BUCKET + "/tmp/test/a"
 45 | b = TEST_BUCKET + "/tmp/test/b"
 46 | c = TEST_BUCKET + "/tmp/test/c"
 47 | d = TEST_BUCKET + "/tmp/test/d"
 48 | 
 49 | params = dict()
 50 | 
 51 | 
 52 | def stop_docker(container):
 53 |     cmd = shlex.split('docker ps -a -q --filter "name=%s"' % container)
 54 |     cid = subprocess.check_output(cmd).strip().decode()
 55 |     if cid:
 56 |         subprocess.call(["docker", "rm", "-f", "-v", cid])
 57 | 
 58 | 
 59 | @pytest.fixture(scope="module")
 60 | def docker_gcs():
 61 |     if "STORAGE_EMULATOR_HOST" in os.environ:
 62 |         # assume using real API or otherwise have a server already set up
 63 |         yield os.getenv("STORAGE_EMULATOR_HOST")
 64 |         return
 65 |     params["token"] = "anon"
 66 |     container = "gcsfs_test"
 67 |     cmd = (
 68 |         "docker run -d -p 4443:4443 --name gcsfs_test fsouza/fake-gcs-server:latest -scheme "
 69 |         "http -public-host 0.0.0.0:4443 -external-url http://localhost:4443 "
 70 |         "-backend memory"
 71 |     )
 72 |     stop_docker(container)
 73 |     subprocess.check_output(shlex.split(cmd))
 74 |     url = "http://0.0.0.0:4443"
 75 |     timeout = 10
 76 |     while True:
 77 |         try:
 78 |             r = requests.get(url + "/storage/v1/b")
 79 |             if r.ok:
 80 |                 yield url
 81 |                 break
 82 |         except Exception as e:  # noqa: E722
 83 |             timeout -= 1
 84 |             if timeout < 0:
 85 |                 raise SystemError from e
 86 |             time.sleep(1)
 87 |     stop_docker(container)
 88 | 
 89 | 
 90 | @pytest.fixture
 91 | def gcs_factory(docker_gcs):
 92 |     params["endpoint_url"] = docker_gcs
 93 | 
 94 |     def factory(default_location=None):
 95 |         GCSFileSystem.clear_instance_cache()
 96 |         params["default_location"] = default_location
 97 |         return fsspec.filesystem("gcs", **params)
 98 | 
 99 |     return factory
100 | 
101 | 
102 | @pytest.fixture
103 | def gcs(gcs_factory, populate=True):
104 |     gcs = gcs_factory()
105 |     try:
106 |         # ensure we're empty.
107 |         try:
108 |             gcs.rm(TEST_BUCKET, recursive=True)
109 |         except FileNotFoundError:
110 |             pass
111 |         try:
112 |             gcs.mkdir(TEST_BUCKET)
113 |         except Exception:
114 |             pass
115 | 
116 |         if populate:
117 |             gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
118 |         gcs.invalidate_cache()
119 |         yield gcs
120 |     finally:
121 |         try:
122 |             gcs.rm(gcs.find(TEST_BUCKET))
123 |             gcs.rm(TEST_BUCKET)
124 |         except:  # noqa: E722
125 |             pass
126 | 
127 | 
128 | @pytest.fixture
129 | def gcs_versioned(gcs_factory):
130 |     gcs = gcs_factory()
131 |     gcs.version_aware = True
132 |     try:
133 |         try:
134 |             gcs.rm(gcs.find(TEST_BUCKET, versions=True))
135 |         except FileNotFoundError:
136 |             pass
137 | 
138 |         try:
139 |             gcs.mkdir(TEST_BUCKET, enable_versioning=True)
140 |         except Exception:
141 |             pass
142 |         gcs.invalidate_cache()
143 |         yield gcs
144 |     finally:
145 |         try:
146 |             gcs.rm(gcs.find(TEST_BUCKET, versions=True))
147 |             gcs.rm(TEST_BUCKET)
148 |         except:  # noqa: E722
149 |             pass
150 | 


--------------------------------------------------------------------------------
/gcsfs/tests/derived/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fsspec/gcsfs/7872bd7a931fb4285d5762ff5d861b8653fc7b70/gcsfs/tests/derived/__init__.py


--------------------------------------------------------------------------------
/gcsfs/tests/derived/gcsfs_fixtures.py:
--------------------------------------------------------------------------------
 1 | import fsspec
 2 | import pytest
 3 | from fsspec.tests.abstract import AbstractFixtures
 4 | 
 5 | from gcsfs.core import GCSFileSystem
 6 | from gcsfs.tests.conftest import allfiles
 7 | from gcsfs.tests.settings import TEST_BUCKET
 8 | 
 9 | 
10 | class GcsfsFixtures(AbstractFixtures):
11 |     @pytest.fixture(scope="class")
12 |     def fs(self, docker_gcs):
13 |         GCSFileSystem.clear_instance_cache()
14 |         gcs = fsspec.filesystem("gcs", endpoint_url=docker_gcs)
15 |         try:
16 |             # ensure we're empty.
17 |             try:
18 |                 gcs.rm(TEST_BUCKET, recursive=True)
19 |             except FileNotFoundError:
20 |                 pass
21 |             try:
22 |                 gcs.mkdir(TEST_BUCKET)
23 |             except Exception:
24 |                 pass
25 | 
26 |             gcs.pipe({TEST_BUCKET + "/" + k: v for k, v in allfiles.items()})
27 |             gcs.invalidate_cache()
28 |             yield gcs
29 |         finally:
30 |             try:
31 |                 gcs.rm(gcs.find(TEST_BUCKET))
32 |                 gcs.rm(TEST_BUCKET)
33 |             except:  # noqa: E722
34 |                 pass
35 | 
36 |     @pytest.fixture
37 |     def fs_path(self):
38 |         return TEST_BUCKET
39 | 
40 |     @pytest.fixture
41 |     def supports_empty_directories(self):
42 |         return False
43 | 


--------------------------------------------------------------------------------
/gcsfs/tests/derived/gcsfs_test.py:
--------------------------------------------------------------------------------
 1 | import fsspec.tests.abstract as abstract
 2 | 
 3 | from gcsfs.tests.derived.gcsfs_fixtures import GcsfsFixtures
 4 | 
 5 | 
 6 | class TestGcsfsCopy(abstract.AbstractCopyTests, GcsfsFixtures):
 7 |     pass
 8 | 
 9 | 
10 | class TestGcsfsGet(abstract.AbstractGetTests, GcsfsFixtures):
11 |     pass
12 | 
13 | 
14 | class TestGcsfsPut(abstract.AbstractPutTests, GcsfsFixtures):
15 |     pass
16 | 


--------------------------------------------------------------------------------
/gcsfs/tests/fake-secret.json:
--------------------------------------------------------------------------------
1 | {
2 |     "type": "service_account",
3 |     "private_key_id": "NOT A SECRET",
4 |     "private_key": "ALSO NOT A SECRET",
5 |     "client_email": "fake-name@fake-project.iam.gserviceaccount.com",
6 |     "auth_uri": "https://accounts.google.com/o/oauth2/auth",
7 |     "token_uri": "https://oauth2.googleapis.com/token"
8 | }
9 | 


--------------------------------------------------------------------------------
/gcsfs/tests/fake-service-account-credentials.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "type": "service_account",
 3 |   "project_id": "gcsfs",
 4 |   "private_key_id": "84e3fd6d7101ec632e7348e8940b2aca71133e71",
 5 |   "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvQIBADANBgkqhkiG9w0BAQEFAASCBKcwggSjAgEAAoIBAQDAJWz1KlBu2jRE\nlUahHKuJes34hj4pr8ADhgejpAguBBrubXVvSro7aSSbvyDC/GIcyDQ8Q33YK/kT\nufQvCez7iIACbtP53o6WjcrIAP+l8z9RUL9so+sBCaVRZzh74+cEMfWIbc3ACBB5\nU2BPBWQFtr3Qtbe8TUJ+liNcLb8I2JznfydHvl9cn0/50HeOB99Xho5JAY75aE0Y\nT+/aMTFlr/kUbekLRRi4pyE+uOA/ei5RmfwzqO366YLMtEC2DaHwTqSuxBWnbtTW\nu/OvYpmPHazd6own2zJLQ0Elnm5WC/d9YmxhHi/8pJFkkbVf/2CYWEBbmBI3ZOx3\n/nHQwcIPAgMBAAECggEAUztC/dYE/me10WmKLTrykTxpYTihT8RqG/ygbYGd63Tq\nx5IRlxJbJmYOrgp2IhBaXZZZjis8JXoyzBk2TXPyvChuLt+cIfYGdO/ZwZYxJ0z9\nhfdA3EoK/6mSe3cHcB8SEG6lqaHKyN6VaEC2DLTMlW8JvREiFEaxQY0+puzH/ge4\n2EypCP4pvlveH78EIIipPgWcJYGpv0bv8KErECuVHRjJv6vZqUjQdcIi73mCz/5u\nnQqLY8j9lOuCr9vBis7DZIyY2tn4vfqcqxfH9wuIFXnzIQW6Wyg0+bBQydHg1kJ2\nFOszfkBVxZ6LpcHGB4CV4c5z7Me2cMReXQz6VsyoLQKBgQD9v92rHZYDBy4/vGxx\nbpfUkAlcCGW8GXu+qsdmyhZdjSdjDLY6lav+6UoHIJgmnA7LsKPFgnEDrdn78KBb\n3wno3VHfozL5kF887q9hC/+UurwScCKIw5QkmWtsStVgjr6wPmAu6rspMz5xNjaa\nSU4YzlNcbBUUXUawhXytWPR+OwKBgQDB2bDCD00R2yfYFdjAKapqenOtMvrnihUi\nW9Se7Yizme7s25fDxF5CBPpOdKPU2EZUlqBC/5182oMUP/xYUOHJkuUhbYcvU0qr\n+BQewLwr6rs+O1QPTh/6e70SUFR+YJLaAHkDc6fvcdjtl+Zx/p02Zj+UiW3/D4Jj\nc0EqVr4qPQKBgQCbJx3a6xQ2dcWJoySLlxuvFQMkCt5pzQsk4jdaWmaifRSAM92Y\npLut+ecRxJRDx1gko7T/p2qC3WJT8iWbBx2ADRNqstcQUX5qO2dw5202+5bTj00O\nYsfKOSS96mPdzmo6SWl2RoB6CKM9hfCNFhVyhXXjJRMeiIoYlQZO1/1m0QKBgCzz\nat6FJ8z1MdcUsc9VmhPY00wdXzsjtOTjwHkeAa4MCvBXt2iI94Z9mwFoYLkxcZWZ\n3A3NMlrKXMzsTXq5PrI8Yu+Oc2OQ/+bCvv+ml7vjUYoLveFSr22pFd3STNWFVWhB\n5c3cGtwWXUQzDhfu/8umiCXMfHpBwW2IQ1srBCvNAoGATcC3oCFBC/HdGxdeJC5C\n59EoFvKdZsAdc2I5GS/DtZ1Wo9sXqubCaiUDz+4yty+ssHIZ1ikFr8rWfL6KFEs2\niTe+kgM/9FLFtftf1WDpbfIOumbz/6CiGLqsGNlO3ZaU0kYJ041SZ8RleTOYa0zO\noSTLwBo3vje+aflytEwS8SI=\n-----END PRIVATE KEY-----",
 6 |   "client_email": "fake@gscfs.iam.gserviceaccount.com",
 7 |   "auth_uri": "https://accounts.google.com/o/oauth2/auth",
 8 |   "token_uri": "https://oauth2.googleapis.com/token"
 9 | }
10 | 


--------------------------------------------------------------------------------
/gcsfs/tests/settings.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | TEST_BUCKET = os.getenv("GCSFS_TEST_BUCKET", "gcsfs_test")
 4 | TEST_PROJECT = os.getenv("GCSFS_TEST_PROJECT", "project")
 5 | TEST_REQUESTER_PAYS_BUCKET = "gcsfs_test_req_pay"
 6 | TEST_KMS_KEY = os.getenv(
 7 |     "GCSFS_TEST_KMS_KEY",
 8 |     f"projects/{TEST_PROJECT}/locations/us/keyRings/gcsfs_test/cryptKeys/gcsfs_test_key",
 9 | )
10 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_checkers.py:
--------------------------------------------------------------------------------
  1 | import base64
  2 | from hashlib import md5
  3 | 
  4 | import pytest
  5 | 
  6 | from gcsfs.checkers import Crc32cChecker, MD5Checker, SizeChecker, crcmod
  7 | from gcsfs.retry import ChecksumError
  8 | 
  9 | 
 10 | def google_response_from_data(expected_data: bytes, actual_data=None):
 11 |     actual_data = actual_data or expected_data
 12 |     checksum = md5(actual_data)
 13 |     checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
 14 |     if crcmod is not None:
 15 |         checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF)
 16 |         checksum.update(actual_data)
 17 |         crc = base64.b64encode(checksum.digest()).decode()
 18 | 
 19 |     class response:
 20 |         content_length = len(actual_data)
 21 |         headers = {"X-Goog-Hash": f"md5={checksum_b64}"}
 22 |         if crcmod is not None:
 23 |             headers["X-Goog-Hash"] += f", crc32c={crc}"
 24 | 
 25 |     return response
 26 | 
 27 | 
 28 | def google_response_from_data_with_reverse_header_order(
 29 |     expected_data: bytes, actual_data=None
 30 | ):
 31 |     actual_data = actual_data or expected_data
 32 |     checksum = md5(actual_data)
 33 |     checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
 34 |     if crcmod is not None:
 35 |         checksum = crcmod.Crc(0x11EDC6F41, initCrc=0, xorOut=0xFFFFFFFF)
 36 |         checksum.update(actual_data)
 37 |         crc = base64.b64encode(checksum.digest()).decode()
 38 | 
 39 |     class response:
 40 |         content_length = len(actual_data)
 41 |         headers = {}
 42 |         if crcmod is not None:
 43 |             headers["X-Goog-Hash"] = f"crc32c={crc}, md5={checksum_b64}"
 44 |         else:
 45 |             headers["X-Goog-Hash"] = f"md5={checksum_b64}"
 46 | 
 47 |     return response
 48 | 
 49 | 
 50 | def google_json_response_from_data(expected_data: bytes, actual_data=None):
 51 |     actual_data = actual_data or expected_data
 52 |     checksum = md5(actual_data)
 53 |     checksum_b64 = base64.b64encode(checksum.digest()).decode("UTF-8")
 54 | 
 55 |     response = {"md5Hash": checksum_b64, "size": len(actual_data)}
 56 | 
 57 |     # some manual checksums verified using gsutil ls -L
 58 |     # also can add using https://crccalc.com/
 59 |     # be careful about newlines
 60 |     crc32c_points = {
 61 |         b"hello world\n": "8P9ykg==",
 62 |         b"different checksum": "DoesntMatter==",
 63 |     }
 64 | 
 65 |     try:
 66 |         response["crc32c"] = crc32c_points[actual_data]
 67 |     except KeyError:
 68 |         pass
 69 | 
 70 |     return response
 71 | 
 72 | 
 73 | params = [
 74 |     (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
 75 |     (MD5Checker(), b"hello world", b"hello world", ()),
 76 | ]
 77 | 
 78 | if crcmod is not None:
 79 |     params.append(
 80 |         (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,))
 81 |     )
 82 |     params.append((Crc32cChecker(), b"hello world", b"hello world", ()))
 83 | 
 84 | 
 85 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
 86 | def test_validate_headers(checker, data, actual_data, raises):
 87 |     response = google_response_from_data(actual_data)
 88 |     checker.update(data)
 89 | 
 90 |     if raises:
 91 |         with pytest.raises(raises):
 92 |             checker.validate_headers(response.headers)
 93 |     else:
 94 |         checker.validate_headers(response.headers)
 95 | 
 96 | 
 97 | params = [
 98 |     (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
 99 |     (MD5Checker(), b"hello world", b"hello world", ()),
100 | ]
101 | 
102 | if crcmod is not None:
103 |     params.append(
104 |         (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,))
105 |     )
106 |     params.append((Crc32cChecker(), b"hello world", b"hello world", ()))
107 | 
108 | 
109 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
110 | def test_validate_headers_with_reverse_order(checker, data, actual_data, raises):
111 |     response = google_response_from_data_with_reverse_header_order(actual_data)
112 |     checker.update(data)
113 | 
114 |     if raises:
115 |         with pytest.raises(raises):
116 |             checker.validate_headers(response.headers)
117 |     else:
118 |         checker.validate_headers(response.headers)
119 | 
120 | 
121 | params = [
122 |     (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
123 |     (MD5Checker(), b"hello world", b"hello world", ()),
124 |     (SizeChecker(), b"hello world", b"hello world", ()),
125 |     (SizeChecker(), b"hello world", b"different size", (AssertionError,)),
126 | ]
127 | 
128 | if crcmod is not None:
129 |     params.append((Crc32cChecker(), b"hello world", b"hello world", ()))
130 |     params.append(
131 |         (Crc32cChecker(), b"hello world", b"different size", (ChecksumError,))
132 |     )
133 | 
134 | 
135 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
136 | def test_checker_validate_http_response(checker, data, actual_data, raises):
137 |     response = google_response_from_data(data, actual_data=actual_data)
138 |     checker.update(data)
139 |     if raises:
140 |         with pytest.raises(raises):
141 |             checker.validate_http_response(response)
142 |     else:
143 |         checker.validate_http_response(response)
144 | 
145 | 
146 | params = [
147 |     (MD5Checker(), b"hello world", b"different checksum", (ChecksumError,)),
148 |     (MD5Checker(), b"hello world", b"hello world", ()),
149 |     (SizeChecker(), b"hello world", b"hello world", ()),
150 |     (SizeChecker(), b"hello world", b"different size", (AssertionError,)),
151 | ]
152 | if crcmod is not None:
153 |     params.extend(
154 |         [
155 |             (Crc32cChecker(), b"hello world", b"different checksum", (ChecksumError,)),
156 |             (Crc32cChecker(), b"hello world\n", b"hello world\n", ()),
157 |         ]
158 |     )
159 | 
160 | 
161 | @pytest.mark.parametrize("checker, data, actual_data, raises", params)
162 | def test_checker_validate_json_response(checker, data, actual_data, raises):
163 |     response = google_json_response_from_data(data, actual_data=actual_data)
164 |     checker.update(data)
165 |     if raises:
166 |         with pytest.raises(raises):
167 |             checker.validate_json_response(response)
168 |     else:
169 |         checker.validate_json_response(response)
170 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_credentials.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from gcsfs import GCSFileSystem
 4 | from gcsfs.credentials import GoogleCredentials
 5 | from gcsfs.retry import HttpError
 6 | 
 7 | 
 8 | def test_googlecredentials_none():
 9 |     credentials = GoogleCredentials(project="myproject", token=None, access="read_only")
10 |     headers = {}
11 |     credentials.apply(headers)
12 | 
13 | 
14 | @pytest.mark.parametrize("token", ["", "incorrect.token", "x" * 100])
15 | def test_credentials_from_raw_token(token):
16 |     with pytest.raises(HttpError, match="Invalid Credentials"):
17 |         fs = GCSFileSystem(project="myproject", token=token)
18 |         fs.ls("/")
19 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_fuse.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import sys
 4 | import tempfile
 5 | import threading
 6 | import time
 7 | from functools import partial
 8 | 
 9 | import pytest
10 | 
11 | from gcsfs.tests.settings import TEST_BUCKET
12 | 
13 | 
14 | @pytest.mark.timeout(180)
15 | @pytest.fixture
16 | def fsspec_fuse_run():
17 |     """Fixture catches other errors on fuse import."""
18 |     try:
19 |         _fuse = pytest.importorskip("fuse")  # noqa
20 | 
21 |         from fsspec.fuse import run as _fsspec_fuse_run
22 | 
23 |         return _fsspec_fuse_run
24 |     except Exception as error:
25 |         logging.debug("Error importing fuse: %s", error)
26 |         pytest.skip("Error importing fuse.")
27 | 
28 | 
29 | @pytest.mark.skipif(sys.version_info < (3, 9), reason="Test fuse causes hang.")
30 | @pytest.mark.xfail(reason="Failing test not previously tested.")
31 | @pytest.mark.timeout(180)
32 | def test_fuse(gcs, fsspec_fuse_run):
33 |     mountpath = tempfile.mkdtemp()
34 |     _run = partial(fsspec_fuse_run, gcs, TEST_BUCKET + "/", mountpath)
35 |     th = threading.Thread(target=_run)
36 |     th.daemon = True
37 |     th.start()
38 | 
39 |     time.sleep(5)
40 |     timeout = 20
41 |     n = 40
42 |     for i in range(n):
43 |         logging.debug(f"Attempt # {i+1}/{n} to create lock file.")
44 |         try:
45 |             open(os.path.join(mountpath, "lock"), "w").close()
46 |             os.remove(os.path.join(mountpath, "lock"))
47 |             break
48 |         except Exception as error:  # noqa: E722
49 |             logging.debug("Error: %s", error)
50 |             time.sleep(0.5)
51 |         timeout -= 0.5
52 |         assert timeout > 0
53 |     else:
54 |         raise AssertionError(f"Attempted lock file failed after {n} attempts.")
55 | 
56 |     with open(os.path.join(mountpath, "hello"), "w") as f:
57 |         # NB this is in TEXT mode
58 |         f.write("hello")
59 |     files = os.listdir(mountpath)
60 |     assert "hello" in files
61 |     with open(os.path.join(mountpath, "hello")) as f:
62 |         # NB this is in TEXT mode
63 |         assert f.read() == "hello"
64 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_inventory_report.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | from datetime import datetime, timedelta
  3 | from unittest import mock
  4 | 
  5 | import pytest
  6 | 
  7 | from gcsfs.core import GCSFileSystem
  8 | from gcsfs.inventory_report import InventoryReport, InventoryReportConfig
  9 | 
 10 | 
 11 | class TestInventoryReport(object):
 12 |     """
 13 |     Unit tests for the inventory report logic, see 'inventory_report.py'.
 14 | 
 15 |     The test cases follow the same ordering as the methods in `inventory.report.py`.
 16 |     Each method is covered by either one or more parametrized test cases. Some
 17 |     methods include a setup method just above them.
 18 |     """
 19 | 
 20 |     @pytest.mark.parametrize(
 21 |         "inventory_report_info, expected_error",
 22 |         [
 23 |             # Check whether missing inventory report info will raise exception.
 24 |             (
 25 |                 {"location": "us-west", "id": "123"},
 26 |                 "Use snapshot listing is not configured.",
 27 |             ),
 28 |             (
 29 |                 {"use_snapshot_listing": True, "id": "123"},
 30 |                 "Inventory report location is not configured.",
 31 |             ),
 32 |             (
 33 |                 {"use_snapshot_listing": True, "location": "us-west"},
 34 |                 "Inventory report id is not configured.",
 35 |             ),
 36 |             # Check complete inventory report info will not raise exception.
 37 |             ({"use_snapshot_listing": True, "location": "us-west", "id": "123"}, None),
 38 |         ],
 39 |     )
 40 |     def test_validate_inventory_report_info(
 41 |         self, inventory_report_info, expected_error
 42 |     ):
 43 |         if expected_error is not None:
 44 |             with pytest.raises(ValueError) as e_info:
 45 |                 InventoryReport._validate_inventory_report_info(
 46 |                     inventory_report_info=inventory_report_info
 47 |                 )
 48 |                 assert str(e_info.value) == expected_error
 49 |         else:
 50 |             # If no error is expected, we simply call the function
 51 |             # to ensure no exception is raised.
 52 |             InventoryReport._validate_inventory_report_info(
 53 |                 inventory_report_info=inventory_report_info
 54 |             )
 55 | 
 56 |     @pytest.mark.asyncio
 57 |     @pytest.mark.parametrize(
 58 |         "location, id, exception, expected_result",
 59 |         [
 60 |             # Test no error fetching proceeds normally.
 61 |             ("us-west", "id1", None, {"config": "config1"}),
 62 |             # Test if the exception is caught successfully.
 63 |             ("us-west", "id2", Exception("fetch error"), None),
 64 |         ],
 65 |     )
 66 |     async def test_fetch_raw_inventory_report_config(
 67 |         self, location, id, exception, expected_result
 68 |     ):
 69 |         # Mocking the gcs_file_system.
 70 |         gcs_file_system = mock.MagicMock()
 71 |         gcs_file_system.project = "project"
 72 | 
 73 |         # Mocking gcs_file_system._call.
 74 |         if exception is not None:
 75 |             gcs_file_system._call = mock.MagicMock(side_effect=exception)
 76 |         else:
 77 |             return_value = asyncio.Future()
 78 |             return_value.set_result(expected_result)
 79 |             gcs_file_system._call = mock.MagicMock(return_value=return_value)
 80 | 
 81 |         if exception is not None:
 82 |             with pytest.raises(Exception) as e_info:
 83 |                 await InventoryReport._fetch_raw_inventory_report_config(
 84 |                     gcs_file_system=gcs_file_system, location=location, id=id
 85 |                 )
 86 |                 assert str(e_info.value) == str(exception)
 87 |         else:
 88 |             result = await InventoryReport._fetch_raw_inventory_report_config(
 89 |                 gcs_file_system=gcs_file_system, location=location, id=id
 90 |             )
 91 |             gcs_file_system._call.assert_called_once_with(
 92 |                 "GET", mock.ANY, json_out=True
 93 |             )
 94 |             assert result == expected_result
 95 | 
 96 |     def test_parse_raw_inventory_report_config_invalid_date(self):
 97 |         today = datetime.today().date()
 98 | 
 99 |         # Get tomorrow's date.
100 |         tomorrow = today + timedelta(days=1)
101 | 
102 |         # Get the date a week later.
103 |         next_week = today + timedelta(days=7)
104 | 
105 |         raw_inventory_report_config = {
106 |             "frequencyOptions": {
107 |                 "startDate": {
108 |                     "day": tomorrow.day,
109 |                     "month": tomorrow.month,
110 |                     "year": tomorrow.year,
111 |                 },
112 |                 "endDate": {
113 |                     "day": next_week.day,
114 |                     "month": next_week.month,
115 |                     "year": next_week.year,
116 |                 },
117 |             },
118 |             "objectMetadataReportOptions": mock.MagicMock(),
119 |             "csvOptions": mock.MagicMock(),
120 |         }
121 | 
122 |         # If the current date is outside the ranges in the inventory report
123 |         # an exception should be raised.
124 |         with pytest.raises(ValueError):
125 |             InventoryReport._parse_raw_inventory_report_config(
126 |                 raw_inventory_report_config=raw_inventory_report_config,
127 |                 use_snapshot_listing=mock.MagicMock(),
128 |             )
129 | 
130 |     def test_parse_raw_inventory_report_config_missing_metadata_fields(self):
131 |         raw_inventory_report_config = {
132 |             "frequencyOptions": mock.MagicMock(),
133 |             "objectMetadataReportOptions": {
134 |                 "metadataFields": ["project", "bucket", "name"],
135 |                 "storageDestinationOptions": mock.MagicMock(),
136 |             },
137 |             "csvOptions": mock.MagicMock(),
138 |         }
139 | 
140 |         # When the user wants to use snapshot listing, but object size is not
141 |         # included in the inventory reports, an exception should be raised.
142 |         with pytest.raises(ValueError):
143 |             InventoryReport._parse_raw_inventory_report_config(
144 |                 raw_inventory_report_config=raw_inventory_report_config,
145 |                 use_snapshot_listing=True,
146 |             )
147 | 
148 |     def test_parse_raw_inventory_report_config_returns_correct_config(self):
149 |         bucket = "bucket"
150 |         destination_path = "path/to/inventory-report"
151 |         metadata_fields = ["project", "bucket", "name", "size"]
152 |         obj_name_idx = metadata_fields.index("name")
153 |         today = datetime.today().date()
154 |         yesterday = today - timedelta(days=1)
155 |         tomorrow = today + timedelta(days=1)
156 |         use_snapshot_listing = False
157 | 
158 |         csv_options = {
159 |             "recordSeparator": "\n",
160 |             "delimiter": ",",
161 |             "headerRequired": False,
162 |         }
163 | 
164 |         raw_inventory_report_config = {
165 |             "frequencyOptions": {
166 |                 "startDate": {
167 |                     "day": yesterday.day,
168 |                     "month": yesterday.month,
169 |                     "year": yesterday.year,
170 |                 },
171 |                 "endDate": {
172 |                     "day": tomorrow.day,
173 |                     "month": tomorrow.month,
174 |                     "year": tomorrow.year,
175 |                 },
176 |             },
177 |             "objectMetadataReportOptions": {
178 |                 "metadataFields": metadata_fields,
179 |                 "storageDestinationOptions": {
180 |                     "bucket": bucket,
181 |                     "destinationPath": destination_path,
182 |                 },
183 |             },
184 |             "csvOptions": csv_options,
185 |         }
186 | 
187 |         try:
188 |             inventory_report_config = (
189 |                 InventoryReport._parse_raw_inventory_report_config(
190 |                     raw_inventory_report_config=raw_inventory_report_config,
191 |                     use_snapshot_listing=use_snapshot_listing,
192 |                 )
193 |             )
194 | 
195 |             assert isinstance(inventory_report_config, InventoryReportConfig)
196 | 
197 |             assert inventory_report_config.csv_options == csv_options
198 |             assert inventory_report_config.bucket == bucket
199 |             assert inventory_report_config.destination_path == destination_path
200 |             assert inventory_report_config.metadata_fields == metadata_fields
201 |             assert inventory_report_config.obj_name_idx == obj_name_idx
202 | 
203 |         except Exception as e:
204 |             pytest.fail(f"Unexpected exception: {e}.")
205 | 
206 |     @pytest.mark.asyncio
207 |     async def test_fetch_inventory_report_metadata_no_reports(self):
208 |         # Create a mock for GCSFileSystem.
209 |         gcs_file_system = mock.MagicMock(spec=GCSFileSystem)
210 | 
211 |         # Mock the _call method to return a page with two items
212 |         # and then a page with one item and without next page token.
213 |         gcs_file_system._call.side_effect = [{"items": [], "nextPageToken": None}]
214 | 
215 |         # Create a mock for InventoryReportConfig.
216 |         inventory_report_config = mock.MagicMock(spec=InventoryReportConfig)
217 |         inventory_report_config.bucket = "bucket_name"
218 |         inventory_report_config.destination_path = "destination_path"
219 | 
220 |         # If no inventory report metadata is fetched, an exception should be raised.
221 |         match = "No inventory reports to fetch. Check if \
222 |                 your inventory report is set up correctly."
223 |         with pytest.raises(ValueError, match=match):
224 |             await InventoryReport._fetch_inventory_report_metadata(
225 |                 gcs_file_system=gcs_file_system,
226 |                 inventory_report_config=inventory_report_config,
227 |             )
228 | 
229 |     @pytest.mark.asyncio
230 |     async def test_fetch_inventory_report_metadata_multiple_calls(self):
231 |         # Create a mock for GCSFileSystem.
232 |         gcs_file_system = mock.MagicMock(spec=GCSFileSystem)
233 | 
234 |         # Mock the _call method to return a page with two items
235 |         # and then a page with one item and without next page token.
236 |         gcs_file_system._call.side_effect = [
237 |             {"items": ["item1", "item2"], "nextPageToken": "token1"},
238 |             {"items": ["item3"], "nextPageToken": None},
239 |         ]
240 | 
241 |         # Create a mock for InventoryReportConfig.
242 |         inventory_report_config = mock.MagicMock(spec=InventoryReportConfig)
243 |         inventory_report_config.bucket = "bucket_name"
244 |         inventory_report_config.destination_path = "destination_path"
245 | 
246 |         result = await InventoryReport._fetch_inventory_report_metadata(
247 |             gcs_file_system=gcs_file_system,
248 |             inventory_report_config=inventory_report_config,
249 |         )
250 | 
251 |         # Check that _call was called with the right arguments.
252 |         calls = [
253 |             mock.call(
254 |                 "GET", "b/{}/o", "bucket_name", prefix="destination_path", json_out=True
255 |             ),
256 |             mock.call(
257 |                 "GET",
258 |                 "b/{}/o",
259 |                 "bucket_name",
260 |                 prefix="destination_path",
261 |                 pageToken="token1",
262 |                 json_out=True,
263 |             ),
264 |         ]
265 |         gcs_file_system._call.assert_has_calls(calls)
266 | 
267 |         # Check that the function correctly processed the response
268 |         # and returned the right result.
269 |         assert result == ["item1", "item2", "item3"]
270 | 
271 |     @pytest.mark.parametrize(
272 |         "unsorted_inventory_report_metadata, expected",
273 |         [
274 |             (
275 |                 # Input.
276 |                 [
277 |                     {"timeCreated": "2023-08-01T12:00:00Z"},
278 |                     {"timeCreated": "2023-08-02T12:00:00Z"},
279 |                     {"timeCreated": "2023-08-03T12:00:00Z"},
280 |                 ],
281 |                 # Expected output.
282 |                 [
283 |                     {"timeCreated": "2023-08-03T12:00:00Z"},
284 |                     {"timeCreated": "2023-08-02T12:00:00Z"},
285 |                     {"timeCreated": "2023-08-01T12:00:00Z"},
286 |                 ],
287 |             ),
288 |             (
289 |                 # Input.
290 |                 [
291 |                     {"timeCreated": "2023-08-01T12:00:00Z"},
292 |                     {"timeCreated": "2023-07-31T12:00:00Z"},
293 |                     {"timeCreated": "2023-08-02T12:00:00Z"},
294 |                 ],
295 |                 # Expected output.
296 |                 [
297 |                     {"timeCreated": "2023-08-02T12:00:00Z"},
298 |                     {"timeCreated": "2023-08-01T12:00:00Z"},
299 |                     {"timeCreated": "2023-07-31T12:00:00Z"},
300 |                 ],
301 |             ),
302 |         ],
303 |     )
304 |     def test_sort_inventory_report_metadata(
305 |         self, unsorted_inventory_report_metadata, expected
306 |     ):
307 |         result = InventoryReport._sort_inventory_report_metadata(
308 |             unsorted_inventory_report_metadata=unsorted_inventory_report_metadata
309 |         )
310 |         assert result == expected
311 | 
312 |     @pytest.fixture(
313 |         params=[
314 |             # Unique most recent day, same datetime.
315 |             (
316 |                 [
317 |                     {"name": "report1", "timeCreated": "2023-08-02T12:00:00.000Z"},
318 |                     {"name": "report2", "timeCreated": "2023-08-01T12:00:00.000Z"},
319 |                 ],
320 |                 # Expected results.
321 |                 ["report1"],
322 |             ),
323 |             # Multiple most recent day, same datetime.
324 |             (
325 |                 [
326 |                     {"name": "report1", "timeCreated": "2023-08-02T12:00:00.000Z"},
327 |                     {"name": "report2", "timeCreated": "2023-08-02T12:00:00.000Z"},
328 |                     {"name": "report3", "timeCreated": "2023-08-01T12:00:00.000Z"},
329 |                 ],
330 |                 # Expected results.
331 |                 ["report1", "report2"],
332 |             ),
333 |             # Multiple most recent day, different datetimes (same day, different hour).
334 |             (
335 |                 [
336 |                     {"name": "report1", "timeCreated": "2023-08-02T12:00:00.000Z"},
337 |                     {"name": "report2", "timeCreated": "2023-08-02T11:00:00.000Z"},
338 |                     {"name": "report3", "timeCreated": "2023-08-01T12:00:00.000Z"},
339 |                 ],
340 |                 # Expected results.
341 |                 ["report1", "report2"],
342 |             ),
343 |         ]
344 |     )
345 |     def download_inventory_report_content_setup(self, request):
346 |         bucket = "bucket"
347 |         gcs_file_system = mock.MagicMock()
348 |         inventory_report_metadata, expected_reports = request.param
349 | 
350 |         # We are accessing the third argument as the return value,
351 |         # since it is the object name in the function.
352 |         # We are also encoding the content, since the actual method call needs
353 |         # to decode the content.
354 |         async_side_effect = mock.AsyncMock(
355 |             side_effect=lambda *args, **kwargs: ("_header", args[3].encode())
356 |         )
357 |         gcs_file_system._call = async_side_effect
358 |         return gcs_file_system, inventory_report_metadata, bucket, expected_reports
359 | 
360 |     @pytest.mark.asyncio
361 |     async def test_download_inventory_report_content(
362 |         self, download_inventory_report_content_setup
363 |     ):
364 |         (
365 |             gcs_file_system,
366 |             inventory_report_metadata,
367 |             bucket,
368 |             expected_reports,
369 |         ) = download_inventory_report_content_setup
370 | 
371 |         result = await InventoryReport._download_inventory_report_content(
372 |             gcs_file_system=gcs_file_system,
373 |             inventory_report_metadata=inventory_report_metadata,
374 |             bucket=bucket,
375 |         )
376 | 
377 |         # Verify the mocked downloaded reports match (ordering does not matter).
378 |         assert sorted(result) == sorted(expected_reports)
379 | 
380 |     @pytest.mark.parametrize(
381 |         "inventory_report_line, use_snapshot_listing, \
382 |         inventory_report_config_attrs, delimiter, bucket, expected",
383 |         [
384 |             # Test case 1: use snapshot listing with specific metadata
385 |             # fields and delimiter.
386 |             (
387 |                 "object1,value1,value2",
388 |                 True,
389 |                 {"obj_name_idx": 0, "metadata_fields": ["name", "field1", "field2"]},
390 |                 ",",
391 |                 "bucket",
392 |                 {"name": "object1", "field1": "value1", "field2": "value2"},
393 |             ),
394 |             # Test case 2: do not use snapshot listing and only fetch the name.
395 |             (
396 |                 "object1,value1,value2",
397 |                 False,
398 |                 {"obj_name_idx": 0, "metadata_fields": ["name", "field1", "field2"]},
399 |                 ",",
400 |                 "bucket",
401 |                 {"name": "object1"},
402 |             ),
403 |         ],
404 |     )
405 |     def test_parse_inventory_report_line(
406 |         self,
407 |         inventory_report_line,
408 |         use_snapshot_listing,
409 |         inventory_report_config_attrs,
410 |         delimiter,
411 |         bucket,
412 |         expected,
413 |     ):
414 |         # Mock InventoryReportConfig.
415 |         inventory_report_config = mock.MagicMock(spec=InventoryReportConfig)
416 |         inventory_report_config.obj_name_idx = inventory_report_config_attrs.get(
417 |             "obj_name_idx"
418 |         )
419 |         inventory_report_config.metadata_fields = inventory_report_config_attrs.get(
420 |             "metadata_fields"
421 |         )
422 | 
423 |         # Mock GCSFileSystem.
424 |         gcs_file_system = mock.MagicMock(spec=GCSFileSystem)
425 |         gcs_file_system._process_object = mock.Mock(side_effect=lambda obj, bucket: obj)
426 | 
427 |         result = InventoryReport._parse_inventory_report_line(
428 |             inventory_report_line=inventory_report_line,
429 |             use_snapshot_listing=use_snapshot_listing,
430 |             gcs_file_system=gcs_file_system,
431 |             inventory_report_config=inventory_report_config,
432 |             delimiter=delimiter,
433 |             bucket=bucket,
434 |         )
435 | 
436 |         assert result == expected
437 | 
438 |     @pytest.fixture(
439 |         params=[
440 |             # One file, one lines.
441 |             (["header \n line1"], {"recordSeparator": "\n", "headerRequired": True}),
442 |             (["line1"], {"recordSeparator": "\n", "headerRequired": False}),
443 |             (
444 |                 ["header \r\n line1"],
445 |                 {"recordSeparator": "\r\n", "headerRequired": True},
446 |             ),
447 |             (["line1"], {"recordSeparator": "\r\n", "headerRequired": False}),
448 |             # One file, multiple lines.
449 |             (
450 |                 ["header \n line1 \n line2 \n line3"],
451 |                 {"recordSeparator": "\n", "headerRequired": True},
452 |             ),
453 |             (
454 |                 ["line1 \n line2 \n line3"],
455 |                 {"recordSeparator": "\n", "headerRequired": False},
456 |             ),
457 |             (
458 |                 ["header \r\n line1 \r\n line2 \r\n line3"],
459 |                 {"recordSeparator": "\r\n", "headerRequired": True},
460 |             ),
461 |             (
462 |                 ["line1 \r\n line2 \r\n line3"],
463 |                 {"recordSeparator": "\r\n", "headerRequired": False},
464 |             ),
465 |             # Multiple files.
466 |             (
467 |                 ["line1", "line2 \n line3"],
468 |                 {"recordSeparator": "\n", "headerRequired": False},
469 |             ),
470 |             (
471 |                 ["header \n line1", "header \n line2 \n line3"],
472 |                 {"recordSeparator": "\n", "headerRequired": True},
473 |             ),
474 |         ]
475 |     )
476 |     def parse_inventory_report_content_setup(self, request):
477 |         # Mock the necessary parameters.
478 |         gcs_file_system = mock.MagicMock()
479 |         bucket = mock.MagicMock()
480 |         use_snapshot_listing = mock.MagicMock()
481 | 
482 |         # Parse the content and config data.
483 |         inventory_report_content = request.param[0]
484 |         inventory_report_config = request.param[1]
485 |         record_separator = inventory_report_config["recordSeparator"]
486 |         header_required = inventory_report_config["headerRequired"]
487 | 
488 |         # Construct custom inventory report config.
489 |         inventory_report_config = mock.MagicMock(spec=InventoryReportConfig)
490 |         inventory_report_config.csv_options = {
491 |             "recordSeparator": record_separator,
492 |             "headerRequired": header_required,
493 |         }
494 | 
495 |         # Stub parse_inventory_report_line method.
496 |         InventoryReport._parse_inventory_report_line = mock.MagicMock(
497 |             side_effect="parsed_inventory_report_line"
498 |         )
499 | 
500 |         return (
501 |             gcs_file_system,
502 |             inventory_report_content,
503 |             inventory_report_config,
504 |             bucket,
505 |             use_snapshot_listing,
506 |         )
507 | 
508 |     def test_parse_inventory_reports(self, parse_inventory_report_content_setup):
509 |         (
510 |             gcs_file_system,
511 |             inventory_report_content,
512 |             inventory_report_config,
513 |             bucket,
514 |             use_snapshot_listing,
515 |         ) = parse_inventory_report_content_setup
516 | 
517 |         record_separator = inventory_report_config.csv_options["recordSeparator"]
518 |         header_required = inventory_report_config.csv_options["headerRequired"]
519 | 
520 |         # Number of inventory reports.
521 |         num_inventory_reports = len(inventory_report_content)
522 | 
523 |         # Tota, number of object metadata lines.
524 |         total_lines_in_reports = sum(
525 |             content.count(record_separator) + 1 for content in inventory_report_content
526 |         )
527 | 
528 |         # Remove the header line for each line if header is present.
529 |         total_lines_in_reports -= num_inventory_reports * 1 if header_required else 0
530 | 
531 |         result = InventoryReport._parse_inventory_report_content(
532 |             gcs_file_system=gcs_file_system,
533 |             inventory_report_content=inventory_report_content,
534 |             inventory_report_config=inventory_report_config,
535 |             use_snapshot_listing=use_snapshot_listing,
536 |             bucket=bucket,
537 |         )
538 | 
539 |         # Assert that the number of objects returned is correct.
540 |         assert len(result) == total_lines_in_reports
541 | 
542 |         # Assert parse_inventory_report_line was called the correct
543 |         # number of times.
544 |         assert (
545 |             InventoryReport._parse_inventory_report_line.call_count
546 |             == total_lines_in_reports
547 |         )
548 | 
549 |     @pytest.mark.parametrize(
550 |         "use_snapshot_listing, prefix, mock_objects, expected_result",
551 |         [
552 |             # Not using snapshot, no prefix, directory, all matched.
553 |             (
554 |                 False,
555 |                 None,
556 |                 [{"name": "prefix/object1"}, {"name": "prefix/object2"}],
557 |                 ([{"name": "prefix/object1"}, {"name": "prefix/object2"}], []),
558 |             ),
559 |             # Not using snapshot, no prefix, no directory, all matched.
560 |             (
561 |                 False,
562 |                 None,
563 |                 [{"name": "object1"}, {"name": "object2"}],
564 |                 ([{"name": "object1"}, {"name": "object2"}], []),
565 |             ),
566 |             # Not using snapshot, prefix, directory, all matched.
567 |             (
568 |                 False,
569 |                 "prefix",
570 |                 [{"name": "prefix/object1"}, {"name": "prefix/object2"}],
571 |                 ([{"name": "prefix/object1"}, {"name": "prefix/object2"}], []),
572 |             ),
573 |             # Not using snapshot, prefix, directory, some matched.
574 |             (
575 |                 False,
576 |                 "prefix",
577 |                 [{"name": "prefix/object1"}, {"name": "object2"}],
578 |                 ([{"name": "prefix/object1"}], []),
579 |             ),
580 |             # Not using snapshot, prefix, directory, none matched.
581 |             (False, "prefix", [{"name": "a/object1"}, {"name": "b/object2"}], ([], [])),
582 |             # Not using snapshot, prefix, no directory, all matched.
583 |             (
584 |                 False,
585 |                 "object",
586 |                 [{"name": "object1"}, {"name": "object2"}],
587 |                 ([{"name": "object1"}, {"name": "object2"}], []),
588 |             ),
589 |             # Not using snapshot, prefix, no directory, some matched.
590 |             (
591 |                 False,
592 |                 "object",
593 |                 [{"name": "object1"}, {"name": "obj2"}],
594 |                 ([{"name": "object1"}], []),
595 |             ),
596 |             # Not using snapshot, prefix, no directory, none matched.
597 |             (False, "object", [{"name": "obj1"}, {"name": "obj2"}], ([], [])),
598 |             # Using snapshot, no prefix, no directory.
599 |             (
600 |                 True,
601 |                 None,
602 |                 [{"name": "object1"}, {"name": "object2"}],
603 |                 ([{"name": "object1"}, {"name": "object2"}], []),
604 |             ),
605 |             # Using snapshot, no prefix, a single directory.
606 |             (
607 |                 True,
608 |                 None,
609 |                 [{"name": "object1"}, {"name": "dir/object2"}],
610 |                 ([{"name": "object1"}], ["dir/"]),
611 |             ),
612 |             # Using snapshot, no prefix, multiple directories.
613 |             (
614 |                 True,
615 |                 None,
616 |                 [
617 |                     {"name": "object1"},
618 |                     {"name": "dir1/object2"},
619 |                     {"name": "dir2/object3"},
620 |                 ],
621 |                 ([{"name": "object1"}], ["dir1/", "dir2/"]),
622 |             ),
623 |             # Using snapshot, no prefix, same directory multiple times.
624 |             (
625 |                 True,
626 |                 None,
627 |                 [
628 |                     {"name": "object1"},
629 |                     {"name": "dir1/object2"},
630 |                     {"name": "dir1/object3"},
631 |                 ],
632 |                 ([{"name": "object1"}], ["dir1/"]),
633 |             ),
634 |             # Using snapshot, prefix, no directory.
635 |             (
636 |                 True,
637 |                 "object",
638 |                 [{"name": "object1"}, {"name": "object2"}],
639 |                 ([{"name": "object1"}, {"name": "object2"}], []),
640 |             ),
641 |             # Using snapshot, prefix, a single directory.
642 |             (
643 |                 True,
644 |                 "dir1/",
645 |                 [{"name": "dir1/dir2/object1"}, {"name": "dir1/object2"}],
646 |                 ([{"name": "dir1/object2"}], ["dir1/dir2/"]),
647 |             ),
648 |             # Using snapshot, prefix, multiple directories.
649 |             (
650 |                 True,
651 |                 "dir1/",
652 |                 [
653 |                     {"name": "dir1/dir2/object1"},
654 |                     {"name": "dir1/dir3/object2"},
655 |                     {"name": "dir1/object3"},
656 |                 ],
657 |                 ([{"name": "dir1/object3"}], ["dir1/dir2/", "dir1/dir3/"]),
658 |             ),
659 |             # Using snapshot, prefix, same directory multiple times.
660 |             (
661 |                 True,
662 |                 "dir1/",
663 |                 [
664 |                     {"name": "dir1/dir2/object1"},
665 |                     {"name": "dir1/dir2/object2"},
666 |                     {"name": "dir1/object3"},
667 |                 ],
668 |                 ([{"name": "dir1/object3"}], ["dir1/dir2/"]),
669 |             ),
670 |             # Sanity check from the examples given by the JSON API.
671 |             # https://cloud.google.com/storage/docs/json_api/v1/objects/list
672 |             (
673 |                 True,
674 |                 None,
675 |                 [
676 |                     {"name": "a/b"},
677 |                     {"name": "a/c"},
678 |                     {"name": "d"},
679 |                     {"name": "e"},
680 |                     {"name": "e/f"},
681 |                     {"name": "e/g/h"},
682 |                 ],
683 |                 ([{"name": "d"}, {"name": "e"}], ["a/", "e/"]),
684 |             ),
685 |             (
686 |                 True,
687 |                 "e/",
688 |                 [
689 |                     {"name": "a/b"},
690 |                     {"name": "a/c"},
691 |                     {"name": "d"},
692 |                     {"name": "e"},
693 |                     {"name": "e/f"},
694 |                     {"name": "e/g/h"},
695 |                 ],
696 |                 ([{"name": "e/f"}], ["e/g/"]),
697 |             ),
698 |             (
699 |                 True,
700 |                 "e",
701 |                 [
702 |                     {"name": "a/b"},
703 |                     {"name": "a/c"},
704 |                     {"name": "d"},
705 |                     {"name": "e"},
706 |                     {"name": "e/f"},
707 |                     {"name": "e/g/h"},
708 |                 ],
709 |                 ([{"name": "e"}], ["e/"]),
710 |             ),
711 |         ],
712 |     )
713 |     def test_construct_final_snapshot(
714 |         self, use_snapshot_listing, prefix, mock_objects, expected_result
715 |     ):
716 |         # Construct the final snapshot.
717 |         result = InventoryReport._construct_final_snapshot(
718 |             objects=mock_objects,
719 |             prefix=prefix,
720 |             use_snapshot_listing=use_snapshot_listing,
721 |         )
722 | 
723 |         # Assert the expected outcomes.
724 |         items, prefixes = result
725 |         expected_items, expected_prefixes = expected_result
726 |         assert items == expected_items
727 |         assert sorted(prefixes) == sorted(expected_prefixes)
728 | 
729 | 
730 | # Test fields of the inventory report config is correctly stored.
731 | class TestInventoryReportConfig:
732 |     def test_inventory_report_config_creation(self):
733 |         csv_options = {}
734 |         bucket = "bucket"
735 |         destination_path = ""
736 |         metadata_fields = []
737 |         obj_name_idx = 0
738 | 
739 |         inventory_report_config = InventoryReportConfig(
740 |             csv_options=csv_options,
741 |             bucket=bucket,
742 |             destination_path=destination_path,
743 |             metadata_fields=metadata_fields,
744 |             obj_name_idx=obj_name_idx,
745 |         )
746 | 
747 |         assert inventory_report_config.csv_options == csv_options
748 |         assert inventory_report_config.bucket == bucket
749 |         assert inventory_report_config.destination_path == destination_path
750 |         assert inventory_report_config.metadata_fields == metadata_fields
751 |         assert inventory_report_config.obj_name_idx == obj_name_idx
752 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_inventory_report_listing.py:
--------------------------------------------------------------------------------
 1 | import gcsfs.checkers
 2 | import gcsfs.tests.settings
 3 | from gcsfs.inventory_report import InventoryReport
 4 | 
 5 | TEST_BUCKET = gcsfs.tests.settings.TEST_BUCKET
 6 | 
 7 | 
 8 | # Basic integration test to ensure listing returns the correct result.
 9 | def test_ls_base(monkeypatch, gcs):
10 |     # First get results from original listing.
11 |     items = gcs.ls(TEST_BUCKET)
12 | 
13 |     async def mock_fetch_snapshot(*args, **kwargs):
14 |         return [{"name": item} for item in items], []
15 | 
16 |     # Patch the fetch_snapshot method with the replacement.
17 |     monkeypatch.setattr(InventoryReport, "fetch_snapshot", mock_fetch_snapshot)
18 | 
19 |     inventory_report_info = {
20 |         "location": "location",
21 |         "id": "id",
22 |         "use_snapshot_listing": False,
23 |     }
24 | 
25 |     # Then get results from listing with inventory report.
26 |     actual_items = gcs.ls(TEST_BUCKET, inventory_report_info=inventory_report_info)
27 | 
28 |     # Check equality.
29 |     assert actual_items == items
30 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_manyopens.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test helper to open the same file many times.
 3 | 
 4 | This is not a python unit test, but rather a standalone program that will open
 5 | a file repeatedly, to check whether a cloud storage transient error can
 6 | defeat gcsfs. This is to be run against real GCS, since we cannot capture
 7 | HTTP exceptions with VCR.
 8 | 
 9 | Ideally you should see nothing, just the attempt count go up until we're done.
10 | """
11 | 
12 | import sys
13 | 
14 | import gcsfs
15 | 
16 | 
17 | def run():
18 |     if len(sys.argv) != 4:
19 |         print(
20 |             "usage: python -m gcsfs.tests.test_manyopens <project> "
21 |             '<credentials_file|"cloud"> <text_file_on_gcs>'
22 |         )
23 |         return
24 |     project = sys.argv[1]
25 |     credentials = sys.argv[2]
26 |     file = sys.argv[3]
27 |     print("project: " + project)
28 |     for i in range(2000):
29 |         # Issue #12 only reproduces if I re-create the fs object every time.
30 |         fs = gcsfs.GCSFileSystem(project=project, token=credentials)
31 |         print("attempt %s" % i)
32 |         with fs.open(file, "rb") as o:
33 |             o.readline()
34 | 
35 | 
36 | if __name__ == "__main__":
37 |     run()
38 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_mapping.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | 
  3 | from gcsfs.tests.settings import TEST_BUCKET
  4 | 
  5 | MAPPING_ROOT = TEST_BUCKET + "/mapping"
  6 | 
  7 | 
  8 | def test_api():
  9 |     import gcsfs
 10 | 
 11 |     assert "GCSMap" in dir(gcsfs)
 12 |     assert "mapping" in dir(gcsfs)
 13 | 
 14 | 
 15 | def test_map_simple(gcs):
 16 |     d = gcs.get_mapper(MAPPING_ROOT)
 17 |     assert not d
 18 | 
 19 |     assert list(d) == list(d.keys()) == []
 20 |     assert list(d.values()) == []
 21 |     assert list(d.items()) == []
 22 | 
 23 | 
 24 | def test_map_default_gcsfilesystem(gcs):
 25 |     d = gcs.get_mapper(MAPPING_ROOT)
 26 |     assert d.fs is gcs
 27 | 
 28 | 
 29 | def test_map_errors(gcs):
 30 |     d = gcs.get_mapper(MAPPING_ROOT)
 31 |     with pytest.raises(KeyError):
 32 |         d["nonexistent"]
 33 |     try:
 34 |         gcs.get_mapper("does-not-exist")
 35 |     except Exception as e:
 36 |         assert "does-not-exist" in str(e)
 37 | 
 38 | 
 39 | def test_map_with_data(gcs):
 40 |     d = gcs.get_mapper(MAPPING_ROOT)
 41 |     d["x"] = b"123"
 42 |     assert list(d) == list(d.keys()) == ["x"]
 43 |     assert list(d.values()) == [b"123"]
 44 |     assert list(d.items()) == [("x", b"123")]
 45 |     assert d["x"] == b"123"
 46 |     assert bool(d)
 47 | 
 48 |     assert gcs.find(MAPPING_ROOT) == [TEST_BUCKET + "/mapping/x"]
 49 |     d["x"] = b"000"
 50 |     assert d["x"] == b"000"
 51 | 
 52 |     d["y"] = b"456"
 53 |     assert d["y"] == b"456"
 54 |     assert set(d) == {"x", "y"}
 55 | 
 56 |     d.clear()
 57 |     assert list(d) == []
 58 | 
 59 | 
 60 | def test_map_clear_empty(gcs):
 61 |     d = gcs.get_mapper(MAPPING_ROOT)
 62 |     d.clear()
 63 |     assert list(d) == []
 64 |     d["1"] = b"1"
 65 |     assert list(d) == ["1"] or list(d) == ["1"]
 66 |     d.clear()
 67 |     assert list(d) == []
 68 | 
 69 | 
 70 | def test_map_pickle(gcs):
 71 |     d = gcs.get_mapper(MAPPING_ROOT)
 72 |     d["x"] = b"1"
 73 |     assert d["x"] == b"1"
 74 | 
 75 |     import pickle
 76 | 
 77 |     d2 = pickle.loads(pickle.dumps(d))
 78 | 
 79 |     assert d2["x"] == b"1"
 80 | 
 81 | 
 82 | def test_map_array(gcs):
 83 |     from array import array
 84 | 
 85 |     d = gcs.get_mapper(MAPPING_ROOT)
 86 |     d["x"] = array("B", [65] * 1000)
 87 | 
 88 |     assert d["x"] == b"A" * 1000
 89 | 
 90 | 
 91 | def test_map_bytearray(gcs):
 92 |     d = gcs.get_mapper(MAPPING_ROOT)
 93 |     d["x"] = bytearray(b"123")
 94 | 
 95 |     assert d["x"] == b"123"
 96 | 
 97 | 
 98 | def test_new_bucket(gcs):
 99 |     new_bucket = TEST_BUCKET + "new-bucket"
100 |     try:
101 |         gcs.rmdir(new_bucket)
102 |     except:  # noqa: E722
103 |         pass
104 |     with pytest.raises(Exception) as e:
105 |         d = gcs.get_mapper(new_bucket, check=True)
106 |     assert "create=True" in str(e.value)
107 | 
108 |     try:
109 |         d = gcs.get_mapper(new_bucket, create=True)
110 |         assert not d
111 | 
112 |         d = gcs.get_mapper(new_bucket + "/new-directory")
113 |         assert not d
114 |     finally:
115 |         gcs.rmdir(new_bucket)
116 | 
117 | 
118 | def test_map_pickle(gcs):
119 |     import pickle
120 | 
121 |     d = gcs.get_mapper(MAPPING_ROOT)
122 |     d["x"] = b"1234567890"
123 | 
124 |     b = pickle.dumps(d)
125 |     assert b"1234567890" not in b
126 | 
127 |     e = pickle.loads(b)
128 | 
129 |     assert dict(e) == {"x": b"1234567890"}
130 | 


--------------------------------------------------------------------------------
/gcsfs/tests/test_retry.py:
--------------------------------------------------------------------------------
  1 | import multiprocessing
  2 | import os
  3 | import pickle
  4 | from concurrent.futures import ProcessPoolExecutor
  5 | 
  6 | import pytest
  7 | import requests
  8 | from requests.exceptions import ProxyError
  9 | 
 10 | from gcsfs.retry import HttpError, is_retriable, validate_response
 11 | from gcsfs.tests.settings import TEST_BUCKET
 12 | from gcsfs.tests.utils import tmpfile
 13 | 
 14 | 
 15 | def test_tempfile():
 16 |     with tmpfile() as fn:
 17 |         with open(fn, "w"):
 18 |             pass
 19 |         assert os.path.exists(fn)
 20 |     assert not os.path.exists(fn)
 21 | 
 22 | 
 23 | def test_retriable_exception():
 24 |     e = requests.exceptions.Timeout()
 25 |     assert is_retriable(e)
 26 |     e = ValueError
 27 |     assert not is_retriable(e)
 28 | 
 29 |     e = HttpError({"message": "", "code": 500})
 30 |     assert is_retriable(e)
 31 | 
 32 |     e = HttpError({"message": "", "code": "500"})
 33 |     assert is_retriable(e)
 34 | 
 35 |     e = HttpError({"message": "", "code": 400})
 36 |     assert not is_retriable(e)
 37 | 
 38 |     e = HttpError({"code": "429"})
 39 |     assert is_retriable(e)
 40 | 
 41 |     e = ProxyError()
 42 |     assert is_retriable(e)
 43 | 
 44 | 
 45 | def test_pickle_serialization():
 46 |     expected = HttpError({"message": "", "code": 400})
 47 | 
 48 |     # Serialize/Deserialize
 49 |     serialized = pickle.dumps(expected)
 50 |     actual = pickle.loads(serialized)
 51 | 
 52 |     is_same_type = type(expected) is type(actual)
 53 |     is_same_args = expected.args == actual.args
 54 | 
 55 |     assert is_same_type and is_same_args
 56 | 
 57 | 
 58 | def conditional_exception(process_id):
 59 |     # Raise only on second process (id=1)
 60 |     if process_id == 1:
 61 |         raise HttpError({"message": "", "code": 400})
 62 | 
 63 | 
 64 | def test_multiprocessing_error_handling():
 65 |     # Ensure spawn context to avoid forking issues
 66 |     ctx = multiprocessing.get_context("spawn")
 67 | 
 68 |     # Run on two processes
 69 |     with ProcessPoolExecutor(2, mp_context=ctx) as p:
 70 |         results = p.map(conditional_exception, range(2))
 71 | 
 72 |     with pytest.raises(HttpError):
 73 |         _ = [result for result in results]
 74 | 
 75 | 
 76 | def test_validate_response():
 77 |     validate_response(200, None, "/path")
 78 | 
 79 |     # HttpError with no JSON body
 80 |     with pytest.raises(HttpError) as e:
 81 |         validate_response(503, b"", "/path")
 82 |     assert e.value.code == 503
 83 |     assert e.value.message == ", 503"
 84 | 
 85 |     # HttpError with JSON body
 86 |     j = '{"error": {"code": 503, "message": "Service Unavailable"}}'
 87 |     with pytest.raises(HttpError) as e:
 88 |         validate_response(503, j, "/path")
 89 |     assert e.value.code == 503
 90 |     assert e.value.message == "Service Unavailable, 503"
 91 | 
 92 |     # 403
 93 |     j = '{"error": {"message": "Not ok"}}'
 94 |     with pytest.raises(IOError, match="Forbidden: /path\nNot ok"):
 95 |         validate_response(403, j, "/path")
 96 | 
 97 |     # 404
 98 |     with pytest.raises(FileNotFoundError):
 99 |         validate_response(404, b"", "/path")
100 | 
101 |     # 502
102 |     with pytest.raises(ProxyError):
103 |         validate_response(502, b"", "/path")
104 | 
105 | 
106 | def test_validate_response_error_is_string():
107 |     # HttpError with JSON body
108 |     j = '{"error": "Too Many Requests"}'
109 |     with pytest.raises(HttpError) as e:
110 |         validate_response(429, j, "/path")
111 |     assert e.value.code == 429
112 |     assert e.value.message == "Too Many Requests, 429"
113 | 
114 | 
115 | @pytest.mark.parametrize(
116 |     ["file_path", "validate_get_error", "validate_list_error", "expected_error"],
117 |     [
118 |         (
119 |             "/missing",
120 |             FileNotFoundError,
121 |             None,
122 |             FileNotFoundError,
123 |         ),  # Not called
124 |         (
125 |             "/missing",
126 |             OSError("Forbidden"),
127 |             FileNotFoundError,
128 |             FileNotFoundError,
129 |         ),
130 |         (
131 |             "/2014-01-01.csv",
132 |             None,
133 |             None,
134 |             None,
135 |         ),
136 |         (
137 |             "/2014-01-01.csv",
138 |             OSError("Forbidden"),
139 |             None,
140 |             None,
141 |         ),
142 |     ],
143 |     ids=[
144 |         "missing_with_get_perms",
145 |         "missing_with_list_perms",
146 |         "existing_with_get_perms",
147 |         "existing_with_list_perms",
148 |     ],
149 | )
150 | def test_metadata_read_permissions(
151 |     file_path, validate_get_error, validate_list_error, expected_error, gcs
152 | ):
153 |     def _validate_response(self, status, content, path):
154 |         if path.endswith(f"/o{file_path}") and validate_get_error is not None:
155 |             raise validate_get_error
156 |         if path.endswith("/o/") and validate_list_error is not None:
157 |             raise validate_list_error
158 |         validate_response(status, content, path)
159 | 
160 |     if expected_error is None:
161 |         gcs.ls(TEST_BUCKET + file_path)
162 |         gcs.info(TEST_BUCKET + file_path)
163 |         assert gcs.exists(TEST_BUCKET + file_path)
164 |     else:
165 |         with pytest.raises(expected_error):
166 |             gcs.ls(TEST_BUCKET + file_path)
167 |         with pytest.raises(expected_error):
168 |             gcs.info(TEST_BUCKET + file_path)
169 |         assert gcs.exists(TEST_BUCKET + file_path) is False
170 | 


--------------------------------------------------------------------------------
/gcsfs/tests/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import shutil
 3 | import tempfile
 4 | from contextlib import contextmanager
 5 | 
 6 | 
 7 | @contextmanager
 8 | def ignoring(*exceptions):
 9 |     try:
10 |         yield
11 |     except exceptions:
12 |         pass
13 | 
14 | 
15 | @contextmanager
16 | def tempdir(dir=None):
17 |     dirname = tempfile.mkdtemp(dir=dir)
18 |     shutil.rmtree(dirname, ignore_errors=True)
19 | 
20 |     try:
21 |         yield dirname
22 |     finally:
23 |         if os.path.exists(dirname):
24 |             shutil.rmtree(dirname, ignore_errors=True)
25 | 
26 | 
27 | @contextmanager
28 | def tmpfile(extension="", dir=None):
29 |     extension = "." + extension.lstrip(".")
30 |     handle, filename = tempfile.mkstemp(extension, dir=dir)
31 |     os.close(handle)
32 |     os.remove(filename)
33 | 
34 |     try:
35 |         yield filename
36 |     finally:
37 |         if os.path.exists(filename):
38 |             if os.path.isdir(filename):
39 |                 shutil.rmtree(filename)
40 |             else:
41 |                 with ignoring(OSError):
42 |                     os.remove(filename)
43 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp!=4.0.0a0, !=4.0.0a1
2 | decorator>4.1.2
3 | fsspec==2025.5.1
4 | google-auth>=1.2
5 | google-auth-oauthlib
6 | google-cloud-storage
7 | requests
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [versioneer]
 2 | VCS = git
 3 | style = pep440
 4 | versionfile_source = gcsfs/_version.py
 5 | versionfile_build = gcsfs/_version.py
 6 | tag_prefix =
 7 | 
 8 | [bdist_wheel]
 9 | universal=1
10 | 
11 | [flake8]
12 | exclude = versioneer.py,docs/source/conf.py
13 | ignore =
14 |     # Extra space in brackets
15 |     E20,
16 |     # Multiple spaces around ","
17 |     E231,E241,
18 |     # Comments
19 |     E26,
20 |     # Import formatting
21 |     E4,
22 |     # Comparing types instead of isinstance
23 |     E721,
24 |     # Assigning lambda expression
25 |     E731,
26 |     # Ambiguous variable names
27 |     E741,
28 |     # line break before binary operator
29 |     W503,
30 |     # line break after binary operator
31 |     W504,
32 |     # redefinition of unused 'loop' from line 10
33 |     F811,
34 | max-line-length = 120
35 | 
36 | [tool:pytest]
37 | addopts =
38 |     --color=yes --timeout=600
39 | log_cli = false
40 | log_cli_level = DEBUG
41 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import os
 4 | 
 5 | from setuptools import setup
 6 | 
 7 | import versioneer
 8 | 
 9 | setup(
10 |     name="gcsfs",
11 |     version=versioneer.get_version(),
12 |     cmdclass=versioneer.get_cmdclass(),
13 |     description="Convenient Filesystem interface over GCS",
14 |     url="https://github.com/fsspec/gcsfs",
15 |     maintainer="Martin Durant",
16 |     maintainer_email="mdurant@anaconda.com",
17 |     license="BSD",
18 |     classifiers=[
19 |         "Development Status :: 4 - Beta",
20 |         "Intended Audience :: Developers",
21 |         "License :: OSI Approved :: BSD License",
22 |         "Operating System :: OS Independent",
23 |         "Programming Language :: Python :: 3.9",
24 |         "Programming Language :: Python :: 3.10",
25 |         "Programming Language :: Python :: 3.11",
26 |         "Programming Language :: Python :: 3.12",
27 |         "Programming Language :: Python :: 3.13",
28 |     ],
29 |     keywords=["google-cloud-storage", "gcloud", "file-system"],
30 |     packages=["gcsfs", "gcsfs.cli"],
31 |     install_requires=[open("requirements.txt").read().strip().split("\n")],
32 |     long_description=(
33 |         open("README.rst").read() if os.path.exists("README.rst") else ""
34 |     ),
35 |     extras_require={"gcsfuse": ["fusepy"], "crc": ["crcmod"]},
36 |     python_requires=">=3.9",
37 |     zip_safe=False,
38 | )
39 | 


--------------------------------------------------------------------------------