├── src └── morefs │ ├── __init__.py │ ├── py.typed │ ├── asyn_local.py │ ├── memory.py │ ├── overlay.py │ └── dict.py ├── .gitattributes ├── tests ├── __init__.py ├── test_morefs.py ├── test_asyn_local.py ├── test_memfs.py └── test_dictfs.py ├── .github ├── dependabot.yml └── workflows │ ├── update-template.yaml │ ├── release.yml │ └── tests.yml ├── .cruft.json ├── .pre-commit-config.yaml ├── noxfile.py ├── .gitignore ├── CONTRIBUTING.rst ├── pyproject.toml ├── README.rst ├── CODE_OF_CONDUCT.rst └── LICENSE /src/morefs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/morefs/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * text=auto eol=lf 2 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test suite for the morefs package.""" 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | updates: 4 | - directory: "/" 5 | package-ecosystem: "pip" 6 | schedule: 7 | interval: "weekly" 8 | labels: 9 | - "maintenance" 10 | 11 | - directory: "/" 12 | package-ecosystem: "github-actions" 13 | schedule: 14 | interval: "weekly" 15 | labels: 16 | - "maintenance" 17 | -------------------------------------------------------------------------------- /tests/test_morefs.py: -------------------------------------------------------------------------------- 1 | """Tests for `morefs` package.""" 2 | 3 | import fsspec 4 | import pytest 5 | 6 | from morefs.dict import DictFS 7 | from morefs.memory import MemFS 8 | 9 | 10 | @pytest.mark.parametrize("proto, fs_cls", [("dictfs", DictFS), ("memfs", MemFS)]) 11 | def test_fsspec(proto, fs_cls): 12 | fs = fsspec.filesystem(proto) 13 | assert isinstance(fs, fs_cls) 14 | -------------------------------------------------------------------------------- /.github/workflows/update-template.yaml: -------------------------------------------------------------------------------- 1 | name: Update template 2 | 3 | on: 4 | schedule: 5 | - cron: '5 1 * * *' # every day at 01:05 6 | workflow_dispatch: 7 | 8 | jobs: 9 | update: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Check out the repository 13 | uses: actions/checkout@v5 14 | 15 | - name: Update template 16 | uses: iterative/py-template@main 17 | -------------------------------------------------------------------------------- /.cruft.json: -------------------------------------------------------------------------------- 1 | { 2 | "template": "https://github.com/iterative/py-template", 3 | "commit": "e4ec95f4cfd03d4af0a8604d462ee11d07d63b42", 4 | "checkout": null, 5 | "context": { 6 | "cookiecutter": { 7 | "project_name": "morefs", 8 | "package_name": "morefs", 9 | "friendly_name": "morefs", 10 | "author": "Iterative", 11 | "email": "saugat@iterative.ai", 12 | "github_user": "iterative", 13 | "version": "0.0.0", 14 | "copyright_year": "2022", 15 | "license": "Apache-2.0", 16 | "docs": "False", 17 | "short_description": "A collection of self-contained fsspec-based filesystems", 18 | "development_status": "Development Status :: 4 - Beta", 19 | "_template": "https://github.com/iterative/py-template" 20 | } 21 | }, 22 | "directory": null 23 | } 24 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | workflow_dispatch: 7 | 8 | env: 9 | FORCE_COLOR: "1" 10 | 11 | jobs: 12 | release: 13 | environment: pypi 14 | permissions: 15 | contents: read 16 | id-token: write 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Check out the repository 20 | uses: actions/checkout@v5 21 | with: 22 | fetch-depth: 0 23 | 24 | - name: Set up Python 3.12 25 | uses: actions/setup-python@v6 26 | with: 27 | python-version: '3.12' 28 | cache: 'pip' 29 | 30 | - name: Upgrade nox 31 | run: | 32 | python -m pip install --upgrade 'nox[uv]' 33 | nox --version 34 | 35 | - name: Build package 36 | run: nox -s build 37 | 38 | - name: Upload package 39 | if: github.event_name == 'release' 40 | uses: pypa/gh-action-pypi-publish@release/v1 41 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3 3 | repos: 4 | - repo: https://github.com/pre-commit/pre-commit-hooks 5 | rev: v6.0.0 6 | hooks: 7 | - id: check-added-large-files 8 | - id: check-case-conflict 9 | - id: check-docstring-first 10 | - id: check-executables-have-shebangs 11 | - id: check-json 12 | - id: check-merge-conflict 13 | args: ['--assume-in-merge'] 14 | - id: check-toml 15 | - id: check-yaml 16 | - id: end-of-file-fixer 17 | - id: mixed-line-ending 18 | args: ['--fix=lf'] 19 | - id: sort-simple-yaml 20 | - id: trailing-whitespace 21 | - repo: https://github.com/astral-sh/ruff-pre-commit 22 | rev: 'v0.14.3' 23 | hooks: 24 | - id: ruff 25 | args: [--fix, --exit-non-zero-on-fix] 26 | - id: ruff-format 27 | - repo: https://github.com/codespell-project/codespell 28 | rev: v2.4.1 29 | hooks: 30 | - id: codespell 31 | additional_dependencies: ["tomli"] 32 | - repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks 33 | rev: v2.15.0 34 | hooks: 35 | - id: pretty-format-toml 36 | args: [--autofix, --no-sort] 37 | - id: pretty-format-yaml 38 | args: [--autofix, --indent, '2', '--offset', '2', --preserve-quotes] 39 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: Tests 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | workflow_dispatch: 8 | 9 | env: 10 | FORCE_COLOR: "1" 11 | 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | tests: 18 | timeout-minutes: 10 19 | runs-on: ${{ matrix.os }} 20 | strategy: 21 | fail-fast: false 22 | matrix: 23 | os: [ubuntu-latest, windows-latest, macos-latest] 24 | pyv: ['3.9', '3.10', '3.11', '3.12', '3.13'] 25 | include: 26 | - {os: ubuntu-latest, pyv: 'pypy3.9'} 27 | 28 | steps: 29 | - name: Check out the repository 30 | uses: actions/checkout@v5 31 | with: 32 | fetch-depth: 0 33 | 34 | - name: Set up Python ${{ matrix.pyv }} 35 | uses: actions/setup-python@v6 36 | with: 37 | python-version: ${{ matrix.pyv }} 38 | allow-prereleases: true 39 | cache: 'pip' 40 | 41 | - name: Upgrade nox 42 | run: | 43 | python -m pip install --upgrade 'nox[uv]' 44 | nox --version 45 | 46 | - name: Lint code 47 | run: nox -s lint 48 | 49 | - name: Run tests 50 | run: nox -s tests-${{ matrix.pyv }} -- --cov-report=xml 51 | 52 | - name: Upload coverage report 53 | uses: codecov/codecov-action@v5 54 | 55 | - name: Build package 56 | run: nox -s build 57 | -------------------------------------------------------------------------------- /noxfile.py: -------------------------------------------------------------------------------- 1 | """Automation using nox.""" 2 | 3 | import glob 4 | import os 5 | 6 | import nox 7 | 8 | nox.options.default_venv_backend = "uv|virtualenv" 9 | nox.options.reuse_existing_virtualenvs = True 10 | nox.options.sessions = "lint", "tests" 11 | locations = "src", "tests" 12 | 13 | 14 | @nox.session( 15 | python=[ 16 | "3.9", 17 | "3.10", 18 | "3.11", 19 | "3.12", 20 | "3.13", 21 | "pypy3.9", 22 | "pypy3.10", 23 | ] 24 | ) 25 | def tests(session: nox.Session) -> None: 26 | session.install(".[tests]") 27 | session.run( 28 | "pytest", 29 | "--cov", 30 | "--cov-config=pyproject.toml", 31 | *session.posargs, 32 | env={"COVERAGE_FILE": f".coverage.{session.python}"}, 33 | ) 34 | 35 | 36 | @nox.session 37 | def lint(session: nox.Session) -> None: 38 | session.install("pre-commit") 39 | session.install("-e", ".[dev]") 40 | 41 | args = *(session.posargs or ("--show-diff-on-failure",)), "--all-files" 42 | session.run("pre-commit", "run", *args) 43 | session.run("python", "-m", "mypy") 44 | 45 | 46 | @nox.session 47 | def build(session: nox.Session) -> None: 48 | session.install("build", "twine", "uv") 49 | session.run("python", "-m", "build", "--installer", "uv") 50 | dists = glob.glob("dist/*") 51 | session.run("twine", "check", *dists, silent=True) 52 | 53 | 54 | @nox.session 55 | def dev(session: nox.Session) -> None: 56 | """Sets up a python development environment for the project.""" 57 | args = session.posargs or ("venv",) 58 | venv_dir = os.fsdecode(os.path.abspath(args[0])) 59 | 60 | session.log(f"Setting up virtual environment in {venv_dir}") 61 | session.install("virtualenv") 62 | session.run("virtualenv", venv_dir, silent=True) 63 | 64 | python = os.path.join(venv_dir, "bin/python") 65 | session.run(python, "-m", "pip", "install", "-e", ".[dev]", external=True) 66 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 98 | __pypackages__/ 99 | 100 | # Celery stuff 101 | celerybeat-schedule 102 | celerybeat.pid 103 | 104 | # SageMath parsed files 105 | *.sage.py 106 | 107 | # Environments 108 | .env 109 | .venv 110 | env/ 111 | venv/ 112 | ENV/ 113 | env.bak/ 114 | venv.bak/ 115 | 116 | # Spyder project settings 117 | .spyderproject 118 | .spyproject 119 | 120 | # Rope project settings 121 | .ropeproject 122 | 123 | # mkdocs documentation 124 | /site 125 | 126 | # mypy 127 | .mypy_cache/ 128 | .dmypy.json 129 | dmypy.json 130 | 131 | # Pyre type checker 132 | .pyre/ 133 | 134 | # pytype static type analyzer 135 | .pytype/ 136 | 137 | # Cython debug symbols 138 | cython_debug/ 139 | -------------------------------------------------------------------------------- /CONTRIBUTING.rst: -------------------------------------------------------------------------------- 1 | Contributor Guide 2 | ================= 3 | 4 | Thank you for your interest in improving this project. 5 | This project is open-source under the `Apache 2.0 license`_ and 6 | welcomes contributions in the form of bug reports, feature requests, and pull requests. 7 | 8 | Here is a list of important resources for contributors: 9 | 10 | - `Source Code`_ 11 | - `Issue Tracker`_ 12 | - `Code of Conduct`_ 13 | 14 | .. _Apache 2.0 license: https://opensource.org/licenses/Apache-2.0 15 | .. _Source Code: https://github.com/iterative/morefs 16 | .. _Issue Tracker: https://github.com/iterative/morefs/issues 17 | 18 | How to report a bug 19 | ------------------- 20 | 21 | Report bugs on the `Issue Tracker`_. 22 | 23 | When filing an issue, make sure to answer these questions: 24 | 25 | - Which operating system and Python version are you using? 26 | - Which version of this project are you using? 27 | - What did you do? 28 | - What did you expect to see? 29 | - What did you see instead? 30 | 31 | The best way to get your bug fixed is to provide a test case, 32 | and/or steps to reproduce the issue. 33 | 34 | 35 | How to request a feature 36 | ------------------------ 37 | 38 | Request features on the `Issue Tracker`_. 39 | 40 | 41 | How to set up your development environment 42 | ------------------------------------------ 43 | 44 | You need Python 3.9+ and the following tools: 45 | 46 | - Nox_ 47 | 48 | Install the package with development requirements: 49 | 50 | .. code:: console 51 | 52 | $ pip install nox 53 | 54 | .. _Nox: https://nox.thea.codes/ 55 | 56 | 57 | How to test the project 58 | ----------------------- 59 | 60 | Run the full test suite: 61 | 62 | .. code:: console 63 | 64 | $ nox 65 | 66 | List the available Nox sessions: 67 | 68 | .. code:: console 69 | 70 | $ nox --list-sessions 71 | 72 | You can also run a specific Nox session. 73 | For example, invoke the unit test suite like this: 74 | 75 | .. code:: console 76 | 77 | $ nox --session=tests 78 | 79 | Unit tests are located in the ``tests`` directory, 80 | and are written using the pytest_ testing framework. 81 | 82 | .. _pytest: https://pytest.readthedocs.io/ 83 | 84 | 85 | How to submit changes 86 | --------------------- 87 | 88 | Open a `pull request`_ to submit changes to this project. 89 | 90 | Your pull request needs to meet the following guidelines for acceptance: 91 | 92 | - The Nox test suite must pass without errors and warnings. 93 | - Include unit tests. This project maintains 100% code coverage. 94 | - If your changes add functionality, update the documentation accordingly. 95 | 96 | Feel free to submit early, though—we can always iterate on this. 97 | 98 | To run linting and code formatting checks, you can invoke a `lint` session in nox: 99 | 100 | .. code:: console 101 | 102 | $ nox -s lint 103 | 104 | It is recommended to open an issue before starting work on anything. 105 | This will allow a chance to talk it over with the owners and validate your approach. 106 | 107 | .. _pull request: https://github.com/iterative/morefs/pulls 108 | .. github-only 109 | .. _Code of Conduct: CODE_OF_CONDUCT.rst 110 | -------------------------------------------------------------------------------- /src/morefs/asyn_local.py: -------------------------------------------------------------------------------- 1 | import shutil 2 | import sys 3 | from asyncio import get_running_loop, iscoroutinefunction 4 | from collections.abc import Awaitable 5 | from functools import partial, wraps 6 | from typing import Callable, TypeVar 7 | 8 | import aiofile 9 | from fsspec.asyn import AsyncFileSystem 10 | from fsspec.implementations.local import LocalFileSystem 11 | 12 | if sys.version_info < (3, 10): # pragma: no cover 13 | from typing_extensions import ParamSpec 14 | else: # pragma: no cover 15 | from typing import ParamSpec 16 | 17 | P = ParamSpec("P") 18 | R = TypeVar("R") 19 | 20 | 21 | def wrap(func: Callable[P, R]) -> Callable[P, Awaitable[R]]: 22 | @wraps(func) 23 | async def run(*args: P.args, **kwargs: P.kwargs) -> R: 24 | loop = get_running_loop() 25 | pfunc = partial(func, *args, **kwargs) 26 | return await loop.run_in_executor(None, pfunc) 27 | 28 | return run 29 | 30 | 31 | class AsyncLocalFileSystem(AsyncFileSystem, LocalFileSystem): 32 | """Async implementation of LocalFileSystem. 33 | 34 | This filesystem provides both async and sync methods. The sync methods are not 35 | overridden and use LocalFileSystem's implementation. 36 | 37 | The async methods run the respective sync methods in a threadpool executor. 38 | It also provides open_async() method that supports asynchronous file operations, 39 | using `aiofile`_. 40 | 41 | Note that some async methods like _find may call these wrapped async methods 42 | many times, and might have high overhead. 43 | In that case, it might be faster to run the whole operation in a threadpool, 44 | which is available as `_*_async()` versions of the API. 45 | eg: _find_async()/_get_file_async, etc. 46 | 47 | .. aiofile: 48 | https://github.com/mosquito/aiofile 49 | """ 50 | 51 | mirror_sync_methods = False 52 | 53 | _cat_file = wrap(LocalFileSystem.cat_file) 54 | _chmod = wrap(LocalFileSystem.chmod) 55 | _cp_file = wrap(LocalFileSystem.cp_file) 56 | _created = wrap(LocalFileSystem.created) 57 | _find_async = wrap(LocalFileSystem.find) 58 | _get_file_async = wrap(LocalFileSystem.get_file) 59 | _info = wrap(LocalFileSystem.info) 60 | _islink = wrap(LocalFileSystem.islink) 61 | _lexists = wrap(LocalFileSystem.lexists) 62 | _link = wrap(LocalFileSystem.link) 63 | _ls = wrap(LocalFileSystem.ls) 64 | _makedirs = wrap(LocalFileSystem.makedirs) 65 | _mkdir = wrap(LocalFileSystem.mkdir) 66 | _modified = wrap(LocalFileSystem.modified) 67 | 68 | # `mv_file` was renamed to `mv` in fsspec==2024.5.0 69 | # https://github.com/fsspec/filesystem_spec/pull/1585 70 | _mv = wrap(getattr(LocalFileSystem, "mv", None) or LocalFileSystem.mv_file) # type: ignore[call-overload] 71 | _mv_file = _mv 72 | _pipe_file = wrap(LocalFileSystem.pipe_file) 73 | _put_file = wrap(LocalFileSystem.put_file) 74 | _read_bytes = wrap(LocalFileSystem.read_bytes) 75 | _read_text = wrap(LocalFileSystem.read_text) 76 | _rm = wrap(LocalFileSystem.rm) 77 | _rm_file = wrap(LocalFileSystem.rm_file) 78 | _rmdir = wrap(LocalFileSystem.rmdir) 79 | _touch = wrap(LocalFileSystem.touch) 80 | _symlink = wrap(LocalFileSystem.symlink) 81 | _write_bytes = wrap(LocalFileSystem.write_bytes) 82 | _write_text = wrap(LocalFileSystem.write_text) 83 | sign = LocalFileSystem.sign 84 | 85 | async def _get_file(self, src, dst, **kwargs): # pylint: disable=arguments-renamed 86 | if not iscoroutinefunction(getattr(dst, "write", None)): 87 | src = self._strip_protocol(src) 88 | return await self._get_file_async(src, dst) 89 | 90 | fsrc = await self.open_async(src, "rb") 91 | async with fsrc: 92 | while True: 93 | buf = await fsrc.read(length=shutil.COPY_BUFSIZE) 94 | if not buf: 95 | break 96 | await dst.write(buf) 97 | 98 | async def open_async(self, path, mode="rb", **kwargs): 99 | path = self._strip_protocol(path) 100 | if self.auto_mkdir and "w" in mode: 101 | await self._makedirs(self._parent(path), exist_ok=True) 102 | return await aiofile.async_open(path, mode, **kwargs) 103 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools>=48", "setuptools_scm[toml]>=6.3.1"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "morefs" 7 | description = "A collection of self-contained fsspec-based filesystems" 8 | readme = "README.rst" 9 | license = {text = "Apache-2.0"} 10 | authors = [{name = "Iterative", email = "saugat@iterative.ai"}] 11 | classifiers = [ 12 | "Programming Language :: Python :: 3", 13 | "Programming Language :: Python :: 3.9", 14 | "Programming Language :: Python :: 3.10", 15 | "Programming Language :: Python :: 3.11", 16 | "Programming Language :: Python :: 3.12", 17 | "Programming Language :: Python :: 3.13", 18 | "Development Status :: 4 - Beta" 19 | ] 20 | requires-python = ">=3.9" 21 | dynamic = ["version"] 22 | dependencies = [ 23 | "fsspec" 24 | ] 25 | 26 | [project.urls] 27 | Issues = "https://github.com/iterative/morefs/issues" 28 | Source = "https://github.com/iterative/morefs" 29 | 30 | [project.optional-dependencies] 31 | memfs = [ 32 | "pygtrie>=2.3.2" 33 | ] 34 | asynclocalfs = [ 35 | "fsspec>=2022.10.0", 36 | "typing_extensions>=3.10.0; python_version < '3.10'", 37 | "aiofile>=3.8.5,<4" 38 | ] 39 | asynclocal = [ 40 | "morefs[asynclocalfs]" 41 | ] 42 | all = [ 43 | "morefs[memfs,asynclocalfs]" 44 | ] 45 | tests = [ 46 | "morefs[all]", 47 | "pytest>=7,<9", 48 | "pytest-asyncio>=0.21.0,<2", 49 | "pytest-cov>=4.1.0", 50 | "pytest-mock", 51 | "pytest-sugar" 52 | ] 53 | dev = [ 54 | "morefs[tests,all]", 55 | "mypy==1.18.2" 56 | ] 57 | 58 | [project.entry-points."fsspec.specs"] 59 | memfs = "morefs.memory:MemFS" 60 | dictfs = "morefs.dict:DictFS" 61 | overlayfs = "morefs.overlay:OverlayFileSystem" 62 | asynclocal = "morefs.asyn_local:AsyncLocalFileSystem" 63 | 64 | [tool.setuptools.packages.find] 65 | where = ["src"] 66 | namespaces = false 67 | 68 | [tool.setuptools_scm] 69 | 70 | [tool.pytest.ini_options] 71 | addopts = "-ra" 72 | 73 | [tool.coverage.run] 74 | branch = true 75 | source = ["morefs", "tests"] 76 | 77 | [tool.coverage.paths] 78 | source = ["src", "*/site-packages"] 79 | 80 | [tool.coverage.report] 81 | show_missing = true 82 | exclude_lines = [ 83 | "pragma: no cover", 84 | "if __name__ == .__main__.:", 85 | "if typing.TYPE_CHECKING:", 86 | "if TYPE_CHECKING:", 87 | "raise NotImplementedError", 88 | "raise AssertionError", 89 | "@overload" 90 | ] 91 | 92 | [tool.mypy] 93 | # Error output 94 | show_column_numbers = true 95 | show_error_codes = true 96 | show_error_context = true 97 | show_traceback = true 98 | pretty = true 99 | check_untyped_defs = false 100 | # Warnings 101 | warn_no_return = true 102 | warn_redundant_casts = true 103 | warn_unreachable = true 104 | ignore_missing_imports = true 105 | files = ["src", "tests"] 106 | 107 | [tool.codespell] 108 | ignore-words-list = "fo,cachable,afile" 109 | skip = "CODE_OF_CONDUCT.rst" 110 | 111 | [tool.ruff] 112 | output-format = "full" 113 | show-fixes = true 114 | 115 | [tool.ruff.lint] 116 | ignore = [ 117 | "N818", 118 | "S101", 119 | "ISC001", 120 | "PT007", 121 | "RET502", 122 | "RET503", 123 | "SIM105", 124 | "SIM108", 125 | "SIM117", 126 | "TRY003", 127 | "TRY300", 128 | "PLR2004", 129 | "PLW2901", 130 | "LOG007" 131 | ] 132 | select = [ 133 | "F", 134 | "E", 135 | "W", 136 | "C90", 137 | "I", 138 | "N", 139 | "UP", 140 | "YTT", 141 | "ASYNC", 142 | "S", 143 | "BLE", 144 | "B", 145 | "A", 146 | "C4", 147 | "T10", 148 | "EXE", 149 | "ISC", 150 | "ICN", 151 | "G", 152 | "INP", 153 | "PIE", 154 | "T20", 155 | "PYI", 156 | "PT", 157 | "Q", 158 | "RSE", 159 | "RET", 160 | "SLOT", 161 | "SIM", 162 | "TID", 163 | "TCH", 164 | "ARG", 165 | "PGH", 166 | "PLC", 167 | "PLE", 168 | "PLR", 169 | "PLW", 170 | "TRY", 171 | "FLY", 172 | "PERF101", 173 | "LOG", 174 | "RUF" 175 | ] 176 | 177 | [tool.ruff.lint.flake8-unused-arguments] 178 | ignore-variadic-names = true 179 | 180 | [tool.ruff.lint.flake8-pytest-style] 181 | fixture-parentheses = false 182 | mark-parentheses = false 183 | parametrize-names-type = "csv" 184 | 185 | [tool.ruff.lint.per-file-ignores] 186 | "noxfile.py" = ["D", "PTH"] 187 | "tests/**" = ["S", "ARG001", "ARG002", "ANN"] 188 | "docs/**" = ["INP"] 189 | 190 | [tool.ruff.lint.flake8-type-checking] 191 | strict = true 192 | -------------------------------------------------------------------------------- /src/morefs/memory.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | 4 | import pygtrie 5 | from fsspec import AbstractFileSystem 6 | from fsspec.implementations.memory import MemoryFile 7 | from fsspec.implementations.memory import MemoryFileSystem as _MemFS 8 | 9 | 10 | class MemFS(AbstractFileSystem): # pylint: disable=abstract-method 11 | """In-Memory Object Storage FileSystem based on Trie data-structure.""" 12 | 13 | cachable = False 14 | protocol = ("memfs", "memory") 15 | 16 | def __init__(self, *args, **storage_options): 17 | super().__init__(*args, **storage_options) 18 | self.trie = self.store = pygtrie.StringTrie() 19 | 20 | @classmethod 21 | def _strip_protocol(cls, path): 22 | if path.startswith("memfs://"): 23 | path = path[len("memfs://") :] 24 | return _MemFS._strip_protocol(path) # pylint: disable=protected-access 25 | 26 | @staticmethod 27 | def _info(path, filelike=None, **kwargs): 28 | if filelike: 29 | return { 30 | "name": path, 31 | "size": ( 32 | filelike.size 33 | if hasattr(filelike, "size") 34 | else filelike.getbuffer().nbytes 35 | ), 36 | "type": "file", 37 | "created": getattr(filelike, "created", None), 38 | } 39 | return {"name": path, "size": 0, "type": "directory"} 40 | 41 | def ls(self, path, detail=False, **kwargs): 42 | path = self._strip_protocol(path) 43 | out = [] 44 | 45 | def node_factory(path_conv, parts, children, filelike=None): 46 | node_path = path_conv(parts) 47 | if path == node_path and children: 48 | list(children) 49 | return 50 | 51 | info = self._info(node_path, filelike) if detail else node_path 52 | out.append(info) 53 | 54 | try: 55 | self.trie.traverse(node_factory, prefix=path) 56 | except KeyError as exc: 57 | if path in ("", "/"): 58 | return [] 59 | raise FileNotFoundError(errno.ENOENT, "No such file", path) from exc 60 | 61 | return out 62 | 63 | def info(self, path, **kwargs): 64 | path = self._strip_protocol(path) 65 | if path in ("", "/") or self.trie.has_subtrie(path): 66 | return self._info(path, **kwargs) 67 | if filelike := self.trie.get(path): 68 | return self._info(path, filelike, **kwargs) 69 | 70 | short = self.trie.shortest_prefix(path) 71 | if short and short.key != path: 72 | raise NotADirectoryError(errno.ENOTDIR, "Not a directory", path) 73 | raise FileNotFoundError(errno.ENOENT, "No such file", path) 74 | 75 | def _rm(self, path): 76 | path = self._strip_protocol(path) 77 | if self.isdir(path): 78 | raise IsADirectoryError(errno.EISDIR, "Is a directory", path) 79 | 80 | try: 81 | del self.trie[path] 82 | except KeyError as e: 83 | raise FileNotFoundError(errno.ENOENT, "No such file", path) from e 84 | 85 | def rm(self, path, recursive=False, maxdepth=None): 86 | paths = self.expand_path(path, recursive=recursive, maxdepth=maxdepth) 87 | for p in paths: 88 | self.store.pop(p, None) 89 | 90 | def _open(self, path, mode="rb", **kwargs): # pylint: disable=arguments-differ 91 | path = self._strip_protocol(path) 92 | try: 93 | info = self.info(path) 94 | if info["type"] == "directory": 95 | raise IsADirectoryError(errno.EISDIR, "Is a directory", path) 96 | except FileNotFoundError: 97 | if mode in ["rb", "ab", "rb+"]: 98 | raise 99 | 100 | if mode == "wb": 101 | filelike = MemoryFile(self, path) 102 | if not self._intrans: 103 | filelike.commit() 104 | return filelike 105 | 106 | filelike = self.trie[path] 107 | filelike.seek(0, os.SEEK_END if mode == "ab" else os.SEEK_SET) 108 | return filelike 109 | 110 | def cp_file(self, path1, path2, **kwargs): 111 | path1 = self._strip_protocol(path1) 112 | path2 = self._strip_protocol(path2) 113 | 114 | try: 115 | src = self.open(path1, "rb") 116 | except IsADirectoryError: 117 | return 118 | 119 | with src, self.open(path2, "wb") as dst: 120 | dst.write(src.getbuffer()) 121 | 122 | def created(self, path): 123 | return self.info(path).get("created") 124 | -------------------------------------------------------------------------------- /tests/test_asyn_local.py: -------------------------------------------------------------------------------- 1 | from os import fspath 2 | 3 | import pytest 4 | from fsspec.implementations.local import LocalFileSystem 5 | 6 | from morefs.asyn_local import AsyncLocalFileSystem 7 | 8 | 9 | @pytest.fixture 10 | def fs(): 11 | return AsyncLocalFileSystem() 12 | 13 | 14 | @pytest.fixture 15 | def localfs(): 16 | return LocalFileSystem() 17 | 18 | 19 | @pytest.mark.asyncio 20 | async def test_ls(tmp_path, localfs, fs): 21 | struct = { 22 | fspath(tmp_path / "foo"): b"foo", 23 | fspath(tmp_path / "bar"): b"bar", 24 | fspath(tmp_path / "dir" / "file"): b"file", 25 | } 26 | localfs.mkdir(tmp_path / "dir") 27 | localfs.pipe(struct) 28 | 29 | assert set(await fs._ls(tmp_path, detail=False)) == { 30 | localfs._strip_protocol(tmp_path / f) for f in ["foo", "bar", "dir"] 31 | } 32 | assert await fs._ls(tmp_path, detail=False) == localfs.ls(tmp_path, detail=False) 33 | 34 | assert await fs._info(tmp_path / "foo") == localfs.info(tmp_path / "foo") 35 | assert await fs._info(tmp_path / "dir") == localfs.info(tmp_path / "dir") 36 | 37 | assert await fs._ls(tmp_path, detail=True) == localfs.ls(tmp_path, detail=True) 38 | 39 | assert await fs._find(tmp_path, detail=False) == localfs.find( 40 | tmp_path, 41 | detail=False, 42 | ) 43 | assert await fs._find(tmp_path, detail=True) == localfs.find(tmp_path, detail=True) 44 | 45 | assert await fs._isfile(tmp_path / "foo") 46 | assert await fs._isdir(tmp_path / "dir") 47 | assert await fs._exists(tmp_path / "bar") 48 | assert not await fs._exists(tmp_path / "not-existing-file") 49 | assert await fs._lexists(tmp_path / "foo") 50 | 51 | 52 | def test_sync_methods(tmp_path, localfs, fs): 53 | struct = { 54 | fspath(tmp_path / "foo"): b"foo", 55 | fspath(tmp_path / "bar"): b"bar", 56 | fspath(tmp_path / "dir" / "file"): b"file", 57 | } 58 | localfs.mkdir(tmp_path / "dir") 59 | localfs.pipe(struct) 60 | 61 | assert set(fs.ls(tmp_path, detail=False)) == { 62 | localfs._strip_protocol(tmp_path / f) for f in ["foo", "bar", "dir"] 63 | } 64 | assert fs.ls(tmp_path, detail=False) == localfs.ls(tmp_path, detail=False) 65 | 66 | assert fs.info(tmp_path / "foo") == localfs.info(tmp_path / "foo") 67 | assert fs.info(tmp_path / "dir") == localfs.info(tmp_path / "dir") 68 | 69 | assert fs.ls(tmp_path, detail=True) == localfs.ls(tmp_path, detail=True) 70 | assert fs.find(tmp_path, detail=False) == localfs.find(tmp_path, detail=False) 71 | assert fs.find(tmp_path, detail=True) == localfs.find(tmp_path, detail=True) 72 | 73 | assert fs.isfile(tmp_path / "foo") 74 | assert fs.isdir(tmp_path / "dir") 75 | assert fs.exists(tmp_path / "bar") 76 | assert not fs.exists(tmp_path / "not-existing-file") 77 | assert fs.lexists(tmp_path / "foo") 78 | 79 | 80 | @pytest.mark.asyncio 81 | async def test_open_async(tmp_path, fs): 82 | f = await fs.open_async(tmp_path / "file", mode="wb") 83 | async with f: 84 | pass 85 | assert await fs._exists(tmp_path / "file") 86 | 87 | f = await fs.open_async(tmp_path / "file", mode="wb") 88 | async with f: 89 | assert await f.write(b"contents") 90 | 91 | f = await fs.open_async(tmp_path / "file") 92 | async with f: 93 | assert await f.read() == b"contents" 94 | 95 | 96 | @pytest.mark.asyncio 97 | async def test_get_file(tmp_path, fs): 98 | await fs._pipe_file(tmp_path / "foo", b"foo") 99 | await fs._get_file(tmp_path / "foo", tmp_path / "bar") 100 | 101 | assert await fs._isfile(tmp_path / "bar") 102 | 103 | f = await fs.open_async(tmp_path / "file1", mode="wb") 104 | async with f: 105 | await fs._get_file(tmp_path / "foo", f) 106 | assert await fs._cat_file(tmp_path / "file1") == b"foo" 107 | 108 | with fs.open(tmp_path / "file2", mode="wb") as f: 109 | await fs._get_file(tmp_path / "foo", f) 110 | assert await fs._cat_file(tmp_path / "file2") == b"foo" 111 | 112 | with (tmp_path / "file3").open(mode="wb") as f: 113 | await fs._get_file(tmp_path / "foo", f) 114 | assert await fs._cat_file(tmp_path / "file3") == b"foo" 115 | 116 | 117 | @pytest.mark.asyncio 118 | async def test_auto_mkdir_on_open_async(tmp_path): 119 | fs = AsyncLocalFileSystem(auto_mkdir=True) 120 | f = await fs.open_async(tmp_path / "dir" / "file", mode="wb") 121 | async with f: 122 | await f.write(b"contents") 123 | 124 | assert await fs._isdir(tmp_path / "dir") 125 | assert await fs._isfile(tmp_path / "dir" / "file") 126 | assert await fs._cat_file(tmp_path / "dir" / "file") == b"contents" 127 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | morefs 2 | ====== 3 | 4 | |PyPI| |Status| |Python Version| |License| 5 | 6 | |Tests| |Codecov| |pre-commit| |Black| 7 | 8 | .. |PyPI| image:: https://img.shields.io/pypi/v/morefs.svg 9 | :target: https://pypi.org/project/morefs/ 10 | :alt: PyPI 11 | .. |Status| image:: https://img.shields.io/pypi/status/morefs.svg 12 | :target: https://pypi.org/project/morefs/ 13 | :alt: Status 14 | .. |Python Version| image:: https://img.shields.io/pypi/pyversions/morefs 15 | :target: https://pypi.org/project/morefs 16 | :alt: Python Version 17 | .. |License| image:: https://img.shields.io/pypi/l/morefs 18 | :target: https://opensource.org/licenses/Apache-2.0 19 | :alt: License 20 | .. |Tests| image:: https://github.com/iterative/morefs/workflows/Tests/badge.svg 21 | :target: https://github.com/iterative/morefs/actions?workflow=Tests 22 | :alt: Tests 23 | .. |Codecov| image:: https://codecov.io/gh/iterative/morefs/branch/main/graph/badge.svg 24 | :target: https://app.codecov.io/gh/iterative/morefs 25 | :alt: Codecov 26 | .. |pre-commit| image:: https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit&logoColor=white 27 | :target: https://github.com/pre-commit/pre-commit 28 | :alt: pre-commit 29 | .. |Black| image:: https://img.shields.io/badge/code%20style-black-000000.svg 30 | :target: https://github.com/psf/black 31 | :alt: Black 32 | 33 | 34 | Features 35 | -------- 36 | 37 | *morefs* provides standalone fsspec-based filesystems like: 38 | 39 | * ``AsyncLocalFileSystem`` that provides async implementation of ``LocalFileSystem``. 40 | * In-memory filesystems ``DictFileSystem`` built on nested dictionaries and ``MemFS`` built on tries, and are much faster than fsspec's ``MemoryFileSystem``. 41 | * ``OverlayFileSystem`` that allows to overlay multiple fsspec-based filesystems. 42 | 43 | Installation 44 | ------------ 45 | 46 | You can install *morefs* via pip_ from PyPI_: 47 | 48 | .. code:: console 49 | 50 | $ pip install morefs 51 | 52 | You might need to install with extras for some filesystems: 53 | 54 | .. code:: console 55 | 56 | $ pip install morefs[asynclocal] # for installing aiofile dependency for AsyncLocalFileSystem 57 | $ pip install morefs[memfs] # for installing pygtrie dependency for MemFS 58 | 59 | 60 | Usage 61 | ----- 62 | 63 | AsyncLocalFileSystem 64 | ~~~~~~~~~~~~~~~~~~~~ 65 | 66 | Extended version of ``LocalFileSystem`` that also provides async methods. 67 | 68 | .. code:: python 69 | 70 | import asyncio 71 | from morefs.asyn_local import AsyncLocalFileSystem 72 | 73 | async def main(): 74 | fs = AsyncLocalFileSystem(auto_mkdir=False) 75 | 76 | f = await fs.open_async("foo", mode="w") 77 | async with f: 78 | await f.write("foobar") 79 | 80 | content = await fs._cat("foo") 81 | print(content) 82 | print(fs.cat("foo")) # you can still use sync methods 83 | 84 | asyncio.run(main()) 85 | 86 | 87 | DictFS 88 | ~~~~~~ 89 | 90 | DictFS is a nested dictionary-based, in-memory filesystem 91 | and acts more like a real LocalFileSystem. 92 | 93 | .. code:: python 94 | 95 | from morefs.dict import filesystem 96 | 97 | fs = DictFS() 98 | 99 | 100 | MemFS 101 | ~~~~~ 102 | 103 | MemFS is a trie-based in-memory filesystem, and acts like a bucket storage. 104 | 105 | .. code:: python 106 | 107 | from morefs.memory import MemFS 108 | 109 | fs = MemFS() 110 | 111 | 112 | OverlayFileSystem 113 | ~~~~~~~~~~~~~~~~~ 114 | 115 | .. code:: python 116 | 117 | from morefs.overlay import OverlayFileSystem 118 | 119 | # use localfilesystem for write, overlay all filesystems for read 120 | fs = OverlayFileSystem(file={"auto_mkdir": True}, s3={"anon": True}) 121 | # or you can pass filesystem instances directly 122 | # as variable positional arguments or with keyword argument `filesystems=[]` 123 | fs = OverlayFileSystem(LocalFileSystem(), s3={"anon": True}) 124 | 125 | 126 | Contributing 127 | ------------ 128 | 129 | Contributions are very welcome. 130 | To learn more, see the `Contributor Guide`_. 131 | 132 | 133 | License 134 | ------- 135 | 136 | Distributed under the terms of the `Apache 2.0 license`_, 137 | *morefs* is free and open source software. 138 | 139 | 140 | Issues 141 | ------ 142 | 143 | If you encounter any problems, 144 | please `file an issue`_ along with a detailed description. 145 | 146 | 147 | .. _Apache 2.0 license: https://opensource.org/licenses/Apache-2.0 148 | .. _PyPI: https://pypi.org/ 149 | .. _file an issue: https://github.com/iterative/morefs/issues 150 | .. _pip: https://pip.pypa.io/ 151 | .. github-only 152 | .. _Contributor Guide: CONTRIBUTING.rst 153 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.rst: -------------------------------------------------------------------------------- 1 | Contributor Covenant Code of Conduct 2 | ==================================== 3 | 4 | Our Pledge 5 | ---------- 6 | 7 | We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. 8 | 9 | We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. 10 | 11 | 12 | Our Standards 13 | ------------- 14 | 15 | Examples of behavior that contributes to a positive environment for our community include: 16 | 17 | - Demonstrating empathy and kindness toward other people 18 | - Being respectful of differing opinions, viewpoints, and experiences 19 | - Giving and gracefully accepting constructive feedback 20 | - Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience 21 | - Focusing on what is best not just for us as individuals, but for the overall community 22 | 23 | Examples of unacceptable behavior include: 24 | 25 | - The use of sexualized language or imagery, and sexual attention or 26 | advances of any kind 27 | - Trolling, insulting or derogatory comments, and personal or political attacks 28 | - Public or private harassment 29 | - Publishing others' private information, such as a physical or email 30 | address, without their explicit permission 31 | - Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | Enforcement Responsibilities 35 | ---------------------------- 36 | 37 | Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. 38 | 39 | Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. 40 | 41 | 42 | Scope 43 | ----- 44 | 45 | This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. 46 | 47 | 48 | Enforcement 49 | ----------- 50 | 51 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at saugat@iterative.ai. All complaints will be reviewed and investigated promptly and fairly. 52 | 53 | All community leaders are obligated to respect the privacy and security of the reporter of any incident. 54 | 55 | 56 | Enforcement Guidelines 57 | ---------------------- 58 | 59 | Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: 60 | 61 | 62 | 1. Correction 63 | ~~~~~~~~~~~~~ 64 | 65 | **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. 66 | 67 | **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. 68 | 69 | 70 | 2. Warning 71 | ~~~~~~~~~~ 72 | 73 | **Community Impact**: A violation through a single incident or series of actions. 74 | 75 | **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. 76 | 77 | 78 | 3. Temporary Ban 79 | ~~~~~~~~~~~~~~~~ 80 | 81 | **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. 82 | 83 | **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. 84 | 85 | 86 | 4. Permanent Ban 87 | ~~~~~~~~~~~~~~~~ 88 | 89 | **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. 90 | 91 | **Consequence**: A permanent ban from any sort of public interaction within the community. 92 | 93 | 94 | Attribution 95 | ----------- 96 | 97 | This Code of Conduct is adapted from the `Contributor Covenant `__, version 2.0, 98 | available at https://www.contributor-covenant.org/version/2/0/code_of_conduct/. 99 | 100 | Community Impact Guidelines were inspired by `Mozilla’s code of conduct enforcement ladder `__. 101 | 102 | .. _homepage: https://www.contributor-covenant.org 103 | 104 | For answers to common questions about this code of conduct, see the FAQ at 105 | https://www.contributor-covenant.org/faq. Translations are available at https://www.contributor-covenant.org/translations. 106 | -------------------------------------------------------------------------------- /tests/test_memfs.py: -------------------------------------------------------------------------------- 1 | from os import fspath 2 | from unittest.mock import ANY 3 | 4 | import pytest 5 | 6 | from morefs.memory import MemFS 7 | 8 | 9 | @pytest.fixture 10 | def m(): 11 | return MemFS() 12 | 13 | 14 | def test_memfs_should_not_be_cached(): 15 | assert MemFS() is not MemFS() 16 | 17 | 18 | def test_1(m): 19 | m.touch("/somefile") # NB: is found with or without initial / 20 | m.touch("afiles/and/another") 21 | files = m.find("") 22 | assert files == ["/afiles/and/another", "/somefile"] 23 | 24 | files = sorted(m.get_mapper()) 25 | assert files == ["afiles/and/another", "somefile"] 26 | 27 | 28 | def test_strip(m): 29 | assert m._strip_protocol("") == "" 30 | assert m._strip_protocol("memory://") == "" 31 | assert m._strip_protocol("afile") == "/afile" 32 | assert m._strip_protocol("/b/c") == "/b/c" 33 | assert m._strip_protocol("/b/c/") == "/b/c" 34 | 35 | 36 | def test_put_single(m, tmp_path): 37 | fn = tmp_path / "dir" 38 | fn.mkdir() 39 | 40 | (fn / "abc").write_bytes(b"text") 41 | m.put(fspath(fn), "/test") # no-op, no files 42 | assert not m.exists("/test/abc") 43 | assert not m.exists("/test/dir") 44 | m.put(fspath(fn), "/test", recursive=True) 45 | assert m.cat("/test/abc") == b"text" 46 | 47 | 48 | def test_ls(m): 49 | m.touch("/dir/afile") 50 | m.touch("/dir/dir1/bfile") 51 | m.touch("/dir/dir1/cfile") 52 | 53 | assert m.ls("/", False) == ["/dir"] 54 | assert m.ls("/dir", False) == ["/dir/afile", "/dir/dir1"] 55 | assert m.ls("/dir", True)[0]["type"] == "file" 56 | assert m.ls("/dir", True)[1]["type"] == "directory" 57 | 58 | assert len(m.ls("/dir/dir1")) == 2 59 | assert m.ls("/dir/afile") == ["/dir/afile"] 60 | assert m.ls("/dir/dir1/bfile") == ["/dir/dir1/bfile"] 61 | assert m.ls("/dir/dir1/cfile") == ["/dir/dir1/cfile"] 62 | 63 | with pytest.raises(FileNotFoundError): 64 | m.ls("/dir/not-existing-file") 65 | 66 | 67 | def test_mv_recursive(m): 68 | m.mkdir("src") 69 | m.touch("src/file.txt") 70 | m.mv("src", "dest", recursive=True) 71 | assert m.exists("dest/file.txt") 72 | assert not m.exists("src") 73 | 74 | 75 | def test_rm(m): 76 | m.touch("/dir1/dir2/file") 77 | m.rm("/dir1", recursive=True) 78 | assert not m.exists("/dir1/dir2/file") 79 | assert not m.exists("/dir1/dir2") 80 | assert not m.exists("/dir1") 81 | 82 | with pytest.raises(FileNotFoundError): 83 | m.rm("/dir1", recursive=True) 84 | 85 | 86 | def test_rm_multiple_files(m): 87 | m.touch("/dir/file1") 88 | m.touch("/dir/file2") 89 | 90 | m.rm(["/dir/file1", "/dir/file2"]) 91 | assert not m.ls("/") 92 | 93 | 94 | def test_rm_file(m): 95 | m.touch("/dir/file") 96 | with pytest.raises(IsADirectoryError): 97 | m.rm_file("/dir") 98 | 99 | with pytest.raises(FileNotFoundError): 100 | m.rm_file("/dir/foo") 101 | 102 | m.rm_file("/dir/file") 103 | assert not m.exists("/dir/file") 104 | 105 | 106 | def test_rewind(m): 107 | # https://github.com/fsspec/filesystem_spec/issues/349 108 | with m.open("src/file.txt", "w") as f: 109 | f.write("content") 110 | with m.open("src/file.txt") as f: 111 | assert f.tell() == 0 112 | 113 | 114 | def test_no_rewind_append_mode(m): 115 | # https://github.com/fsspec/filesystem_spec/issues/349 116 | with m.open("src/file.txt", "w") as f: 117 | f.write("content") 118 | with m.open("src/file.txt", "a") as f: 119 | assert f.tell() == 7 120 | 121 | 122 | def test_seekable(m): 123 | fn0 = "foo.txt" 124 | with m.open(fn0, "wb") as f: 125 | f.write(b"data") 126 | 127 | f = m.open(fn0, "rt") 128 | assert f.seekable(), "file is not seekable" 129 | f.seek(1) 130 | assert f.read(1) == "a" 131 | assert f.tell() == 2 132 | 133 | 134 | def test_try_open_directory(m): 135 | m.touch("/dir/file") 136 | with pytest.raises(IsADirectoryError): 137 | m.open("dir") 138 | 139 | 140 | def test_try_open_not_existing_file(m): 141 | with pytest.raises(FileNotFoundError): 142 | m.open("not-existing-file") 143 | 144 | 145 | def test_try_open_file_on_super_prefix(m): 146 | m.touch("/afile") 147 | with pytest.raises(NotADirectoryError): 148 | m.open("/afile/file") 149 | 150 | 151 | def test_empty_raises(m): 152 | with pytest.raises(FileNotFoundError): 153 | m.ls("nonexistent") 154 | 155 | with pytest.raises(FileNotFoundError): 156 | m.info("nonexistent") 157 | 158 | 159 | def test_moves(m): 160 | m.touch("source.txt") 161 | m.mv("source.txt", "target.txt") 162 | 163 | m.touch("source2.txt") 164 | m.mv("source2.txt", "target2.txt", recursive=True) 165 | assert m.find("") == ["/target.txt", "/target2.txt"] 166 | 167 | 168 | def test_remove_all(m): 169 | m.touch("afile") 170 | m.rm("/", recursive=True) 171 | assert not m.ls("/") 172 | 173 | 174 | def test_created(m): 175 | m.touch("/dir/afile") 176 | assert m.created("/dir/afile") == m.trie["/dir/afile"].created 177 | assert m.created("/dir") is None 178 | 179 | 180 | def test_info(m): 181 | m.touch("/dir/file") 182 | 183 | assert m.info("/") == {"name": "", "size": 0, "type": "directory"} 184 | assert m.info("/dir") == {"name": "/dir", "size": 0, "type": "directory"} 185 | assert m.info("/dir/file") == { 186 | "name": "/dir/file", 187 | "size": 0, 188 | "type": "file", 189 | "created": ANY, 190 | } 191 | 192 | with pytest.raises(FileNotFoundError): 193 | m.info("/not-existing-file") 194 | 195 | 196 | def test_cp_file(m): 197 | m.pipe_file("/afile", b"content") 198 | m.cp_file("/afile", "/bfile") 199 | assert m.cat_file("/bfile") == m.cat_file("/afile") == b"content" 200 | 201 | 202 | def test_transaction(m): 203 | m.start_transaction() 204 | m.touch("/dir/afile") 205 | assert m.find("/") == [] 206 | m.end_transaction() 207 | assert m.find("/") == ["/dir/afile"] 208 | 209 | with m.transaction: 210 | m.touch("/dir/bfile") 211 | assert m.find("/") == ["/dir/afile"] 212 | assert m.find("/") == ["/dir/afile", "/dir/bfile"] 213 | -------------------------------------------------------------------------------- /src/morefs/overlay.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | import shutil 4 | 5 | import fsspec 6 | 7 | 8 | class OverlayFileSystem(fsspec.AbstractFileSystem): # pylint: disable=abstract-method 9 | cachable = False 10 | 11 | def __init__(self, *fses: fsspec.AbstractFileSystem, **kwargs): 12 | storage_options = { 13 | key: value for key, value in kwargs.items() if key.startswith("fs_") 14 | } 15 | self.fses: list[fsspec.AbstractFileSystem] = list(fses) 16 | self.fses.extend(kwargs.pop("filesystems", [])) 17 | for proto, options in kwargs.items(): 18 | if proto.startswith("fs_"): 19 | continue 20 | if options is None: 21 | options = {} 22 | self.fses.append(fsspec.filesystem(proto, **options)) 23 | super().__init__(*self.fses, **storage_options) 24 | 25 | @property 26 | def upper_fs(self): 27 | return self.fses[0] 28 | 29 | def __getattr__(self, proto): 30 | for fs in self.fses: 31 | if isinstance(fs.protocol, str): 32 | protocols = (fs.protocol,) 33 | else: 34 | protocols = fs.protocol 35 | 36 | for fs_proto in protocols: 37 | if proto == fs_proto: 38 | setattr(self, proto, fs) 39 | return fs 40 | raise AttributeError 41 | 42 | def ls(self, path, detail=False, **kwargs): 43 | listing = [] 44 | for fs in self.fses: 45 | try: 46 | listing.extend(fs.ls(path, detail=detail, **kwargs)) 47 | except (FileNotFoundError, NotImplementedError): 48 | continue 49 | 50 | if not detail: 51 | return sorted({item.strip("/") for item in listing}) 52 | 53 | out = {} 54 | for item in listing: 55 | name = item["name"].strip("/") 56 | out.setdefault(name, {**item, "name": name}) 57 | 58 | return [item for _, item in sorted(out.items())] 59 | 60 | @staticmethod 61 | def _iterate_fs_with(func): 62 | def inner(self, path, *args, **kwargs): 63 | for fs in self.fses: 64 | try: 65 | return getattr(fs, func)(path, *args, **kwargs) 66 | except ( 67 | FileNotFoundError, 68 | NotImplementedError, 69 | AttributeError, 70 | ): 71 | continue 72 | raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), path) 73 | 74 | return inner 75 | 76 | @staticmethod 77 | def _raise_readonly(path, *args, **kwargs): 78 | raise OSError(errno.EROFS, os.strerror(errno.EROFS), path) 79 | 80 | info = _iterate_fs_with.__get__(object)("info") 81 | created = _iterate_fs_with.__get__(object)("created") 82 | modified = _iterate_fs_with.__get__(object)("modified") 83 | 84 | def mkdir(self, path, create_parents=True, **kwargs): 85 | # if create_parents is False: 86 | if self.exists(path): 87 | raise FileExistsError(errno.EEXIST, os.strerror(errno.EEXIST), path) 88 | parent = self._parent(path) 89 | if not create_parents and not self.isdir(parent): 90 | raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path) 91 | self.upper_fs.mkdir(path, create_parents=True, **kwargs) 92 | 93 | def makedirs(self, path, exist_ok=False): 94 | self.upper_fs.makedirs(path, exist_ok=exist_ok) 95 | 96 | def rmdir(self, path): 97 | self.upper_fs.rmdir(path) 98 | 99 | def _rm(self, path): 100 | self.upper_fs._rm(path) # pylint: disable=protected-access 101 | 102 | def cp_file(self, path1, path2, **kwargs): 103 | src_fs = None 104 | for fs in self.fses: 105 | if fs.exists(path1): 106 | src_fs = fs 107 | break 108 | 109 | if not src_fs: 110 | raise FileNotFoundError(errno.ENOENT, os.strerror(errno.ENOENT), path1) 111 | if src_fs == self.upper_fs: 112 | return src_fs.cp_file(path1, path2) 113 | 114 | with src_fs.open(path1) as src, self.upper_fs.open(path2, "wb") as dst: 115 | shutil.copyfileobj(src, dst) 116 | 117 | def _open(self, path, mode="rb", **kwargs): # pylint: disable=arguments-differ 118 | if "rb" in mode: 119 | for fs in self.fses: 120 | try: 121 | # pylint: disable=protected-access 122 | return fs._open(path, mode=mode, **kwargs) 123 | except ( 124 | FileNotFoundError, 125 | NotImplementedError, 126 | AttributeError, 127 | ): 128 | continue 129 | raise OSError(errno.ENOENT, os.strerror(errno.ENOENT), path) 130 | 131 | if "ab" in mode: 132 | try: 133 | info = self.upper_fs.info(path) 134 | if info["type"] == "directory": 135 | raise IsADirectoryError( 136 | errno.EISDIR, 137 | os.strerror(errno.EISDIR), 138 | path, 139 | ) 140 | except FileNotFoundError as exc: 141 | for fs in self.fses[1:]: 142 | try: 143 | info = fs.info(path) 144 | if info["type"] == "directory": 145 | raise IsADirectoryError( 146 | errno.EISDIR, 147 | os.strerror(errno.EISDIR), 148 | path, 149 | ) from exc 150 | return self._raise_readonly(path) 151 | except ( 152 | FileNotFoundError, 153 | NotImplementedError, 154 | AttributeError, 155 | ): 156 | continue 157 | # pylint: disable=protected-access 158 | return self.upper_fs._open(path, mode=mode, **kwargs) 159 | 160 | def sign(self, path, expiration=100, **kwargs): 161 | return self.upper_fs.sign(path, expiration, **kwargs) 162 | 163 | if hasattr(fsspec.AbstractFileSystem, "fsid"): 164 | 165 | @property 166 | def fsid(self): 167 | return "overlay_" + "+".join(fs.fsid for fs in self.fses) 168 | -------------------------------------------------------------------------------- /tests/test_dictfs.py: -------------------------------------------------------------------------------- 1 | import errno 2 | from unittest.mock import ANY 3 | 4 | import pytest 5 | 6 | from morefs.dict import DictFS 7 | 8 | 9 | @pytest.fixture 10 | def dfs(): 11 | return DictFS() 12 | 13 | 14 | def test_dictfs_should_not_be_cached(): 15 | assert DictFS() is not DictFS() 16 | 17 | 18 | def test_strip(dfs): 19 | assert dfs._strip_protocol("") == "" 20 | assert dfs._strip_protocol("/") == "" 21 | assert dfs._strip_protocol("dictfs://") == "" 22 | assert dfs._strip_protocol("afile") == "/afile" 23 | assert dfs._strip_protocol("dir/afile") == "/dir/afile" 24 | assert dfs._strip_protocol("/b/c") == "/b/c" 25 | assert dfs._strip_protocol("/b/c/") == "/b/c" 26 | 27 | 28 | def test_info(dfs): 29 | dfs.mkdir("/dir") 30 | dfs.pipe_file("/dir/file", b"contents") 31 | 32 | assert dfs.info("/") == {"name": "", "size": 0, "type": "directory"} 33 | assert dfs.info("") == {"name": "", "size": 0, "type": "directory"} 34 | assert dfs.info("/dir") == {"name": "/dir", "size": 0, "type": "directory"} 35 | assert dfs.info("/dir/file") == { 36 | "name": "/dir/file", 37 | "size": 8, 38 | "type": "file", 39 | "created": ANY, 40 | } 41 | 42 | 43 | def test_info_errors(dfs): 44 | dfs.touch("/afile") 45 | with pytest.raises(NotADirectoryError): 46 | dfs.info("/afile/foo") 47 | 48 | with pytest.raises(FileNotFoundError): 49 | dfs.info("/not-existing") 50 | 51 | 52 | def test_ls(dfs): 53 | dfs.makedirs("/dir/dir1") 54 | dfs.touch("/dir/afile") 55 | dfs.touch("/dir/dir1/bfile") 56 | dfs.touch("/dir/dir1/cfile") 57 | 58 | assert dfs.ls("/", False) == ["/dir"] 59 | assert dfs.ls("/dir", False) == ["/dir/dir1", "/dir/afile"] 60 | assert dfs.ls("/dir", True)[0]["type"] == "directory" 61 | assert dfs.ls("/dir", True)[1]["type"] == "file" 62 | 63 | assert len(dfs.ls("/dir/dir1")) == 2 64 | assert dfs.ls("/dir/afile") == ["/dir/afile"] 65 | assert dfs.ls("/dir/dir1/bfile") == ["/dir/dir1/bfile"] 66 | assert dfs.ls("/dir/dir1/cfile") == ["/dir/dir1/cfile"] 67 | 68 | assert dfs.ls("/dir/afile", True)[0] == { 69 | "name": "/dir/afile", 70 | "type": "file", 71 | "size": 0, 72 | "created": ANY, 73 | } 74 | with pytest.raises(FileNotFoundError): 75 | dfs.ls("/dir/not-existing-file") 76 | 77 | with pytest.raises(NotADirectoryError): 78 | dfs.ls("/dir/afile/foo") 79 | 80 | 81 | def test_rm_file(dfs): 82 | dfs.touch("/afile") 83 | dfs.rm_file("/afile") 84 | assert not dfs.exists("/afile") 85 | 86 | 87 | def test_try_rm_file_not_existing(dfs): 88 | with pytest.raises(FileNotFoundError): 89 | dfs.rm_file("/not-existing") 90 | 91 | 92 | def test_try_rm_file_directory(dfs): 93 | dfs.mkdir("dir") 94 | with pytest.raises(IsADirectoryError): 95 | dfs.rm_file("/dir") 96 | 97 | with pytest.raises(FileNotFoundError): 98 | dfs.rm_file("/dir/file") 99 | assert dfs.isdir("/dir") 100 | 101 | 102 | def test_try_rm_file_under_filepath(dfs): 103 | dfs.mkdir("/dir") 104 | dfs.touch("/dir/file") 105 | with pytest.raises(NotADirectoryError): 106 | dfs.rm_file("/dir/file/foo") 107 | 108 | 109 | def test_rmdir(dfs): 110 | dfs.mkdir("/dir") 111 | dfs.rmdir("/dir") 112 | assert not dfs.exists("/dir") 113 | 114 | 115 | def test_try_rmdir_not_existing(dfs): 116 | with pytest.raises(FileNotFoundError): 117 | dfs.rmdir("/dir") 118 | 119 | 120 | def test_try_rmdir_file(dfs): 121 | dfs.touch("/afile") 122 | with pytest.raises(NotADirectoryError): 123 | dfs.rmdir("/afile") 124 | assert dfs.exists("/afile") 125 | 126 | 127 | def test_try_rmdir_non_empty_directory(dfs): 128 | dfs.mkdir("/dir") 129 | dfs.touch("/dir/afile") 130 | with pytest.raises(OSError) as exc: # noqa: PT011 131 | dfs.rmdir("/dir") 132 | assert exc.value.errno == errno.ENOTEMPTY 133 | 134 | 135 | def test_try_rmdir_under_filepath(dfs): 136 | dfs.mkdir("/dir") 137 | dfs.touch("/dir/file") 138 | with pytest.raises(NotADirectoryError): 139 | dfs.rmdir("/dir/file/foo") 140 | 141 | 142 | def test_rm_multiple_files(dfs): 143 | dfs.mkdir("/dir") 144 | dfs.touch("/dir/file1") 145 | dfs.touch("/dir/file2") 146 | 147 | dfs.rm(["/dir/file1", "/dir/file2", "/dir", "/"]) 148 | assert not dfs.ls("/") 149 | 150 | 151 | def test_remove_all(dfs): 152 | dfs.touch("afile") 153 | dfs.rm("/", recursive=True) 154 | assert not dfs.ls("/") 155 | 156 | 157 | def test_rm_errors(dfs): 158 | with pytest.raises(FileNotFoundError): 159 | dfs.rm(["/dir", "/dir2"], recursive=True) 160 | 161 | 162 | def test_mkdir(dfs): 163 | dfs.mkdir("/dir") 164 | dfs.touch("/afile") 165 | with pytest.raises(FileExistsError): 166 | dfs.mkdir("/dir") 167 | 168 | with pytest.raises(FileExistsError): 169 | dfs.mkdir("/afile") 170 | 171 | with pytest.raises(NotADirectoryError): 172 | dfs.mkdir("/afile/foo") 173 | 174 | dfs.mkdir("/dir/dir1/dir2") 175 | assert dfs.isdir("/dir/dir1/dir2") 176 | 177 | 178 | def test_mkdir_no_parents(dfs): 179 | dfs.mkdir("/dir", create_parents=False) 180 | dfs.touch("/afile") 181 | with pytest.raises(FileExistsError): 182 | dfs.mkdir("/dir", create_parents=False) 183 | 184 | with pytest.raises(FileExistsError): 185 | dfs.mkdir("/afile", create_parents=False) 186 | 187 | with pytest.raises(NotADirectoryError): 188 | dfs.mkdir("/afile/foo", create_parents=False) 189 | 190 | with pytest.raises(FileNotFoundError): 191 | dfs.mkdir("/dir/dir1/dir2", create_parents=False) 192 | 193 | 194 | def test_makedirs(dfs): 195 | dfs.touch("/afile") 196 | dfs.makedirs("/dir1/dir2") 197 | assert dfs.isdir("/dir1/dir2") 198 | 199 | with pytest.raises(FileExistsError): 200 | dfs.makedirs("/dir1/dir2") 201 | 202 | with pytest.raises(NotADirectoryError): 203 | dfs.makedirs("/afile/foo") 204 | 205 | with pytest.raises(FileExistsError): 206 | dfs.makedirs("/dir1/dir2/dir3") 207 | 208 | 209 | def test_makedirs_exist_ok(dfs): 210 | dfs.touch("/afile") 211 | dfs.makedirs("/dir1/dir2", exist_ok=True) 212 | assert dfs.isdir("/dir1/dir2") 213 | dfs.makedirs("/dir1/dir2", exist_ok=True) 214 | 215 | with pytest.raises(NotADirectoryError): 216 | dfs.makedirs("/afile/foo", exist_ok=True) 217 | 218 | dfs.makedirs("/dir1/dir2/dir3", exist_ok=True) 219 | assert dfs.isdir("/dir1/dir2/dir3") 220 | 221 | 222 | def test_rewind(dfs): 223 | # https://github.com/fsspec/filesystem_spec/issues/349 224 | dfs.mkdir("src") 225 | with dfs.open("src/file.txt", "w") as f: 226 | f.write("content") 227 | with dfs.open("src/file.txt") as f: 228 | assert f.tell() == 0 229 | 230 | 231 | def test_no_rewind_append_mode(dfs): 232 | # https://github.com/fsspec/filesystem_spec/issues/349 233 | dfs.mkdir("src") 234 | 235 | with dfs.open("src/file.txt", "w") as f: 236 | f.write("content") 237 | with dfs.open("src/file.txt", "a") as f: 238 | assert f.tell() == 7 239 | 240 | 241 | def test_seekable(dfs): 242 | fn0 = "foo.txt" 243 | with dfs.open(fn0, "wb") as f: 244 | f.write(b"data") 245 | 246 | f = dfs.open(fn0, "rt") 247 | assert f.seekable(), "file is not seekable" 248 | f.seek(1) 249 | assert f.read(1) == "a" 250 | assert f.tell() == 2 251 | 252 | 253 | def test_try_open_directory(dfs): 254 | dfs.mkdir("/dir") 255 | with pytest.raises(IsADirectoryError): 256 | dfs.open("dir") 257 | 258 | 259 | def test_try_open_not_existing_file(dfs): 260 | with pytest.raises(FileNotFoundError): 261 | dfs.open("not-existing-file") 262 | 263 | 264 | def test_try_open_file_on_super_prefix(dfs): 265 | dfs.touch("/afile") 266 | with pytest.raises(NotADirectoryError): 267 | dfs.open("/afile/file") 268 | 269 | 270 | def test_created(dfs): 271 | dfs.mkdir("/dir") 272 | dfs.touch("/dir/afile") 273 | assert dfs.created("/dir/afile") == dfs.store.get(["dir", "afile"]).created 274 | assert dfs.created("/dir") is None 275 | 276 | 277 | def test_cp_file(dfs): 278 | dfs.pipe_file("/afile", b"content") 279 | dfs.cp_file("/afile", "/bfile") 280 | assert dfs.cat_file("/bfile") == dfs.cat_file("/afile") == b"content" 281 | 282 | 283 | def test_cp_file_directory(dfs): 284 | dfs.mkdir("/dir") 285 | dfs.cp_file("/dir", "/dir2") 286 | assert dfs.isdir("/dir") 287 | 288 | 289 | def test_transaction(dfs): 290 | dfs.start_transaction() 291 | dfs.mkdir("/dir") 292 | dfs.touch("/dir/afile") 293 | assert dfs.find("/") == [] 294 | dfs.end_transaction() 295 | assert dfs.find("/") == ["/dir/afile"] 296 | 297 | with dfs.transaction: 298 | dfs.touch("/dir/bfile") 299 | assert dfs.find("/") == ["/dir/afile"] 300 | assert dfs.find("/") == ["/dir/afile", "/dir/bfile"] 301 | -------------------------------------------------------------------------------- /src/morefs/dict.py: -------------------------------------------------------------------------------- 1 | import errno 2 | import os 3 | from collections.abc import Iterable 4 | from datetime import datetime 5 | from functools import lru_cache 6 | from typing import Any, Optional, Union 7 | 8 | from fsspec import AbstractFileSystem 9 | from fsspec.implementations.memory import MemoryFile 10 | 11 | ContainerOrFile = Union[dict[str, dict], "DictFile"] 12 | 13 | 14 | class Store(dict): 15 | def __init__(self, paths: Iterable[str] = ()) -> None: 16 | super().__init__() 17 | self.paths = tuple(paths) 18 | 19 | def new_child(self, paths: Iterable[str]) -> None: 20 | self.set(paths, type(self)(paths=paths)) 21 | 22 | def set(self, paths: Iterable[str], value: Any, overwrite: bool = False) -> None: 23 | if not paths: 24 | raise ValueError("no path supplied") 25 | 26 | *rest, key = paths 27 | child = self.get(rest) 28 | 29 | if not overwrite and key in child: 30 | raise ValueError("cannot overwrite - item exists") 31 | child[key] = value 32 | 33 | def get(self, paths: Iterable[str]) -> "ContainerOrFile": # type: ignore[override] 34 | child = self 35 | for path in paths: 36 | child = child[path] 37 | return child 38 | 39 | def delete(self, paths: Iterable[str]) -> None: 40 | if not paths: 41 | self.clear() 42 | return 43 | 44 | *rest, key = paths 45 | child = self.get(rest) 46 | del child[key] 47 | 48 | 49 | def oserror(code: int, path: str) -> OSError: 50 | return OSError(code, os.strerror(code), path) 51 | 52 | 53 | class DictFS(AbstractFileSystem): # pylint: disable=abstract-method 54 | cachable = False 55 | protocol = "dictfs" 56 | root_marker = "" 57 | 58 | @classmethod 59 | def _strip_protocol(cls, path: str) -> str: 60 | if path.startswith("dictfs://"): 61 | path = path[len("dictfs://") :] 62 | if "::" in path or "://" in path: 63 | return path.rstrip("/") 64 | path = path.lstrip("/").rstrip("/") 65 | return "/" + path if path else cls.root_marker 66 | 67 | def __init__(self, store: Optional[Store] = None) -> None: 68 | super().__init__() 69 | if store is None: 70 | store = Store() 71 | self.store = store 72 | 73 | def _info( 74 | self, 75 | path: str, 76 | item: ContainerOrFile, 77 | file: bool = False, 78 | **kwargs: Any, 79 | ) -> dict[str, Any]: 80 | if isinstance(item, dict): 81 | return {"name": path, "size": 0, "type": "directory"} 82 | assert isinstance(item, DictFile) 83 | return item.to_json(file=file) 84 | 85 | @classmethod 86 | @lru_cache(maxsize=1000) 87 | def path_parts(cls, path: str) -> tuple[str, ...]: 88 | path = cls._strip_protocol(path) 89 | if path == "/": 90 | return () 91 | _root_marker, *parts = path.split(cls.sep) 92 | return tuple(parts) 93 | 94 | @classmethod 95 | @lru_cache(maxsize=1000) 96 | def join_paths(cls, paths: tuple[str, ...]) -> str: 97 | if not paths: 98 | return cls.root_marker 99 | return cls.sep.join([cls.root_marker, *paths]) 100 | 101 | def info(self, path: str, **kwargs: Any) -> dict[str, Any]: 102 | paths = self.path_parts(path) 103 | normpath = self.join_paths(paths) 104 | try: 105 | item = self.store.get(paths) 106 | except KeyError as exc: 107 | raise oserror(errno.ENOENT, normpath) from exc 108 | except TypeError as exc: 109 | raise oserror(errno.ENOTDIR, normpath) from exc 110 | return self._info(normpath, item, **kwargs) 111 | 112 | def ls(self, path: str, detail: bool = False, **kwargs: Any): 113 | paths = self.path_parts(path) 114 | normpath = self.join_paths(paths) 115 | 116 | try: 117 | item = self.store.get(paths) 118 | except KeyError as exc: 119 | raise oserror(errno.ENOENT, normpath) from exc 120 | except TypeError as exc: 121 | raise oserror(errno.ENOTDIR, normpath) from exc 122 | 123 | if not isinstance(item, dict): 124 | if not detail: 125 | return [normpath] 126 | return [self._info(normpath, item)] 127 | 128 | entries: Iterable[tuple[str, ContainerOrFile]] = item.items() 129 | if kwargs.get("sort"): 130 | entries = sorted(entries) 131 | 132 | if not detail: 133 | return [self.join_paths((*paths, key)) for key, _ in entries] 134 | return [ 135 | self._info(self.join_paths((*paths, key)), value) for key, value in entries 136 | ] 137 | 138 | def _rm(self, path: str) -> None: 139 | info = self.info(path) 140 | paths = self.path_parts(path) 141 | normpath = self.join_paths(paths) 142 | if info["type"] == "directory": 143 | raise oserror(errno.EISDIR, normpath) 144 | return self._rm_paths(paths) 145 | 146 | def _rm_paths(self, paths: tuple[str, ...]) -> None: 147 | normpath = self.join_paths(paths) 148 | try: 149 | self.store.delete(paths) 150 | except TypeError as exc: 151 | raise oserror(errno.ENOTDIR, normpath) from exc 152 | except KeyError as exc: 153 | raise oserror(errno.ENOENT, normpath) from exc 154 | 155 | def rmdir(self, path: str) -> None: 156 | info = self.info(path) 157 | paths = self.path_parts(path) 158 | normpath = self.join_paths(paths) 159 | 160 | if info["type"] == "file": 161 | raise oserror(errno.ENOTDIR, normpath) 162 | 163 | if self.ls(path): 164 | raise oserror(errno.ENOTEMPTY, normpath) 165 | self._rm_paths(paths) 166 | 167 | def mkdir(self, path: str, create_parents: bool = True, **kwargs) -> None: 168 | paths = self.path_parts(path) 169 | normpath = self.join_paths(paths) 170 | try: 171 | _ = self.store.get(paths) 172 | raise oserror(errno.EEXIST, normpath) 173 | except KeyError: 174 | pass 175 | except TypeError as exc: 176 | raise oserror(errno.ENOTDIR, normpath) from exc 177 | 178 | if create_parents: 179 | return self.makedirs(path, exist_ok=True) 180 | self._mkdir_paths(paths) 181 | 182 | def _mkdir_paths(self, paths: tuple[str, ...]) -> None: 183 | normpath = self.join_paths(paths) 184 | try: 185 | self.store.new_child(paths) 186 | except TypeError as exc: 187 | raise oserror(errno.ENOTDIR, normpath) from exc 188 | except ValueError as exc: 189 | raise oserror(errno.EEXIST, normpath) from exc 190 | except KeyError as exc: 191 | raise oserror(errno.ENOENT, normpath) from exc 192 | 193 | def makedirs(self, path: str, exist_ok: bool = False) -> None: 194 | paths = self.path_parts(path) 195 | normpath = self.join_paths(paths) 196 | try: 197 | _ = self.store.get(paths) 198 | if not exist_ok: 199 | raise oserror(errno.EEXIST, normpath) 200 | return 201 | except KeyError: 202 | pass 203 | except TypeError as exc: 204 | raise oserror(errno.ENOTDIR, normpath) from exc 205 | 206 | for idx in range(len(paths)): 207 | try: 208 | self._mkdir_paths(paths[: idx + 1]) 209 | except FileExistsError: 210 | if not exist_ok: 211 | raise 212 | 213 | def _open( 214 | self, 215 | path: str, 216 | mode: str = "rb", 217 | block_size=None, # noqa: ARG002 218 | autocommit=True, # noqa: ARG002 219 | cache_options=None, # noqa: ARG002 220 | **kwargs, 221 | ) -> "DictFile": 222 | paths = self.path_parts(path) 223 | normpath = self.join_paths(paths) 224 | 225 | try: 226 | info = self.info(path, file=True) 227 | if info["type"] == "directory": 228 | raise oserror(errno.EISDIR, normpath) 229 | except FileNotFoundError: 230 | if mode in ["rb", "ab", "rb+"]: 231 | raise 232 | 233 | if mode == "wb": 234 | file = DictFile(self, normpath, data=kwargs.get("data")) 235 | if not self._intrans: 236 | file.commit() 237 | else: 238 | file = info["file"] 239 | file.seek(0, os.SEEK_END if mode == "ab" else os.SEEK_SET) 240 | return file 241 | 242 | def cp_file(self, path1: str, path2: str, **kwargs: Any) -> None: 243 | try: 244 | src = self.open(path1, "rb") 245 | except IsADirectoryError: 246 | self.mkdir(path2) 247 | return 248 | 249 | file = DictFile(self, path2, src.getvalue()) # implicit copy 250 | file.commit() 251 | 252 | def created(self, path: str) -> Optional[datetime]: 253 | return self.info(path).get("created") 254 | 255 | def rm( 256 | self, 257 | path: Union[str, list[str]], 258 | recursive: bool = False, 259 | maxdepth: Optional[int] = None, 260 | ) -> None: 261 | if isinstance(path, str): 262 | paths = [path] 263 | else: 264 | paths = path 265 | 266 | if recursive and not maxdepth: 267 | for p in paths: 268 | self._rm_paths(self.path_parts(p)) 269 | return 270 | 271 | paths = self.expand_path(paths, recursive=recursive, maxdepth=maxdepth) 272 | for p in reversed(paths): 273 | if p in ("", "/"): 274 | continue 275 | if self.isfile(p): 276 | self.rm_file(p) 277 | else: 278 | self.rmdir(p) 279 | 280 | def pipe_file(self, path: str, value, **kwargs) -> None: 281 | self.open(path, "wb", data=value) 282 | 283 | 284 | class DictFile(MemoryFile): 285 | def commit(self) -> None: 286 | fs = self.fs 287 | paths = fs.path_parts(self.path) 288 | try: 289 | fs.store.set(paths, self, overwrite=True) 290 | except TypeError as exc: 291 | raise oserror(errno.ENOTDIR, self.path) from exc 292 | except ValueError as exc: 293 | raise oserror(errno.EEXIST, self.path) from exc 294 | except KeyError as exc: 295 | raise oserror(errno.ENOENT, self.path) from exc 296 | 297 | def to_json(self, file: bool = False) -> dict[str, Any]: 298 | details = { 299 | "name": self.path, 300 | "size": self.size, 301 | "type": "file", 302 | "created": self.created, 303 | } 304 | if file: 305 | details["file"] = self 306 | return details 307 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright 2022 Iterative. 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------