├── .coveragerc ├── .github └── workflows │ ├── pre-commit.yml │ └── test.yml ├── .gitignore ├── .pre-commit-config.yaml ├── .readthedocs.yaml ├── CONTRIBUTING.md ├── LICENSE.txt ├── MANIFEST.in ├── README.rst ├── continuous_integration ├── condarc ├── environment-alldeps.yml └── environment-nodeps.yml ├── doc ├── Makefile ├── make.bat ├── requirements.txt └── source │ ├── _static │ └── placeholder │ ├── changelog.rst │ ├── conf.py │ └── index.rst ├── requirements.txt ├── setup.cfg ├── setup.py └── zict ├── __init__.py ├── async_buffer.py ├── buffer.py ├── cache.py ├── common.py ├── file.py ├── func.py ├── keymap.py ├── lmdb.py ├── lru.py ├── py.typed ├── sieve.py ├── tests ├── __init__.py ├── conftest.py ├── test_async_buffer.py ├── test_buffer.py ├── test_cache.py ├── test_common.py ├── test_file.py ├── test_func.py ├── test_keymap.py ├── test_lmdb.py ├── test_lru.py ├── test_sieve.py ├── test_utils.py ├── test_zip.py └── utils_test.py ├── utils.py └── zip.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = 3 | zict 4 | omit = 5 | 6 | [report] 7 | show_missing = True 8 | exclude_lines = 9 | # re-enable the standard pragma 10 | pragma: nocover 11 | pragma: no cover 12 | # always ignore type checking blocks 13 | TYPE_CHECKING 14 | @overload 15 | 16 | [html] 17 | directory = coverage_html_report 18 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit.yml: -------------------------------------------------------------------------------- 1 | name: Linting 2 | 3 | on: 4 | push: 5 | branches: main 6 | pull_request: 7 | branches: main 8 | 9 | jobs: 10 | checks: 11 | name: pre-commit hooks 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v3.3.0 15 | - uses: actions/setup-python@v4 16 | with: 17 | python-version: '3.9' 18 | - uses: pre-commit/action@v3.0.0 19 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | test: 7 | name: ${{ matrix.os }} ${{ matrix.python-version }} ${{ matrix.environment }} 8 | runs-on: ${{ matrix.os }} 9 | strategy: 10 | fail-fast: false 11 | matrix: 12 | os: [ubuntu-latest, windows-latest, macos-latest] 13 | python-version: ["3.9", "3.11"] 14 | environment: [alldeps] 15 | include: 16 | - os: ubuntu-latest 17 | python-version: "3.10" 18 | environment: alldeps 19 | - os: ubuntu-latest 20 | python-version: "3.9" 21 | environment: nodeps 22 | 23 | steps: 24 | - name: Checkout source 25 | uses: actions/checkout@v3.3.0 26 | with: 27 | fetch-depth: 0 28 | 29 | - name: Setup Conda Environment 30 | uses: conda-incubator/setup-miniconda@v2.2.0 31 | with: 32 | miniforge-variant: Mambaforge 33 | miniforge-version: latest 34 | use-mamba: true 35 | activate-environment: test 36 | condarc-file: continuous_integration/condarc 37 | python-version: ${{ matrix.python-version }} 38 | environment-file: continuous_integration/environment-${{ matrix.environment }}.yml 39 | 40 | - name: Install zict 41 | shell: bash -l {0} 42 | run: python setup.py install 43 | 44 | - name: mamba list 45 | shell: bash -l {0} 46 | run: mamba list 47 | 48 | - name: mamba env export 49 | shell: bash -l {0} 50 | run: | 51 | echo -e "--\n--Conda Environment (re-create this with \`mamba env create --name -f \`)\n--" 52 | mamba env export | grep -E -v '^prefix:.*$' 53 | 54 | - name: Run pytest 55 | shell: bash -l {0} 56 | run: pytest --doctest-modules zict 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | *.zip 3 | doc/build/* 4 | .idea 5 | build/ 6 | dist/ 7 | .mypy_cache/ 8 | *.egg-info/ 9 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | repos: 2 | - repo: https://github.com/MarcoGorelli/absolufy-imports 3 | rev: v0.3.1 4 | hooks: 5 | - id: absolufy-imports 6 | name: absolufy-imports 7 | - repo: https://github.com/pycqa/isort 8 | rev: 5.12.0 9 | hooks: 10 | - id: isort 11 | language_version: python3 12 | - repo: https://github.com/asottile/pyupgrade 13 | rev: v3.3.1 14 | hooks: 15 | - id: pyupgrade 16 | args: 17 | - --py39-plus 18 | - repo: https://github.com/psf/black 19 | rev: 23.1.0 20 | hooks: 21 | - id: black 22 | language_version: python3 23 | args: 24 | - --target-version=py39 25 | - repo: https://github.com/pycqa/flake8 26 | rev: 6.0.0 27 | hooks: 28 | - id: flake8 29 | language_version: python3 30 | additional_dependencies: 31 | # NOTE: autoupdate does not pick up flake8-bugbear since it is a transitive 32 | # dependency. Make sure to update flake8-bugbear manually on a regular basis. 33 | - flake8-bugbear==23.2.13 34 | - repo: https://github.com/codespell-project/codespell 35 | rev: v2.2.2 36 | hooks: 37 | - id: codespell 38 | types_or: [rst, markdown] 39 | files: doc 40 | - repo: https://github.com/pre-commit/mirrors-mypy 41 | rev: v1.0.0 42 | hooks: 43 | - id: mypy 44 | # Override default --ignore-missing-imports 45 | # Use setup.cfg if possible instead of adding command line parameters here 46 | args: [ --warn-unused-configs ] 47 | additional_dependencies: 48 | # Type stubs 49 | - types-psutil 50 | - types-setuptools 51 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details 2 | version: 2 3 | build: 4 | os: ubuntu-22.04 5 | tools: 6 | python: "3.9" 7 | 8 | sphinx: 9 | configuration: doc/source/conf.py 10 | 11 | python: 12 | install: 13 | - requirements: doc/requirements.txt 14 | - method: pip 15 | path: . -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | Dask is a community maintained project. We welcome contributions in the form of bug 2 | reports, documentation, code, design proposals, and more. 3 | 4 | For general information on how to contribute see 5 | https://docs.dask.org/en/latest/develop.html. 6 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016 Matthew Rocklin 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are met: 7 | 8 | a. Redistributions of source code must retain the above copyright notice, 9 | this list of conditions and the following disclaimer. 10 | b. Redistributions in binary form must reproduce the above copyright 11 | notice, this list of conditions and the following disclaimer in the 12 | documentation and/or other materials provided with the distribution. 13 | c. Neither the name of the copyright holder nor the names of its contributors 14 | may be used to endorse or promote products derived from this software 15 | without specific prior written permission. 16 | 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR 22 | ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH 28 | DAMAGE. 29 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | recursive-include zict *.py 2 | recursive-include docs *.rst 3 | 4 | include setup.py 5 | include README.rst 6 | include LICENSE.txt 7 | include MANIFEST.in 8 | include requirements.txt 9 | include zict/py.typed 10 | 11 | prune docs/_build 12 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Zict 2 | ==== 3 | 4 | |Build Status| |Linting| 5 | 6 | Mutable Mapping tools. See documentation_. 7 | 8 | .. _documentation: http://zict.readthedocs.io/en/latest/ 9 | .. |Build Status| image:: https://github.com/dask/zict/actions/workflows/test.yml/badge.svg 10 | :target: https://github.com/dask/zict/actions/workflows/test.yml 11 | .. |Linting| image:: https://github.com/dask/zict/actions/workflows/pre-commit.yml/badge.svg 12 | :target: https://github.com/dask/zict/actions/workflows/pre-commit.yml 13 | -------------------------------------------------------------------------------- /continuous_integration/condarc: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - defaults 4 | channel_priority: true 5 | auto_activate_base: false 6 | remote_backoff_factor: 20 7 | remote_connect_timeout_secs: 20.0 8 | remote_max_retries: 10 9 | remote_read_timeout_secs: 60.0 10 | -------------------------------------------------------------------------------- /continuous_integration/environment-alldeps.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - psutil # Enables extra tests 6 | - python-lmdb # Optional dependency 7 | - pytest 8 | - pytest-asyncio 9 | - pytest-repeat 10 | - pytest-timeout 11 | -------------------------------------------------------------------------------- /continuous_integration/environment-nodeps.yml: -------------------------------------------------------------------------------- 1 | name: test 2 | channels: 3 | - conda-forge 4 | dependencies: 5 | - pytest 6 | - pytest-asyncio 7 | - pytest-repeat 8 | - pytest-timeout 9 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | # Makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | PAPER = 8 | BUILDDIR = build 9 | 10 | # User-friendly check for sphinx-build 11 | ifeq ($(shell which $(SPHINXBUILD) >/dev/null 2>&1; echo $$?), 1) 12 | $(error The '$(SPHINXBUILD)' command was not found. Make sure you have Sphinx installed, then set the SPHINXBUILD environment variable to point to the full path of the '$(SPHINXBUILD)' executable. Alternatively you can add the directory with the executable to your PATH. If you don't have Sphinx installed, grab it from http://sphinx-doc.org/) 13 | endif 14 | 15 | # Internal variables. 16 | PAPEROPT_a4 = -D latex_paper_size=a4 17 | PAPEROPT_letter = -D latex_paper_size=letter 18 | ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 19 | # the i18n builder cannot share the environment and doctrees with the others 20 | I18NSPHINXOPTS = $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) source 21 | 22 | .PHONY: help 23 | help: 24 | @echo "Please use \`make ' where is one of" 25 | @echo " html to make standalone HTML files" 26 | @echo " dirhtml to make HTML files named index.html in directories" 27 | @echo " singlehtml to make a single large HTML file" 28 | @echo " pickle to make pickle files" 29 | @echo " json to make JSON files" 30 | @echo " htmlhelp to make HTML files and a HTML help project" 31 | @echo " qthelp to make HTML files and a qthelp project" 32 | @echo " applehelp to make an Apple Help Book" 33 | @echo " devhelp to make HTML files and a Devhelp project" 34 | @echo " epub to make an epub" 35 | @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" 36 | @echo " latexpdf to make LaTeX files and run them through pdflatex" 37 | @echo " latexpdfja to make LaTeX files and run them through platex/dvipdfmx" 38 | @echo " text to make text files" 39 | @echo " man to make manual pages" 40 | @echo " texinfo to make Texinfo files" 41 | @echo " info to make Texinfo files and run them through makeinfo" 42 | @echo " gettext to make PO message catalogs" 43 | @echo " changes to make an overview of all changed/added/deprecated items" 44 | @echo " xml to make Docutils-native XML files" 45 | @echo " pseudoxml to make pseudoxml-XML files for display purposes" 46 | @echo " linkcheck to check all external links for integrity" 47 | @echo " doctest to run all doctests embedded in the documentation (if enabled)" 48 | @echo " coverage to run coverage check of the documentation (if enabled)" 49 | 50 | .PHONY: clean 51 | clean: 52 | rm -rf $(BUILDDIR)/* 53 | 54 | .PHONY: html 55 | html: 56 | $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html 57 | @echo 58 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." 59 | 60 | .PHONY: dirhtml 61 | dirhtml: 62 | $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml 63 | @echo 64 | @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." 65 | 66 | .PHONY: singlehtml 67 | singlehtml: 68 | $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml 69 | @echo 70 | @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." 71 | 72 | .PHONY: pickle 73 | pickle: 74 | $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle 75 | @echo 76 | @echo "Build finished; now you can process the pickle files." 77 | 78 | .PHONY: json 79 | json: 80 | $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json 81 | @echo 82 | @echo "Build finished; now you can process the JSON files." 83 | 84 | .PHONY: htmlhelp 85 | htmlhelp: 86 | $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp 87 | @echo 88 | @echo "Build finished; now you can run HTML Help Workshop with the" \ 89 | ".hhp project file in $(BUILDDIR)/htmlhelp." 90 | 91 | .PHONY: qthelp 92 | qthelp: 93 | $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp 94 | @echo 95 | @echo "Build finished; now you can run "qcollectiongenerator" with the" \ 96 | ".qhcp project file in $(BUILDDIR)/qthelp, like this:" 97 | @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/zict.qhcp" 98 | @echo "To view the help file:" 99 | @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/zict.qhc" 100 | 101 | .PHONY: applehelp 102 | applehelp: 103 | $(SPHINXBUILD) -b applehelp $(ALLSPHINXOPTS) $(BUILDDIR)/applehelp 104 | @echo 105 | @echo "Build finished. The help book is in $(BUILDDIR)/applehelp." 106 | @echo "N.B. You won't be able to view it unless you put it in" \ 107 | "~/Library/Documentation/Help or install it in your application" \ 108 | "bundle." 109 | 110 | .PHONY: devhelp 111 | devhelp: 112 | $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp 113 | @echo 114 | @echo "Build finished." 115 | @echo "To view the help file:" 116 | @echo "# mkdir -p $$HOME/.local/share/devhelp/zict" 117 | @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/zict" 118 | @echo "# devhelp" 119 | 120 | .PHONY: epub 121 | epub: 122 | $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub 123 | @echo 124 | @echo "Build finished. The epub file is in $(BUILDDIR)/epub." 125 | 126 | .PHONY: latex 127 | latex: 128 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 129 | @echo 130 | @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." 131 | @echo "Run \`make' in that directory to run these through (pdf)latex" \ 132 | "(use \`make latexpdf' here to do that automatically)." 133 | 134 | .PHONY: latexpdf 135 | latexpdf: 136 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 137 | @echo "Running LaTeX files through pdflatex..." 138 | $(MAKE) -C $(BUILDDIR)/latex all-pdf 139 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 140 | 141 | .PHONY: latexpdfja 142 | latexpdfja: 143 | $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex 144 | @echo "Running LaTeX files through platex and dvipdfmx..." 145 | $(MAKE) -C $(BUILDDIR)/latex all-pdf-ja 146 | @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." 147 | 148 | .PHONY: text 149 | text: 150 | $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text 151 | @echo 152 | @echo "Build finished. The text files are in $(BUILDDIR)/text." 153 | 154 | .PHONY: man 155 | man: 156 | $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man 157 | @echo 158 | @echo "Build finished. The manual pages are in $(BUILDDIR)/man." 159 | 160 | .PHONY: texinfo 161 | texinfo: 162 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 163 | @echo 164 | @echo "Build finished. The Texinfo files are in $(BUILDDIR)/texinfo." 165 | @echo "Run \`make' in that directory to run these through makeinfo" \ 166 | "(use \`make info' here to do that automatically)." 167 | 168 | .PHONY: info 169 | info: 170 | $(SPHINXBUILD) -b texinfo $(ALLSPHINXOPTS) $(BUILDDIR)/texinfo 171 | @echo "Running Texinfo files through makeinfo..." 172 | make -C $(BUILDDIR)/texinfo info 173 | @echo "makeinfo finished; the Info files are in $(BUILDDIR)/texinfo." 174 | 175 | .PHONY: gettext 176 | gettext: 177 | $(SPHINXBUILD) -b gettext $(I18NSPHINXOPTS) $(BUILDDIR)/locale 178 | @echo 179 | @echo "Build finished. The message catalogs are in $(BUILDDIR)/locale." 180 | 181 | .PHONY: changes 182 | changes: 183 | $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes 184 | @echo 185 | @echo "The overview file is in $(BUILDDIR)/changes." 186 | 187 | .PHONY: linkcheck 188 | linkcheck: 189 | $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck 190 | @echo 191 | @echo "Link check complete; look for any errors in the above output " \ 192 | "or in $(BUILDDIR)/linkcheck/output.txt." 193 | 194 | .PHONY: doctest 195 | doctest: 196 | $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest 197 | @echo "Testing of doctests in the sources finished, look at the " \ 198 | "results in $(BUILDDIR)/doctest/output.txt." 199 | 200 | .PHONY: coverage 201 | coverage: 202 | $(SPHINXBUILD) -b coverage $(ALLSPHINXOPTS) $(BUILDDIR)/coverage 203 | @echo "Testing of coverage in the sources finished, look at the " \ 204 | "results in $(BUILDDIR)/coverage/python.txt." 205 | 206 | .PHONY: xml 207 | xml: 208 | $(SPHINXBUILD) -b xml $(ALLSPHINXOPTS) $(BUILDDIR)/xml 209 | @echo 210 | @echo "Build finished. The XML files are in $(BUILDDIR)/xml." 211 | 212 | .PHONY: pseudoxml 213 | pseudoxml: 214 | $(SPHINXBUILD) -b pseudoxml $(ALLSPHINXOPTS) $(BUILDDIR)/pseudoxml 215 | @echo 216 | @echo "Build finished. The pseudo-XML files are in $(BUILDDIR)/pseudoxml." 217 | -------------------------------------------------------------------------------- /doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | REM Command file for Sphinx documentation 4 | 5 | if "%SPHINXBUILD%" == "" ( 6 | set SPHINXBUILD=sphinx-build 7 | ) 8 | set BUILDDIR=build 9 | set ALLSPHINXOPTS=-d %BUILDDIR%/doctrees %SPHINXOPTS% source 10 | set I18NSPHINXOPTS=%SPHINXOPTS% source 11 | if NOT "%PAPER%" == "" ( 12 | set ALLSPHINXOPTS=-D latex_paper_size=%PAPER% %ALLSPHINXOPTS% 13 | set I18NSPHINXOPTS=-D latex_paper_size=%PAPER% %I18NSPHINXOPTS% 14 | ) 15 | 16 | if "%1" == "" goto help 17 | 18 | if "%1" == "help" ( 19 | :help 20 | echo.Please use `make ^` where ^ is one of 21 | echo. html to make standalone HTML files 22 | echo. dirhtml to make HTML files named index.html in directories 23 | echo. singlehtml to make a single large HTML file 24 | echo. pickle to make pickle files 25 | echo. json to make JSON files 26 | echo. htmlhelp to make HTML files and a HTML help project 27 | echo. qthelp to make HTML files and a qthelp project 28 | echo. devhelp to make HTML files and a Devhelp project 29 | echo. epub to make an epub 30 | echo. latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter 31 | echo. text to make text files 32 | echo. man to make manual pages 33 | echo. texinfo to make Texinfo files 34 | echo. gettext to make PO message catalogs 35 | echo. changes to make an overview over all changed/added/deprecated items 36 | echo. xml to make Docutils-native XML files 37 | echo. pseudoxml to make pseudoxml-XML files for display purposes 38 | echo. linkcheck to check all external links for integrity 39 | echo. doctest to run all doctests embedded in the documentation if enabled 40 | echo. coverage to run coverage check of the documentation if enabled 41 | goto end 42 | ) 43 | 44 | if "%1" == "clean" ( 45 | for /d %%i in (%BUILDDIR%\*) do rmdir /q /s %%i 46 | del /q /s %BUILDDIR%\* 47 | goto end 48 | ) 49 | 50 | 51 | REM Check if sphinx-build is available and fallback to Python version if any 52 | %SPHINXBUILD% 1>NUL 2>NUL 53 | if errorlevel 9009 goto sphinx_python 54 | goto sphinx_ok 55 | 56 | :sphinx_python 57 | 58 | set SPHINXBUILD=python -m sphinx.__init__ 59 | %SPHINXBUILD% 2> nul 60 | if errorlevel 9009 ( 61 | echo. 62 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 63 | echo.installed, then set the SPHINXBUILD environment variable to point 64 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 65 | echo.may add the Sphinx directory to PATH. 66 | echo. 67 | echo.If you don't have Sphinx installed, grab it from 68 | echo.http://sphinx-doc.org/ 69 | exit /b 1 70 | ) 71 | 72 | :sphinx_ok 73 | 74 | 75 | if "%1" == "html" ( 76 | %SPHINXBUILD% -b html %ALLSPHINXOPTS% %BUILDDIR%/html 77 | if errorlevel 1 exit /b 1 78 | echo. 79 | echo.Build finished. The HTML pages are in %BUILDDIR%/html. 80 | goto end 81 | ) 82 | 83 | if "%1" == "dirhtml" ( 84 | %SPHINXBUILD% -b dirhtml %ALLSPHINXOPTS% %BUILDDIR%/dirhtml 85 | if errorlevel 1 exit /b 1 86 | echo. 87 | echo.Build finished. The HTML pages are in %BUILDDIR%/dirhtml. 88 | goto end 89 | ) 90 | 91 | if "%1" == "singlehtml" ( 92 | %SPHINXBUILD% -b singlehtml %ALLSPHINXOPTS% %BUILDDIR%/singlehtml 93 | if errorlevel 1 exit /b 1 94 | echo. 95 | echo.Build finished. The HTML pages are in %BUILDDIR%/singlehtml. 96 | goto end 97 | ) 98 | 99 | if "%1" == "pickle" ( 100 | %SPHINXBUILD% -b pickle %ALLSPHINXOPTS% %BUILDDIR%/pickle 101 | if errorlevel 1 exit /b 1 102 | echo. 103 | echo.Build finished; now you can process the pickle files. 104 | goto end 105 | ) 106 | 107 | if "%1" == "json" ( 108 | %SPHINXBUILD% -b json %ALLSPHINXOPTS% %BUILDDIR%/json 109 | if errorlevel 1 exit /b 1 110 | echo. 111 | echo.Build finished; now you can process the JSON files. 112 | goto end 113 | ) 114 | 115 | if "%1" == "htmlhelp" ( 116 | %SPHINXBUILD% -b htmlhelp %ALLSPHINXOPTS% %BUILDDIR%/htmlhelp 117 | if errorlevel 1 exit /b 1 118 | echo. 119 | echo.Build finished; now you can run HTML Help Workshop with the ^ 120 | .hhp project file in %BUILDDIR%/htmlhelp. 121 | goto end 122 | ) 123 | 124 | if "%1" == "qthelp" ( 125 | %SPHINXBUILD% -b qthelp %ALLSPHINXOPTS% %BUILDDIR%/qthelp 126 | if errorlevel 1 exit /b 1 127 | echo. 128 | echo.Build finished; now you can run "qcollectiongenerator" with the ^ 129 | .qhcp project file in %BUILDDIR%/qthelp, like this: 130 | echo.^> qcollectiongenerator %BUILDDIR%\qthelp\zict.qhcp 131 | echo.To view the help file: 132 | echo.^> assistant -collectionFile %BUILDDIR%\qthelp\zict.ghc 133 | goto end 134 | ) 135 | 136 | if "%1" == "devhelp" ( 137 | %SPHINXBUILD% -b devhelp %ALLSPHINXOPTS% %BUILDDIR%/devhelp 138 | if errorlevel 1 exit /b 1 139 | echo. 140 | echo.Build finished. 141 | goto end 142 | ) 143 | 144 | if "%1" == "epub" ( 145 | %SPHINXBUILD% -b epub %ALLSPHINXOPTS% %BUILDDIR%/epub 146 | if errorlevel 1 exit /b 1 147 | echo. 148 | echo.Build finished. The epub file is in %BUILDDIR%/epub. 149 | goto end 150 | ) 151 | 152 | if "%1" == "latex" ( 153 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 154 | if errorlevel 1 exit /b 1 155 | echo. 156 | echo.Build finished; the LaTeX files are in %BUILDDIR%/latex. 157 | goto end 158 | ) 159 | 160 | if "%1" == "latexpdf" ( 161 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 162 | cd %BUILDDIR%/latex 163 | make all-pdf 164 | cd %~dp0 165 | echo. 166 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 167 | goto end 168 | ) 169 | 170 | if "%1" == "latexpdfja" ( 171 | %SPHINXBUILD% -b latex %ALLSPHINXOPTS% %BUILDDIR%/latex 172 | cd %BUILDDIR%/latex 173 | make all-pdf-ja 174 | cd %~dp0 175 | echo. 176 | echo.Build finished; the PDF files are in %BUILDDIR%/latex. 177 | goto end 178 | ) 179 | 180 | if "%1" == "text" ( 181 | %SPHINXBUILD% -b text %ALLSPHINXOPTS% %BUILDDIR%/text 182 | if errorlevel 1 exit /b 1 183 | echo. 184 | echo.Build finished. The text files are in %BUILDDIR%/text. 185 | goto end 186 | ) 187 | 188 | if "%1" == "man" ( 189 | %SPHINXBUILD% -b man %ALLSPHINXOPTS% %BUILDDIR%/man 190 | if errorlevel 1 exit /b 1 191 | echo. 192 | echo.Build finished. The manual pages are in %BUILDDIR%/man. 193 | goto end 194 | ) 195 | 196 | if "%1" == "texinfo" ( 197 | %SPHINXBUILD% -b texinfo %ALLSPHINXOPTS% %BUILDDIR%/texinfo 198 | if errorlevel 1 exit /b 1 199 | echo. 200 | echo.Build finished. The Texinfo files are in %BUILDDIR%/texinfo. 201 | goto end 202 | ) 203 | 204 | if "%1" == "gettext" ( 205 | %SPHINXBUILD% -b gettext %I18NSPHINXOPTS% %BUILDDIR%/locale 206 | if errorlevel 1 exit /b 1 207 | echo. 208 | echo.Build finished. The message catalogs are in %BUILDDIR%/locale. 209 | goto end 210 | ) 211 | 212 | if "%1" == "changes" ( 213 | %SPHINXBUILD% -b changes %ALLSPHINXOPTS% %BUILDDIR%/changes 214 | if errorlevel 1 exit /b 1 215 | echo. 216 | echo.The overview file is in %BUILDDIR%/changes. 217 | goto end 218 | ) 219 | 220 | if "%1" == "linkcheck" ( 221 | %SPHINXBUILD% -b linkcheck %ALLSPHINXOPTS% %BUILDDIR%/linkcheck 222 | if errorlevel 1 exit /b 1 223 | echo. 224 | echo.Link check complete; look for any errors in the above output ^ 225 | or in %BUILDDIR%/linkcheck/output.txt. 226 | goto end 227 | ) 228 | 229 | if "%1" == "doctest" ( 230 | %SPHINXBUILD% -b doctest %ALLSPHINXOPTS% %BUILDDIR%/doctest 231 | if errorlevel 1 exit /b 1 232 | echo. 233 | echo.Testing of doctests in the sources finished, look at the ^ 234 | results in %BUILDDIR%/doctest/output.txt. 235 | goto end 236 | ) 237 | 238 | if "%1" == "coverage" ( 239 | %SPHINXBUILD% -b coverage %ALLSPHINXOPTS% %BUILDDIR%/coverage 240 | if errorlevel 1 exit /b 1 241 | echo. 242 | echo.Testing of coverage in the sources finished, look at the ^ 243 | results in %BUILDDIR%/coverage/python.txt. 244 | goto end 245 | ) 246 | 247 | if "%1" == "xml" ( 248 | %SPHINXBUILD% -b xml %ALLSPHINXOPTS% %BUILDDIR%/xml 249 | if errorlevel 1 exit /b 1 250 | echo. 251 | echo.Build finished. The XML files are in %BUILDDIR%/xml. 252 | goto end 253 | ) 254 | 255 | if "%1" == "pseudoxml" ( 256 | %SPHINXBUILD% -b pseudoxml %ALLSPHINXOPTS% %BUILDDIR%/pseudoxml 257 | if errorlevel 1 exit /b 1 258 | echo. 259 | echo.Build finished. The pseudo-XML files are in %BUILDDIR%/pseudoxml. 260 | goto end 261 | ) 262 | 263 | :end 264 | -------------------------------------------------------------------------------- /doc/requirements.txt: -------------------------------------------------------------------------------- 1 | numpydoc 2 | -------------------------------------------------------------------------------- /doc/source/_static/placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/zict/7e5dafedcf016a4ac61286badbf1f8da3741d2d3/doc/source/_static/placeholder -------------------------------------------------------------------------------- /doc/source/changelog.rst: -------------------------------------------------------------------------------- 1 | Changelog 2 | ========= 3 | .. currentmodule:: zict 4 | 5 | 3.1.0 - Unreleased 6 | ------------------ 7 | - Dropped support for Python 3.8 (:pr:`106`) `Guido Imperiale`_ 8 | - New object :class:`KeyMap` (:pr:`110`) `Guido Imperiale`_ 9 | 10 | 11 | 3.0.0 - 2023-04-17 12 | ------------------ 13 | - The library is now almost completely thread-safe 14 | (:pr:`82`, :pr:`90`, :pr:`92`, :pr:`93`) 15 | - Dropped support for Python 3.7 (:pr:`84`) 16 | - ``File.__getitem__`` now returns bytearray instead of bytes. This prevents a memcpy 17 | when deserializing numpy arrays with dask. (:pr:`74`) 18 | - Removed dependency from ``heapdict``; sped up :class:`LRU` (:pr:`77`) 19 | - Fixed broken :class:`LRU` state when the underlying mapping starts non-empty. 20 | (:pr:`77`) 21 | - :class:`File` and :class:`LMDB` now support :class:`pathlib.Path` and pytest's 22 | ``tmpdir`` (:pr:`78`) 23 | - :class:`LMDB` now uses memory-mapped I/O on MacOSX and is usable on Windows (:pr:`78`) 24 | - :class:`LRU` and :class:`Buffer` now support delayed eviction (:pr:`87`) 25 | - New object :class:`InsertionSortedSet` (:pr:`87`) 26 | - All mappings now return proper KeysView, ItemsView, and ValuesView objects from their 27 | keys(), items(), and values() methods (:pr:`93`) 28 | - :class:`File`, :class:`LMDB`, and :class:`Zip` now behave coherently with unexpected 29 | key/value types (:pr:`95`) 30 | - ``Zip.__contains__`` no longer reads the value from disk (:pr:`95`) 31 | - ``Zip.__setitem__`` will now raise when updating an already-existing key instead of 32 | quietly corrupting the mapping (:pr:`95`) 33 | - Can now change ``LRU.n`` on the fly. Added ``LRU.offset`` attribute. Added 34 | accessors to ``n`` and ``offset`` to :class:`Buffer`. (:pr:`101`) 35 | - New object :class:`AsyncBuffer`; new method :meth:`LRU.get_all_or_nothing` (:pr:`88`) 36 | 37 | All changes by `Guido Imperiale`_ 38 | 39 | 40 | 2.2.0 - 2022-04-28 41 | ------------------ 42 | - Added ``python_requires`` to ``setup.py`` (:pr:`60`) `Carlos Cordoba`_ 43 | - Added type annotations (:pr:`62`) `Guido Imperiale`_ 44 | - If you call ``Func.update()`` and ``Func`` wraps around ``File``, do not store all dump outputs in 45 | memory (:pr:`64`) `Guido Imperiale`_ 46 | - Added new classes ``zict.Cache`` and ``zict.WeakRefCache`` 47 | (:pr:`65`) `Guido Imperiale`_ 48 | 49 | 50 | 2.1.0 - 2022-02-25 51 | ------------------ 52 | - LRU and Buffer now deal with exceptions raised by the callbacks - namely, OSError 53 | raised when the disk is full (:pr:`48`) `Naty Clementi`_, `Guido Imperiale`_ 54 | - Dropped support for Python 3.6; added support for Python 3.9 and 3.10 (:pr:`55`) 55 | `Guido Imperiale`_ 56 | - Migrate to GitHub actions (:pr:`40`) `Thomas J. Fan`_ 57 | - Allow file mmaping (:pr:`51`) `jakirkham`_ 58 | 59 | 60 | 2.0.0 - 2020-02-28 61 | ------------------ 62 | 63 | - Create ``CONTRIBUTING.md`` (:pr:`28`) `Jacob Tomlinson`_ 64 | - Import ABC from ``collections.abc`` instead of ``collections`` for Python 3.9 65 | compatibility (:pr:`31`) `Karthikeyan Singaravelan`_ 66 | - Drop Python 2 / 3.5 and add Python 3.7 / 3.8 support (:pr:`34`) `James Bourbeau`_ 67 | - Duplicate keys fast slow (:pr:`32`) `Florian Jetter`_ 68 | - Fix dask cuda worker's race condition failure (:pr:`33`) `Pradipta Ghosh`_ 69 | - Changed default ``lmdb`` encoding to ``utf-8`` (:pr:`36`) `Alex Davies`_ 70 | - Add code linting and style check (:pr:`35`) `James Bourbeau`_ 71 | 72 | .. _`Jacob Tomlinson`: https://github.com/jacobtomlinson 73 | .. _`Karthikeyan Singaravelan`: https://github.com/tirkarthi 74 | .. _`James Bourbeau`: https://github.com/jrbourbeau 75 | .. _`Florian Jetter`: https://github.com/fjetter 76 | .. _`Pradipta Ghosh`: https://github.com/pradghos 77 | .. _`Alex Davies`: https://github.com/traverseda 78 | .. _`Naty Clementi`: https://github.com/ncclementi 79 | .. _`Guido Imperiale`: https://github.com/crusaderky 80 | .. _`Thomas J. Fan`: https://github.com/thomasjpfan 81 | .. _`jakirkham`: https://github.com/jakirkham 82 | .. _`Carlos Cordoba`: https://github.com/ccordoba12 -------------------------------------------------------------------------------- /doc/source/conf.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | # zict documentation build configuration file, created by 4 | # sphinx-quickstart on Sat Apr 2 14:56:27 2016. 5 | # 6 | # This file is execfile()d with the current directory set to its 7 | # containing dir. 8 | # 9 | # Note that not all possible configuration values are present in this 10 | # autogenerated file. 11 | # 12 | # All configuration values have a default; values that are commented out 13 | # serve to show the default. 14 | 15 | # If extensions (or modules to document with autodoc) are in another directory, 16 | # add these directories to sys.path here. If the directory is relative to the 17 | # documentation root, use os.path.abspath to make it absolute, like shown here. 18 | # sys.path.insert(0, os.path.abspath('.')) 19 | 20 | # -- General configuration ------------------------------------------------ 21 | 22 | # If your documentation needs a minimal Sphinx version, state it here. 23 | # needs_sphinx = '1.0' 24 | 25 | # Add any Sphinx extension module names here, as strings. They can be 26 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 27 | # ones. 28 | extensions = [ 29 | "sphinx.ext.autodoc", 30 | "sphinx.ext.autosummary", 31 | "sphinx.ext.viewcode", 32 | "sphinx.ext.extlinks", 33 | "numpydoc", 34 | ] 35 | 36 | # Generate the API documentation when building 37 | autosummary_generate = True 38 | numpydoc_show_class_members = False 39 | 40 | # Add any paths that contain templates here, relative to this directory. 41 | templates_path = ["_templates"] 42 | 43 | # The suffix(es) of source filenames. 44 | # You can specify multiple suffix as a list of string: 45 | # source_suffix = ['.rst', '.md'] 46 | source_suffix = ".rst" 47 | 48 | # The encoding of source files. 49 | # source_encoding = 'utf-8-sig' 50 | 51 | # The master toctree document. 52 | master_doc = "index" 53 | 54 | # General information about the project. 55 | project = "zict" 56 | copyright = "2016, Matthew Rocklin" 57 | author = "Matthew Rocklin" 58 | 59 | # The version info for the project you're documenting, acts as replacement for 60 | # |version| and |release|, also used in various other places throughout the 61 | # built documents. 62 | # 63 | # The short X.Y version. 64 | import zict 65 | 66 | version = zict.__version__ 67 | # The full version, including alpha/beta/rc tags. 68 | release = version 69 | 70 | # Link to GitHub issues and pull requests using :pr:`1234` and :issue:`1234` 71 | # syntax 72 | extlinks = { 73 | "issue": ("https://github.com/dask/zict/issues/%s", "#%s"), 74 | "pr": ("https://github.com/dask/zict/pull/%s", "#%s"), 75 | } 76 | 77 | # The language for content autogenerated by Sphinx. Refer to documentation 78 | # for a list of supported languages. 79 | # 80 | # This is also used if you do content translation via gettext catalogs. 81 | # Usually you set "language" from the command line for these cases. 82 | language = "en" 83 | 84 | # There are two options for replacing |today|: either, you set today to some 85 | # non-false value, then it is used: 86 | # today = '' 87 | # Else, today_fmt is used as the format for a strftime call. 88 | # today_fmt = '%B %d, %Y' 89 | 90 | # List of patterns, relative to source directory, that match files and 91 | # directories to ignore when looking for source files. 92 | exclude_patterns: list[str] = [] 93 | 94 | # The reST default role (used for this markup: `text`) to use for all 95 | # documents. 96 | # default_role = None 97 | 98 | # If true, '()' will be appended to :func: etc. cross-reference text. 99 | # add_function_parentheses = True 100 | 101 | # If true, the current module name will be prepended to all description 102 | # unit titles (such as .. function::). 103 | # add_module_names = True 104 | 105 | # If true, sectionauthor and moduleauthor directives will be shown in the 106 | # output. They are ignored by default. 107 | # show_authors = False 108 | 109 | # The name of the Pygments (syntax highlighting) style to use. 110 | pygments_style = "sphinx" 111 | 112 | # A list of ignored prefixes for module index sorting. 113 | # modindex_common_prefix = [] 114 | 115 | # If true, keep warnings as "system message" paragraphs in the built documents. 116 | # keep_warnings = False 117 | 118 | # If true, `todo` and `todoList` produce output, else they produce nothing. 119 | todo_include_todos = False 120 | 121 | 122 | # -- Options for HTML output ---------------------------------------------- 123 | 124 | # The theme to use for HTML and HTML Help pages. See the documentation for 125 | # a list of builtin themes. 126 | html_theme = "alabaster" 127 | 128 | # Theme options are theme-specific and customize the look and feel of a theme 129 | # further. For a list of options available for each theme, see the 130 | # documentation. 131 | # html_theme_options = {} 132 | 133 | # Add any paths that contain custom themes here, relative to this directory. 134 | # html_theme_path = [] 135 | 136 | # The name for this set of Sphinx documents. If None, it defaults to 137 | # " v documentation". 138 | # html_title = None 139 | 140 | # A shorter title for the navigation bar. Default is the same as html_title. 141 | # html_short_title = None 142 | 143 | # The name of an image file (relative to this directory) to place at the top 144 | # of the sidebar. 145 | # html_logo = None 146 | 147 | # The name of an image file (within the static path) to use as favicon of the 148 | # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 149 | # pixels large. 150 | # html_favicon = None 151 | 152 | # Add any paths that contain custom static files (such as style sheets) here, 153 | # relative to this directory. They are copied after the builtin static files, 154 | # so a file named "default.css" will overwrite the builtin "default.css". 155 | html_static_path = ["_static"] 156 | 157 | # Add any extra paths that contain custom files (such as robots.txt or 158 | # .htaccess) here, relative to this directory. These files are copied 159 | # directly to the root of the documentation. 160 | # html_extra_path = [] 161 | 162 | # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, 163 | # using the given strftime format. 164 | # html_last_updated_fmt = '%b %d, %Y' 165 | 166 | # If true, SmartyPants will be used to convert quotes and dashes to 167 | # typographically correct entities. 168 | # html_use_smartypants = True 169 | 170 | # Custom sidebar templates, maps document names to template names. 171 | # html_sidebars = {} 172 | 173 | # Additional templates that should be rendered to pages, maps page names to 174 | # template names. 175 | # html_additional_pages = {} 176 | 177 | # If false, no module index is generated. 178 | # html_domain_indices = True 179 | 180 | # If false, no index is generated. 181 | # html_use_index = True 182 | 183 | # If true, the index is split into individual pages for each letter. 184 | # html_split_index = False 185 | 186 | # If true, links to the reST sources are added to the pages. 187 | # html_show_sourcelink = True 188 | 189 | # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. 190 | # html_show_sphinx = True 191 | 192 | # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. 193 | # html_show_copyright = True 194 | 195 | # If true, an OpenSearch description file will be output, and all pages will 196 | # contain a tag referring to it. The value of this option must be the 197 | # base URL from which the finished HTML is served. 198 | # html_use_opensearch = '' 199 | 200 | # This is the file name suffix for HTML files (e.g. ".xhtml"). 201 | # html_file_suffix = None 202 | 203 | # Language to be used for generating the HTML full-text search index. 204 | # Sphinx supports the following languages: 205 | # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' 206 | # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' 207 | # html_search_language = 'en' 208 | 209 | # A dictionary with options for the search language support, empty by default. 210 | # Now only 'ja' uses this config value 211 | # html_search_options = {'type': 'default'} 212 | 213 | # The name of a javascript file (relative to the configuration directory) that 214 | # implements a search results scorer. If empty, the default will be used. 215 | # html_search_scorer = 'scorer.js' 216 | 217 | # Output file base name for HTML help builder. 218 | htmlhelp_basename = "zictdoc" 219 | 220 | # -- Options for LaTeX output --------------------------------------------- 221 | 222 | latex_elements: dict[str, str] = { 223 | # The paper size ('letterpaper' or 'a4paper'). 224 | # 'papersize': 'letterpaper', 225 | # The font size ('10pt', '11pt' or '12pt'). 226 | # 'pointsize': '10pt', 227 | # Additional stuff for the LaTeX preamble. 228 | # 'preamble': '', 229 | # Latex figure (float) alignment 230 | # 'figure_align': 'htbp', 231 | } 232 | 233 | # Grouping the document tree into LaTeX files. List of tuples 234 | # (source start file, target name, title, 235 | # author, documentclass [howto, manual, or own class]). 236 | latex_documents = [ 237 | (master_doc, "zict.tex", "zict Documentation", "Matthew Rocklin", "manual"), 238 | ] 239 | 240 | # The name of an image file (relative to this directory) to place at the top of 241 | # the title page. 242 | # latex_logo = None 243 | 244 | # For "manual" documents, if this is true, then toplevel headings are parts, 245 | # not chapters. 246 | # latex_use_parts = False 247 | 248 | # If true, show page references after internal links. 249 | # latex_show_pagerefs = False 250 | 251 | # If true, show URL addresses after external links. 252 | # latex_show_urls = False 253 | 254 | # Documents to append as an appendix to all manuals. 255 | # latex_appendices = [] 256 | 257 | # If false, no module index is generated. 258 | # latex_domain_indices = True 259 | 260 | 261 | # -- Options for manual page output --------------------------------------- 262 | 263 | # One entry per manual page. List of tuples 264 | # (source start file, name, description, authors, manual section). 265 | man_pages = [(master_doc, "zict", "zict Documentation", [author], 1)] 266 | 267 | # If true, show URL addresses after external links. 268 | # man_show_urls = False 269 | 270 | 271 | # -- Options for Texinfo output ------------------------------------------- 272 | 273 | # Grouping the document tree into Texinfo files. List of tuples 274 | # (source start file, target name, title, author, 275 | # dir menu entry, description, category) 276 | texinfo_documents = [ 277 | ( 278 | master_doc, 279 | "zict", 280 | "zict Documentation", 281 | author, 282 | "zict", 283 | "One line description of project.", 284 | "Miscellaneous", 285 | ), 286 | ] 287 | 288 | # Documents to append as an appendix to all manuals. 289 | # texinfo_appendices = [] 290 | 291 | # If false, no module index is generated. 292 | # texinfo_domain_indices = True 293 | 294 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 295 | # texinfo_show_urls = 'footnote' 296 | 297 | # If true, do not generate a @detailmenu in the "Top" node's menu. 298 | # texinfo_no_detailmenu = False 299 | 300 | 301 | # -- Options for Epub output ---------------------------------------------- 302 | 303 | # Bibliographic Dublin Core info. 304 | epub_title = project 305 | epub_author = author 306 | epub_publisher = author 307 | epub_copyright = copyright 308 | 309 | # The basename for the epub file. It defaults to the project name. 310 | # epub_basename = project 311 | 312 | # The HTML theme for the epub output. Since the default themes are not 313 | # optimized for small screen space, using the same theme for HTML and epub 314 | # output is usually not wise. This defaults to 'epub', a theme designed to save 315 | # visual space. 316 | # epub_theme = 'epub' 317 | 318 | # The language of the text. It defaults to the language option 319 | # or 'en' if the language is not set. 320 | # epub_language = '' 321 | 322 | # The scheme of the identifier. Typical schemes are ISBN or URL. 323 | # epub_scheme = '' 324 | 325 | # The unique identifier of the text. This can be a ISBN number 326 | # or the project homepage. 327 | # epub_identifier = '' 328 | 329 | # A unique identification for the text. 330 | # epub_uid = '' 331 | 332 | # A tuple containing the cover image and cover page html template filenames. 333 | # epub_cover = () 334 | 335 | # A sequence of (type, uri, title) tuples for the guide element of content.opf. 336 | # epub_guide = () 337 | 338 | # HTML files that should be inserted before the pages created by sphinx. 339 | # The format is a list of tuples containing the path and title. 340 | # epub_pre_files = [] 341 | 342 | # HTML files that should be inserted after the pages created by sphinx. 343 | # The format is a list of tuples containing the path and title. 344 | # epub_post_files = [] 345 | 346 | # A list of files that should not be packed into the epub file. 347 | epub_exclude_files = ["search.html"] 348 | 349 | # The depth of the table of contents in toc.ncx. 350 | # epub_tocdepth = 3 351 | 352 | # Allow duplicate toc entries. 353 | # epub_tocdup = True 354 | 355 | # Choose between 'default' and 'includehidden'. 356 | # epub_tocscope = 'default' 357 | 358 | # Fix unsupported image types using the Pillow. 359 | # epub_fix_images = False 360 | 361 | # Scale large images. 362 | # epub_max_image_width = 0 363 | 364 | # How to display URL addresses: 'footnote', 'no', or 'inline'. 365 | # epub_show_urls = 'inline' 366 | 367 | # If false, no index is generated. 368 | # epub_use_index = True 369 | -------------------------------------------------------------------------------- /doc/source/index.rst: -------------------------------------------------------------------------------- 1 | Zict: Composable Mutable Mappings 2 | ================================= 3 | 4 | The dictionary / mutable mapping interface is powerful and multi-faceted. 5 | 6 | * We store data in different locations such as in-memory, on disk, in archive 7 | files, etc.. 8 | * We manage old data with different policies like LRU, random eviction, etc.. 9 | * We might encode or transform data as it arrives or departs the dictionary 10 | through compression, encoding, etc.. 11 | 12 | To this end we build abstract ``MutableMapping`` classes that consume and build 13 | on other ``MutableMappings``. We can compose several of these with each other 14 | to form intuitive interfaces over complex storage systems policies. 15 | 16 | Example 17 | ------- 18 | In the following example we create an LRU dictionary backed by pickle-encoded, 19 | zlib-compressed, directory of files. 20 | 21 | .. code-block:: python 22 | 23 | import pickle 24 | import zlib 25 | 26 | from zict import File, Func, LRU 27 | 28 | a = File('mydir/') 29 | b = Func(zlib.compress, zlib.decompress, a) 30 | c = Func(pickle.dumps, pickle.loads, b) 31 | d = LRU(100, c) 32 | 33 | >>> d['x'] = [1, 2, 3] 34 | >>> d['x'] 35 | [1, 2, 3] 36 | 37 | Thread-safety 38 | ------------- 39 | Most classes in this library are thread-safe. 40 | Refer to the documentation of the individual mappings for exceptions. 41 | 42 | API 43 | --- 44 | .. currentmodule:: zict 45 | 46 | **zict** defines the following MutableMappings: 47 | 48 | .. autoclass:: Buffer 49 | :members: 50 | .. autoclass:: AsyncBuffer 51 | :members: 52 | .. autoclass:: Cache 53 | :members: 54 | .. autoclass:: File 55 | :members: 56 | .. autoclass:: Func 57 | :members: 58 | .. autoclass:: KeyMap 59 | :members: 60 | .. autoclass:: LMDB 61 | :members: 62 | .. autoclass:: LRU 63 | :members: 64 | .. autoclass:: Sieve 65 | :members: 66 | .. autoclass:: Zip 67 | :members: 68 | 69 | Additionally, **zict** makes available the following general-purpose objects: 70 | 71 | .. autoclass:: InsertionSortedSet 72 | :members: 73 | .. autoclass:: WeakValueMapping 74 | :members: 75 | 76 | 77 | Changelog 78 | --------- 79 | Release notes can be found :doc:`here `. 80 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/zict/7e5dafedcf016a4ac61286badbf1f8da3741d2d3/requirements.txt -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = zict 3 | # Must be kept aligned with zict.__init__ 4 | version = 3.1.0 5 | maintainer=Matthew Rocklin 6 | maintainer_email=mrocklin@coiled.io 7 | license = BSD 8 | description = Mutable mapping tools 9 | description_content_type=text/plain 10 | long_description = 11 | Mutable Mapping tools. See `documentation`_. 12 | 13 | .. _documentation: http://zict.readthedocs.io/en/latest/ 14 | 15 | long_description_content_type=text/x-rst 16 | keywords = mutable mapping,dict,dask 17 | url = http://zict.readthedocs.io/en/latest/ 18 | classifiers = 19 | Development Status :: 5 - Production/Stable 20 | License :: OSI Approved :: BSD License 21 | Operating System :: OS Independent 22 | Programming Language :: Python 23 | Programming Language :: Python :: 3 24 | Programming Language :: Python :: 3.9 25 | Programming Language :: Python :: 3.10 26 | Programming Language :: Python :: 3.11 27 | 28 | [options] 29 | packages = zict 30 | zip_safe = False # https://mypy.readthedocs.io/en/latest/installed_packages.html 31 | include_package_data = True 32 | python_requires = >=3.9 33 | install_requires = 34 | 35 | [options.package_data] 36 | zict = 37 | py.typed 38 | 39 | [bdist_wheel] 40 | universal = 1 41 | 42 | [wheel] 43 | universal = 1 44 | 45 | [flake8] 46 | # References: 47 | # https://flake8.readthedocs.io/en/latest/user/configuration.html 48 | # https://flake8.readthedocs.io/en/latest/user/error-codes.html 49 | 50 | # Aligned with black https://github.com/psf/black/blob/main/.flake8 51 | extend-ignore = E203, E266, E501 52 | # Note: there cannot be spaces after commas here 53 | exclude = __init__.py 54 | ignore = 55 | # Import formatting 56 | E4 57 | # Assigning lambda expression 58 | E731 59 | # line break before binary operator 60 | W503 61 | 62 | max-line-length = 88 63 | 64 | [tool:pytest] 65 | addopts = 66 | -v 67 | --doctest-modules 68 | --durations=20 69 | --strict-markers 70 | --strict-config 71 | -p no:legacypath 72 | 73 | # pytest-timeout settings 74 | # 'thread' kills off the whole test suite. 'signal' only kills the offending test. 75 | # However, 'signal' doesn't work on Windows (due to lack of SIGALRM). 76 | timeout_method = thread 77 | timeout = 180 78 | markers = 79 | stress: slow-running stress test with a random component. Pass --stress to change number of reruns. 80 | 81 | 82 | [isort] 83 | sections = FUTURE,STDLIB,THIRDPARTY,FIRSTPARTY,LOCALFOLDER 84 | profile = black 85 | skip_gitignore = true 86 | force_to_top = true 87 | default_section = THIRDPARTY 88 | known_first_party = zict 89 | 90 | 91 | [mypy] 92 | python_version = 3.9 93 | # See https://github.com/python/mypy/issues/12286 for automatic multi-platform support 94 | platform = linux 95 | # platform = win32 96 | # platform = darwin 97 | allow_incomplete_defs = false 98 | allow_untyped_decorators = false 99 | allow_untyped_defs = false 100 | ignore_missing_imports = true 101 | no_implicit_optional = true 102 | show_error_codes = true 103 | warn_redundant_casts = true 104 | warn_unused_ignores = true 105 | warn_unreachable = true 106 | 107 | [mypy-zict.tests.*] 108 | allow_untyped_defs = true 109 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from setuptools import setup 4 | 5 | setup() 6 | -------------------------------------------------------------------------------- /zict/__init__.py: -------------------------------------------------------------------------------- 1 | from zict.async_buffer import AsyncBuffer as AsyncBuffer 2 | from zict.buffer import Buffer as Buffer 3 | from zict.cache import Cache as Cache 4 | from zict.cache import WeakValueMapping as WeakValueMapping 5 | from zict.file import File as File 6 | from zict.func import Func as Func 7 | from zict.keymap import KeyMap as KeyMap 8 | from zict.lmdb import LMDB as LMDB 9 | from zict.lru import LRU as LRU 10 | from zict.sieve import Sieve as Sieve 11 | from zict.utils import InsertionSortedSet as InsertionSortedSet 12 | from zict.zip import Zip as Zip 13 | 14 | # Must be kept aligned with setup.cfg 15 | __version__ = "3.1.0" 16 | -------------------------------------------------------------------------------- /zict/async_buffer.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import asyncio 4 | import contextvars 5 | from collections.abc import Callable, Collection 6 | from concurrent.futures import Executor, ThreadPoolExecutor 7 | from functools import wraps 8 | from itertools import chain 9 | from typing import Any, Literal 10 | 11 | from zict.buffer import Buffer 12 | from zict.common import KT, VT, T, locked 13 | 14 | 15 | class AsyncBuffer(Buffer[KT, VT]): 16 | """Extension of :class:`~zict.Buffer` that allows offloading all reads and writes 17 | from/to slow to a separate worker thread. 18 | 19 | This requires ``fast`` to be fully thread-safe (e.g. a plain dict). 20 | 21 | ``slow.__setitem__`` and ``slow.__getitem__`` will be called from the offloaded 22 | thread, while all of its other methods (including, notably for the purpose of 23 | thread-safety consideration, ``__contains__`` and ``__delitem__``) will be called 24 | from the main thread. 25 | 26 | See Also 27 | -------- 28 | Buffer 29 | 30 | Parameters 31 | ---------- 32 | Same as in Buffer, plus: 33 | 34 | executor: concurrent.futures.Executor, optional 35 | An Executor instance to use for offloading. It must not pickle/unpickle. 36 | Defaults to an internal ThreadPoolExecutor. 37 | nthreads: int, optional 38 | Number of offloaded threads to run in parallel. Defaults to 1. 39 | Mutually exclusive with executor parameter. 40 | """ 41 | 42 | executor: Executor | None 43 | nthreads: int | None 44 | futures: set[asyncio.Future] 45 | evicting: dict[asyncio.Future, float] 46 | 47 | @wraps(Buffer.__init__) 48 | def __init__( 49 | self, 50 | *args: Any, 51 | executor: Executor | None = None, 52 | nthreads: int = 1, 53 | **kwargs: Any, 54 | ) -> None: 55 | super().__init__(*args, **kwargs) 56 | self.executor = executor 57 | self.nthreads = None if executor else nthreads 58 | self._internal_executor = executor is None 59 | self.futures = set() 60 | self.evicting = {} 61 | 62 | def close(self) -> None: 63 | # Call LRU.close(), which stops LRU.evict_until_below_target() halfway through 64 | super().close() 65 | for future in self.futures: 66 | future.cancel() 67 | if self.executor is not None and self.nthreads is not None: 68 | self.executor.shutdown(wait=True) 69 | self.executor = None 70 | 71 | def _offload(self, func: Callable[..., T], *args: Any) -> asyncio.Future[T]: 72 | if self.executor is None: 73 | assert self.nthreads 74 | self.executor = ThreadPoolExecutor( 75 | self.nthreads, thread_name_prefix="zict.AsyncBuffer offloader" 76 | ) 77 | 78 | loop = asyncio.get_running_loop() 79 | context = contextvars.copy_context() 80 | future = loop.run_in_executor(self.executor, context.run, func, *args) 81 | self.futures.add(future) 82 | future.add_done_callback(self.futures.remove) 83 | return future # type: ignore[return-value] 84 | 85 | # Return an asyncio.Future, instead of just writing it as an async function, to make 86 | # it easier for overriders to tell apart the use case when all keys were already 87 | # in fast 88 | @locked 89 | def async_get( 90 | self, keys: Collection[KT], missing: Literal["raise", "omit"] = "raise" 91 | ) -> asyncio.Future[dict[KT, VT]]: 92 | """Fetch one or more key/value pairs. If not all keys are available in fast, 93 | offload to a worker thread moving keys from slow to fast, as well as possibly 94 | moving older keys from fast to slow. 95 | 96 | Parameters 97 | ---------- 98 | keys: 99 | collection of zero or more keys to get 100 | missing: raise or omit, optional 101 | raise (default) 102 | If any key is missing, raise KeyError. 103 | omit 104 | If a key is missing, return a dict with less keys than those requested. 105 | 106 | Notes 107 | ----- 108 | All keys may be present when you call ``async_get``, but ``__delitem__`` may be 109 | called on one of them before the actual data is fetched. ``__setitem__`` also 110 | internally calls ``__delitem__`` in a non-atomic way, so you may get 111 | ``KeyError`` when updating a value too. 112 | """ 113 | # This block avoids spawning a thread if keys are missing from both fast and 114 | # slow. It is otherwise just a performance optimization. 115 | if missing == "omit": 116 | keys = [key for key in keys if key in self] 117 | elif missing == "raise": 118 | for key in keys: 119 | if key not in self: 120 | raise KeyError(key) 121 | else: 122 | raise ValueError(f"missing: expected raise or omit; got {missing}") 123 | # End performance optimization 124 | 125 | try: 126 | # Do not pull keys towards the top of the LRU unless they are all available. 127 | # This matters when there is a very long queue of async_get futures. 128 | d = self.fast.get_all_or_nothing(keys) 129 | except KeyError: 130 | pass 131 | else: 132 | f: asyncio.Future[dict[KT, VT]] = asyncio.Future() 133 | f.set_result(d) 134 | return f 135 | 136 | def _async_get() -> dict[KT, VT]: 137 | d = {} 138 | for k in keys: 139 | if self.fast.closed: 140 | raise asyncio.CancelledError() 141 | try: 142 | # This can cause keys to be restored and older keys to be evicted 143 | d[k] = self[k] 144 | except KeyError: 145 | # Race condition: key was there when async_get was called, but got 146 | # deleted afterwards. 147 | if missing == "raise": 148 | raise 149 | return d 150 | 151 | return self._offload(_async_get) 152 | 153 | def __setitem__(self, key: KT, value: VT) -> None: 154 | """Immediately set a key in fast. If this causes the total weight to exceed n, 155 | asynchronously start moving keys from fast to slow in a worker thread. 156 | """ 157 | self.set_noevict(key, value) 158 | self.async_evict_until_below_target() 159 | 160 | @locked 161 | def async_evict_until_below_target(self, n: float | None = None) -> None: 162 | """If the total weight exceeds n, asynchronously start moving keys from fast to 163 | slow in a worker thread. 164 | """ 165 | if n is None: 166 | n = self.n 167 | n = max(0.0, n) 168 | weight = min(chain([self.fast.total_weight], self.evicting.values())) 169 | if weight <= n: 170 | return 171 | 172 | # Note: this can get cancelled by LRU.close(), which in turn is 173 | # triggered by Buffer.close() 174 | future = self._offload(self.evict_until_below_target, n) 175 | self.evicting[future] = n 176 | future.add_done_callback(self.evicting.__delitem__) 177 | -------------------------------------------------------------------------------- /zict/buffer.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Callable, ItemsView, Iterator, MutableMapping, ValuesView 4 | from itertools import chain 5 | 6 | from zict.common import KT, VT, ZictBase, close, discard, flush, locked 7 | from zict.lru import LRU 8 | 9 | 10 | class Buffer(ZictBase[KT, VT]): 11 | """Buffer one dictionary on top of another 12 | 13 | This creates a MutableMapping by combining two MutableMappings, one that 14 | feeds into the other when it overflows, based on an LRU mechanism. When 15 | the first evicts elements these get placed into the second. When an item 16 | is retrieved from the second it is placed back into the first. 17 | 18 | Parameters 19 | ---------- 20 | fast: MutableMapping 21 | slow: MutableMapping 22 | n: float 23 | Number of elements to keep, or total weight if ``weight`` is used. 24 | weight: f(k, v) -> float, optional 25 | Weight of each key/value pair (default: 1) 26 | fast_to_slow_callbacks: list of callables 27 | These functions run every time data moves from the fast to the slow 28 | mapping. They take two arguments, a key and a value. 29 | If an exception occurs during a fast_to_slow_callbacks (e.g a callback tried 30 | storing to disk and raised a disk full error) the key will remain in the LRU. 31 | slow_to_fast_callbacks: list of callables 32 | These functions run every time data moves form the slow to the fast mapping. 33 | 34 | Notes 35 | ----- 36 | If you call methods of this class from multiple threads, access will be fast as long 37 | as all methods of ``fast``, plus ``slow.__contains__`` and ``slow.__delitem__``, are 38 | fast. ``slow.__getitem__``, ``slow.__setitem__`` and callbacks are not protected 39 | by locks. 40 | 41 | Examples 42 | -------- 43 | >>> fast = {} 44 | >>> slow = Func(dumps, loads, File('storage/')) # doctest: +SKIP 45 | >>> def weight(k, v): 46 | ... return sys.getsizeof(v) 47 | >>> buff = Buffer(fast, slow, 1e8, weight=weight) # doctest: +SKIP 48 | 49 | See Also 50 | -------- 51 | LRU 52 | """ 53 | 54 | fast: LRU[KT, VT] 55 | slow: MutableMapping[KT, VT] 56 | weight: Callable[[KT, VT], float] 57 | fast_to_slow_callbacks: list[Callable[[KT, VT], None]] 58 | slow_to_fast_callbacks: list[Callable[[KT, VT], None]] 59 | _cancel_restore: dict[KT, bool] 60 | 61 | def __init__( 62 | self, 63 | fast: MutableMapping[KT, VT], 64 | slow: MutableMapping[KT, VT], 65 | n: float, 66 | weight: Callable[[KT, VT], float] = lambda k, v: 1, 67 | fast_to_slow_callbacks: Callable[[KT, VT], None] 68 | | list[Callable[[KT, VT], None]] 69 | | None = None, 70 | slow_to_fast_callbacks: Callable[[KT, VT], None] 71 | | list[Callable[[KT, VT], None]] 72 | | None = None, 73 | ): 74 | super().__init__() 75 | self.fast = LRU( 76 | n, 77 | fast, 78 | weight=weight, 79 | on_evict=[self.fast_to_slow], 80 | on_cancel_evict=[self._cancel_evict], 81 | ) 82 | self.slow = slow 83 | self.weight = weight 84 | if callable(fast_to_slow_callbacks): 85 | fast_to_slow_callbacks = [fast_to_slow_callbacks] 86 | if callable(slow_to_fast_callbacks): 87 | slow_to_fast_callbacks = [slow_to_fast_callbacks] 88 | self.fast_to_slow_callbacks = fast_to_slow_callbacks or [] 89 | self.slow_to_fast_callbacks = slow_to_fast_callbacks or [] 90 | self._cancel_restore = {} 91 | 92 | @property 93 | def n(self) -> float: 94 | """Maximum weight in the fast mapping before eviction happens. 95 | Can be updated; this won't trigger eviction by itself; you should call 96 | :meth:`evict_until_below_target` afterwards. 97 | 98 | See also 99 | -------- 100 | offset 101 | evict_until_below_target 102 | LRU.n 103 | LRU.offset 104 | """ 105 | return self.fast.n 106 | 107 | @n.setter 108 | def n(self, value: float) -> None: 109 | self.fast.n = value 110 | 111 | @property 112 | def offset(self) -> float: 113 | """Offset to add to the total weight in the fast buffer to determine when 114 | eviction happens. Note that increasing offset is not the same as decreasing n, 115 | as the latter also changes what keys qualify as "heavy" and should not be stored 116 | in fast. 117 | 118 | Always starts at zero and can be updated; this won't trigger eviction by itself; 119 | you should call :meth:`evict_until_below_target` afterwards. 120 | 121 | See also 122 | -------- 123 | n 124 | evict_until_below_target 125 | LRU.n 126 | LRU.offset 127 | """ 128 | return self.fast.offset 129 | 130 | @offset.setter 131 | def offset(self, value: float) -> None: 132 | self.fast.offset = value 133 | 134 | def fast_to_slow(self, key: KT, value: VT) -> None: 135 | self.slow[key] = value 136 | try: 137 | for cb in self.fast_to_slow_callbacks: 138 | cb(key, value) 139 | # LRU catches exception, raises and makes sure keys are not lost and located in 140 | # fast. 141 | except Exception: 142 | del self.slow[key] 143 | raise 144 | 145 | def slow_to_fast(self, key: KT) -> VT: 146 | self._cancel_restore[key] = False 147 | try: 148 | with self.unlock(): 149 | value = self.slow[key] 150 | if self._cancel_restore[key]: 151 | raise KeyError(key) 152 | finally: 153 | del self._cancel_restore[key] 154 | 155 | # Avoid useless movement for heavy values 156 | w = self.weight(key, value) 157 | if w <= self.n: 158 | # Multithreaded edge case: 159 | # - Thread 1 starts slow_to_fast(x) and puts it at the top of fast 160 | # - This causes the eviction of older key(s) 161 | # - While thread 1 is evicting older keys, thread 2 is loading fast with 162 | # set_noevict() 163 | # - By the time the eviction of the older key(s) is done, there is 164 | # enough weight in fast that thread 1 will spill x 165 | # - If the below code was just `self.fast[key] = value; del 166 | # self.slow[key]` now the key would be in neither slow nor fast! 167 | self.fast.set_noevict(key, value) 168 | del self.slow[key] 169 | 170 | with self.unlock(): 171 | self.fast.evict_until_below_target() 172 | for cb in self.slow_to_fast_callbacks: 173 | cb(key, value) 174 | 175 | return value 176 | 177 | @locked 178 | def __getitem__(self, key: KT) -> VT: 179 | try: 180 | return self.fast[key] 181 | except KeyError: 182 | return self.slow_to_fast(key) 183 | 184 | def __setitem__(self, key: KT, value: VT) -> None: 185 | with self.lock: 186 | discard(self.slow, key) 187 | if key in self._cancel_restore: 188 | self._cancel_restore[key] = True 189 | self.fast[key] = value 190 | 191 | @locked 192 | def set_noevict(self, key: KT, value: VT) -> None: 193 | """Variant of ``__setitem__`` that does not move keys from fast to slow if the 194 | total weight exceeds n 195 | """ 196 | discard(self.slow, key) 197 | if key in self._cancel_restore: 198 | self._cancel_restore[key] = True 199 | self.fast.set_noevict(key, value) 200 | 201 | def evict_until_below_target(self, n: float | None = None) -> None: 202 | """Wrapper around :meth:`zict.LRU.evict_until_below_target`. 203 | Presented here to allow easier overriding. 204 | """ 205 | self.fast.evict_until_below_target(n) 206 | 207 | @locked 208 | def __delitem__(self, key: KT) -> None: 209 | if key in self._cancel_restore: 210 | self._cancel_restore[key] = True 211 | try: 212 | del self.fast[key] 213 | except KeyError: 214 | del self.slow[key] 215 | 216 | @locked 217 | def _cancel_evict(self, key: KT, value: VT) -> None: 218 | discard(self.slow, key) 219 | 220 | def values(self) -> ValuesView[VT]: 221 | return BufferValuesView(self) 222 | 223 | def items(self) -> ItemsView[KT, VT]: 224 | return BufferItemsView(self) 225 | 226 | def __len__(self) -> int: 227 | with self.lock, self.fast.lock: 228 | return ( 229 | len(self.fast) 230 | + len(self.slow) 231 | - sum( 232 | k in self.fast and k in self.slow 233 | for k in chain(self._cancel_restore, self.fast._cancel_evict) 234 | ) 235 | ) 236 | 237 | def __iter__(self) -> Iterator[KT]: 238 | """Make sure that the iteration is not disrupted if you evict/restore a key in 239 | the middle of it 240 | """ 241 | seen = set() 242 | while True: 243 | try: 244 | for d in (self.fast, self.slow): 245 | for key in d: 246 | if key not in seen: 247 | seen.add(key) 248 | yield key 249 | return 250 | except RuntimeError: 251 | pass 252 | 253 | def __contains__(self, key: object) -> bool: 254 | return key in self.fast or key in self.slow 255 | 256 | def __str__(self) -> str: 257 | return f"Buffer<{self.fast}, {self.slow}>" 258 | 259 | __repr__ = __str__ 260 | 261 | def flush(self) -> None: 262 | flush(self.fast, self.slow) 263 | 264 | def close(self) -> None: 265 | close(self.fast, self.slow) 266 | 267 | 268 | class BufferItemsView(ItemsView[KT, VT]): 269 | _mapping: Buffer # FIXME CPython implementation detail 270 | __slots__ = () 271 | 272 | def __iter__(self) -> Iterator[tuple[KT, VT]]: 273 | # Avoid changing the LRU 274 | return chain(self._mapping.fast.items(), self._mapping.slow.items()) 275 | 276 | 277 | class BufferValuesView(ValuesView[VT]): 278 | _mapping: Buffer # FIXME CPython implementation detail 279 | __slots__ = () 280 | 281 | def __contains__(self, value: object) -> bool: 282 | # Avoid changing the LRU 283 | return any(value == v for v in self) 284 | 285 | def __iter__(self) -> Iterator[VT]: 286 | # Avoid changing the LRU 287 | return chain(self._mapping.fast.values(), self._mapping.slow.values()) 288 | -------------------------------------------------------------------------------- /zict/cache.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import weakref 4 | from collections.abc import Iterator, MutableMapping 5 | 6 | from zict.common import KT, VT, ZictBase, close, discard, flush, locked 7 | 8 | 9 | class Cache(ZictBase[KT, VT]): 10 | """Transparent write-through cache around a MutableMapping with an expensive 11 | __getitem__ method. 12 | 13 | Parameters 14 | ---------- 15 | data: MutableMapping 16 | Persistent, slow to read mapping to be cached 17 | cache: MutableMapping 18 | Fast cache for reads from data. This mapping may lose keys on its own; e.g. it 19 | could be a LRU. 20 | update_on_set: bool, optional 21 | If True (default), the cache will be updated both when writing and reading. 22 | If False, update the cache when reading, but just invalidate it when writing. 23 | 24 | Notes 25 | ----- 26 | If you call methods of this class from multiple threads, access will be fast as long 27 | as all methods of ``cache``, plus ``data.__delitem__``, are fast. Other methods of 28 | ``data`` are not protected by locks. 29 | 30 | Examples 31 | -------- 32 | Keep the latest 100 accessed values in memory 33 | >>> from zict import Cache, File, LRU, WeakValueMapping 34 | >>> d = Cache(File('myfile'), LRU(100, {})) # doctest: +SKIP 35 | 36 | Read data from disk every time, unless it was previously accessed and it's still in 37 | use somewhere else in the application 38 | >>> d = Cache(File('myfile'), WeakValueMapping()) # doctest: +SKIP 39 | """ 40 | 41 | data: MutableMapping[KT, VT] 42 | cache: MutableMapping[KT, VT] 43 | update_on_set: bool 44 | _gen: int 45 | _last_updated: dict[KT, int] 46 | 47 | def __init__( 48 | self, 49 | data: MutableMapping[KT, VT], 50 | cache: MutableMapping[KT, VT], 51 | update_on_set: bool = True, 52 | ): 53 | super().__init__() 54 | self.data = data 55 | self.cache = cache 56 | self.update_on_set = update_on_set 57 | self._gen = 0 58 | self._last_updated = {} 59 | 60 | @locked 61 | def __getitem__(self, key: KT) -> VT: 62 | try: 63 | return self.cache[key] 64 | except KeyError: 65 | pass 66 | gen = self._last_updated[key] 67 | 68 | with self.unlock(): 69 | value = self.data[key] 70 | 71 | # Could another thread have called __setitem__ or __delitem__ on the 72 | # same key in the meantime? If not, update the cache 73 | if gen == self._last_updated.get(key): 74 | self.cache[key] = value 75 | self._last_updated[key] += 1 76 | return value 77 | 78 | @locked 79 | def __setitem__(self, key: KT, value: VT) -> None: 80 | # If the item was already in cache and data.__setitem__ fails, e.g. because 81 | # it's a File and the disk is full, make sure that the cache is invalidated. 82 | discard(self.cache, key) 83 | gen = self._gen 84 | gen += 1 85 | self._last_updated[key] = self._gen = gen 86 | 87 | with self.unlock(): 88 | self.data[key] = value 89 | 90 | if key not in self._last_updated: 91 | # Another thread called __delitem__ in the meantime 92 | discard(self.data, key) 93 | elif gen != self._last_updated[key]: 94 | # Another thread called __setitem__ in the meantime. We have no idea which 95 | # of the two ended up actually setting self.data. 96 | # Case 1: the other thread did not enter this locked code block yet. 97 | # Prevent it from setting the cache. 98 | self._last_updated[key] += 1 99 | # Case 2: the other thread already exited this locked code block and set the 100 | # cache. Invalidate it. 101 | discard(self.cache, key) 102 | else: 103 | # No race condition 104 | self._last_updated[key] += 1 105 | if self.update_on_set: 106 | self.cache[key] = value 107 | 108 | @locked 109 | def __delitem__(self, key: KT) -> None: 110 | del self.data[key] 111 | del self._last_updated[key] 112 | discard(self.cache, key) 113 | 114 | def __len__(self) -> int: 115 | return len(self.data) 116 | 117 | def __iter__(self) -> Iterator[KT]: 118 | return iter(self.data) 119 | 120 | def __contains__(self, key: object) -> bool: 121 | # Do not let MutableMapping call self.data[key] 122 | return key in self.data 123 | 124 | def flush(self) -> None: 125 | flush(self.cache, self.data) 126 | 127 | def close(self) -> None: 128 | close(self.cache, self.data) 129 | 130 | 131 | class WeakValueMapping(weakref.WeakValueDictionary[KT, VT]): 132 | """Variant of weakref.WeakValueDictionary which silently ignores objects that 133 | can't be referenced by a weakref.ref 134 | """ 135 | 136 | def __setitem__(self, key: KT, value: VT) -> None: 137 | try: 138 | super().__setitem__(key, value) 139 | except TypeError: 140 | pass 141 | -------------------------------------------------------------------------------- /zict/common.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import threading 4 | from collections.abc import Callable, Iterable, Iterator, Mapping, MutableMapping 5 | from contextlib import contextmanager 6 | from enum import Enum 7 | from functools import wraps 8 | from itertools import chain 9 | from typing import TYPE_CHECKING, Any, TypeVar, cast 10 | 11 | T = TypeVar("T") 12 | KT = TypeVar("KT") 13 | VT = TypeVar("VT") 14 | 15 | if TYPE_CHECKING: 16 | # TODO import ParamSpec from typing (needs Python >=3.10) 17 | # TODO import Self from typing (needs Python >=3.11) 18 | from typing_extensions import ParamSpec, Self 19 | 20 | P = ParamSpec("P") 21 | 22 | 23 | class NoDefault(Enum): 24 | nodefault = None 25 | 26 | 27 | nodefault = NoDefault.nodefault 28 | 29 | 30 | class ZictBase(MutableMapping[KT, VT]): 31 | """Base class for zict mappings""" 32 | 33 | lock: threading.RLock 34 | 35 | def __init__(self) -> None: 36 | self.lock = threading.RLock() 37 | 38 | def __getstate__(self) -> dict[str, Any]: 39 | state = self.__dict__.copy() 40 | del state["lock"] 41 | return state 42 | 43 | def __setstate__(self, state: dict[str, Any]) -> None: 44 | self.__dict__ = state 45 | self.lock = threading.RLock() 46 | 47 | def update( # type: ignore[override] 48 | self, 49 | other: Mapping[KT, VT] | Iterable[tuple[KT, VT]] = (), 50 | /, 51 | **kwargs: VT, 52 | ) -> None: 53 | if hasattr(other, "items"): 54 | other = other.items() 55 | other = chain(other, kwargs.items()) # type: ignore 56 | self._do_update(other) 57 | 58 | def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None: 59 | # Default implementation, can be overriden for speed 60 | for k, v in items: 61 | self[k] = v 62 | 63 | def discard(self, key: KT) -> None: 64 | """Flush *key* if possible. 65 | Not the same as ``m.pop(key, None)``, as it doesn't trigger ``__getitem__``. 66 | """ 67 | discard(self, key) 68 | 69 | def close(self) -> None: 70 | """Release any system resources held by this object""" 71 | 72 | def __enter__(self) -> Self: 73 | return self 74 | 75 | def __exit__(self, *args: Any) -> None: 76 | self.close() 77 | 78 | def __del__(self) -> None: 79 | self.close() 80 | 81 | @contextmanager 82 | def unlock(self) -> Iterator[None]: 83 | """To be used in a method decorated by ``@locked``. 84 | Temporarily releases the mapping's RLock. 85 | """ 86 | self.lock.release() 87 | try: 88 | yield 89 | finally: 90 | self.lock.acquire() 91 | 92 | 93 | def close(*z: Any) -> None: 94 | """Close *z* if possible.""" 95 | for zi in z: 96 | if hasattr(zi, "close"): 97 | zi.close() 98 | 99 | 100 | def flush(*z: Any) -> None: 101 | """Flush *z* if possible.""" 102 | for zi in z: 103 | if hasattr(zi, "flush"): 104 | zi.flush() 105 | 106 | 107 | def discard(m: MutableMapping[KT, VT], key: KT) -> None: 108 | """Flush *key* if possible. 109 | Not the same as ``m.pop(key, None)``, as it doesn't trigger ``__getitem__``. 110 | """ 111 | try: 112 | del m[key] 113 | except KeyError: 114 | pass 115 | 116 | 117 | def locked(func: Callable[P, VT]) -> Callable[P, VT]: 118 | """Decorator for a method of ZictBase, which wraps the whole method in a 119 | instance-specific (but not key-specific) rlock. 120 | """ 121 | 122 | @wraps(func) 123 | def wrapper(*args: P.args, **kwargs: P.kwargs) -> VT: 124 | self = cast(ZictBase, args[0]) 125 | with self.lock: 126 | return func(*args, **kwargs) 127 | 128 | return wrapper 129 | -------------------------------------------------------------------------------- /zict/file.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import mmap 4 | import os 5 | import pathlib 6 | from collections.abc import Iterator 7 | from urllib.parse import quote, unquote 8 | 9 | from zict.common import ZictBase, locked 10 | 11 | 12 | class File(ZictBase[str, bytes]): 13 | """Mutable Mapping interface to a directory 14 | 15 | Keys must be strings, values must be buffers 16 | 17 | Note this shouldn't be used for interprocess persistence, as keys 18 | are cached in memory. 19 | 20 | Parameters 21 | ---------- 22 | directory: str 23 | Directory to write to. If it already exists, existing files will be imported as 24 | mapping elements. If it doesn't exists, it will be created. 25 | memmap: bool (optional) 26 | If True, use `mmap` for reading. Defaults to False. 27 | 28 | Notes 29 | ----- 30 | If you call methods of this class from multiple threads, access will be fast as long 31 | as atomic disk access such as ``open``, ``os.fstat``, and ``os.remove`` is fast. 32 | This is not always the case, e.g. in case of slow network mounts or spun-down 33 | magnetic drives. 34 | Bytes read/write in the files is not protected by locks; this could cause failures 35 | on Windows, NFS, and in general whenever it's not OK to delete a file while there 36 | are file descriptors open on it. 37 | 38 | Examples 39 | -------- 40 | >>> z = File('myfile') # doctest: +SKIP 41 | >>> z['x'] = b'123' # doctest: +SKIP 42 | >>> z['x'] # doctest: +SKIP 43 | b'123' 44 | 45 | Also supports writing lists of bytes objects 46 | 47 | >>> z['y'] = [b'123', b'4567'] # doctest: +SKIP 48 | >>> z['y'] # doctest: +SKIP 49 | b'1234567' 50 | 51 | Or anything that can be used with file.write, like a memoryview 52 | 53 | >>> z['data'] = np.ones(5).data # doctest: +SKIP 54 | """ 55 | 56 | directory: str 57 | memmap: bool 58 | filenames: dict[str, str] 59 | _inc: int 60 | 61 | def __init__(self, directory: str | pathlib.Path, memmap: bool = False): 62 | super().__init__() 63 | self.directory = str(directory) 64 | self.memmap = memmap 65 | self.filenames = {} 66 | self._inc = 0 67 | 68 | if not os.path.exists(self.directory): 69 | os.makedirs(self.directory, exist_ok=True) 70 | else: 71 | for fn in os.listdir(self.directory): 72 | self.filenames[self._unsafe_key(fn)] = fn 73 | self._inc += 1 74 | 75 | def _safe_key(self, key: str) -> str: 76 | """Escape key so that it is usable on all filesystems. 77 | 78 | Append to the filenames a unique suffix that changes every time this method is 79 | called. This prevents race conditions when another thread accesses the same 80 | key, e.g. ``__setitem__`` on one thread and ``__getitem__`` on another. 81 | """ 82 | # `#` is escaped by quote and is supported by most file systems 83 | key = quote(key, safe="") + f"#{self._inc}" 84 | self._inc += 1 85 | return key 86 | 87 | @staticmethod 88 | def _unsafe_key(key: str) -> str: 89 | """Undo the escaping done by _safe_key()""" 90 | key = key.split("#")[0] 91 | return unquote(key) 92 | 93 | def __str__(self) -> str: 94 | return f"" 95 | 96 | __repr__ = __str__ 97 | 98 | @locked 99 | def __getitem__(self, key: str) -> bytearray | memoryview: 100 | fn = os.path.join(self.directory, self.filenames[key]) 101 | 102 | # distributed.protocol.numpy.deserialize_numpy_ndarray makes sure that, if the 103 | # numpy array was writeable before serialization, remains writeable afterwards. 104 | # If it receives a read-only buffer (e.g. from fh.read() or from a mmap to a 105 | # read-only file descriptor), it performs an expensive memcpy. 106 | # Note that this is a dask-specific feature; vanilla pickle.loads will instead 107 | # return an array with flags.writeable=False. 108 | 109 | if self.memmap: 110 | with open(fn, "r+b") as fh: 111 | return memoryview(mmap.mmap(fh.fileno(), 0)) 112 | else: 113 | with open(fn, "rb") as fh: 114 | size = os.fstat(fh.fileno()).st_size 115 | buf = bytearray(size) 116 | with self.unlock(): 117 | nread = fh.readinto(buf) 118 | assert nread == size 119 | return buf 120 | 121 | @locked 122 | def __setitem__( 123 | self, 124 | key: str, 125 | value: bytes 126 | | bytearray 127 | | memoryview 128 | | list[bytes | bytearray | memoryview] 129 | | tuple[bytes | bytearray | memoryview, ...], 130 | ) -> None: 131 | self.discard(key) 132 | fn = self._safe_key(key) 133 | with open(os.path.join(self.directory, fn), "wb") as fh, self.unlock(): 134 | if isinstance(value, (tuple, list)): 135 | fh.writelines(value) 136 | else: 137 | fh.write(value) 138 | 139 | if key in self.filenames: 140 | # Race condition: two calls to __setitem__ from different threads on the 141 | # same key at the same time 142 | os.remove(os.path.join(self.directory, fn)) 143 | else: 144 | self.filenames[key] = fn 145 | 146 | def __contains__(self, key: object) -> bool: 147 | return key in self.filenames 148 | 149 | def __iter__(self) -> Iterator[str]: 150 | return iter(self.filenames) 151 | 152 | @locked 153 | def __delitem__(self, key: str) -> None: 154 | fn = self.filenames.pop(key) 155 | os.remove(os.path.join(self.directory, fn)) 156 | 157 | def __len__(self) -> int: 158 | return len(self.filenames) 159 | -------------------------------------------------------------------------------- /zict/func.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Callable, Iterable, Iterator, MutableMapping 4 | from typing import Generic, TypeVar 5 | 6 | from zict.common import KT, VT, ZictBase, close, flush 7 | 8 | WT = TypeVar("WT") 9 | 10 | 11 | class Func(ZictBase[KT, VT], Generic[KT, VT, WT]): 12 | """Translate the values of a MutableMapping with a pair of input/output functions 13 | 14 | Parameters 15 | ---------- 16 | dump: callable 17 | Function to call on value as we set it into the mapping 18 | load: callable 19 | Function to call on value as we pull it from the mapping 20 | d: MutableMapping 21 | 22 | See Also 23 | -------- 24 | KeyMap 25 | 26 | Examples 27 | -------- 28 | >>> def double(x): 29 | ... return x * 2 30 | 31 | >>> def halve(x): 32 | ... return x / 2 33 | 34 | >>> d = {} 35 | >>> f = Func(double, halve, d) 36 | >>> f['x'] = 10 37 | >>> d 38 | {'x': 20} 39 | >>> f['x'] 40 | 10.0 41 | """ 42 | 43 | dump: Callable[[VT], WT] 44 | load: Callable[[WT], VT] 45 | d: MutableMapping[KT, WT] 46 | 47 | def __init__( 48 | self, 49 | dump: Callable[[VT], WT], 50 | load: Callable[[WT], VT], 51 | d: MutableMapping[KT, WT], 52 | ): 53 | super().__init__() 54 | self.dump = dump 55 | self.load = load 56 | self.d = d 57 | 58 | def __getitem__(self, key: KT) -> VT: 59 | return self.load(self.d[key]) 60 | 61 | def __setitem__(self, key: KT, value: VT) -> None: 62 | self.d[key] = self.dump(value) 63 | 64 | def __contains__(self, key: object) -> bool: 65 | return key in self.d 66 | 67 | def __delitem__(self, key: KT) -> None: 68 | del self.d[key] 69 | 70 | def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None: 71 | it = ((k, self.dump(v)) for k, v in items) 72 | self.d.update(it) 73 | 74 | def __iter__(self) -> Iterator[KT]: 75 | return iter(self.d) 76 | 77 | def __len__(self) -> int: 78 | return len(self.d) 79 | 80 | def __str__(self) -> str: 81 | return f"{funcname(self.load)} {self.d}>" 82 | 83 | __repr__ = __str__ 84 | 85 | def flush(self) -> None: 86 | flush(self.d) 87 | 88 | def close(self) -> None: 89 | close(self.d) 90 | 91 | 92 | def funcname(func: Callable) -> str: 93 | """Get the name of a function.""" 94 | while hasattr(func, "func"): 95 | func = func.func 96 | try: 97 | return func.__name__ 98 | except Exception: 99 | return str(func) 100 | -------------------------------------------------------------------------------- /zict/keymap.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Callable, Iterator, MutableMapping 4 | from typing import Generic, TypeVar 5 | 6 | from zict.common import KT, VT, ZictBase, close, discard, flush, locked 7 | 8 | JT = TypeVar("JT") 9 | 10 | 11 | class KeyMap(ZictBase[KT, VT], Generic[KT, JT, VT]): 12 | """Translate the keys of a MutableMapping with a pair of input/output functions 13 | 14 | Parameters 15 | ---------- 16 | fn: callable 17 | Function to call on a key of the KeyMap to transform it to a key of the wrapped 18 | mapping. It must be pure (if called twice on the same key it must return 19 | the same result) and it must not generate collisions. In other words, 20 | ``fn(a) == fn(b) iff a == b``. 21 | 22 | d: MutableMapping 23 | Wrapped mapping 24 | 25 | See Also 26 | -------- 27 | Func 28 | 29 | Examples 30 | -------- 31 | Use any python object as keys of a File, instead of just strings, as long as their 32 | str representation is unique: 33 | 34 | >>> from zict import File 35 | >>> z = KeyMap(str, File("myfile")) # doctest: +SKIP 36 | >>> z[1] = 10 # doctest: +SKIP 37 | """ 38 | 39 | fn: Callable[[KT], JT] 40 | d: MutableMapping[JT, VT] 41 | keymap: dict[KT, JT] 42 | 43 | def __init__(self, fn: Callable[[KT], JT], d: MutableMapping[JT, VT]): 44 | super().__init__() 45 | self.fn = fn 46 | self.d = d 47 | self.keymap = {} 48 | 49 | @locked 50 | def __setitem__(self, key: KT, value: VT) -> None: 51 | j = self.fn(key) 52 | self.keymap[key] = j 53 | with self.unlock(): 54 | self.d[j] = value 55 | if key not in self.keymap: 56 | # Race condition with __delitem__ 57 | discard(self.d, j) 58 | 59 | def __getitem__(self, key: KT) -> VT: 60 | j = self.keymap[key] 61 | return self.d[j] 62 | 63 | @locked 64 | def __delitem__(self, key: KT) -> None: 65 | j = self.keymap.pop(key) 66 | del self.d[j] 67 | 68 | def __contains__(self, key: object) -> bool: 69 | return key in self.keymap 70 | 71 | def __iter__(self) -> Iterator[KT]: 72 | return iter(self.keymap) 73 | 74 | def __len__(self) -> int: 75 | return len(self.keymap) 76 | 77 | def flush(self) -> None: 78 | flush(self.d) 79 | 80 | def close(self) -> None: 81 | close(self.d) 82 | -------------------------------------------------------------------------------- /zict/lmdb.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import pathlib 4 | import sys 5 | from collections.abc import ItemsView, Iterable, Iterator, ValuesView 6 | 7 | from zict.common import ZictBase 8 | 9 | 10 | def _encode_key(key: str) -> bytes: 11 | return key.encode("utf-8") 12 | 13 | 14 | def _decode_key(key: bytes) -> str: 15 | return key.decode("utf-8") 16 | 17 | 18 | class LMDB(ZictBase[str, bytes]): 19 | """Mutable Mapping interface to a LMDB database. 20 | 21 | Keys must be strings, values must be bytes 22 | 23 | Parameters 24 | ---------- 25 | directory: str 26 | map_size: int 27 | On Linux and MacOS, maximum size of the database file on disk. 28 | Defaults to 1 TiB on 64 bit systems and 1 GiB on 32 bit ones. 29 | 30 | On Windows, preallocated total size of the database file on disk. Defaults to 31 | 10 MiB to encourage explicitly setting it. 32 | 33 | Notes 34 | ----- 35 | None of this class is thread-safe - not even normally trivial methods such as 36 | ``__len__ `` or ``__contains__``. 37 | 38 | Examples 39 | -------- 40 | >>> z = LMDB('/tmp/somedir/') # doctest: +SKIP 41 | >>> z['x'] = b'123' # doctest: +SKIP 42 | >>> z['x'] # doctest: +SKIP 43 | b'123' 44 | """ 45 | 46 | def __init__(self, directory: str | pathlib.Path, map_size: int | None = None): 47 | import lmdb 48 | 49 | super().__init__() 50 | if map_size is None: 51 | if sys.platform != "win32": 52 | map_size = min(2**40, sys.maxsize // 4) 53 | else: 54 | map_size = 10 * 2**20 55 | 56 | self.db = lmdb.open( 57 | str(directory), 58 | subdir=True, 59 | map_size=map_size, 60 | sync=False, 61 | writemap=True, 62 | ) 63 | 64 | def __getitem__(self, key: str) -> bytes: 65 | if not isinstance(key, str): 66 | raise KeyError(key) 67 | with self.db.begin() as txn: 68 | value = txn.get(_encode_key(key)) 69 | if value is None: 70 | raise KeyError(key) 71 | return value 72 | 73 | def __setitem__(self, key: str, value: bytes) -> None: 74 | if not isinstance(key, str): 75 | raise TypeError(key) 76 | if not isinstance(value, bytes): 77 | raise TypeError(value) 78 | with self.db.begin(write=True) as txn: 79 | txn.put(_encode_key(key), value) 80 | 81 | def __contains__(self, key: object) -> bool: 82 | if not isinstance(key, str): 83 | return False 84 | with self.db.begin() as txn: 85 | return txn.cursor().set_key(_encode_key(key)) 86 | 87 | def __iter__(self) -> Iterator[str]: 88 | cursor = self.db.begin().cursor() 89 | return (_decode_key(k) for k in cursor.iternext(keys=True, values=False)) 90 | 91 | def items(self) -> ItemsView[str, bytes]: 92 | return LMDBItemsView(self) 93 | 94 | def values(self) -> ValuesView[bytes]: 95 | return LMDBValuesView(self) 96 | 97 | def _do_update(self, items: Iterable[tuple[str, bytes]]) -> None: 98 | # Optimized version of update() using a single putmulti() call. 99 | items_enc = [] 100 | for key, value in items: 101 | if not isinstance(key, str): 102 | raise TypeError(key) 103 | if not isinstance(value, bytes): 104 | raise TypeError(value) 105 | items_enc.append((_encode_key(key), value)) 106 | 107 | with self.db.begin(write=True) as txn: 108 | consumed, added = txn.cursor().putmulti(items_enc) 109 | assert consumed == added == len(items_enc) 110 | 111 | def __delitem__(self, key: str) -> None: 112 | if not isinstance(key, str): 113 | raise KeyError(key) 114 | with self.db.begin(write=True) as txn: 115 | if not txn.delete(_encode_key(key)): 116 | raise KeyError(key) 117 | 118 | def __len__(self) -> int: 119 | return self.db.stat()["entries"] 120 | 121 | def close(self) -> None: 122 | self.db.close() 123 | 124 | 125 | class LMDBItemsView(ItemsView[str, bytes]): 126 | _mapping: LMDB # FIXME CPython implementation detail 127 | __slots__ = () 128 | 129 | def __contains__(self, item: object) -> bool: 130 | key: str 131 | value: object 132 | key, value = item # type: ignore 133 | try: 134 | v = self._mapping[key] 135 | except KeyError: 136 | return False 137 | else: 138 | return v == value 139 | 140 | def __iter__(self) -> Iterator[tuple[str, bytes]]: 141 | cursor = self._mapping.db.begin().cursor() 142 | return ((_decode_key(k), v) for k, v in cursor.iternext(keys=True, values=True)) 143 | 144 | 145 | class LMDBValuesView(ValuesView[bytes]): 146 | _mapping: LMDB # FIXME CPython implementation detail 147 | __slots__ = () 148 | 149 | def __contains__(self, value: object) -> bool: 150 | return any(value == v for v in self) 151 | 152 | def __iter__(self) -> Iterator[bytes]: 153 | cursor = self._mapping.db.begin().cursor() 154 | return cursor.iternext(keys=False, values=True) 155 | -------------------------------------------------------------------------------- /zict/lru.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import ( 4 | Callable, 5 | Collection, 6 | ItemsView, 7 | Iterator, 8 | KeysView, 9 | MutableMapping, 10 | ValuesView, 11 | ) 12 | 13 | from zict.common import KT, VT, NoDefault, ZictBase, close, flush, locked, nodefault 14 | from zict.utils import InsertionSortedSet 15 | 16 | 17 | class LRU(ZictBase[KT, VT]): 18 | """Evict Least Recently Used Elements. 19 | 20 | Parameters 21 | ---------- 22 | n: int or float 23 | Number of elements to keep, or total weight if ``weight`` is used. 24 | Any individual key that is heavier than n will be automatically evicted as soon 25 | as it is inserted. 26 | 27 | It can be updated after initialization. See also: ``offset`` attribute. 28 | d: MutableMapping 29 | Dict-like in which to hold elements. There are no expectations on its internal 30 | ordering. Iteration on the LRU follows the order of the underlying mapping. 31 | on_evict: callable or list of callables 32 | Function:: k, v -> action to call on key/value pairs prior to eviction 33 | If an exception occurs during an on_evict callback (e.g a callback tried 34 | storing to disk and raised a disk full error) the key will remain in the LRU. 35 | on_cancel_evict: callable or list of callables 36 | Function:: k, v -> action to call on key/value pairs if they're deleted or 37 | updated from a thread while the on_evict callables are being executed in 38 | another. 39 | If you're not accessing the LRU from multiple threads, ignore this parameter. 40 | weight: callable 41 | Function:: k, v -> number to determine the size of keeping the item in 42 | the mapping. Defaults to ``(k, v) -> 1`` 43 | 44 | Notes 45 | ----- 46 | If you call methods of this class from multiple threads, access will be fast as long 47 | as all methods of ``d`` are fast. Callbacks are not protected by locks and can be 48 | arbitrarily slow. 49 | 50 | Examples 51 | -------- 52 | >>> lru = LRU(2, {}, on_evict=lambda k, v: print("Lost", k, v)) 53 | >>> lru['x'] = 1 54 | >>> lru['y'] = 2 55 | >>> lru['z'] = 3 56 | Lost x 1 57 | """ 58 | 59 | d: MutableMapping[KT, VT] 60 | order: InsertionSortedSet[KT] 61 | heavy: InsertionSortedSet[KT] 62 | on_evict: list[Callable[[KT, VT], None]] 63 | on_cancel_evict: list[Callable[[KT, VT], None]] 64 | weight: Callable[[KT, VT], float] 65 | #: Maximum weight before eviction is triggered, as set during initialization. 66 | #: Updating this attribute doesn't trigger eviction by itself; you should call 67 | #: :meth:`evict_until_below_target` explicitly afterwards. 68 | n: float 69 | #: Offset to add to ``total_weight`` to determine if key/value pairs should be 70 | #: evicted. It always starts at zero and can be updated afterwards. Updating this 71 | #: attribute doesn't trigger eviction by itself; you should call 72 | #: :meth:`evict_until_below_target` explicitly afterwards. 73 | #: Increasing ``offset`` is not the same as reducing ``n``, as the latter will also 74 | #: reduce the threshold below which a value is considered "heavy" and qualifies for 75 | #: immediate eviction. 76 | offset: float 77 | weights: dict[KT, float] 78 | closed: bool 79 | total_weight: float 80 | _cancel_evict: dict[KT, bool] 81 | 82 | def __init__( 83 | self, 84 | n: float, 85 | d: MutableMapping[KT, VT], 86 | *, 87 | on_evict: Callable[[KT, VT], None] 88 | | list[Callable[[KT, VT], None]] 89 | | None = None, 90 | on_cancel_evict: Callable[[KT, VT], None] 91 | | list[Callable[[KT, VT], None]] 92 | | None = None, 93 | weight: Callable[[KT, VT], float] = lambda k, v: 1, 94 | ): 95 | super().__init__() 96 | self.d = d 97 | self.n = n 98 | self.offset = 0 99 | 100 | if callable(on_evict): 101 | on_evict = [on_evict] 102 | self.on_evict = on_evict or [] 103 | if callable(on_cancel_evict): 104 | on_cancel_evict = [on_cancel_evict] 105 | self.on_cancel_evict = on_cancel_evict or [] 106 | 107 | self.weight = weight 108 | self.weights = {k: weight(k, v) for k, v in d.items()} 109 | self.total_weight = sum(self.weights.values()) 110 | self.order = InsertionSortedSet(d) 111 | self.heavy = InsertionSortedSet(k for k, v in self.weights.items() if v >= n) 112 | self.closed = False 113 | self._cancel_evict = {} 114 | 115 | @locked 116 | def __getitem__(self, key: KT) -> VT: 117 | result = self.d[key] 118 | self.order.remove(key) 119 | self.order.add(key) 120 | return result 121 | 122 | @locked 123 | def get_all_or_nothing(self, keys: Collection[KT]) -> dict[KT, VT]: 124 | """If all keys exist in the LRU, update their FIFO priority and return their 125 | values; this would be the same as ``{k: lru[k] for k in keys}``. 126 | If any keys are missing, however, raise KeyError for the first one missing and 127 | do not bring any of the available keys to the top of the LRU. 128 | """ 129 | result = {key: self.d[key] for key in keys} 130 | for key in keys: 131 | self.order.remove(key) 132 | self.order.add(key) 133 | return result 134 | 135 | def __setitem__(self, key: KT, value: VT) -> None: 136 | self.set_noevict(key, value) 137 | try: 138 | self.evict_until_below_target() 139 | except Exception: 140 | if self.weights.get(key, 0) > self.n and key not in self.heavy: 141 | # weight(value) > n and evicting the key we just inserted failed. 142 | # Evict the rest of the LRU instead. 143 | try: 144 | while len(self.d) > 1: 145 | self.evict() 146 | except Exception: 147 | pass 148 | raise 149 | 150 | @locked 151 | def set_noevict(self, key: KT, value: VT) -> None: 152 | """Variant of ``__setitem__`` that does not evict if the total weight exceeds n. 153 | Unlike ``__setitem__``, this method does not depend on the ``on_evict`` 154 | functions to be thread-safe for its own thread-safety. It also is not prone to 155 | re-raising exceptions from the ``on_evict`` callbacks. 156 | """ 157 | self.discard(key) 158 | weight = self.weight(key, value) 159 | if key in self._cancel_evict: 160 | self._cancel_evict[key] = True 161 | self.d[key] = value 162 | self.order.add(key) 163 | if weight > self.n: 164 | self.heavy.add(key) # Mark this key to be evicted first 165 | self.weights[key] = weight 166 | self.total_weight += weight 167 | 168 | def evict_until_below_target(self, n: float | None = None) -> None: 169 | """Evict key/value pairs until the total weight falls below n 170 | 171 | Parameters 172 | ---------- 173 | n: float, optional 174 | Total weight threshold to achieve. Defaults to self.n. 175 | """ 176 | if n is None: 177 | n = self.n 178 | while self.total_weight + self.offset > n and not self.closed: 179 | try: 180 | self.evict() 181 | except KeyError: 182 | return # Multithreaded race condition 183 | 184 | @locked 185 | def evict( 186 | self, key: KT | NoDefault = nodefault 187 | ) -> tuple[KT, VT, float] | tuple[None, None, float]: 188 | """Evict least recently used key, or least recently inserted key with individual 189 | weight > n, if any. You may also evict a specific key. 190 | 191 | This is typically called from internal use, but can be externally 192 | triggered as well. 193 | 194 | Returns 195 | ------- 196 | Tuple of (key, value, weight) 197 | 198 | Or (None, None, 0) if the key that was being evicted was updated or deleted from 199 | another thread while the on_evict callbacks were being executed. This outcome is 200 | only possible in multithreaded access. 201 | """ 202 | if key is nodefault: 203 | try: 204 | key = next(iter(self.heavy or self.order)) 205 | except StopIteration: 206 | raise KeyError("evict(): dictionary is empty") 207 | 208 | if key in self._cancel_evict: 209 | return None, None, 0 210 | 211 | # For the purpose of multithreaded access, it's important that the value remains 212 | # in self.d until all callbacks are successful. 213 | # When this is used inside a Buffer, there must never be a moment when the key 214 | # is neither in fast nor in slow. 215 | value = self.d[key] 216 | 217 | # If we are evicting a heavy key we just inserted and one of the callbacks 218 | # fails, put it at the bottom of the LRU instead of the top. This way lighter 219 | # keys will have a chance to be evicted first and make space. 220 | self.heavy.discard(key) 221 | 222 | self._cancel_evict[key] = False 223 | try: 224 | with self.unlock(): 225 | # This may raise; e.g. if a callback tries storing to a full disk 226 | for cb in self.on_evict: 227 | cb(key, value) 228 | 229 | if self._cancel_evict[key]: 230 | for cb in self.on_cancel_evict: 231 | cb(key, value) 232 | return None, None, 0 233 | finally: 234 | del self._cancel_evict[key] 235 | 236 | del self.d[key] 237 | self.order.remove(key) 238 | weight = self.weights.pop(key) 239 | self.total_weight -= weight 240 | 241 | return key, value, weight 242 | 243 | @locked 244 | def __delitem__(self, key: KT) -> None: 245 | if key in self._cancel_evict: 246 | self._cancel_evict[key] = True 247 | del self.d[key] 248 | self.order.remove(key) 249 | self.heavy.discard(key) 250 | self.total_weight -= self.weights.pop(key) 251 | 252 | def keys(self) -> KeysView[KT]: 253 | return self.d.keys() 254 | 255 | def values(self) -> ValuesView[VT]: 256 | return self.d.values() 257 | 258 | def items(self) -> ItemsView[KT, VT]: 259 | return self.d.items() 260 | 261 | def __len__(self) -> int: 262 | return len(self.d) 263 | 264 | def __iter__(self) -> Iterator[KT]: 265 | return iter(self.d) 266 | 267 | def __contains__(self, key: object) -> bool: 268 | return key in self.d 269 | 270 | def __str__(self) -> str: 271 | sub = str(self.d) if not isinstance(self.d, dict) else "dict" 272 | return f"" 273 | 274 | __repr__ = __str__ 275 | 276 | def flush(self) -> None: 277 | flush(self.d) 278 | 279 | def close(self) -> None: 280 | self.closed = True 281 | close(self.d) 282 | -------------------------------------------------------------------------------- /zict/py.typed: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/zict/7e5dafedcf016a4ac61286badbf1f8da3741d2d3/zict/py.typed -------------------------------------------------------------------------------- /zict/sieve.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections import defaultdict 4 | from collections.abc import Callable, Iterable, Iterator, Mapping, MutableMapping 5 | from typing import Generic, TypeVar 6 | 7 | from zict.common import KT, VT, ZictBase, close, discard, flush, locked 8 | 9 | MKT = TypeVar("MKT") 10 | 11 | 12 | class Sieve(ZictBase[KT, VT], Generic[KT, VT, MKT]): 13 | """Store values in different mappings based on a selector's 14 | output. 15 | 16 | This creates a MutableMapping combining several underlying 17 | MutableMappings for storage. Items are dispatched based on 18 | a selector function provided by the user. 19 | 20 | Parameters 21 | ---------- 22 | mappings: dict of {mapping key: MutableMapping} 23 | selector: callable (key, value) -> mapping key 24 | 25 | Notes 26 | ----- 27 | If you call methods of this class from multiple threads, access will be fast as long 28 | as the ``__contains__`` and ``__delitem__`` methods of all underlying mappins are 29 | fast. ``__getitem__`` and ``__setitem__`` methods of the underlying mappings are not 30 | protected by locks. 31 | 32 | Examples 33 | -------- 34 | >>> small = {} 35 | >>> large = DataBase() # doctest: +SKIP 36 | >>> mappings = {True: small, False: large} # doctest: +SKIP 37 | >>> def is_small(key, value): # doctest: +SKIP 38 | ... return sys.getsizeof(value) < 10000 # doctest: +SKIP 39 | >>> d = Sieve(mappings, is_small) # doctest: +SKIP 40 | """ 41 | 42 | mappings: Mapping[MKT, MutableMapping[KT, VT]] 43 | selector: Callable[[KT, VT], MKT] 44 | key_to_mapping: dict[KT, MutableMapping[KT, VT]] 45 | gen: int 46 | 47 | def __init__( 48 | self, 49 | mappings: Mapping[MKT, MutableMapping[KT, VT]], 50 | selector: Callable[[KT, VT], MKT], 51 | ): 52 | super().__init__() 53 | self.mappings = mappings 54 | self.selector = selector 55 | self.key_to_mapping = {} 56 | self.gen = 0 57 | 58 | def __getitem__(self, key: KT) -> VT: 59 | # Note that this may raise KeyError if you call it in the middle of __setitem__ 60 | # or update for an already existing key 61 | return self.key_to_mapping[key][key] 62 | 63 | @locked 64 | def __setitem__(self, key: KT, value: VT) -> None: 65 | discard(self, key) 66 | mkey = self.selector(key, value) 67 | mapping = self.mappings[mkey] 68 | self.key_to_mapping[key] = mapping 69 | self.gen += 1 70 | gen = self.gen 71 | 72 | with self.unlock(): 73 | mapping[key] = value 74 | 75 | if gen != self.gen and self.key_to_mapping.get(key) is not mapping: 76 | # Multithreaded race condition 77 | discard(mapping, key) 78 | 79 | @locked 80 | def __delitem__(self, key: KT) -> None: 81 | mapping = self.key_to_mapping.pop(key) 82 | self.gen += 1 83 | discard(mapping, key) 84 | 85 | @locked 86 | def _do_update(self, items: Iterable[tuple[KT, VT]]) -> None: 87 | # Optimized update() implementation issuing a single update() 88 | # call per underlying mapping. 89 | updates = defaultdict(list) 90 | self.gen += 1 91 | gen = self.gen 92 | 93 | for key, value in items: 94 | old_mapping = self.key_to_mapping.pop(key, None) 95 | if old_mapping is not None: 96 | discard(old_mapping, key) 97 | mkey = self.selector(key, value) 98 | mapping = self.mappings[mkey] 99 | updates[mkey].append((key, value)) 100 | self.key_to_mapping[key] = mapping 101 | 102 | with self.unlock(): 103 | for mkey, mitems in updates.items(): 104 | mapping = self.mappings[mkey] 105 | mapping.update(mitems) 106 | 107 | if gen != self.gen: 108 | # Multithreaded race condition 109 | for mkey, mitems in updates.items(): 110 | mapping = self.mappings[mkey] 111 | for key, _ in mitems: 112 | if self.key_to_mapping.get(key) is not mapping: 113 | discard(mapping, key) 114 | 115 | def __len__(self) -> int: 116 | return len(self.key_to_mapping) 117 | 118 | def __iter__(self) -> Iterator[KT]: 119 | return iter(self.key_to_mapping) 120 | 121 | def __contains__(self, key: object) -> bool: 122 | return key in self.key_to_mapping 123 | 124 | def __str__(self) -> str: 125 | return f"Sieve<{self.mappings}>" 126 | 127 | __repr__ = __str__ 128 | 129 | def flush(self) -> None: 130 | flush(*self.mappings.values()) 131 | 132 | def close(self) -> None: 133 | close(*self.mappings.values()) 134 | -------------------------------------------------------------------------------- /zict/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/dask/zict/7e5dafedcf016a4ac61286badbf1f8da3741d2d3/zict/tests/__init__.py -------------------------------------------------------------------------------- /zict/tests/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import gc 4 | import sys 5 | import threading 6 | from concurrent.futures import ThreadPoolExecutor 7 | 8 | import pytest 9 | 10 | try: 11 | import psutil 12 | except ImportError: 13 | psutil = None # type: ignore 14 | 15 | 16 | @pytest.fixture 17 | def check_fd_leaks(): 18 | if sys.platform == "win32" or psutil is None: 19 | yield 20 | else: 21 | proc = psutil.Process() 22 | before = proc.num_fds() 23 | yield 24 | gc.collect() 25 | assert proc.num_fds() == before 26 | 27 | 28 | @pytest.fixture 29 | def is_locked(): 30 | """Callable that returns True if the parameter zict mapping has its RLock engaged""" 31 | with ThreadPoolExecutor(1) as ex: 32 | 33 | def __is_locked(d): 34 | out = d.lock.acquire(blocking=False) 35 | if out: 36 | d.lock.release() 37 | return not out 38 | 39 | def _is_locked(d): 40 | return ex.submit(__is_locked, d).result() 41 | 42 | yield _is_locked 43 | 44 | 45 | @pytest.fixture 46 | def check_thread_leaks(): 47 | active_threads_start = threading.enumerate() 48 | 49 | yield 50 | 51 | bad_threads = [ 52 | thread for thread in threading.enumerate() if thread not in active_threads_start 53 | ] 54 | if bad_threads: 55 | raise RuntimeError(f"Leaked thread(s): {bad_threads}") 56 | -------------------------------------------------------------------------------- /zict/tests/test_async_buffer.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import contextvars 3 | import threading 4 | import time 5 | from collections import UserDict 6 | from concurrent.futures import Executor, Future 7 | 8 | import pytest 9 | 10 | from zict import AsyncBuffer, Func 11 | from zict.tests import utils_test 12 | 13 | 14 | @pytest.mark.asyncio 15 | async def test_simple(check_thread_leaks): 16 | with AsyncBuffer({}, utils_test.SlowDict(0.01), n=3) as buff: 17 | buff["a"] = 1 18 | buff["b"] = 2 19 | buff["c"] = 3 20 | assert set(buff.fast) == {"a", "b", "c"} 21 | assert not buff.slow 22 | assert not buff.futures 23 | 24 | buff["d"] = 4 25 | assert set(buff.fast) == {"a", "b", "c", "d"} 26 | assert not buff.slow 27 | assert buff.futures 28 | await asyncio.wait(buff.futures) 29 | assert set(buff.fast) == {"b", "c", "d"} 30 | assert set(buff.slow) == {"a"} 31 | 32 | buff.async_evict_until_below_target() 33 | assert not buff.futures 34 | buff.async_evict_until_below_target(10) 35 | assert not buff.futures 36 | buff.async_evict_until_below_target(2) 37 | assert buff.futures 38 | await asyncio.wait(buff.futures) 39 | assert set(buff.fast) == {"c", "d"} 40 | assert set(buff.slow) == {"a", "b"} 41 | 42 | # Do not incur in threading sync cost if everything is in fast 43 | assert list(buff.fast.order) == ["c", "d"] 44 | future = buff.async_get(["c"]) 45 | assert future.done() 46 | assert await future == {"c": 3} 47 | assert list(buff.fast.order) == ["d", "c"] 48 | 49 | # Do not disturb LRU order in case of missing keys 50 | with pytest.raises(KeyError, match="m"): 51 | _ = buff.async_get(["d", "m"], missing="raise") 52 | assert list(buff.fast.order) == ["d", "c"] 53 | 54 | future = buff.async_get(["d", "m"], missing="omit") 55 | assert future.done() 56 | assert await future == {"d": 4} 57 | assert list(buff.fast.order) == ["c", "d"] 58 | 59 | with pytest.raises(ValueError): 60 | _ = buff.async_get(["a"], missing="misspell") 61 | 62 | # Asynchronously retrieve from slow 63 | future = buff.async_get(["a", "b"]) 64 | assert not future.done() 65 | assert future in buff.futures 66 | assert await future == {"a": 1, "b": 2} 67 | assert not buff.futures 68 | assert set(buff.fast) == {"d", "a", "b"} 69 | assert set(buff.slow) == {"c"} 70 | 71 | 72 | @pytest.mark.asyncio 73 | async def test_double_evict(check_thread_leaks): 74 | """User calls async_evict_until_below_target() while the same is already running""" 75 | with AsyncBuffer({}, utils_test.SlowDict(0.01), n=3) as buff: 76 | buff["x"] = 1 77 | buff["y"] = 2 78 | buff["z"] = 3 79 | assert len(buff.fast) == 3 80 | assert not buff.futures 81 | 82 | buff.async_evict_until_below_target(2) 83 | assert len(buff.futures) == 1 84 | assert list(buff.evicting.values()) == [2] 85 | 86 | # Evicting to the same n is a no-op 87 | buff.async_evict_until_below_target(2) 88 | assert len(buff.futures) == 1 89 | assert list(buff.evicting.values()) == [2] 90 | 91 | # Evicting to a lower n while a previous eviction is still running does not 92 | # cancel the previous eviction 93 | buff.async_evict_until_below_target(1) 94 | assert len(buff.futures) == 2 95 | assert list(buff.evicting.values()) == [2, 1] 96 | await asyncio.wait(buff.futures, return_when=asyncio.FIRST_COMPLETED) 97 | assert len(buff.futures) == 1 98 | assert list(buff.evicting.values()) == [1] 99 | await asyncio.wait(buff.futures) 100 | assert not buff.futures 101 | assert not buff.evicting 102 | 103 | assert buff.fast == {"z": 3} 104 | assert buff.slow.data == {"x": 1, "y": 2} 105 | 106 | # Evicting to negative n while fast is empty does nothing 107 | buff.evict_until_below_target(0) 108 | buff.async_evict_until_below_target(-1) 109 | assert not buff.futures 110 | assert not buff.evicting 111 | 112 | 113 | @pytest.mark.asyncio 114 | async def test_close_during_evict(check_thread_leaks): 115 | buff = AsyncBuffer({}, utils_test.SlowDict(0.01), n=100) 116 | buff.update({i: i for i in range(100)}) 117 | assert not buff.futures 118 | assert len(buff.fast) == 100 119 | 120 | buff.async_evict_until_below_target(0) 121 | while not buff.slow: 122 | await asyncio.sleep(0.01) 123 | assert buff.fast 124 | assert buff.futures 125 | 126 | buff.close() 127 | await asyncio.wait(buff.futures) 128 | assert not buff.futures 129 | assert buff.fast 130 | assert buff.slow 131 | 132 | 133 | @pytest.mark.asyncio 134 | async def test_close_during_get(check_thread_leaks): 135 | buff = AsyncBuffer({}, utils_test.SlowDict(0.01), n=100) 136 | buff.slow.data.update({i: i for i in range(100)}) 137 | assert len(buff) == 100 138 | assert not buff.fast 139 | 140 | future = buff.async_get(list(range(100))) 141 | assert buff.futures 142 | while not buff.fast: 143 | await asyncio.sleep(0.01) 144 | 145 | buff.close() 146 | with pytest.raises(asyncio.CancelledError): 147 | await future 148 | await asyncio.wait(buff.futures) 149 | assert not buff.futures 150 | 151 | assert buff.fast 152 | assert buff.slow 153 | 154 | 155 | @pytest.mark.asyncio 156 | async def test_contextvars(check_thread_leaks): 157 | ctx = contextvars.ContextVar("v", default=0) 158 | in_dump = threading.Event() 159 | in_load = threading.Event() 160 | block_dump = threading.Event() 161 | block_load = threading.Event() 162 | 163 | def dump(v): 164 | in_dump.set() 165 | assert block_dump.wait(timeout=5) 166 | return v + ctx.get() 167 | 168 | def load(v): 169 | in_load.set() 170 | assert block_load.wait(timeout=5) 171 | return v + ctx.get() 172 | 173 | with AsyncBuffer({}, Func(dump, load, {}), n=0.1) as buff: 174 | ctx.set(20) # Picked up by dump 175 | buff["x"] = 1 176 | assert buff.futures 177 | assert in_dump.wait(timeout=5) 178 | ctx.set(300) # Changed while dump runs. Won't be picked up until load. 179 | block_dump.set() 180 | await asyncio.wait(buff.futures) 181 | assert buff.slow.d == {"x": 21} 182 | fut = buff.async_get(["x"]) 183 | assert in_load.wait(timeout=5) 184 | ctx.set(4000) # Changed while load runs. Won't be picked up. 185 | block_load.set() 186 | assert await fut == {"x": 321} # 1 + 20 (added by dump) + 300 (added by load) 187 | 188 | 189 | @pytest.mark.asyncio 190 | @pytest.mark.parametrize("missing", ["raise", "omit"]) 191 | async def test_race_condition_get_async_delitem(check_thread_leaks, missing): 192 | """All required keys exist in slow when you call get_async(); however some are 193 | deleted by the time the offloaded thread retrieves their values. 194 | """ 195 | 196 | class Slow(UserDict): 197 | def __getitem__(self, key): 198 | if key in self: 199 | time.sleep(0.01) 200 | return super().__getitem__(key) 201 | 202 | with AsyncBuffer({}, Slow(), n=100) as buff: 203 | buff.slow.update({i: i for i in range(100)}) 204 | assert len(buff) == 100 205 | 206 | future = buff.async_get(list(range(100)), missing=missing) 207 | while not buff.fast: 208 | await asyncio.sleep(0.01) 209 | assert buff.slow 210 | # Don't use clear(); it uses __iter__ which would not return until restore is 211 | # completed 212 | for i in range(100): 213 | del buff[i] 214 | assert not buff.fast 215 | assert not buff.slow 216 | assert not future.done() 217 | 218 | if missing == "raise": 219 | with pytest.raises(KeyError): 220 | await future 221 | else: 222 | out = await future 223 | assert 0 < len(out) < 100 224 | 225 | 226 | @pytest.mark.asyncio 227 | async def test_multiple_offload_threads(): 228 | barrier = threading.Barrier(2) 229 | 230 | class Slow(UserDict): 231 | def __getitem__(self, key): 232 | barrier.wait(timeout=5) 233 | return super().__getitem__(key) 234 | 235 | with AsyncBuffer({}, Slow(), n=100, nthreads=2) as buff: 236 | buff["x"] = 1 237 | buff["y"] = 2 238 | buff.evict_until_below_target(0) 239 | assert not buff.fast 240 | assert set(buff.slow) == {"x", "y"} 241 | 242 | out = await asyncio.gather(buff.async_get(["x"]), buff.async_get(["y"])) 243 | assert out == [{"x": 1}, {"y": 2}] 244 | 245 | 246 | @pytest.mark.asyncio 247 | async def test_external_executor(): 248 | n_submit = 0 249 | 250 | class MyExecutor(Executor): 251 | def submit(self, fn, /, *args, **kwargs): 252 | nonlocal n_submit 253 | n_submit += 1 254 | out = fn(*args, **kwargs) 255 | f = Future() 256 | f.set_result(out) 257 | return f 258 | 259 | def shutdown(self, *args, **kwargs): 260 | raise AssertionError("AsyncBuffer.close() called executor.shutdown()") 261 | 262 | ex = MyExecutor() 263 | buff = AsyncBuffer({}, {}, n=1, executor=ex) 264 | buff["x"] = 1 265 | buff["y"] = 2 # Evict x 266 | assert buff.fast.d == {"y": 2} 267 | assert buff.slow == {"x": 1} 268 | assert await buff.async_get(["x"]) == {"x": 1} # Restore x, evict y 269 | assert buff.fast.d == {"x": 1} 270 | assert buff.slow == {"y": 2} 271 | assert n_submit == 2 272 | buff.close() 273 | -------------------------------------------------------------------------------- /zict/tests/test_buffer.py: -------------------------------------------------------------------------------- 1 | import random 2 | import threading 3 | from collections import UserDict 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | import pytest 7 | 8 | from zict import Buffer 9 | from zict.tests import utils_test 10 | 11 | 12 | def test_simple(): 13 | a = {} 14 | b = {} 15 | buff = Buffer(a, b, n=10, weight=lambda k, v: v) 16 | 17 | buff["x"] = 1 18 | buff["y"] = 2 19 | 20 | assert buff["x"] == 1 21 | assert buff["y"] == 2 22 | assert a == {"x": 1, "y": 2} 23 | assert buff.fast.total_weight == 3 24 | 25 | buff["z"] = 8 26 | assert a == {"y": 2, "z": 8} 27 | assert b == {"x": 1} 28 | 29 | assert buff["x"] == 1 30 | assert a == {"x": 1, "z": 8} 31 | assert b == {"y": 2} 32 | 33 | assert "x" in buff 34 | assert "y" in buff 35 | assert "missing" not in buff 36 | 37 | buff["y"] = 1 38 | assert a == {"x": 1, "y": 1, "z": 8} 39 | assert buff.fast.total_weight == 10 40 | assert b == {} 41 | 42 | del buff["z"] 43 | assert a == {"x": 1, "y": 1} 44 | assert buff.fast.total_weight == 2 45 | assert b == {} 46 | 47 | del buff["y"] 48 | assert a == {"x": 1} 49 | assert buff.fast.total_weight == 1 50 | assert b == {} 51 | 52 | assert "y" not in buff 53 | 54 | buff["a"] = 5 55 | assert set(buff) == set(buff.keys()) == {"a", "x"} 56 | 57 | fast_keys = set(buff.fast) 58 | slow_keys = set(buff.slow) 59 | assert not (fast_keys & slow_keys) 60 | assert fast_keys | slow_keys == set(buff) 61 | 62 | # Overweight element stays in slow mapping 63 | buff["b"] = 1000 64 | assert "b" in buff.slow 65 | assert set(buff.fast) == fast_keys 66 | assert set(buff.slow) == {"b"} | slow_keys 67 | assert "b" in buff 68 | assert buff["b"] == 1000 69 | 70 | 71 | def test_setitem_avoid_fast_slow_duplicate(): 72 | a = {} 73 | b = {} 74 | buff = Buffer(a, b, n=10, weight=lambda k, v: v) 75 | for first, second in [(1, 12), (12, 1)]: 76 | buff["a"] = first 77 | assert buff["a"] == first 78 | buff["a"] = second 79 | assert buff["a"] == second 80 | 81 | fast_keys = set(buff.fast) 82 | slow_keys = set(buff.slow) 83 | assert not (fast_keys & slow_keys) 84 | assert fast_keys | slow_keys == set(buff) 85 | 86 | del buff["a"] 87 | assert "a" not in buff 88 | assert "a" not in a 89 | assert "a" not in b 90 | 91 | 92 | def test_mapping(): 93 | """ 94 | Test mapping interface for Buffer(). 95 | """ 96 | a = {} 97 | b = {} 98 | buff = Buffer(a, b, n=2) 99 | utils_test.check_mapping(buff) 100 | utils_test.check_closing(buff) 101 | 102 | buff.clear() 103 | assert not buff.slow 104 | assert not buff._cancel_restore 105 | assert not buff.fast 106 | assert not buff.fast.d 107 | assert not buff.fast.weights 108 | assert not buff.fast.total_weight 109 | assert not buff.fast._cancel_evict 110 | 111 | 112 | def test_callbacks(): 113 | f2s = [] 114 | 115 | def f2s_cb(k, v): 116 | f2s.append(k) 117 | 118 | s2f = [] 119 | 120 | def s2f_cb(k, v): 121 | s2f.append(k) 122 | 123 | a = {} 124 | b = {} 125 | buff = Buffer( 126 | a, 127 | b, 128 | n=10, 129 | weight=lambda k, v: v, 130 | fast_to_slow_callbacks=f2s_cb, 131 | slow_to_fast_callbacks=s2f_cb, 132 | ) 133 | 134 | buff["x"] = 1 135 | buff["y"] = 2 136 | 137 | assert buff["x"] == 1 138 | assert buff["y"] == 2 139 | assert not f2s 140 | assert not s2f 141 | 142 | buff["z"] = 8 143 | 144 | assert f2s == ["x"] 145 | assert s2f == [] 146 | buff["z"] 147 | 148 | assert f2s == ["x"] 149 | assert s2f == [] 150 | 151 | buff["x"] 152 | assert f2s == ["x", "y"] 153 | assert s2f == ["x"] 154 | 155 | 156 | def test_callbacks_exception_catch(): 157 | class MyError(Exception): 158 | pass 159 | 160 | f2s = [] 161 | 162 | def f2s_cb(k, v): 163 | if v > 10: 164 | raise MyError() 165 | f2s.append(k) 166 | 167 | s2f = [] 168 | 169 | def s2f_cb(k, v): 170 | s2f.append(k) 171 | 172 | a = {} 173 | b = {} 174 | buff = Buffer( 175 | a, 176 | b, 177 | n=10, 178 | weight=lambda k, v: v, 179 | fast_to_slow_callbacks=f2s_cb, 180 | slow_to_fast_callbacks=s2f_cb, 181 | ) 182 | 183 | buff["x"] = 1 184 | buff["y"] = 2 185 | 186 | assert buff["x"] == 1 187 | assert buff["y"] == 2 188 | assert not f2s 189 | assert not s2f 190 | assert a == {"x": 1, "y": 2} # keys are in fast/memory 191 | assert not b 192 | 193 | # Add key < n but total weight > n this will move x out of fast 194 | buff["z"] = 8 195 | 196 | assert f2s == ["x"] 197 | assert s2f == [] 198 | assert a == {"y": 2, "z": 8} 199 | assert b == {"x": 1} 200 | 201 | # Add key > n, again total weight > n this will move everything to slow except w 202 | # that stays in fast due to callback raising 203 | with pytest.raises(MyError): 204 | buff["w"] = 11 205 | 206 | assert f2s == ["x", "y", "z"] 207 | assert s2f == [] 208 | assert a == {"w": 11} 209 | assert b == {"x": 1, "y": 2, "z": 8} 210 | 211 | 212 | def test_n_offset(): 213 | buff = Buffer({}, {}, n=5) 214 | assert buff.n == 5 215 | assert buff.fast.n == 5 216 | buff.n = 3 217 | assert buff.fast.n == 3 218 | assert buff.offset == 0 219 | assert buff.fast.offset == 0 220 | buff.offset = 2 221 | assert buff.offset == 2 222 | assert buff.fast.offset == 2 223 | 224 | 225 | def test_set_noevict(): 226 | a = {} 227 | b = {} 228 | f2s = [] 229 | s2f = [] 230 | buff = Buffer( 231 | a, 232 | b, 233 | n=5, 234 | weight=lambda k, v: v, 235 | fast_to_slow_callbacks=lambda k, v: f2s.append(k), 236 | slow_to_fast_callbacks=lambda k, v: s2f.append(k), 237 | ) 238 | buff.set_noevict("x", 3) 239 | buff.set_noevict("y", 3) # Would cause x to move to slow 240 | buff.set_noevict("z", 6) # >n; would be immediately evicted 241 | 242 | assert a == {"x": 3, "y": 3, "z": 6} 243 | assert b == {} 244 | assert f2s == s2f == [] 245 | 246 | buff.evict_until_below_target() 247 | assert a == {"y": 3} 248 | assert b == {"z": 6, "x": 3} 249 | assert f2s == ["z", "x"] 250 | assert s2f == [] 251 | 252 | # set_noevict clears slow 253 | f2s.clear() 254 | buff.set_noevict("x", 1) 255 | assert a == {"y": 3, "x": 1} 256 | assert b == {"z": 6} 257 | assert f2s == s2f == [] 258 | 259 | # Custom target; 0 != None 260 | buff.evict_until_below_target(0) 261 | assert a == {} 262 | assert b == {"z": 6, "x": 1, "y": 3} 263 | assert f2s == ["y", "x"] 264 | assert s2f == [] 265 | 266 | 267 | def test_evict_restore_during_iter(): 268 | """Test that __iter__ won't be disrupted if another thread evicts or restores a key""" 269 | buff = Buffer({"x": 1, "y": 2}, {"z": 3}, n=5) 270 | assert list(buff) == ["x", "y", "z"] 271 | it = iter(buff) 272 | assert next(it) == "x" 273 | buff.fast.evict("x") 274 | assert next(it) == "y" 275 | assert buff["x"] == 1 276 | assert next(it) == "z" 277 | with pytest.raises(StopIteration): 278 | next(it) 279 | 280 | 281 | @pytest.mark.parametrize("event", ("set", "set_noevict", "del")) 282 | @pytest.mark.parametrize("when", ("before", "after")) 283 | def test_cancel_evict(event, when): 284 | """See also: 285 | 286 | test_cancel_restore 287 | test_lru.py::test_cancel_evict 288 | """ 289 | ev1 = threading.Event() 290 | ev2 = threading.Event() 291 | 292 | class Slow(UserDict): 293 | def __setitem__(self, k, v): 294 | if when == "before": 295 | ev1.set() 296 | assert ev2.wait(timeout=5) 297 | super().__setitem__(k, v) 298 | else: 299 | super().__setitem__(k, v) 300 | ev1.set() 301 | assert ev2.wait(timeout=5) 302 | 303 | buff = Buffer({}, Slow(), n=100, weight=lambda k, v: v) 304 | buff.set_noevict("x", 1) 305 | with ThreadPoolExecutor(1) as ex: 306 | fut = ex.submit(buff.fast.evict) 307 | assert ev1.wait(timeout=5) 308 | # cb is running 309 | 310 | if event == "set": 311 | buff["x"] = 2 312 | elif event == "set_noevict": 313 | buff.set_noevict("x", 2) 314 | else: 315 | assert event == "del" 316 | del buff["x"] 317 | ev2.set() 318 | assert fut.result() == (None, None, 0) 319 | 320 | if event in ("set", "set_noevict"): 321 | assert buff.fast == {"x": 2} 322 | assert not buff.slow 323 | assert buff.fast.weights == {"x": 2} 324 | assert list(buff.fast.order) == ["x"] 325 | else: 326 | assert not buff.fast 327 | assert not buff.slow 328 | assert not buff.fast.weights 329 | assert not buff.fast.order 330 | 331 | assert not buff.fast._cancel_evict 332 | 333 | 334 | @pytest.mark.parametrize("event", ("set", "set_noevict", "del")) 335 | @pytest.mark.parametrize("when", ("before", "after")) 336 | def test_cancel_restore(event, when): 337 | """See also: 338 | 339 | test_cancel_evict 340 | test_lru.py::test_cancel_evict 341 | """ 342 | ev1 = threading.Event() 343 | ev2 = threading.Event() 344 | 345 | class Slow(UserDict): 346 | def __getitem__(self, k): 347 | if when == "before": 348 | ev1.set() 349 | assert ev2.wait(timeout=5) 350 | return super().__getitem__(k) 351 | else: 352 | out = super().__getitem__(k) 353 | ev1.set() 354 | assert ev2.wait(timeout=5) 355 | return out 356 | 357 | buff = Buffer({}, Slow(), n=100, weight=lambda k, v: v) 358 | buff.set_noevict("x", 1) 359 | buff.fast.evict() 360 | assert not buff.fast 361 | assert set(buff.slow) == {"x"} 362 | 363 | with ThreadPoolExecutor(1) as ex: 364 | fut = ex.submit(buff.__getitem__, "x") 365 | assert ev1.wait(timeout=5) 366 | # cb is running 367 | 368 | if event == "set": 369 | buff["x"] = 2 370 | elif event == "set_noevict": 371 | buff.set_noevict("x", 2) 372 | else: 373 | assert event == "del" 374 | del buff["x"] 375 | ev2.set() 376 | 377 | with pytest.raises(KeyError, match="x"): 378 | fut.result() 379 | 380 | if event in ("set", "set_noevict"): 381 | assert buff.fast == {"x": 2} 382 | assert not buff.slow 383 | assert buff.fast.weights == {"x": 2} 384 | assert list(buff.fast.order) == ["x"] 385 | else: 386 | assert not buff.fast 387 | assert not buff.slow 388 | assert not buff.fast.weights 389 | assert not buff.fast.order 390 | 391 | assert not buff._cancel_restore 392 | 393 | 394 | @pytest.mark.stress 395 | @pytest.mark.repeat(utils_test.REPEAT_STRESS_TESTS) 396 | def test_stress_different_keys_threadsafe(): 397 | # Sometimes x and y can cohexist without triggering eviction 398 | # Sometimes x and y are individually " 54 | 55 | 56 | def test_setitem_typeerror(tmp_path, check_fd_leaks): 57 | z = File(tmp_path) 58 | with pytest.raises(TypeError): 59 | z["x"] = 123 60 | 61 | 62 | def test_contextmanager(tmp_path, check_fd_leaks): 63 | with File(tmp_path) as z: 64 | z["x"] = b"123" 65 | 66 | with open(tmp_path / "x#0", "rb") as fh: 67 | assert fh.read() == b"123" 68 | 69 | 70 | def test_delitem(tmp_path, check_fd_leaks): 71 | z = File(tmp_path) 72 | 73 | z["x"] = b"123" 74 | assert os.listdir(tmp_path) == ["x#0"] 75 | del z["x"] 76 | assert os.listdir(tmp_path) == [] 77 | # File name is never repeated 78 | z["x"] = b"123" 79 | assert os.listdir(tmp_path) == ["x#1"] 80 | # __setitem__ deletes the previous file 81 | z["x"] = b"123" 82 | assert os.listdir(tmp_path) == ["x#2"] 83 | 84 | 85 | def test_missing_key(tmp_path, check_fd_leaks): 86 | z = File(tmp_path) 87 | 88 | with pytest.raises(KeyError): 89 | z["x"] 90 | 91 | 92 | def test_arbitrary_chars(tmp_path, check_fd_leaks): 93 | z = File(tmp_path) 94 | 95 | # Avoid hitting the Windows max filename length 96 | chunk = 16 97 | for i in range(1, 128, chunk): 98 | key = "".join(["foo_"] + [chr(i) for i in range(i, min(128, i + chunk))]) 99 | with pytest.raises(KeyError): 100 | z[key] 101 | z[key] = b"foo" 102 | assert z[key] == b"foo" 103 | assert list(z) == [key] 104 | assert list(z.keys()) == [key] 105 | assert list(z.items()) == [(key, b"foo")] 106 | assert list(z.values()) == [b"foo"] 107 | 108 | zz = File(tmp_path) 109 | assert zz[key] == b"foo" 110 | assert list(zz) == [key] 111 | assert list(zz.keys()) == [key] 112 | assert list(zz.items()) == [(key, b"foo")] 113 | assert list(zz.values()) == [b"foo"] 114 | del zz 115 | 116 | del z[key] 117 | with pytest.raises(KeyError): 118 | z[key] 119 | 120 | 121 | def test_write_list_of_bytes(tmp_path, check_fd_leaks): 122 | z = File(tmp_path) 123 | 124 | z["x"] = [b"123", b"4567"] 125 | assert z["x"] == b"1234567" 126 | 127 | 128 | def test_bad_types(tmp_path, check_fd_leaks): 129 | z = File(tmp_path) 130 | utils_test.check_bad_key_types(z) 131 | utils_test.check_bad_value_types(z) 132 | 133 | 134 | @pytest.mark.stress 135 | @pytest.mark.repeat(utils_test.REPEAT_STRESS_TESTS) 136 | def test_stress_different_keys_threadsafe(tmp_path): 137 | z = File(tmp_path) 138 | utils_test.check_different_keys_threadsafe(z) 139 | utils_test.check_mapping(z) 140 | 141 | 142 | @pytest.mark.stress 143 | @pytest.mark.repeat(utils_test.REPEAT_STRESS_TESTS) 144 | @pytest.mark.skipif(sys.platform == "win32", reason="Can't delete file with open fd") 145 | def test_stress_same_key_threadsafe(tmp_path): 146 | z = File(tmp_path) 147 | utils_test.check_same_key_threadsafe(z) 148 | utils_test.check_mapping(z) 149 | -------------------------------------------------------------------------------- /zict/tests/test_func.py: -------------------------------------------------------------------------------- 1 | import gc 2 | from collections.abc import MutableMapping 3 | 4 | import pytest 5 | 6 | from zict import Func 7 | from zict.common import ZictBase 8 | from zict.tests import utils_test 9 | 10 | 11 | def inc(x): 12 | return x + 1 13 | 14 | 15 | def dec(x): 16 | return x - 1 17 | 18 | 19 | def rotl(x): 20 | return x[1:] + x[:1] 21 | 22 | 23 | def rotr(x): 24 | return x[-1:] + x[:-1] 25 | 26 | 27 | def test_simple(): 28 | d = {} 29 | f = Func(inc, dec, d) 30 | f["x"] = 10 31 | assert f["x"] == 10 32 | assert d["x"] == 11 33 | 34 | assert "x" in f 35 | assert list(f) == ["x"] 36 | assert list(f.values()) == [10] 37 | assert list(f.items()) == [("x", 10)] 38 | 39 | assert all(s in str(f) for s in ["inc", "dec", "x", "Func"]) 40 | assert all(s in repr(f) for s in ["inc", "dec", "x", "Func"]) 41 | 42 | del f["x"] 43 | assert "x" not in d 44 | 45 | 46 | def test_mapping(): 47 | """ 48 | Test mapping interface for Func(). 49 | """ 50 | d = {} 51 | z = Func(rotl, rotr, d) 52 | utils_test.check_mapping(z) 53 | utils_test.check_closing(z) 54 | 55 | 56 | @pytest.mark.parametrize("wrapped_cls", [MutableMapping, ZictBase]) 57 | def test_update_descopes_early(wrapped_cls): 58 | """Test that Func.update() descopes the output of self.dump as soon as it can, if 59 | the wrapped mapping allows, and doesn't store everything into a list. 60 | """ 61 | 62 | class Dumped: 63 | n = 0 64 | 65 | def __init__(self): 66 | gc.collect() # Only necessary on pypy 67 | Dumped.n += 1 68 | assert Dumped.n < 3 69 | 70 | def __del__(self): 71 | Dumped.n -= 1 72 | 73 | class Dummy(wrapped_cls): 74 | def __setitem__(self, key, value): 75 | pass 76 | 77 | def __getitem__(self, key, value): 78 | raise KeyError(key) 79 | 80 | def __delitem__(self, key): 81 | raise KeyError(key) 82 | 83 | def __iter__(self): 84 | return iter(()) 85 | 86 | def __len__(self): 87 | return 0 88 | 89 | d = Func(lambda v: Dumped(), lambda w: None, Dummy()) 90 | d.update(dict.fromkeys(range(10))) 91 | -------------------------------------------------------------------------------- /zict/tests/test_keymap.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from zict import KeyMap 4 | from zict.tests import utils_test 5 | 6 | 7 | def test_simple(): 8 | d = {} 9 | z = KeyMap(str, d) 10 | z[1] = 10 11 | assert d == {"1": 10} 12 | assert z.keymap == {1: "1"} 13 | assert 1 in z 14 | assert 2 not in z 15 | assert list(z) == [1] 16 | assert len(z) == 1 17 | assert z[1] == 10 18 | with pytest.raises(KeyError): 19 | z[2] 20 | del z[1] 21 | assert 1 not in z 22 | assert 1 not in z.keymap 23 | 24 | 25 | def test_mapping(): 26 | z = KeyMap(str, {}) 27 | utils_test.check_mapping(z) 28 | utils_test.check_closing(z) 29 | 30 | 31 | @pytest.mark.stress 32 | @pytest.mark.repeat(utils_test.REPEAT_STRESS_TESTS) 33 | def test_stress_same_key_threadsafe(): 34 | d = utils_test.SlowDict(0.001) 35 | z = KeyMap(str, d) 36 | utils_test.check_same_key_threadsafe(z) 37 | assert not z.keymap 38 | assert not z.d 39 | utils_test.check_mapping(z) 40 | -------------------------------------------------------------------------------- /zict/tests/test_lmdb.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pathlib 3 | 4 | import pytest 5 | 6 | from zict import LMDB 7 | from zict.tests import utils_test 8 | 9 | pytest.importorskip("lmdb") 10 | 11 | 12 | @pytest.mark.parametrize("dirtype", [str, pathlib.Path, lambda x: x]) 13 | def test_dirtypes(tmp_path, check_fd_leaks, dirtype): 14 | z = LMDB(tmp_path) 15 | z["x"] = b"123" 16 | assert z["x"] == b"123" 17 | del z["x"] 18 | 19 | 20 | def test_mapping(tmp_path, check_fd_leaks): 21 | """ 22 | Test mapping interface for LMDB(). 23 | """ 24 | z = LMDB(tmp_path) 25 | utils_test.check_mapping(z) 26 | 27 | 28 | def test_bad_types(tmp_path, check_fd_leaks): 29 | z = LMDB(tmp_path) 30 | utils_test.check_bad_key_types(z) 31 | utils_test.check_bad_value_types(z) 32 | 33 | 34 | def test_reuse(tmp_path, check_fd_leaks): 35 | """ 36 | Test persistence of a LMDB() mapping. 37 | """ 38 | with LMDB(tmp_path) as z: 39 | assert len(z) == 0 40 | z["abc"] = b"123" 41 | 42 | with LMDB(tmp_path) as z: 43 | assert len(z) == 1 44 | assert z["abc"] == b"123" 45 | 46 | 47 | def test_creates_dir(tmp_path, check_fd_leaks): 48 | with LMDB(tmp_path, check_fd_leaks): 49 | assert os.path.isdir(tmp_path) 50 | 51 | 52 | def test_file_descriptors_dont_leak(tmp_path, check_fd_leaks): 53 | z = LMDB(tmp_path) 54 | del z 55 | 56 | z = LMDB(tmp_path) 57 | z.close() 58 | 59 | with LMDB(tmp_path) as z: 60 | pass 61 | 62 | 63 | def test_map_size(tmp_path, check_fd_leaks): 64 | import lmdb 65 | 66 | z = LMDB(tmp_path, map_size=2**20) 67 | z["x"] = b"x" * 2**19 68 | with pytest.raises(lmdb.MapFullError): 69 | z["y"] = b"x" * 2**20 70 | -------------------------------------------------------------------------------- /zict/tests/test_lru.py: -------------------------------------------------------------------------------- 1 | import random 2 | import threading 3 | import time 4 | from concurrent.futures import ThreadPoolExecutor 5 | 6 | import pytest 7 | 8 | from zict import LRU 9 | from zict.tests import utils_test 10 | 11 | 12 | def test_simple(): 13 | d = {} 14 | lru = LRU(2, d) 15 | 16 | lru["x"] = 1 17 | lru["y"] = 2 18 | 19 | assert lru["x"] == 1 20 | assert lru["y"] == 2 21 | assert d == {"x": 1, "y": 2} 22 | 23 | lru["z"] = 3 24 | assert len(d) == 2 25 | assert len(lru) == 2 26 | assert "z" in d 27 | assert "z" in lru 28 | assert "x" not in d 29 | assert "y" in d 30 | 31 | del lru["y"] 32 | assert "y" not in d 33 | assert "y" not in lru 34 | 35 | lru["a"] = 5 36 | assert set(lru) == {"z", "a"} 37 | 38 | 39 | def test_str(): 40 | d = {} 41 | lru = LRU(2, d) 42 | 43 | lru["x"] = 1 44 | lru["y"] = 2 45 | 46 | assert str(lru.total_weight) in str(lru) 47 | assert str(lru.total_weight) in repr(lru) 48 | assert str(lru.n) in str(lru) 49 | assert str(lru.n) in repr(lru) 50 | assert "dict" in str(lru) 51 | assert "dict" in repr(lru) 52 | 53 | 54 | def test_mapping(): 55 | """ 56 | Test mapping interface for LRU(). 57 | """ 58 | d = {} 59 | # 100 is more than the max length when running check_mapping() 60 | lru = LRU(100, d) 61 | utils_test.check_mapping(lru) 62 | utils_test.check_closing(lru) 63 | 64 | lru.clear() 65 | assert not lru.d 66 | assert not lru.weights 67 | assert not lru.total_weight 68 | assert not lru._cancel_evict 69 | 70 | 71 | def test_overwrite(): 72 | d = {} 73 | lru = LRU(2, d) 74 | 75 | lru["x"] = 1 76 | lru["y"] = 2 77 | lru["y"] = 3 78 | 79 | assert set(lru) == {"x", "y"} 80 | 81 | lru.update({"y": 4}) 82 | 83 | assert set(lru) == {"x", "y"} 84 | 85 | 86 | def test_callbacks(): 87 | count = [0] 88 | 89 | def cb(k, v): 90 | count[0] += 1 91 | 92 | L = [] 93 | d = {} 94 | lru = LRU(2, d, on_evict=[lambda k, v: L.append((k, v)), cb]) 95 | 96 | lru["x"] = 1 97 | lru["y"] = 2 98 | lru["z"] = 3 99 | 100 | assert L == [("x", 1)] 101 | assert count[0] == len(L) 102 | 103 | 104 | def test_cb_exception_keep_on_lru(): 105 | class MyError(Exception): 106 | pass 107 | 108 | def cb(k, v): 109 | raise MyError() 110 | 111 | a = [] 112 | b = [] 113 | d = {} 114 | lru = LRU( 115 | 2, 116 | d, 117 | on_evict=[ 118 | lambda k, v: a.append((k, v)), 119 | cb, 120 | lambda k, v: b.append((k, v)), 121 | ], 122 | ) 123 | 124 | lru["x"] = 1 125 | lru["y"] = 2 126 | 127 | with pytest.raises(MyError): 128 | lru["z"] = 3 129 | 130 | # exception was raised in a later callback 131 | assert a == [("x", 1)] 132 | # tried to evict and raised exception 133 | assert b == [] 134 | assert lru.total_weight == 3 135 | assert lru.weights == {"x": 1, "y": 1, "z": 1} 136 | 137 | assert set(lru) == {"x", "y", "z"} 138 | 139 | assert lru.d == {"x": 1, "y": 2, "z": 3} 140 | assert list(lru.order) == ["x", "y", "z"] 141 | 142 | 143 | def test_cb_exception_keep_on_lru_weights(): 144 | class MyError(Exception): 145 | pass 146 | 147 | def cb(k, v): 148 | if v >= 3: 149 | raise MyError() 150 | 151 | a = [] 152 | b = [] 153 | d = {} 154 | lru = LRU( 155 | 2, 156 | d, 157 | on_evict=[ 158 | lambda k, v: a.append((k, v)), 159 | cb, 160 | lambda k, v: b.append((k, v)), 161 | ], 162 | weight=lambda k, v: v, 163 | ) 164 | 165 | lru["x"] = 1 166 | 167 | with pytest.raises(MyError): 168 | # value is individually heavier than n 169 | lru["y"] = 3 170 | 171 | # exception was raised in a later callback 172 | assert a == [("y", 3), ("x", 1)] 173 | # tried to to evict x and succeeded 174 | assert b == [("x", 1)] 175 | assert lru.total_weight == 3 176 | assert set(lru) == {"y"} 177 | 178 | assert lru.d == {"y": 3} 179 | assert list(lru.order) == ["y"] 180 | 181 | with pytest.raises(MyError): 182 | # value is individually heavier than n 183 | lru["z"] = 4 184 | 185 | # exception was raised in a later callback 186 | assert a == [ 187 | ("y", 3), 188 | ("x", 1), 189 | ("z", 4), 190 | ("y", 3), 191 | ] # try to evict z and then y again 192 | # tried to evict and raised exception 193 | assert b == [("x", 1)] 194 | assert lru.total_weight == 7 195 | assert set(lru) == {"y", "z"} 196 | 197 | assert lru.d == {"y": 3, "z": 4} 198 | assert list(lru.order) == ["y", "z"] 199 | 200 | 201 | def test_weight(): 202 | d = {} 203 | weight = lambda k, v: v 204 | lru = LRU(10, d, weight=weight) 205 | 206 | lru["x"] = 5 207 | assert lru.total_weight == 5 208 | 209 | lru["y"] = 4 210 | assert lru.total_weight == 9 211 | 212 | lru["z"] = 3 213 | assert d == {"y": 4, "z": 3} 214 | assert lru.total_weight == 7 215 | 216 | del lru["z"] 217 | assert lru.total_weight == 4 218 | 219 | lru["a"] = 10000 220 | assert "a" not in lru 221 | assert d == {"y": 4} 222 | 223 | 224 | def test_manual_eviction(): 225 | a = [] 226 | lru = LRU(100, {}, weight=lambda k, v: v, on_evict=lambda k, v: a.append(k)) 227 | lru.set_noevict("x", 70) 228 | lru.set_noevict("y", 50) 229 | lru.set_noevict("z", 110) 230 | assert lru.total_weight == 70 + 50 + 110 231 | assert lru.heavy == {"z"} 232 | assert list(lru.order) == ["x", "y", "z"] 233 | assert a == [] 234 | 235 | lru.evict_until_below_target() 236 | assert dict(lru) == {"y": 50} 237 | assert a == ["z", "x"] 238 | assert lru.weights == {"y": 50} 239 | assert lru.order == {"y"} 240 | assert not lru.heavy 241 | 242 | lru.evict_until_below_target() # No-op 243 | assert dict(lru) == {"y": 50} 244 | lru.evict_until_below_target(50) # Custom target 245 | assert dict(lru) == {"y": 50} 246 | lru.evict_until_below_target(0) # 0 != None 247 | assert not lru 248 | assert not lru.order 249 | assert not lru.weights 250 | assert a == ["z", "x", "y"] 251 | 252 | 253 | def test_explicit_evict(): 254 | d = {} 255 | lru = LRU(10, d) 256 | 257 | lru["x"] = 1 258 | lru["y"] = 2 259 | lru["z"] = 3 260 | 261 | assert set(d) == {"x", "y", "z"} 262 | 263 | assert lru.evict() == ("x", 1, 1) 264 | assert set(d) == {"y", "z"} 265 | assert lru.evict("z") == ("z", 3, 1) 266 | assert set(d) == {"y"} 267 | assert lru.evict() == ("y", 2, 1) 268 | with pytest.raises(KeyError, match=r"'evict\(\): dictionary is empty'"): 269 | lru.evict() 270 | 271 | # evict() with explicit key 272 | lru["v"] = 4 273 | lru["w"] = 5 274 | assert lru.evict("w") == ("w", 5, 1) 275 | with pytest.raises(KeyError, match="notexist"): 276 | lru.evict("notexist") 277 | 278 | 279 | def test_init_not_empty(): 280 | lru1 = LRU(100, {}, weight=lambda k, v: v * 2) 281 | lru1.set_noevict(1, 10) 282 | lru1.set_noevict(2, 20) 283 | lru1.set_noevict(3, 30) 284 | lru1.set_noevict(4, 60) 285 | lru2 = LRU(100, {1: 10, 2: 20, 3: 30, 4: 60}, weight=lambda k, v: v * 2) 286 | assert lru1.d == lru2.d == {1: 10, 2: 20, 3: 30, 4: 60} 287 | assert lru1.weights == lru2.weights == {1: 20, 2: 40, 3: 60, 4: 120} 288 | assert lru1.total_weight == lru2.total_weight == 240 289 | assert list(lru1.order) == list(lru2.order) == [1, 2, 3, 4] 290 | assert list(lru1.heavy) == list(lru2.heavy) == [4] 291 | 292 | 293 | def test_get_all_or_nothing(): 294 | lru = LRU(100, {"x": 1, "y": 2, "z": 3}) 295 | assert list(lru.order) == ["x", "y", "z"] 296 | with pytest.raises(KeyError, match="w"): 297 | lru.get_all_or_nothing(["x", "w", "y"]) 298 | assert list(lru.order) == ["x", "y", "z"] 299 | assert lru.get_all_or_nothing(["y", "x"]) == {"y": 2, "x": 1} 300 | assert list(lru.order) == ["z", "y", "x"] 301 | 302 | 303 | def test_close_aborts_eviction(): 304 | evicted = [] 305 | 306 | def cb(k, v): 307 | evicted.append(k) 308 | if len(evicted) == 3: 309 | lru.close() 310 | 311 | lru = LRU(100, {}, weight=lambda k, v: v, on_evict=cb) 312 | lru["a"] = 20 313 | lru["b"] = 20 314 | lru["c"] = 20 315 | lru["d"] = 20 316 | lru["e"] = 90 # Trigger eviction of a, b, c, d 317 | 318 | assert lru.closed 319 | assert evicted == ["a", "b", "c"] 320 | assert dict(lru) == {"d": 20, "e": 90} 321 | 322 | 323 | def test_flush_close(): 324 | flushed = 0 325 | closed = False 326 | 327 | class D(utils_test.SimpleDict): 328 | def flush(self): 329 | nonlocal flushed 330 | flushed += 1 331 | 332 | def close(self): 333 | nonlocal closed 334 | closed = True 335 | 336 | with LRU(10, D()) as lru: 337 | lru.flush() 338 | 339 | assert flushed == 1 340 | assert closed 341 | 342 | 343 | def test_update_n(): 344 | evicted = [] 345 | z = LRU(10, {}, on_evict=lambda k, v: evicted.append(k), weight=lambda k, v: v) 346 | z["x"] = 5 347 | assert not evicted 348 | 349 | # Update n. This also changes what keys are considered heavy 350 | # (but there isn't a full scan on the weights for already existing keys) 351 | z.n = 3 352 | assert not evicted 353 | assert not z.heavy 354 | z["y"] = 1 355 | assert evicted == ["x"] 356 | z["z"] = 4 357 | assert evicted == ["x", "z"] 358 | 359 | 360 | def test_update_offset(): 361 | evicted = [] 362 | z = LRU(5, {}, on_evict=lambda k, v: evicted.append(k), weight=lambda k, v: v) 363 | 364 | z.offset = 2 365 | z["x"] = 1 366 | # y would be a heavy key if we had reduced n by 2 instead of increasing offset 367 | z["y"] = 2.5 368 | assert evicted == ["x"] 369 | z["z"] = 5.5 # Still heavy according to n alone 370 | assert evicted == ["x", "z"] 371 | 372 | 373 | @pytest.mark.parametrize("event", ("set", "set_noevict", "del")) 374 | def test_cancel_evict(event): 375 | """See also: 376 | 377 | test_buffer.py::test_cancel_evict 378 | test_buffer.py::test_cancel_restore 379 | """ 380 | ev1 = threading.Event() 381 | ev2 = threading.Event() 382 | log = [] 383 | 384 | def cb(k, v): 385 | ev1.set() 386 | assert ev2.wait(timeout=5) 387 | 388 | def cancel_cb(k, v): 389 | log.append((k, v)) 390 | 391 | lru = LRU(100, {}, on_evict=cb, on_cancel_evict=cancel_cb, weight=lambda k, v: v) 392 | lru.set_noevict("x", 1) 393 | with ThreadPoolExecutor(1) as ex: 394 | fut = ex.submit(lru.evict) 395 | assert ev1.wait(timeout=5) 396 | # cb is running 397 | 398 | assert lru.evict() == (None, None, 0) 399 | if event == "set": 400 | lru["x"] = 2 401 | elif event == "set_noevict": 402 | lru.set_noevict("x", 2) 403 | else: 404 | assert event == "del" 405 | del lru["x"] 406 | 407 | ev2.set() 408 | assert fut.result() == (None, None, 0) 409 | 410 | assert log == [("x", 1)] 411 | if event in ("set", "set_noevict"): 412 | assert lru.d == {"x": 2} 413 | assert lru.weights == {"x": 2} 414 | assert list(lru.order) == ["x"] 415 | else: 416 | assert not lru.d 417 | assert not lru.weights 418 | assert not lru.order 419 | 420 | assert not lru._cancel_evict 421 | 422 | 423 | def slow_cb(k, v): 424 | time.sleep(0.01) 425 | 426 | 427 | @pytest.mark.stress 428 | @pytest.mark.repeat(utils_test.REPEAT_STRESS_TESTS) 429 | def test_stress_different_keys_threadsafe(): 430 | # Sometimes x and y can cohexist without triggering eviction 431 | # Sometimes x and y are individually prev if method == "popleft" else v < prev, (v, prev, len(s)) 100 | prev = v 101 | n += 1 102 | except KeyError: 103 | assert not s 104 | return n 105 | 106 | with ThreadPoolExecutor(2) as ex: 107 | f1 = ex.submit(t) 108 | f2 = ex.submit(t) 109 | # On Linux, these are in the 38_000 ~ 62_000 range. 110 | # On Windows, we've seen as little as 2300. 111 | assert f1.result() > 100 112 | assert f2.result() > 100 113 | -------------------------------------------------------------------------------- /zict/tests/test_zip.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | from collections.abc import MutableMapping 3 | 4 | import pytest 5 | 6 | from zict import Zip 7 | from zict.tests import utils_test 8 | 9 | 10 | @pytest.fixture 11 | def fn(tmp_path, check_fd_leaks): 12 | yield tmp_path / "tmp.zip" 13 | 14 | 15 | def test_simple(fn): 16 | z = Zip(fn) 17 | assert isinstance(z, MutableMapping) 18 | assert not z 19 | 20 | assert list(z) == list(z.keys()) == [] 21 | assert list(z.values()) == [] 22 | assert list(z.items()) == [] 23 | 24 | z["x"] = b"123" 25 | assert list(z) == list(z.keys()) == ["x"] 26 | assert list(z.values()) == [b"123"] 27 | assert list(z.items()) == [("x", b"123")] 28 | assert z["x"] == b"123" 29 | 30 | z.flush() 31 | zz = zipfile.ZipFile(fn, mode="r") 32 | assert zz.read("x") == b"123" 33 | 34 | z["y"] = b"456" 35 | assert z["y"] == b"456" 36 | 37 | 38 | def test_setitem_typeerror(fn): 39 | z = Zip(fn) 40 | with pytest.raises(TypeError): 41 | z["x"] = 123 42 | 43 | 44 | def test_contextmanager(fn): 45 | with Zip(fn) as z: 46 | z["x"] = b"123" 47 | 48 | zz = zipfile.ZipFile(fn, mode="r") 49 | assert zz.read("x") == b"123" 50 | 51 | 52 | def test_missing_key(fn): 53 | z = Zip(fn) 54 | 55 | with pytest.raises(KeyError): 56 | z["x"] 57 | 58 | 59 | def test_close(fn): 60 | z = Zip(fn) 61 | 62 | z["x"] = b"123" 63 | z.close() 64 | 65 | zz = zipfile.ZipFile(fn, mode="r") 66 | assert zz.read("x") == b"123" 67 | 68 | with pytest.raises(IOError): 69 | z["y"] = b"123" 70 | 71 | 72 | def test_bytearray(fn): 73 | data = bytearray(b"123") 74 | with Zip(fn) as z: 75 | z["x"] = data 76 | 77 | with Zip(fn) as z: 78 | assert z["x"] == b"123" 79 | 80 | 81 | def test_memoryview(fn): 82 | data = memoryview(b"123") 83 | with Zip(fn) as z: 84 | z["x"] = data 85 | 86 | with Zip(fn) as z: 87 | assert z["x"] == b"123" 88 | 89 | 90 | def check_mapping(z): 91 | """Shorter version of utils_test.check_mapping, as zip supports neither update nor 92 | delete 93 | """ 94 | assert isinstance(z, MutableMapping) 95 | utils_test.check_empty_mapping(z) 96 | 97 | z["abc"] = b"456" 98 | z["xyz"] = b"12" 99 | assert len(z) == 2 100 | assert z["abc"] == b"456" 101 | 102 | utils_test.check_items(z, [("abc", b"456"), ("xyz", b"12")]) 103 | 104 | assert "abc" in z 105 | assert "xyz" in z 106 | assert "def" not in z 107 | 108 | with pytest.raises(KeyError): 109 | z["def"] 110 | 111 | 112 | def test_mapping(fn): 113 | """ 114 | Test mapping interface for Zip(). 115 | """ 116 | with Zip(fn) as z: 117 | check_mapping(z) 118 | utils_test.check_closing(z) 119 | 120 | 121 | def test_no_delete_update(fn): 122 | with Zip(fn) as z: 123 | z["x"] = b"123" 124 | with pytest.raises(NotImplementedError): 125 | del z["x"] 126 | with pytest.raises(NotImplementedError): 127 | z["x"] = b"456" 128 | assert len(z) == 1 129 | assert z["x"] == b"123" 130 | 131 | 132 | def test_bad_types(fn): 133 | with Zip(fn) as z: 134 | utils_test.check_bad_key_types(z, has_del=False) 135 | utils_test.check_bad_value_types(z) 136 | -------------------------------------------------------------------------------- /zict/tests/utils_test.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import random 4 | import string 5 | import threading 6 | import time 7 | from collections import UserDict 8 | from collections.abc import ItemsView, KeysView, MutableMapping, ValuesView 9 | from concurrent.futures import ThreadPoolExecutor 10 | 11 | import pytest 12 | 13 | from zict.common import ZictBase 14 | 15 | # How many times to repeat non-deterministic stress tests. 16 | # You may set it as high as 50 if you wish to run in CI. 17 | REPEAT_STRESS_TESTS = 1 18 | 19 | 20 | def generate_random_strings(n, min_len, max_len): 21 | r = random.Random(42) 22 | out = [] 23 | chars = string.ascii_lowercase + string.digits 24 | 25 | for _ in range(n): 26 | nchars = r.randint(min_len, max_len) 27 | s = "".join(r.choice(chars) for _ in range(nchars)) 28 | out.append(s) 29 | 30 | return out 31 | 32 | 33 | def to_bytestring(s): 34 | if isinstance(s, bytes): 35 | return s 36 | else: 37 | return s.encode("latin1") 38 | 39 | 40 | def check_items(z: MutableMapping, expected_items: list[tuple[str, bytes]]) -> None: 41 | items = list(z.items()) 42 | assert len(items) == len(expected_items) 43 | assert sorted(items) == sorted(expected_items) 44 | # All iterators should walk the mapping in the same order 45 | assert list(z.keys()) == [k for k, v in items] 46 | assert list(z.values()) == [v for k, v in items] 47 | assert list(z) == [k for k, v in items] 48 | 49 | # ItemsView, KeysView, ValuesView.__contains__() 50 | assert isinstance(z.keys(), KeysView) 51 | assert isinstance(z.values(), ValuesView) 52 | assert isinstance(z.items(), ItemsView) 53 | assert items[0] in z.items() 54 | assert items[0][0] in z.keys() 55 | assert items[0][0] in z 56 | assert items[0][1] in z.values() 57 | assert (object(), object()) not in z.items() 58 | assert object() not in z.keys() 59 | assert object() not in z 60 | assert object() not in z.values() 61 | 62 | 63 | def stress_test_mapping_updates(z: MutableMapping) -> None: 64 | # Certain mappings shuffle between several underlying stores 65 | # during updates. This stress tests the internal mapping 66 | # consistency. 67 | r = random.Random(42) 68 | 69 | keys = list(string.ascii_lowercase) 70 | values = [to_bytestring(s) for s in generate_random_strings(len(keys), 1, 10)] 71 | 72 | z.clear() 73 | assert len(z) == 0 74 | 75 | for k, v in zip(keys, values): 76 | z[k] = v 77 | assert len(z) == len(keys) 78 | assert sorted(z) == sorted(keys) 79 | assert sorted(z.items()) == sorted(zip(keys, values)) 80 | 81 | for _ in range(3): 82 | r.shuffle(keys) 83 | r.shuffle(values) 84 | for k, v in zip(keys, values): 85 | z[k] = v 86 | check_items(z, list(zip(keys, values))) 87 | 88 | r.shuffle(keys) 89 | r.shuffle(values) 90 | z.update(zip(keys, values)) 91 | check_items(z, list(zip(keys, values))) 92 | 93 | 94 | def check_empty_mapping(z: MutableMapping) -> None: 95 | assert not z 96 | assert list(z) == list(z.keys()) == [] 97 | assert list(z.values()) == [] 98 | assert list(z.items()) == [] 99 | assert len(z) == 0 100 | assert "x" not in z 101 | assert "x" not in z.keys() 102 | assert ("x", b"123") not in z.items() 103 | assert b"123" not in z.values() 104 | 105 | 106 | def check_mapping(z: MutableMapping) -> None: 107 | """See also test_zip.check_mapping""" 108 | assert type(z).__name__ in str(z) 109 | assert type(z).__name__ in repr(z) 110 | assert isinstance(z, MutableMapping) 111 | check_empty_mapping(z) 112 | 113 | z["abc"] = b"456" 114 | z["xyz"] = b"12" 115 | assert len(z) == 2 116 | assert z["abc"] == b"456" 117 | 118 | check_items(z, [("abc", b"456"), ("xyz", b"12")]) 119 | 120 | assert "abc" in z 121 | assert "xyz" in z 122 | assert "def" not in z 123 | assert object() not in z 124 | 125 | with pytest.raises(KeyError): 126 | z["def"] 127 | 128 | z.update(xyz=b"707", uvw=b"000") 129 | check_items(z, [("abc", b"456"), ("xyz", b"707"), ("uvw", b"000")]) 130 | z.update([("xyz", b"654"), ("uvw", b"999")]) 131 | check_items(z, [("abc", b"456"), ("xyz", b"654"), ("uvw", b"999")]) 132 | z.update({"xyz": b"321"}) 133 | check_items(z, [("abc", b"456"), ("xyz", b"321"), ("uvw", b"999")]) 134 | # Update with iterator (can read only once) 135 | z.update(iter([("foo", b"132"), ("bar", b"887")])) 136 | check_items( 137 | z, 138 | [ 139 | ("abc", b"456"), 140 | ("xyz", b"321"), 141 | ("uvw", b"999"), 142 | ("foo", b"132"), 143 | ("bar", b"887"), 144 | ], 145 | ) 146 | 147 | del z["abc"] 148 | with pytest.raises(KeyError): 149 | z["abc"] 150 | with pytest.raises(KeyError): 151 | del z["abc"] 152 | assert "abc" not in z 153 | assert set(z) == {"uvw", "xyz", "foo", "bar"} 154 | assert len(z) == 4 155 | 156 | z["def"] = b"\x00\xff" 157 | assert len(z) == 5 158 | assert z["def"] == b"\x00\xff" 159 | assert "def" in z 160 | 161 | stress_test_mapping_updates(z) 162 | 163 | 164 | def check_different_keys_threadsafe( 165 | z: MutableMapping, allow_keyerror: bool = False 166 | ) -> None: 167 | barrier = threading.Barrier(2) 168 | counters = [0, 0] 169 | 170 | def worker(idx, key, value): 171 | barrier.wait() 172 | # When running on a single CPU (`taskset -c 0 pytest`), multitasking can 173 | # misbehave and almost completely starve one of the two threads 174 | while any(c < 10 for c in counters) and all(c < 1000 for c in counters): 175 | z[key] = value 176 | try: 177 | assert z[key] == value 178 | del z[key] 179 | except KeyError: 180 | if allow_keyerror: 181 | continue # Try again, don't inc i 182 | raise 183 | 184 | assert key not in z 185 | with pytest.raises(KeyError): 186 | _ = z[key] 187 | with pytest.raises(KeyError): 188 | del z[key] 189 | assert len(z) in (0, 1) 190 | counters[idx] += 1 191 | 192 | with ThreadPoolExecutor(2) as ex: 193 | f1 = ex.submit(worker, 0, "x", b"123") 194 | f2 = ex.submit(worker, 1, "y", b"456") 195 | f1.result() 196 | f2.result() 197 | 198 | assert not z 199 | 200 | 201 | def check_same_key_threadsafe(z: MutableMapping) -> None: 202 | barrier = threading.Barrier(4) 203 | counters = [0, 0, 0, 0] 204 | 205 | def w_set(): 206 | barrier.wait() 207 | while any(c < 10 for c in counters): 208 | z["x"] = b"123" 209 | counters[0] += 1 210 | 211 | def w_update(): 212 | barrier.wait() 213 | while any(c < 10 for c in counters): 214 | z.update(x=b"456") 215 | counters[1] += 1 216 | 217 | def w_del(): 218 | barrier.wait() 219 | while any(c < 10 for c in counters): 220 | try: 221 | del z["x"] 222 | counters[2] += 1 223 | except KeyError: 224 | pass 225 | 226 | def w_get(): 227 | barrier.wait() 228 | while any(c < 10 for c in counters): 229 | try: 230 | assert z["x"] in (b"123", b"456") 231 | counters[3] += 1 232 | except KeyError: 233 | pass 234 | 235 | with ThreadPoolExecutor(4) as ex: 236 | futures = [ 237 | ex.submit(w_set), 238 | ex.submit(w_update), 239 | ex.submit(w_del), 240 | ex.submit(w_get), 241 | ] 242 | for f in futures: 243 | f.result() 244 | 245 | z.pop("x", None) 246 | 247 | 248 | def check_closing(z: ZictBase) -> None: 249 | z.close() 250 | 251 | 252 | def check_bad_key_types(z: MutableMapping, has_del: bool = True) -> None: 253 | """z does not accept any Hashable as keys. 254 | Test that it reacts correctly when confronted with an invalid key type. 255 | """ 256 | bad = object() 257 | 258 | assert bad not in z 259 | assert bad not in z.keys() 260 | assert (bad, b"123") not in z.items() 261 | 262 | with pytest.raises(TypeError): 263 | z[bad] = b"123" 264 | with pytest.raises(TypeError): 265 | z.update({bad: b"123"}) 266 | with pytest.raises(KeyError): 267 | z[bad] 268 | if has_del: 269 | with pytest.raises(KeyError): 270 | del z[bad] 271 | 272 | 273 | def check_bad_value_types(z: MutableMapping) -> None: 274 | """z does not accept any Python object as values. 275 | Test that it reacts correctly when confronted with an invalid value type. 276 | """ 277 | bad = object() 278 | 279 | assert bad not in z.values() 280 | assert ("x", bad) not in z.items() 281 | 282 | with pytest.raises(TypeError): 283 | z["x"] = bad 284 | with pytest.raises(TypeError): 285 | z.update({"x": bad}) 286 | 287 | 288 | class SimpleDict(ZictBase, UserDict): 289 | def __init__(self): 290 | ZictBase.__init__(self) 291 | UserDict.__init__(self) 292 | 293 | 294 | class SlowDict(UserDict): 295 | def __init__(self, delay): 296 | self.delay = delay 297 | super().__init__(self) 298 | 299 | def __getitem__(self, key): 300 | time.sleep(self.delay) 301 | return super().__getitem__(key) 302 | 303 | def __setitem__(self, key, value): 304 | time.sleep(self.delay) 305 | super().__setitem__(key, value) 306 | -------------------------------------------------------------------------------- /zict/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from collections.abc import Iterable, Iterator, MutableSet 4 | 5 | from zict.common import T 6 | 7 | 8 | class InsertionSortedSet(MutableSet[T]): 9 | """A set-like that retains insertion order, like a dict. Thread-safe. 10 | 11 | Equality does not compare order or class, but only compares against the contents of 12 | any other set-like, coherently with dict and the AbstractSet design. 13 | """ 14 | 15 | _d: dict[T, None] 16 | __slots__ = ("_d",) 17 | 18 | def __init__(self, other: Iterable[T] = ()) -> None: 19 | self._d = dict.fromkeys(other) 20 | 21 | def __contains__(self, item: object) -> bool: 22 | return item in self._d 23 | 24 | def __iter__(self) -> Iterator[T]: 25 | return iter(self._d) 26 | 27 | def __len__(self) -> int: 28 | return len(self._d) 29 | 30 | def add(self, value: T) -> None: 31 | """Add element to the set. If the element is already in the set, retain original 32 | insertion order. 33 | """ 34 | self._d[value] = None 35 | 36 | def discard(self, value: T) -> None: 37 | # Don't trust the thread-safety of self._d.pop(value, None) 38 | try: 39 | del self._d[value] 40 | except KeyError: 41 | pass 42 | 43 | def remove(self, value: T) -> None: 44 | del self._d[value] 45 | 46 | def popleft(self) -> T: 47 | """Pop the oldest-inserted key from the set""" 48 | while True: 49 | try: 50 | value = next(iter(self._d)) 51 | del self._d[value] 52 | return value 53 | except StopIteration: 54 | raise KeyError("pop from an empty set") 55 | except (KeyError, RuntimeError): 56 | # Multithreaded race condition 57 | continue 58 | 59 | def popright(self) -> T: 60 | """Pop the latest-inserted key from the set""" 61 | return self._d.popitem()[0] 62 | 63 | pop = popright 64 | 65 | def clear(self) -> None: 66 | self._d.clear() 67 | -------------------------------------------------------------------------------- /zict/zip.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import zipfile 4 | from collections.abc import Iterator, MutableMapping 5 | from typing import TYPE_CHECKING, Any, Literal 6 | 7 | if TYPE_CHECKING: 8 | # TODO: import from typing (needs Python >=3.10) 9 | from typing_extensions import TypeAlias 10 | 11 | FileMode: TypeAlias = Literal["r", "w", "x", "a"] 12 | 13 | 14 | class Zip(MutableMapping[str, bytes]): 15 | """Mutable Mapping interface to a Zip file 16 | 17 | Keys must be strings, values must be bytes 18 | 19 | Parameters 20 | ---------- 21 | filename: string 22 | mode: string, ('r', 'w', 'a'), defaults to 'a' 23 | 24 | Notes 25 | ----- 26 | None of this class is thread-safe - not even normally trivial methods such as 27 | ``__len__ `` or ``__contains__``. 28 | 29 | Examples 30 | -------- 31 | >>> z = Zip('myfile.zip') # doctest: +SKIP 32 | >>> z['x'] = b'123' # doctest: +SKIP 33 | >>> z['x'] # doctest: +SKIP 34 | b'123' 35 | >>> z.flush() # flush and write metadata to disk # doctest: +SKIP 36 | """ 37 | 38 | filename: str 39 | mode: FileMode | Literal["closed"] 40 | _file: zipfile.ZipFile | None 41 | 42 | def __init__(self, filename: str, mode: FileMode = "a"): 43 | super().__init__() 44 | self.filename = filename 45 | self.mode = mode 46 | self._file = None 47 | 48 | @property 49 | def file(self) -> zipfile.ZipFile: 50 | if self.mode == "closed": 51 | raise OSError("File closed") 52 | if not self._file or not self._file.fp: 53 | self._file = zipfile.ZipFile(self.filename, mode=self.mode) 54 | return self._file 55 | 56 | def __getitem__(self, key: str) -> bytes: 57 | if not isinstance(key, str): 58 | raise KeyError(key) 59 | return self.file.read(key) 60 | 61 | def __setitem__(self, key: str, value: bytes | bytearray | memoryview) -> None: 62 | if not isinstance(key, str): 63 | raise TypeError(key) 64 | if not isinstance(value, (bytes, bytearray, memoryview)): 65 | raise TypeError(value) 66 | if key in self: 67 | raise NotImplementedError("Not supported by stdlib zipfile") 68 | self.file.writestr(key, value) 69 | 70 | def __iter__(self) -> Iterator[str]: 71 | return (zi.filename for zi in self.file.filelist) 72 | 73 | def __contains__(self, key: object) -> bool: 74 | if not isinstance(key, str): 75 | return False 76 | try: 77 | self.file.getinfo(key) 78 | return True 79 | except KeyError: 80 | return False 81 | 82 | def __delitem__(self, key: str) -> None: # pragma: nocover 83 | raise NotImplementedError("Not supported by stdlib zipfile") 84 | 85 | def __len__(self) -> int: 86 | return len(self.file.filelist) 87 | 88 | def flush(self) -> None: 89 | if self._file: 90 | if self._file.fp: 91 | self._file.fp.flush() 92 | self._file.close() 93 | self._file = None 94 | 95 | def close(self) -> None: 96 | self.flush() 97 | self.mode = "closed" 98 | 99 | def __enter__(self) -> Zip: 100 | return self 101 | 102 | def __exit__(self, *args: Any) -> None: 103 | self.close() 104 | --------------------------------------------------------------------------------