├── .flake8
├── .gitattributes
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── bug_report.md
    │   └── feature_request.md
    ├── dependabot.yml
    ├── release-drafter.yml
    ├── workflows.md
    └── workflows
    │   ├── constraints.txt
    │   ├── dependabot-auto-merge.yml
    │   ├── release-please.yml
    │   ├── release.yml
    │   └── tests.yml
├── .gitignore
├── .pre-commit-config.yaml
├── .prettierignore
├── .readthedocs.yml
├── CONTRIBUTING.rst
├── LICENSE
├── README.rst
├── build_docs.sh
├── codecov.yml
├── dev
    ├── __init__.py
    ├── btree experiments.ipynb
    ├── display.ipynb
    ├── measure_ram.py
    ├── multi.ipynb
    ├── multicolumn.md
    ├── munchmark.ipynb
    ├── output.png
    ├── perf.ipynb
    └── wrdl-dev.ipynb
├── docs
    ├── Makefile
    ├── conf.py
    ├── demos.rst
    ├── ducks.concurrent.rst
    ├── ducks.frozen.rst
    ├── ducks.mutable.rst
    ├── ducks.rst
    ├── favicon.ico
    ├── how_it_works.rst
    ├── img
    │   ├── ducks-main.png
    │   └── perf_bench.png
    ├── index.rst
    ├── make.bat
    ├── modules.rst
    ├── quick_start.rst
    └── requirements.txt
├── ducks
    ├── __init__.py
    ├── btree.py
    ├── concurrent
    │   ├── __init__.py
    │   └── main.py
    ├── constants.py
    ├── exceptions.py
    ├── frozen
    │   ├── __init__.py
    │   ├── frozen_attr.py
    │   ├── init_helpers.py
    │   ├── main.py
    │   └── utils.py
    ├── mutable
    │   ├── __init__.py
    │   ├── main.py
    │   └── mutable_attr.py
    ├── pickling.py
    └── utils.py
├── examples
    ├── __init__.py
    ├── collision.py
    ├── concurrent_perf.ipynb
    ├── data
    │   ├── crossword_words.txt
    │   └── wordle_words.csv
    ├── img
    │   ├── word0.png
    │   ├── word1.png
    │   ├── word2.png
    │   ├── word3.png
    │   ├── word4.png
    │   └── word5.png
    ├── pandas_index.py
    ├── percentile.py
    ├── perf_demo.ipynb
    ├── update.py
    └── wordle.ipynb
├── noxfile.py
├── poetry.lock
├── pyproject.toml
├── test
    ├── __init__.py
    ├── concurrent
    │   ├── __init__.py
    │   ├── concurrent_utils.py
    │   ├── test_multi_writer.py
    │   └── test_read_update.py
    ├── conftest.py
    ├── mutable
    │   ├── __init__.py
    │   └── test_soak.py
    ├── test_basic_operations.py
    ├── test_btree.py
    ├── test_container_ops.py
    ├── test_edge_cases.py
    ├── test_examples.py
    ├── test_exceptions.py
    ├── test_fancy_gets.py
    ├── test_missing_attribute.py
    ├── test_mixed_cardinality.py
    ├── test_multiple_operations.py
    ├── test_mutations.py
    ├── test_nones.py
    ├── test_pickling.py
    ├── test_range_queries.py
    ├── test_stale_objects.py
    └── test_wrong_type.py
└── tmp
    ├── Makefile
    ├── conf.py
    ├── index.rst
    └── make.bat


/.flake8:
--------------------------------------------------------------------------------
 1 | 
 2 | [flake8]
 3 | # Eventually would add D and DAR
 4 | select = B,B9,C,E,F,N,RST,S,W
 5 | ignore = E203,E501,RST201,RST203,RST301,W503,B902,N805,DAR402,S403,S404,S605,S603,S403,S301,B015,N818,F401,F811
 6 | max-line-length = 119
 7 | max-complexity = 10
 8 | docstring-convention = google
 9 | per-file-ignores = tests/*:S101
10 | rst-roles = class,const,func,meth,mod,ref
11 | rst-directives = deprecated
12 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | dev/* linguist-vendored
2 | examples/* linguist-vendored
3 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Browser [e.g. chrome, safari]
29 |  - Version [e.g. 22]
30 | 
31 | **Smartphone (please complete the following information):**
32 |  - Device: [e.g. iPhone6]
33 |  - OS: [e.g. iOS8.1]
34 |  - Browser [e.g. stock browser, safari]
35 |  - Version [e.g. 22]
36 | 
37 | **Additional context**
38 | Add any other context about the problem here.
39 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | updates:
 3 |   - package-ecosystem: github-actions
 4 |     directory: "/"
 5 |     schedule:
 6 |       interval: daily
 7 |   - package-ecosystem: pip
 8 |     directory: "/.github/workflows"
 9 |     schedule:
10 |       interval: daily
11 |   - package-ecosystem: pip
12 |     directory: "/docs"
13 |     schedule:
14 |       interval: daily
15 |   - package-ecosystem: pip
16 |     directory: "/"
17 |     schedule:
18 |       interval: daily
19 | 


--------------------------------------------------------------------------------
/.github/release-drafter.yml:
--------------------------------------------------------------------------------
 1 | categories:
 2 |   - title: ":boom: Breaking Changes"
 3 |     label: "breaking"
 4 |   - title: ":rocket: Features"
 5 |     label: "enhancement"
 6 |   - title: ":fire: Removals and Deprecations"
 7 |     label: "removal"
 8 |   - title: ":beetle: Fixes"
 9 |     label: "bug"
10 |   - title: ":racehorse: Performance"
11 |     label: "performance"
12 |   - title: ":rotating_light: Testing"
13 |     label: "testing"
14 |   - title: ":construction_worker: Continuous Integration"
15 |     label: "ci"
16 |   - title: ":books: Documentation"
17 |     label: "documentation"
18 |   - title: ":hammer: Refactoring"
19 |     label: "refactoring"
20 |   - title: ":lipstick: Style"
21 |     label: "style"
22 |   - title: ":package: Dependencies"
23 |     labels:
24 |       - "dependencies"
25 |       - "build"
26 | template: |
27 |   ## Changes
28 | 
29 |   $CHANGES
30 | 


--------------------------------------------------------------------------------
/.github/workflows.md:
--------------------------------------------------------------------------------
 1 | # .github/workflows
 2 | 
 3 | ## ISSUE_TEMPLATE
 4 | 
 5 | Used as templates when creating new issues in the repo
 6 | 
 7 | ## Workflows
 8 | 
 9 | ### constraints.txt
10 | 
11 | Constraints.txt is a pip install file that constrains some python requirements outside of poetry.
12 | 
13 | ### dependabot-auto-merge.yml
14 | 
15 | A workflow that runs on PRs that will automatically merge dependabot update PRs that pass testing
16 | 
17 | ### release-please.yml
18 | 
19 | A workflow that manages releases for the repo. On merges to the main branch, it scans them for [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/)
20 | and will then create or update a release pr with those changes. The [release-please](https://github.com/googleapis/release-please) docs have more info.
21 | 
22 | When that release pr is merged, it will build and upload to pypi.
23 | 
24 | ### release.yml
25 | 
26 | A workflow that runs on the creation of releases to upload the package to pypi
27 | 
28 | ### Required Secrets
29 | 
30 | * THIS_PAT - a personal access token that has access to create releases on this repo and edit the repo's settings. Used with release-please and repo-manager
31 | * PYPI_TOKEN - a pypi token that can upload to pypi for this package. Used with release
32 | 
33 | ## dependabot.yml
34 | 
35 | Configures dependabot updates and alerts for this repo
36 | 
37 | ## release-drafter.yml
38 | 
39 | Configures how release notes are written by release-please
40 | 


--------------------------------------------------------------------------------
/.github/workflows/constraints.txt:
--------------------------------------------------------------------------------
1 | pip==22.3.1
2 | nox==2022.8.7
3 | nox-poetry==1.0.3
4 | poetry==1.4.2
5 | virtualenv==20.21.0
6 | poetry-dynamic-versioning==0.25.0
7 | toml==0.10.2
8 | 


--------------------------------------------------------------------------------
/.github/workflows/dependabot-auto-merge.yml:
--------------------------------------------------------------------------------
 1 | name: Auto Merge Dependabot
 2 | 
 3 | on:
 4 |   pull_request:
 5 | 
 6 | jobs:
 7 |   auto-merge:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: actions/checkout@v3.6.0
11 |       - uses: ahmadnassri/action-dependabot-auto-merge@v2
12 |         with:
13 |           target: minor
14 |           github-token: ${{ secrets.THIS_PAT }}
15 | 


--------------------------------------------------------------------------------
/.github/workflows/release-please.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |   push:
 3 |     branches:
 4 |       - main
 5 | name: release-please
 6 | jobs:
 7 |   release-please:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - uses: google-github-actions/release-please-action@v3
11 |         with:
12 |           token: ${{ secrets.THIS_PAT }}
13 |           release-type: python
14 |           package-name: ducks
15 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: Release to pypi
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [released]
 6 | 
 7 | jobs:
 8 |   release:
 9 |     name: Release
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Check out the repository
13 |         uses: actions/checkout@v3.6.0
14 |         with:
15 |           fetch-depth: 2
16 | 
17 |       - name: Set up Python
18 |         uses: actions/setup-python@v4.8.0
19 |         with:
20 |           python-version: "3.10"
21 | 
22 |       - name: Upgrade pip
23 |         run: |
24 |           pip install --constraint=.github/workflows/constraints.txt pip
25 |           pip --version
26 | 
27 |       - name: Install Poetry
28 |         run: |
29 |           pip install --constraint=.github/workflows/constraints.txt poetry poetry-dynamic-versioning
30 |           poetry --version
31 | 
32 |       - name: Build package
33 |         run: |
34 |           poetry build --ansi
35 | 
36 |       - name: Publish package on PyPI
37 |         uses: pypa/gh-action-pypi-publish@v1.8.11
38 |         with:
39 |           user: __token__
40 |           password: ${{ secrets.PYPI_TOKEN }}
41 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
  1 | name: Tests
  2 | 
  3 | on:
  4 |   - push
  5 | 
  6 | jobs:
  7 | 
  8 |   coverage:
  9 |     runs-on: ubuntu-latest
 10 |     steps:
 11 |       - name: Check out the repository
 12 |         uses: actions/checkout@v3.6.0
 13 | 
 14 |       - name: Set up Python
 15 |         uses: actions/setup-python@v4.8.0
 16 |         with:
 17 |           python-version: "3.10"
 18 | 
 19 |       - name: Upgrade pip
 20 |         run: |
 21 |           pip install --constraint=.github/workflows/constraints.txt pip
 22 |           pip --version
 23 | 
 24 |       - name: Upgrade pip in virtual environments
 25 |         shell: python
 26 |         run: |
 27 |           import os
 28 |           import pip
 29 | 
 30 |           with open(os.environ["GITHUB_ENV"], mode="a") as io:
 31 |               print(f"VIRTUALENV_PIP={pip.__version__}", file=io)
 32 | 
 33 |       - name: Run tests and collect coverage
 34 |         run: |
 35 |           pip install pytest pytest-cov pytest-xdist
 36 |           pip install .
 37 |           pytest
 38 | 
 39 |       - name: Upload coverage report
 40 |         uses: codecov/codecov-action@v3.1.4
 41 | 
 42 |   tests:
 43 |     name: ${{ matrix.session }} ${{ matrix.python }} / ${{ matrix.os }}
 44 |     runs-on: ${{ matrix.os }}
 45 |     strategy:
 46 |       fail-fast: false
 47 |       matrix:
 48 |         include:
 49 |           - { python: "3.10", os: "ubuntu-latest", session: "pre-commit" }
 50 |           - { python: "3.10", os: "ubuntu-latest", session: "safety" }
 51 |           - { python: "3.10", os: "ubuntu-latest", session: "tests" }
 52 |           - { python: "3.9", os: "ubuntu-latest", session: "tests" }
 53 |           - { python: "3.8", os: "ubuntu-latest", session: "tests" }
 54 |           - { python: "3.7", os: "ubuntu-latest", session: "tests" }
 55 |           - { python: "3.10", os: "ubuntu-latest", session: "docs-build" }
 56 | 
 57 |     env:
 58 |       NOXSESSION: ${{ matrix.session }}
 59 |       FORCE_COLOR: "1"
 60 |       PRE_COMMIT_COLOR: "always"
 61 | 
 62 |     steps:
 63 |       - name: Check out the repository
 64 |         uses: actions/checkout@v3.6.0
 65 | 
 66 |       - name: Set up Python ${{ matrix.python }}
 67 |         uses: actions/setup-python@v4.8.0
 68 |         with:
 69 |           python-version: ${{ matrix.python }}
 70 | 
 71 |       - name: Upgrade pip
 72 |         run: |
 73 |           pip install --constraint=.github/workflows/constraints.txt pip
 74 |           pip --version
 75 | 
 76 |       - name: Upgrade pip in virtual environments
 77 |         shell: python
 78 |         run: |
 79 |           import os
 80 |           import pip
 81 | 
 82 |           with open(os.environ["GITHUB_ENV"], mode="a") as io:
 83 |               print(f"VIRTUALENV_PIP={pip.__version__}", file=io)
 84 | 
 85 |       - name: Install Poetry, nox, and requirements
 86 |         run: |
 87 |           pip install --upgrade -r .github/workflows/constraints.txt
 88 |           poetry --version
 89 |           nox --version
 90 | 
 91 |       - name: Compute pre-commit cache key
 92 |         if: matrix.session == 'pre-commit'
 93 |         id: pre-commit-cache
 94 |         shell: python
 95 |         run: |
 96 |           import hashlib
 97 |           import sys
 98 | 
 99 |           python = "py{}.{}".format(*sys.version_info[:2])
100 |           payload = sys.version.encode() + sys.executable.encode()
101 |           digest = hashlib.sha256(payload).hexdigest()
102 |           result = "${{ runner.os }}-{}-{}-pre-commit".format(python, digest[:8])
103 | 
104 |           print("::set-output name=result::{}".format(result))
105 | 
106 |       - name: Restore pre-commit cache
107 |         uses: actions/cache@v3.3.3
108 |         if: matrix.session == 'pre-commit'
109 |         with:
110 |           path: ~/.cache/pre-commit
111 |           key: ${{ steps.pre-commit-cache.outputs.result }}-${{ hashFiles('.pre-commit-config.yaml') }}
112 |           restore-keys: |
113 |             ${{ steps.pre-commit-cache.outputs.result }}-
114 | 
115 |       - name: Run Nox
116 |         run: |
117 |           nox --force-color --python=${{ matrix.python }}
118 | 
119 |       - name: Upload coverage data
120 |         if: always() && matrix.session == 'tests'
121 |         uses: "actions/upload-artifact@v3.1.3"
122 |         with:
123 |           name: coverage-data
124 |           path: ".coverage"
125 | 
126 |       - name: Upload documentation
127 |         if: matrix.session == 'docs-build'
128 |         uses: actions/upload-artifact@v3.1.3
129 |         with:
130 |           name: docs
131 |           path: docs/_build
132 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | .idea/
  7 | 
  8 | # C extensions
  9 | *.so
 10 | 
 11 | # Distribution / packaging
 12 | .Python
 13 | build/
 14 | develop-eggs/
 15 | dist/
 16 | downloads/
 17 | eggs/
 18 | .eggs/
 19 | lib/
 20 | lib64/
 21 | parts/
 22 | sdist/
 23 | var/
 24 | wheels/
 25 | pip-wheel-metadata/
 26 | share/python-wheels/
 27 | *.egg-info/
 28 | .installed.cfg
 29 | *.egg
 30 | MANIFEST
 31 | 
 32 | # PyInstaller
 33 | #  Usually these files are written by a python script from a template
 34 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 35 | *.manifest
 36 | *.spec
 37 | 
 38 | # Installer logs
 39 | pip-log.txt
 40 | pip-delete-this-directory.txt
 41 | 
 42 | # Unit test / coverage reports
 43 | htmlcov/
 44 | .tox/
 45 | .nox/
 46 | .coverage
 47 | .coverage.*
 48 | .cache
 49 | nosetests.xml
 50 | coverage.xml
 51 | *.cover
 52 | *.py,cover
 53 | .hypothesis/
 54 | .pytest_cache/
 55 | 
 56 | # Translations
 57 | *.mo
 58 | *.pot
 59 | 
 60 | # Django stuff:
 61 | *.log
 62 | local_settings.py
 63 | db.sqlite3
 64 | db.sqlite3-journal
 65 | 
 66 | # Flask stuff:
 67 | instance/
 68 | .webassets-cache
 69 | 
 70 | # Scrapy stuff:
 71 | .scrapy
 72 | 
 73 | # Sphinx documentation
 74 | docs/_build/
 75 | 
 76 | # PyBuilder
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | .python-version
 88 | 
 89 | # pipenv
 90 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 91 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 92 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 93 | #   install all needed dependencies.
 94 | #Pipfile.lock
 95 | 
 96 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 97 | __pypackages__/
 98 | 
 99 | # Celery stuff
100 | celerybeat-schedule
101 | celerybeat.pid
102 | 
103 | # SageMath parsed files
104 | *.sage.py
105 | 
106 | # Environments
107 | .env
108 | .venv
109 | env/
110 | venv/
111 | ENV/
112 | env.bak/
113 | venv.bak/
114 | 
115 | # Spyder project settings
116 | .spyderproject
117 | .spyproject
118 | 
119 | # Rope project settings
120 | .ropeproject
121 | 
122 | # mkdocs documentation
123 | /site
124 | 
125 | # mypy
126 | .mypy_cache/
127 | .dmypy.json
128 | dmypy.json
129 | 
130 | # Pyre type checker
131 | .pyre/
132 | 


--------------------------------------------------------------------------------
/.pre-commit-config.yaml:
--------------------------------------------------------------------------------
 1 | repos:
 2 |   - repo: local
 3 |     hooks:
 4 |       - id: black
 5 |         name: black
 6 |         entry: black
 7 |         language: system
 8 |         types: [python]
 9 |         require_serial: true
10 |       - id: check-added-large-files
11 |         name: Check for added large files
12 |         entry: check-added-large-files
13 |         language: system
14 |       - id: check-toml
15 |         name: Check Toml
16 |         entry: check-toml
17 |         language: system
18 |         types: [toml]
19 |       - id: check-yaml
20 |         name: Check Yaml
21 |         entry: check-yaml
22 |         language: system
23 |         types: [yaml]
24 |       - id: end-of-file-fixer
25 |         name: Fix End of Files
26 |         entry: end-of-file-fixer
27 |         language: system
28 |         types: [text]
29 |         stages: [commit, push, manual]
30 |       - id: flake8
31 |         name: flake8
32 |         entry: flake8
33 |         language: system
34 |         types: [python]
35 |         exclude: "^(test/*|examples/*|noxfile.py|docs/*|tmp/*)"
36 |         require_serial: true
37 |         args: ["--config=.flake8"]
38 |       - id: pyupgrade
39 |         name: pyupgrade
40 |         description: Automatically upgrade syntax for newer versions.
41 |         entry: pyupgrade
42 |         language: system
43 |         types: [python]
44 |         args: [--py37-plus]
45 |       - id: reorder-python-imports
46 |         name: Reorder python imports
47 |         entry: reorder-python-imports
48 |         language: system
49 |         types: [python]
50 |         args: [--application-directories=src]
51 |   - repo: https://github.com/pre-commit/mirrors-prettier
52 |     rev: v2.7.1
53 |     hooks:
54 |       - id: prettier
55 |   - repo: https://github.com/rhysd/actionlint
56 |     rev: v1.6.15
57 |     hooks:
58 |       - id: actionlint-docker
59 | 


--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | .github/*
2 | CHANGELOG.md
3 | 


--------------------------------------------------------------------------------
/.readthedocs.yml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-20.04
 5 |   tools:
 6 |     python: "3.9"
 7 | 
 8 | python:
 9 |   install:
10 |     - method: pip
11 |       path: .
12 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.rst:
--------------------------------------------------------------------------------
  1 | Contributor Guide
  2 | =================
  3 | 
  4 | Thank you for your interest in improving this project.
  5 | This project is open-source under the `MIT license`_ and
  6 | welcomes contributions in the form of bug reports, feature requests, and pull requests.
  7 | 
  8 | Here is a list of important resources for contributors:
  9 | 
 10 | - `Source Code`_
 11 | - `Documentation`_
 12 | - `Issue Tracker`_
 13 | - `Code of Conduct`_
 14 | 
 15 | .. _MIT license: https://opensource.org/licenses/MIT
 16 | .. _Source Code: https://github.com/manimino/ducks
 17 | .. _Documentation: https://ducks.readthedocs.io/
 18 | .. _Issue Tracker: https://github.com/manimino/ducks/issues
 19 | 
 20 | How to report a bug
 21 | -------------------
 22 | 
 23 | Report bugs on the `Issue Tracker`_.
 24 | 
 25 | When filing an issue, make sure to answer these questions:
 26 | 
 27 | - Which operating system and Python version are you using?
 28 | - Which version of this project are you using?
 29 | - What did you do?
 30 | - What did you expect to see?
 31 | - What did you see instead?
 32 | 
 33 | The best way to get your bug fixed is to provide a test case,
 34 | and/or steps to reproduce the issue.
 35 | 
 36 | 
 37 | How to request a feature
 38 | ------------------------
 39 | 
 40 | Request features on the `Issue Tracker`_.
 41 | 
 42 | 
 43 | How to set up your development environment
 44 | ------------------------------------------
 45 | 
 46 | You need Python 3.7+ and the following tools:
 47 | 
 48 | - Poetry_
 49 | - Nox_
 50 | - nox-poetry_
 51 | 
 52 | Install the package with development requirements:
 53 | 
 54 | .. code:: console
 55 | 
 56 |    $ poetry install
 57 | 
 58 | You can now run an interactive Python session,
 59 | or the command-line interface:
 60 | 
 61 | .. code:: console
 62 | 
 63 |    $ poetry run python
 64 | 
 65 | .. _Poetry: https://python-poetry.org/
 66 | .. _Nox: https://nox.thea.codes/
 67 | .. _nox-poetry: https://nox-poetry.readthedocs.io/
 68 | 
 69 | 
 70 | How to test the project
 71 | -----------------------
 72 | 
 73 | Run the full test suite:
 74 | 
 75 | .. code:: console
 76 | 
 77 |    $ nox
 78 | 
 79 | List the available Nox sessions:
 80 | 
 81 | .. code:: console
 82 | 
 83 |    $ nox --list-sessions
 84 | 
 85 | You can also run a specific Nox session.
 86 | For example, invoke the unit test suite like this:
 87 | 
 88 | .. code:: console
 89 | 
 90 |    $ nox --session=tests
 91 | 
 92 | Unit tests are located in the ``tests`` directory,
 93 | and are written using the pytest_ testing framework.
 94 | 
 95 | .. _pytest: https://pytest.readthedocs.io/
 96 | 
 97 | 
 98 | How to submit changes
 99 | ---------------------
100 | 
101 | Open a `pull request`_ to submit changes to this project.
102 | 
103 | Your pull request needs to meet the following guidelines for acceptance:
104 | 
105 | - The Nox test suite must pass without errors and warnings.
106 | - Include unit tests. This project maintains 100% code coverage.
107 | - If your changes add functionality, update the documentation accordingly.
108 | 
109 | Feel free to submit early, though—we can always iterate on this.
110 | 
111 | To run linting and code formatting checks before committing your change, you can install pre-commit as a Git hook by running the following command:
112 | 
113 | .. code:: console
114 | 
115 |    $ nox --session=pre-commit -- install
116 | 
117 | It is recommended to open an issue before starting work on anything.
118 | This will allow a chance to talk it over with the owners and validate your approach.
119 | 
120 | .. _pull request: https://github.com/manimino/ducks/pulls
121 | .. github-only
122 | .. _Code of Conduct: CODE_OF_CONDUCT.rst
123 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2022 Theo Walker
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
 1 | .. image:: https://raw.githubusercontent.com/manimino/ducks/main/docs/img/ducks-main.png
 2 |     :alt: Ducks, the Python object indexer
 3 | 
 4 | =========
 5 | ducks  🦆
 6 | =========
 7 | 
 8 | Index your Python objects for fast lookup by their attributes.
 9 | 
10 | .. image:: https://img.shields.io/github/stars/manimino/ducks.svg?style=social&label=Star&maxAge=2592000
11 |     :target: https://github.com/manimino/ducks
12 |     :alt: GitHub stars
13 | .. image:: https://github.com/manimino/ducks/workflows/tests/badge.svg
14 |     :target: https://github.com/manimino/ducks/actions
15 |     :alt: tests Actions Status
16 | .. image:: https://codecov.io/github/manimino/ducks/coverage.svg?branch=main
17 |     :target: https://codecov.io/gh/manimino/ducks
18 |     :alt: Coverage
19 | .. image:: https://img.shields.io/static/v1?label=license&message=MIT&color=2ea44f
20 |     :target: https://github.com/manimino/ducks/blob/main/LICENSE
21 |     :alt: license - MIT
22 | .. image:: https://img.shields.io/static/v1?label=python&message=3.7%2B&color=2ea44f
23 |     :target: https://github.com/manimino/ducks/
24 |     :alt: python - 3.7+
25 | 
26 | -------
27 | Install
28 | -------
29 | 
30 | .. code-block::
31 | 
32 |     pip install ducks
33 | 
34 | -----
35 | Usage
36 | -----
37 | 
38 | The main container in ducks is called Dex.
39 | 
40 | .. code-block::
41 | 
42 |     from ducks import Dex
43 | 
44 |     # make some objects
45 |     objects = [
46 |         {'x': 3, 'y': 'a'},
47 |         {'x': 6, 'y': 'b'},
48 |         {'x': 9, 'y': 'c'}
49 |     ]
50 | 
51 |     # Create a Dex containing the objects.
52 |     # Index on x and y.
53 |     dex = Dex(objects, ['x', 'y'])
54 | 
55 |     # match objects
56 |     dex[{
57 |         'x': {'>': 5, '<': 10},  # where 5 < x < 10
58 |         'y': {'in': ['a', 'b']}  # and y is 'a' or 'b'
59 |     }]
60 |     # result: [{'x': 6, 'y': 'b'}]
61 | 
62 | This is a Dex of dicts, but the objects can be any type, even primitives like strings.
63 | 
64 | Dex supports ==, !=, in, not in, <, <=, >, >=.
65 | 
66 | The indexes can be dict keys, object attributes, or custom functions.
67 | 
68 | See `Quick Start <https://ducks.readthedocs.io/en/latest/quick_start.html>`_ for more examples of all of these.
69 | 
70 | --------------
71 | Is ducks fast?
72 | --------------
73 | 
74 | Yes. Here's how the ducks containers compare to other datastores on an example task.
75 | 
76 | .. image:: https://raw.githubusercontent.com/manimino/ducks/main/docs/img/perf_bench.png
77 |     :width: 600
78 | 
79 | In this benchmark, two million objects are generated. Each datastore is used to find the subset of 200 of them that match
80 | four constraints. The ducks containers Dex and FrozenDex are shown to be very efficient at this, outperforming by 5x and
81 | and 10x respectively.
82 | 
83 | Benchmark code is `in the Jupyter notebook <https://github.com/manimino/ducks/blob/main/examples/perf_demo.ipynb>`_.
84 | 
85 | ----
86 | Docs
87 | ----
88 | 
89 | `Quick Start <https://ducks.readthedocs.io/en/latest/quick_start.html>`_ covers all the features you need, like
90 | pickling, nested attribute handling, and thread concurrency.
91 | 
92 | `How It Works <https://ducks.readthedocs.io/en/latest/how_it_works.html>`_ is a deep dive on the implementation details.
93 | 
94 | `Demos <https://ducks.readthedocs.io/en/latest/demos.html>`_ has short scripts showing example uses.
95 | 


--------------------------------------------------------------------------------
/build_docs.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | pushd docs/
4 | sphinx-apidoc ../ducks -o .; make html
5 | popd
6 | 


--------------------------------------------------------------------------------
/codecov.yml:
--------------------------------------------------------------------------------
 1 | comment: false
 2 | coverage:
 3 |   status:
 4 |     project:
 5 |       default:
 6 |         target: "100"
 7 |     patch:
 8 |       default:
 9 |         target: "100"
10 | 


--------------------------------------------------------------------------------
/dev/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/dev/__init__.py


--------------------------------------------------------------------------------
/dev/measure_ram.py:
--------------------------------------------------------------------------------
  1 | """
  2 | This script was used to measure RAM usage of different collection sizes.
  3 | This was used during design; it's not relevant to users of Dex.
  4 | """
  5 | import os
  6 | import subprocess
  7 | import sys
  8 | from array import array
  9 | 
 10 | import numpy as np
 11 | from cykhash import Int64Set
 12 | 
 13 | 
 14 | TOT_ITEMS = 10**6
 15 | 
 16 | print_names = {
 17 |     "pytup": "tuple",
 18 |     "pyset": "set",
 19 |     "pyarr": "array (int64)",
 20 |     "cyk": "cykhash Int64Set",
 21 |     "nparr": "numpy array (int64)",
 22 |     "btree": "BTrees.LLBTree",
 23 | }
 24 | 
 25 | 
 26 | def get_ram():
 27 |     return (
 28 |         int(
 29 |             os.popen(f"ps -o pid,rss -p {os.getpid()}").read().split("\n")[1].split()[1]
 30 |         )
 31 |         * 1024
 32 |     )
 33 | 
 34 | 
 35 | def cyk(items_per=10):
 36 |     n_sets = TOT_ITEMS // items_per
 37 |     ls = [None for _ in range(n_sets)]
 38 |     baseline = get_ram()
 39 |     for i in range(n_sets):
 40 |         offset = i * items_per
 41 |         iset = Int64Set(range(offset, offset + items_per))
 42 |         ls[i] = iset
 43 |     used = get_ram() - baseline
 44 |     ram = round(used / TOT_ITEMS, 1)
 45 |     print("cykhash_set", items_per, ram)
 46 | 
 47 | 
 48 | def nparr(items_per=10):
 49 |     n_sets = TOT_ITEMS // items_per
 50 |     ls = [None for _ in range(n_sets)]
 51 |     baseline = get_ram()
 52 |     for i in range(n_sets):
 53 |         offset = i * items_per
 54 |         ls[i] = np.array(range(offset, offset + items_per))
 55 |     used = get_ram() - baseline
 56 |     ram = round(used / TOT_ITEMS, 1)
 57 |     print("Numpy_array", items_per, ram)
 58 | 
 59 | 
 60 | def pyset(items_per=10):
 61 |     n_sets = TOT_ITEMS // items_per
 62 |     ls = [None for _ in range(n_sets)]
 63 |     baseline = get_ram()
 64 |     for i in range(n_sets):
 65 |         offset = i * items_per
 66 |         iset = set(range(offset, offset + items_per))
 67 |         ls[i] = iset
 68 |     used = get_ram() - baseline
 69 |     ram = round(used / TOT_ITEMS, 1)
 70 |     print("python_set", items_per, ram)
 71 | 
 72 | 
 73 | def pytup(items_per=10):
 74 |     n_tups = TOT_ITEMS // items_per
 75 |     baseline = get_ram()
 76 |     ls = [None for _ in range(n_tups)]
 77 |     for i in range(n_tups):
 78 |         offset = i * items_per
 79 |         ls[i] = tuple(range(offset, offset + items_per))
 80 |     used = get_ram() - baseline
 81 |     ram = round(used / TOT_ITEMS, 1)
 82 |     print("python_tuple", items_per, ram)
 83 | 
 84 | 
 85 | def pyarr(items_per=10):
 86 |     n_arrs = TOT_ITEMS // items_per
 87 |     baseline = get_ram()
 88 |     ls = [None for _ in range(n_arrs)]
 89 |     for i in range(n_arrs):
 90 |         arr = array("q")
 91 |         offset = i * items_per
 92 |         arr.extend(range(offset, offset + items_per))
 93 |         ls[i] = arr
 94 |     used = get_ram() - baseline
 95 |     ram = round(used / TOT_ITEMS, 1)
 96 |     print("python_array", items_per, ram)
 97 | 
 98 | 
 99 | def main(method, items_per):
100 |     iper = int(items_per)
101 |     if method == "pytup":
102 |         f = pytup
103 |     elif method == "pyset":
104 |         f = pyset
105 |     elif method == "cyk":
106 |         f = cyk
107 |     elif method == "nparr":
108 |         f = nparr
109 |     elif method == "pyarr":
110 |         f = pyarr
111 |     else:
112 |         print("what?!", method)
113 |         raise ValueError()
114 |     f(iper)
115 | 
116 | 
117 | def row_dict_to_table(rd):
118 |     # makes a github markdown table out of a dict of {row: {column: value}}
119 |     # kinda jank looking but pycharm's autoformatter will fix it
120 |     for r in rd:
121 |         header = "|   |" + " | ".join(str(x) for x in rd[r])
122 |         print()
123 |         break
124 |     print(header + "   |")
125 |     dashes = ["|---"]
126 |     for r in rd:
127 |         for k in rd[r]:
128 |             dashes.append("-" * (2 + len(str(rd[r][k]))))
129 |         break
130 |     print("|".join(dashes) + "---|")
131 |     for r in rd:
132 |         s = "| " + print_names[r] + " | "
133 |         s += " | ".join(str(x) for x in rd[r].values())
134 |         print(s + "   |")
135 | 
136 | 
137 | if __name__ == "__main__":
138 |     if len(sys.argv) > 1:
139 |         main(sys.argv[1], sys.argv[2])
140 |     else:
141 |         results = dict()
142 |         for method in ["pyset", "pytup", "pyarr", "cyk", "nparr"]:
143 |             m_result = dict()
144 |             for items_per in [1, 2, 3, 4, 5, 10, 25, 50, 100, 1000, 10000]:
145 |                 txt = subprocess.check_output(
146 |                     f"python measure_ram.py {method} {items_per}".split()
147 |                 )
148 |                 res = txt.decode().strip()
149 |                 _, _, bytes_per = res.split()
150 |                 m_result[items_per] = bytes_per
151 |             results[method] = m_result
152 |         print(results)
153 |         row_dict_to_table(results)
154 | 


--------------------------------------------------------------------------------
/dev/multi.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "e8f13c30",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "#### Multi-attribute indexing\n",
  9 |     "\n",
 10 |     "The one place SQLite will still have a speed edge is in multidimensional range queries using a multi-attribute index. For equality, no prob - concatenate the values into a tuple and you're good to go. That beats SQLite by a lot, and works on both index types. But `a < 5 and b < 6`, not so much.\n",
 11 |     "\n",
 12 |     "Here, let's demo."
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 24,
 18 |    "id": "a7a50a4f",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "\n",
 23 |     "import random\n",
 24 |     "from litebox import LiteBox\n",
 25 |     "from ducks import Dex, FrozenDex\n",
 26 |     "\n",
 27 |     "objs = [{'a': random.random(), 'b': random.random()} for _ in range(10**6)]\n",
 28 |     "lb = LiteBox(objs, {'a': float, 'b': float})\n",
 29 |     "lb_multi = LiteBox(objs, {'a': float, 'b': float}, index=[('a', 'b')])\n",
 30 |     "fb = Dex(objs, ['a', 'b'])\n",
 31 |     "ffb = FrozenDex(objs, ['a', 'b'])\n"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 25,
 37 |    "id": "496467f9",
 38 |    "metadata": {},
 39 |    "outputs": [
 40 |     {
 41 |      "name": "stdout",
 42 |      "output_type": "stream",
 43 |      "text": [
 44 |       "528 µs ± 10.5 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
 45 |      ]
 46 |     }
 47 |    ],
 48 |    "source": [
 49 |     "%%timeit\n",
 50 |     "lb.find(\"a < 0.001 and b < 0.001\")"
 51 |    ]
 52 |   },
 53 |   {
 54 |    "cell_type": "code",
 55 |    "execution_count": 26,
 56 |    "id": "13264dd0",
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "677 µs ± 12.9 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
 64 |      ]
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "%%timeit\n",
 69 |     "fb[{'a': {'<': 0.001}, 'b': {'<': 0.001}}]"
 70 |    ]
 71 |   },
 72 |   {
 73 |    "cell_type": "code",
 74 |    "execution_count": 27,
 75 |    "id": "9533ee92",
 76 |    "metadata": {},
 77 |    "outputs": [
 78 |     {
 79 |      "name": "stdout",
 80 |      "output_type": "stream",
 81 |      "text": [
 82 |       "99.1 µs ± 780 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
 83 |      ]
 84 |     }
 85 |    ],
 86 |    "source": [
 87 |     "%%timeit\n",
 88 |     "ffb[{'a': {'<': 0.001}, 'b': {'<': 0.001}}]"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": 29,
 94 |    "id": "3cd618f2",
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "# and now the multi-attribute indexing, blam"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 28,
104 |    "id": "3b7529fa",
105 |    "metadata": {},
106 |    "outputs": [
107 |     {
108 |      "name": "stdout",
109 |      "output_type": "stream",
110 |      "text": [
111 |       "46.7 µs ± 322 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)\n"
112 |      ]
113 |     }
114 |    ],
115 |    "source": [
116 |     "%%timeit\n",
117 |     "lb_multi.find(\"a < 0.001 and b < 0.001\")"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 23,
123 |    "id": "b3ba56c6",
124 |    "metadata": {},
125 |    "outputs": [
126 |     {
127 |      "name": "stdout",
128 |      "output_type": "stream",
129 |      "text": [
130 |       "204 µs ± 3.33 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)\n"
131 |      ]
132 |     }
133 |    ],
134 |    "source": [
135 |     "%%timeit\n",
136 |     "# not gonna beat it with something naive either\n",
137 |     "[o for o in ffb[{'a': {'<': 0.001}}] if o['b'] < 0.001]"
138 |    ]
139 |   },
140 |   {
141 |    "cell_type": "markdown",
142 |    "id": "7cfe9510",
143 |    "metadata": {},
144 |    "source": [
145 |     "Unfortunately, there's not really a good way to implement a multi-attr index here. \n",
146 |     "BTree doesn't support multi-attribute lookups afaik.\n",
147 |     "\n",
148 |     "So we're kinda stuck."
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "markdown",
153 |    "id": "dab0ec6e",
154 |    "metadata": {},
155 |    "source": [
156 |     "Contrary to popular belief, you can't just \"concatenate the keys and use a regular BTree\". At least, not with this implementation; it doesn't support separate lookups for \"parts\" of a key, so you'd be treating the whole key as one object. \n",
157 |     "\n",
158 |     "But! We can make a BTree of `{obj: obj}`, so `BTree{key1: BTree{key2: values}}` could work. Except that when `key1`'s values are all unique... you get a whole ton of BTrees.\n",
159 |     "\n",
160 |     "OK, so we still don't have a good idea. Making a multi-attribute BTree out of a single-attribute one doesn't seem doable.\n",
161 |     "\n",
162 |     "The best hack I can think of is:\n",
163 |     " - Build the tree on concatenated keys `{(key1, key2): values}` \n",
164 |     " - Get {keys: values} in the range `(k1_min, -inf) < (k1, k2) < (k1_max, inf)`. \n",
165 |     " - Post-filter keys that don't match the k2 constraint.\n",
166 |     " - Return only the values with matching keys.\n",
167 |     "\n",
168 |     "The order bound isn't living up to tree standards, but I bet it would be passable most of the time anyway. Probably beats doing a separate search on key2 and intersecting the results.\n",
169 |     "\n",
170 |     "The `-inf / inf` values would need to be some type-independent thing. `None` is always small in BTrees so that could be the lower bound.\n",
171 |     "\n",
172 |     "Could cram it in at the value level instead? `BTree({key1: [(key2, val), (key2, val) ...]` Avoids the awkward comparisons. Burns some RAM though. And it's really equivalent to just using one index and doing the rest in a list comprehension outside the container. \n",
173 |     "\n",
174 |     "### todo\n",
175 |     "think about the frozen arrays and how you would implement it there. That might give good insights.\n",
176 |     "Sparse ndarrays maybe? Quad / octrees?"
177 |    ]
178 |   },
179 |   {
180 |    "cell_type": "code",
181 |    "execution_count": 3,
182 |    "id": "5f91c00a",
183 |    "metadata": {},
184 |    "outputs": [],
185 |    "source": [
186 |     "from ducks.btree import BTree\n",
187 |     "from random import random\n"
188 |    ]
189 |   },
190 |   {
191 |    "cell_type": "code",
192 |    "execution_count": 5,
193 |    "id": "7b796fee",
194 |    "metadata": {},
195 |    "outputs": [],
196 |    "source": [
197 |     "objs = [\n",
198 |     "    {'i': i, 'a': random()*10, 'b': random()} for i in range(10**3)\n",
199 |     "]\n",
200 |     "\n",
201 |     "# Task: Find objs where 1 < a < 2 and 0.5 < b < 0.6."
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": 6,
207 |    "id": "ad908e8a",
208 |    "metadata": {},
209 |    "outputs": [],
210 |    "source": [
211 |     "tree = BTree()\n",
212 |     "for o in objs:\n",
213 |     "    tree[o['a']] = (o['b'], o)"
214 |    ]
215 |   },
216 |   {
217 |    "cell_type": "code",
218 |    "execution_count": 10,
219 |    "id": "6274a4a2",
220 |    "metadata": {
221 |     "scrolled": true
222 |    },
223 |    "outputs": [
224 |     {
225 |      "name": "stdout",
226 |      "output_type": "stream",
227 |      "text": [
228 |       "{'i': 191, 'a': 1.0570674936431124, 'b': 0.5649662294471903}\n",
229 |       "{'i': 437, 'a': 1.3542792185455155, 'b': 0.5753256982901156}\n",
230 |       "{'i': 185, 'a': 1.401839984653963, 'b': 0.5310477476841865}\n",
231 |       "{'i': 772, 'a': 1.44039489179562, 'b': 0.5176671572926902}\n",
232 |       "{'i': 457, 'a': 1.469287583082859, 'b': 0.5475469700864543}\n",
233 |       "{'i': 943, 'a': 1.5722080241319658, 'b': 0.5615369447345585}\n",
234 |       "{'i': 231, 'a': 1.6165395202332788, 'b': 0.5551452004632332}\n",
235 |       "{'i': 92, 'a': 1.7698873658963565, 'b': 0.5834212111319615}\n",
236 |       "{'i': 392, 'a': 1.834056255838259, 'b': 0.545838844154715}\n",
237 |       "{'i': 691, 'a': 1.8549647165079397, 'b': 0.5517766855664482}\n"
238 |      ]
239 |     }
240 |    ],
241 |    "source": [
242 |     "for b, o in tree.get_range_expr({'>': 1, '<': 2}):\n",
243 |     "    if b < 0.6 and b > 0.5:\n",
244 |     "        print(o)"
245 |    ]
246 |   },
247 |   {
248 |    "cell_type": "code",
249 |    "execution_count": 12,
250 |    "id": "634cfaf3",
251 |    "metadata": {},
252 |    "outputs": [],
253 |    "source": [
254 |     "# attributes are hashable, so this could work as a dict too. But that's less general.\n",
255 |     "# Or parallel arrays, one for each attribute, plus one for the object ID. Nah, too hard to add/remove items.\n"
256 |    ]
257 |   },
258 |   {
259 |    "cell_type": "code",
260 |    "execution_count": null,
261 |    "id": "7c03ff5d",
262 |    "metadata": {},
263 |    "outputs": [],
264 |    "source": []
265 |   }
266 |  ],
267 |  "metadata": {
268 |   "kernelspec": {
269 |    "display_name": "Python 3 (ipykernel)",
270 |    "language": "python",
271 |    "name": "python3"
272 |   },
273 |   "language_info": {
274 |    "codemirror_mode": {
275 |     "name": "ipython",
276 |     "version": 3
277 |    },
278 |    "file_extension": ".py",
279 |    "mimetype": "text/x-python",
280 |    "name": "python",
281 |    "nbconvert_exporter": "python",
282 |    "pygments_lexer": "ipython3",
283 |    "version": "3.9.7"
284 |   }
285 |  },
286 |  "nbformat": 4,
287 |  "nbformat_minor": 5
288 | }
289 | 


--------------------------------------------------------------------------------
/dev/multicolumn.md:
--------------------------------------------------------------------------------
 1 | ### Multicolumn indexing idea
 2 | 
 3 | User supplies a tuple of fields, like [a, b, c].
 4 | 
 5 | We don't simply store the tuple of values, no no. We store a class, with like **slots** or something to make it ram
 6 | efficient.
 7 | 
 8 | The key here is that the class has **gt** defined, like so:
 9 | 
10 | ```
11 | class MultiObject():
12 | 
13 |     def __init__(self, tuple):
14 |         self.tuple = tuple  # holds a value for each field
15 | 
16 |     def __gt__(self, tuple):
17 |         # TODO: this is possible totally wrong, just wanted to jot down the idea real quick.
18 |         # Will evaluate it more srsly later.
19 |         return tuple[0] > self.tuple[0] and tuple[1] > self.tuple[1] and tuple[2] > self.tuple[2]
20 | ```
21 | 
22 | Then you can use an ordinary BTree or numpy array to compare these objects.
23 | 
24 | Update works intuitively, you've got add / remove, EZPZ.
25 | 
26 | I think you can define **gt** in such a way as it allows prefix queries. Not sure if that's a good design choice.
27 | 
28 | But then what is **lt**? I think there's too many constraints on **gt** here, it doesn't quite work as written.
29 | 
30 | Yeah, this seems like a bad idea. Leaving it here for now as it might be the beginnings of a good idea.
31 | 
32 | TODO: Look at the Pandas multicolumn index implementation, that should be a much better starting point.
33 | 


--------------------------------------------------------------------------------
/dev/munchmark.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "5e17014a",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "## Benchmarks\n",
  9 |     " - Range query, 1 attribute\n",
 10 |     " - Range query, 2 attributes\n",
 11 |     " "
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 1,
 17 |    "id": "9c246cee",
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "from dexer import Dexer, FrozenDexer\n",
 22 |     "import time\n",
 23 |     "import pandas as pd\n",
 24 |     "from timeit import timeit, repeat\n",
 25 |     "from litebox import LiteBox\n",
 26 |     "from statistics import stdev, mean"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 156,
 32 |    "id": "b7f1d9b9",
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": []
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 178,
 40 |    "id": "6c0cce05",
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stdout",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "1000000\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "cookies = []\n",
 53 |     "FLAVORS = ['Peanut', 'Chocolate', 'Macadamia', 'Almond', 'Cinnamon', \n",
 54 |     "           'Butter', 'Caramel', 'Fudge', 'Candy', 'Mystery']\n",
 55 |     "COLORS = ['Red', 'Orange', 'Yellow', 'Green', 'Blue',\n",
 56 |     "         'Purple', 'Rainbow', 'Black', 'White', 'Invisible']\n",
 57 |     "TAGS = [str(i).zfill(4) for i in range(10**4)]\n",
 58 |     "cookies = []\n",
 59 |     "num = 0\n",
 60 |     "for i in range(10**6):\n",
 61 |     "    \n",
 62 |     "    cookie = {\n",
 63 |     "        'num': num,\n",
 64 |     "        'size': i % 10**6, \n",
 65 |     "        'chips': i % 10**5, \n",
 66 |     "        'sugar': i % 10**4, \n",
 67 |     "        'flavor': FLAVORS[i % 10], \n",
 68 |     "        'tag': TAGS[(i // 10) % len(TAGS)],\n",
 69 |     "        'color': COLORS[(i // 100) % 10]\n",
 70 |     "    }\n",
 71 |     "    cookies.append(cookie)\n",
 72 |     "    num += 1\n",
 73 |     "\n",
 74 |     "\n",
 75 |     "print(len(cookies))"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": 179,
 81 |    "id": "7d3c129b",
 82 |    "metadata": {},
 83 |    "outputs": [
 84 |     {
 85 |      "name": "stdout",
 86 |      "output_type": "stream",
 87 |      "text": [
 88 |       "CPU times: user 1.03 s, sys: 23.3 ms, total: 1.05 s\n",
 89 |       "Wall time: 1.05 s\n"
 90 |      ]
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "%%time\n",
 95 |     "df = pd.DataFrame(cookies)"
 96 |    ]
 97 |   },
 98 |   {
 99 |    "cell_type": "code",
100 |    "execution_count": 180,
101 |    "id": "30f155ea",
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "name": "stdout",
106 |      "output_type": "stream",
107 |      "text": [
108 |       "CPU times: user 6.67 s, sys: 113 ms, total: 6.78 s\n",
109 |       "Wall time: 6.79 s\n"
110 |      ]
111 |     }
112 |    ],
113 |    "source": [
114 |     "%%time\n",
115 |     "lb = LiteBox(cookies, {'num': int, 'size': int, 'chips': int, 'sugar': int, 'flavor': str, 'tag': str, 'color': str})"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 181,
121 |    "id": "b8087f4e",
122 |    "metadata": {},
123 |    "outputs": [
124 |     {
125 |      "name": "stdout",
126 |      "output_type": "stream",
127 |      "text": [
128 |       "CPU times: user 9.47 s, sys: 52.8 ms, total: 9.52 s\n",
129 |       "Wall time: 9.53 s\n"
130 |      ]
131 |     }
132 |    ],
133 |    "source": [
134 |     "%%time\n",
135 |     "dex = Dexer(cookies, on=['num', 'size', 'chips', 'sugar' 'flavor', 'tag', 'color'])"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 182,
141 |    "id": "4d6ac177",
142 |    "metadata": {},
143 |    "outputs": [
144 |     {
145 |      "name": "stdout",
146 |      "output_type": "stream",
147 |      "text": [
148 |       "CPU times: user 6.14 s, sys: 32.9 ms, total: 6.17 s\n",
149 |       "Wall time: 6.17 s\n"
150 |      ]
151 |     }
152 |    ],
153 |    "source": [
154 |     "%%time\n",
155 |     "fdex = FrozenDexer(cookies, on=['num', 'size', 'chips', 'sugar' 'flavor', 'tag', 'color'])"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": null,
161 |    "id": "e8c325f7",
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": []
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 171,
169 |    "id": "f9ccfa9c",
170 |    "metadata": {},
171 |    "outputs": [],
172 |    "source": [
173 |     "# One-attribute range query returning 100 results\n",
174 |     "\n",
175 |     "def find_range1_df():\n",
176 |     "    return df[df['size'] < 100]\n",
177 |     "\n",
178 |     "def find_range1_lc():\n",
179 |     "    return [o for o in cookies if o['size'] < 100]\n",
180 |     "\n",
181 |     "def find_range1_lb():\n",
182 |     "    return lb.find('size < 100')\n",
183 |     "\n",
184 |     "def find_range1_dex():\n",
185 |     "    return dex.find({\n",
186 |     "        'size': {'<': 100},\n",
187 |     "    })\n",
188 |     "\n",
189 |     "def find_range1_fdex():\n",
190 |     "    return fdex.find({\n",
191 |     "        'size': {'<': 100},\n",
192 |     "    })\n",
193 |     "\n",
194 |     "RANGE1 = [find_range1_df, find_range1_lc, find_range1_lb, find_range1_dex, find_range1_fdex]"
195 |    ]
196 |   },
197 |   {
198 |    "cell_type": "code",
199 |    "execution_count": 172,
200 |    "id": "420d4187",
201 |    "metadata": {},
202 |    "outputs": [
203 |     {
204 |      "name": "stdout",
205 |      "output_type": "stream",
206 |      "text": [
207 |       "100\n",
208 |       "100\n",
209 |       "100\n",
210 |       "100\n",
211 |       "100\n"
212 |      ]
213 |     }
214 |    ],
215 |    "source": [
216 |     "for f in RANGE1:\n",
217 |     "    print(len(f()))\n"
218 |    ]
219 |   },
220 |   {
221 |    "cell_type": "code",
222 |    "execution_count": 173,
223 |    "id": "884272fd",
224 |    "metadata": {},
225 |    "outputs": [],
226 |    "source": [
227 |     "# Two-attribute range query returning 100 results\n",
228 |     "def find_range2_df():\n",
229 |     "    return df[(df['size'] < 100) & (df['chips'] < 100)]\n",
230 |     "\n",
231 |     "def find_range2_lc():\n",
232 |     "    return [o for o in cookies if o['size'] < 1000 and o['chips'] < 100]\n",
233 |     "\n",
234 |     "def find_range2_lb():\n",
235 |     "    return lb.find('size < 1000 and chips < 100')\n",
236 |     "\n",
237 |     "def find_range2_dex():\n",
238 |     "    return dex.find({\n",
239 |     "        'size': {'<': 1000},\n",
240 |     "        'chips': {'<': 100},\n",
241 |     "    })\n",
242 |     "\n",
243 |     "def find_range2_fdex():\n",
244 |     "    return fdex.find({\n",
245 |     "        'size': {'<': 1000},\n",
246 |     "        'chips': {'<': 100},\n",
247 |     "    })\n",
248 |     "\n",
249 |     "RANGE2 = [find_range2_df, find_range2_lc, find_range2_lb, find_range2_dex, find_range2_fdex]"
250 |    ]
251 |   },
252 |   {
253 |    "cell_type": "code",
254 |    "execution_count": 174,
255 |    "id": "534be384",
256 |    "metadata": {},
257 |    "outputs": [
258 |     {
259 |      "name": "stdout",
260 |      "output_type": "stream",
261 |      "text": [
262 |       "100\n",
263 |       "100\n",
264 |       "100\n",
265 |       "100\n",
266 |       "100\n"
267 |      ]
268 |     }
269 |    ],
270 |    "source": [
271 |     "for f in RANGE2:\n",
272 |     "    print(len(f()))\n"
273 |    ]
274 |   },
275 |   {
276 |    "cell_type": "code",
277 |    "execution_count": null,
278 |    "id": "59675ff1",
279 |    "metadata": {},
280 |    "outputs": [],
281 |    "source": []
282 |   },
283 |   {
284 |    "cell_type": "code",
285 |    "execution_count": null,
286 |    "id": "5e8fad87",
287 |    "metadata": {},
288 |    "outputs": [],
289 |    "source": []
290 |   },
291 |   {
292 |    "cell_type": "code",
293 |    "execution_count": 187,
294 |    "id": "3b443b72",
295 |    "metadata": {},
296 |    "outputs": [],
297 |    "source": [
298 |     "# One-attribute exact match query returning 100 results\n",
299 |     "def find_eq1_df():\n",
300 |     "    return df[(df['tag'] == '1111')]\n",
301 |     "\n",
302 |     "def find_eq1_lc():\n",
303 |     "    return [o for o in cookies if o['tag'] == '1111']\n",
304 |     "\n",
305 |     "def find_eq1_lb():\n",
306 |     "    return lb.find('tag == \"1111\"')\n",
307 |     "\n",
308 |     "def find_eq1_dex():\n",
309 |     "    return dex.find({\n",
310 |     "        'tag': '1111'\n",
311 |     "    })\n",
312 |     "\n",
313 |     "def find_eq1_fdex():\n",
314 |     "    return fdex.find({\n",
315 |     "        'tag': '1111'\n",
316 |     "    })\n",
317 |     "\n",
318 |     "EQ1 = [find_eq1_df, find_eq1_lc, find_eq1_lb, find_eq1_dex, find_eq1_fdex]"
319 |    ]
320 |   },
321 |   {
322 |    "cell_type": "code",
323 |    "execution_count": null,
324 |    "id": "c3bbca8a",
325 |    "metadata": {},
326 |    "outputs": [],
327 |    "source": []
328 |   },
329 |   {
330 |    "cell_type": "code",
331 |    "execution_count": null,
332 |    "id": "e1cbc5b2",
333 |    "metadata": {},
334 |    "outputs": [],
335 |    "source": []
336 |   },
337 |   {
338 |    "cell_type": "code",
339 |    "execution_count": 148,
340 |    "id": "07b5c96c",
341 |    "metadata": {},
342 |    "outputs": [],
343 |    "source": [
344 |     "def run_timings(f, result_len, n_times=10):\n",
345 |     "    assert len(f()) == result_len\n",
346 |     "    return min(repeat(f, number=n_times))/n_times\n"
347 |    ]
348 |   },
349 |   {
350 |    "cell_type": "code",
351 |    "execution_count": 188,
352 |    "id": "84c6ec42",
353 |    "metadata": {},
354 |    "outputs": [
355 |     {
356 |      "name": "stdout",
357 |      "output_type": "stream",
358 |      "text": [
359 |       "find_range1_df\n",
360 |       "find_range1_lc\n",
361 |       "find_range1_lb\n",
362 |       "find_range1_dex\n",
363 |       "find_range1_fdex\n",
364 |       "find_range2_df\n",
365 |       "find_range2_lc\n",
366 |       "find_range2_lb\n",
367 |       "find_range2_dex\n",
368 |       "find_range2_fdex\n",
369 |       "find_eq1_df\n",
370 |       "find_eq1_lc\n",
371 |       "find_eq1_lb\n",
372 |       "find_eq1_dex\n",
373 |       "find_eq1_fdex\n"
374 |      ]
375 |     }
376 |    ],
377 |    "source": [
378 |     "n_repeat = 100\n",
379 |     "results = dict()\n",
380 |     "for f in RANGE1 + RANGE2 + EQ1:\n",
381 |     "    print(f.__name__)\n",
382 |     "    results[f.__name__] = run_timings(f, 100)"
383 |    ]
384 |   },
385 |   {
386 |    "cell_type": "code",
387 |    "execution_count": null,
388 |    "id": "635245dc",
389 |    "metadata": {},
390 |    "outputs": [],
391 |    "source": []
392 |   },
393 |   {
394 |    "cell_type": "code",
395 |    "execution_count": 189,
396 |    "id": "6783869e",
397 |    "metadata": {},
398 |    "outputs": [
399 |     {
400 |      "name": "stdout",
401 |      "output_type": "stream",
402 |      "text": [
403 |       "find_range1_df 0.0009101113071665167\n",
404 |       "find_range1_lc 0.051943747501354665\n",
405 |       "find_range1_lb 5.443879636004567e-05\n",
406 |       "find_range1_dex 4.8754794988781214e-05\n",
407 |       "find_range1_fdex 1.6833702102303504e-05\n",
408 |       "find_range2_df 0.0022733806050382554\n",
409 |       "find_range2_lc 0.0498123213998042\n",
410 |       "find_range2_lb 0.0003733630990609527\n",
411 |       "find_range2_dex 0.0005058821989223361\n",
412 |       "find_range2_fdex 6.0588796623051165e-05\n",
413 |       "find_eq1_df 0.04267703660298139\n",
414 |       "find_eq1_lc 0.04795632830355316\n",
415 |       "find_eq1_lb 5.073370411992073e-05\n",
416 |       "find_eq1_dex 1.3539998326450586e-05\n",
417 |       "find_eq1_fdex 1.1276802979409695e-05\n"
418 |      ]
419 |     }
420 |    ],
421 |    "source": [
422 |     "for r, t in results.items():\n",
423 |     "    print(r, t)"
424 |    ]
425 |   },
426 |   {
427 |    "cell_type": "code",
428 |    "execution_count": null,
429 |    "id": "acfa6774",
430 |    "metadata": {},
431 |    "outputs": [],
432 |    "source": []
433 |   },
434 |   {
435 |    "cell_type": "code",
436 |    "execution_count": null,
437 |    "id": "9d5b4eb5",
438 |    "metadata": {},
439 |    "outputs": [],
440 |    "source": []
441 |   },
442 |   {
443 |    "cell_type": "code",
444 |    "execution_count": null,
445 |    "id": "d57de5f1",
446 |    "metadata": {},
447 |    "outputs": [],
448 |    "source": [
449 |     "# One-attribute 'in' query, 1000 results\n",
450 |     "match_tags = [str(t).zfill(4) for t in range(1000, 1010)]\n",
451 |     "\n",
452 |     "def find_in1_df():\n",
453 |     "    return df[(df['tag'] in match_tags)]\n",
454 |     "\n",
455 |     "def find_in1_lc():\n",
456 |     "    return [o for o in cookies if o['tag'] in match_tags]\n",
457 |     "\n",
458 |     "def find_in1_lb():\n",
459 |     "    return lb.find(f'tag in {match_tags}')\n",
460 |     "\n",
461 |     "def find_in1_dex():\n",
462 |     "    return dex.find({\n",
463 |     "        'tag': {'in': match_tags}\n",
464 |     "    })\n",
465 |     "\n",
466 |     "def find_in1_fdex():\n",
467 |     "    return fdex.find({\n",
468 |     "        'tag': {'in': match_tags}\n",
469 |     "    })\n",
470 |     "\n",
471 |     "IN1 = [find_in1_df, find_in1_lc, find_in1_lb, find_in1_dex, find_in1_fdex]"
472 |    ]
473 |   },
474 |   {
475 |    "cell_type": "code",
476 |    "execution_count": null,
477 |    "id": "6954be5b",
478 |    "metadata": {},
479 |    "outputs": [],
480 |    "source": []
481 |   }
482 |  ],
483 |  "metadata": {
484 |   "kernelspec": {
485 |    "display_name": "Python 3 (ipykernel)",
486 |    "language": "python",
487 |    "name": "python3"
488 |   },
489 |   "language_info": {
490 |    "codemirror_mode": {
491 |     "name": "ipython",
492 |     "version": 3
493 |    },
494 |    "file_extension": ".py",
495 |    "mimetype": "text/x-python",
496 |    "name": "python",
497 |    "nbconvert_exporter": "python",
498 |    "pygments_lexer": "ipython3",
499 |    "version": "3.9.7"
500 |   }
501 |  },
502 |  "nbformat": 4,
503 |  "nbformat_minor": 5
504 | }
505 | 


--------------------------------------------------------------------------------
/dev/output.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/dev/output.png


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | # -- Path setup --------------------------------------------------------------
 7 | # If extensions (or modules to document with autodoc) are in another directory,
 8 | # add these directories to sys.path here. If the directory is relative to the
 9 | # documentation root, use os.path.abspath to make it absolute, like shown here.
10 | #
11 | import os
12 | import sys
13 | 
14 | sys.path.insert(0, os.path.abspath("."))
15 | sys.path.insert(0, os.path.abspath(".."))
16 | 
17 | 
18 | # -- Project information -----------------------------------------------------
19 | 
20 | project = "ducks"
21 | copyright = "2022, Theo Walker"
22 | author = "Theo Walker"
23 | 
24 | # The full version, including alpha/beta/rc tags
25 | release = "0.5.1"
26 | 
27 | 
28 | # -- General configuration ---------------------------------------------------
29 | 
30 | # Add any Sphinx extension module names here, as strings. They can be
31 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
32 | # ones.
33 | extensions = ["sphinx.ext.autodoc", "sphinx.ext.napoleon"]
34 | 
35 | # Add any paths that contain templates here, relative to this directory.
36 | templates_path = ["_templates"]
37 | 
38 | # List of patterns, relative to source directory, that match files and
39 | # directories to ignore when looking for source files.
40 | # This pattern also affects html_static_path and html_extra_path.
41 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
42 | 
43 | 
44 | # -- Options for HTML output -------------------------------------------------
45 | 
46 | # The theme to use for HTML and HTML Help pages.  See the documentation for
47 | # a list of builtin themes.
48 | #
49 | html_theme = "sphinx_rtd_theme"
50 | 
51 | # Add any paths that contain custom static files (such as style sheets) here,
52 | # relative to this directory. They are copied after the builtin static files,
53 | # so a file named "default.css" will overwrite the builtin "default.css".
54 | html_static_path = ["_static"]
55 | 
56 | html_favicon = "favicon.ico"
57 | 
58 | 
59 | def skip(app, what, name, obj, would_skip, options):
60 |     if name in ["__getitem__", "__init__"]:
61 |         return False
62 |     return would_skip
63 | 
64 | 
65 | def setup(app):
66 |     app.connect("autodoc-skip-member", skip)
67 | 


--------------------------------------------------------------------------------
/docs/demos.rst:
--------------------------------------------------------------------------------
 1 | .. _demos:
 2 | 
 3 | =====
 4 | Demos
 5 | =====
 6 | 
 7 | These are mini-projects that demonstrate using ``ducks`` in applications.
 8 | 
 9 | * `Auto-updating <https://github.com/manimino/ducks/blob/main/examples/update.py>`_ - Keep Dex updated when objects change
10 | * `Wordle solver <https://github.com/manimino/ducks/blob/main/examples/wordle.ipynb>`_ - Solve string matching problems faster than regex
11 | * `Collision detection <https://github.com/manimino/ducks/blob/main/examples/collision.py>`_ - Find objects based on type and proximity (grid-based)
12 | * `Percentiles <https://github.com/manimino/ducks/blob/main/examples/percentile.py>`_ - Find by percentile (median, p99, etc.)
13 | 


--------------------------------------------------------------------------------
/docs/ducks.concurrent.rst:
--------------------------------------------------------------------------------
 1 | ducks.concurrent package
 2 | ========================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | ducks.concurrent.main module
 8 | ----------------------------
 9 | 
10 | .. automodule:: ducks.concurrent.main
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | Module contents
16 | ---------------
17 | 
18 | .. automodule:: ducks.concurrent
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 


--------------------------------------------------------------------------------
/docs/ducks.frozen.rst:
--------------------------------------------------------------------------------
 1 | ducks.frozen package
 2 | ====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | ducks.frozen.frozen\_attr module
 8 | --------------------------------
 9 | 
10 | .. automodule:: ducks.frozen.frozen_attr
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | ducks.frozen.init\_helpers module
16 | ---------------------------------
17 | 
18 | .. automodule:: ducks.frozen.init_helpers
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | ducks.frozen.main module
24 | ------------------------
25 | 
26 | .. automodule:: ducks.frozen.main
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 
31 | ducks.frozen.utils module
32 | -------------------------
33 | 
34 | .. automodule:: ducks.frozen.utils
35 |    :members:
36 |    :undoc-members:
37 |    :show-inheritance:
38 | 
39 | Module contents
40 | ---------------
41 | 
42 | .. automodule:: ducks.frozen
43 |    :members:
44 |    :undoc-members:
45 |    :show-inheritance:
46 | 


--------------------------------------------------------------------------------
/docs/ducks.mutable.rst:
--------------------------------------------------------------------------------
 1 | ducks.mutable package
 2 | =====================
 3 | 
 4 | Submodules
 5 | ----------
 6 | 
 7 | ducks.mutable.main module
 8 | -------------------------
 9 | 
10 | .. automodule:: ducks.mutable.main
11 |    :members:
12 |    :undoc-members:
13 |    :show-inheritance:
14 | 
15 | ducks.mutable.mutable\_attr module
16 | ----------------------------------
17 | 
18 | .. automodule:: ducks.mutable.mutable_attr
19 |    :members:
20 |    :undoc-members:
21 |    :show-inheritance:
22 | 
23 | Module contents
24 | ---------------
25 | 
26 | .. automodule:: ducks.mutable
27 |    :members:
28 |    :undoc-members:
29 |    :show-inheritance:
30 | 


--------------------------------------------------------------------------------
/docs/ducks.rst:
--------------------------------------------------------------------------------
 1 | ducks package
 2 | =============
 3 | 
 4 | Subpackages
 5 | -----------
 6 | 
 7 | .. toctree::
 8 |    :maxdepth: 4
 9 | 
10 |    ducks.concurrent
11 |    ducks.frozen
12 |    ducks.mutable
13 | 
14 | Submodules
15 | ----------
16 | 
17 | ducks.btree module
18 | ------------------
19 | 
20 | .. automodule:: ducks.btree
21 |    :members:
22 |    :undoc-members:
23 |    :show-inheritance:
24 | 
25 | ducks.constants module
26 | ----------------------
27 | 
28 | .. automodule:: ducks.constants
29 |    :members:
30 |    :undoc-members:
31 |    :show-inheritance:
32 | 
33 | ducks.exceptions module
34 | -----------------------
35 | 
36 | .. automodule:: ducks.exceptions
37 |    :members:
38 |    :undoc-members:
39 |    :show-inheritance:
40 | 
41 | ducks.pickling module
42 | ---------------------
43 | 
44 | .. automodule:: ducks.pickling
45 |    :members:
46 |    :undoc-members:
47 |    :show-inheritance:
48 | 
49 | ducks.utils module
50 | ------------------
51 | 
52 | .. automodule:: ducks.utils
53 |    :members:
54 |    :undoc-members:
55 |    :show-inheritance:
56 | 
57 | Module contents
58 | ---------------
59 | 
60 | .. automodule:: ducks
61 |    :members:
62 |    :undoc-members:
63 |    :show-inheritance:
64 | 


--------------------------------------------------------------------------------
/docs/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/docs/favicon.ico


--------------------------------------------------------------------------------
/docs/how_it_works.rst:
--------------------------------------------------------------------------------
  1 | ===============
  2 | How ducks works
  3 | ===============
  4 | 
  5 | For each attribute in a Dex, it holds a B-tree that maps every unique value to the objects with that value.
  6 | 
  7 | This is a rough idea of the data structure:
  8 | 
  9 | .. code-block::
 10 | 
 11 |     class Dex:
 12 |         indexes = {
 13 |             'attribute1': BTree({10: set(some_obj_ids), 20: set(other_obj_ids)}),
 14 |             'attribute2': BTree({'abc': set(some_obj_ids), 'def': set(other_obj_ids)}),
 15 |         }
 16 |         obj_map = {obj_ids: objects}
 17 |     }
 18 | 
 19 | During a lookup, the object ID sets matching each query value are retrieved. The set operations `union`,
 20 | `intersect`, and `difference` are applied to get the matching object IDs. Finally, the object IDs are converted
 21 | to objects and returned.
 22 | 
 23 | In practice, Dex and FrozenDex have a bit more to them, as they are optimized to have much better
 24 | memory usage and speed than a naive implementation. For example, FrozenDex makes heavy use of sorted Numpy arrays.
 25 | 
 26 | -------------
 27 | Dex internals
 28 | -------------
 29 | 
 30 | Here's more detailed pseudocode of a Dex:
 31 | 
 32 | .. code-block::
 33 | 
 34 |     class Dex:
 35 |         # holds each attribute index and the id-to-object map
 36 |         indexes = {
 37 |             'attr1': MutableAttrIndex(),
 38 |             'attr2': MutableAttrIndex()
 39 |         }
 40 |         'obj_map': {obj_ids: objects}
 41 |     }
 42 | 
 43 | 
 44 |     class MutableAttrIndex:
 45 |         # maps the values for one attribute to object IDs
 46 |         tree = BTree({
 47 |             val1: set_like(some_obj_ids),
 48 |             val2: set_like(other_obj_ids)
 49 |         })
 50 |     ```
 51 | 
 52 | To run a query:
 53 | 
 54 | #. Dex breaks the query down into individual attribute value lookups.
 55 | #. The object IDs associated with the query attribute values are retrieved from MutableAttrIndex.
 56 | #. The set-like containers are converted to sets if needed.
 57 | #. Operations like `intersect` are performed on the sets to get the final object IDs.
 58 | #. The object IDs are mapped to objects, which are then returned.
 59 | 
 60 | Memory efficiency
 61 | =================
 62 | 
 63 | That "set-like container" is there for memory efficiency reasons. Imagine building an index on a million distinct
 64 | values. If actual sets were used, we'd get a million sets of size 1. Collections have a lot of overhead, so that would
 65 | be a poor choice. We can do better.
 66 | 
 67 | Memory usage of different collections
 68 | =====================================
 69 | 
 70 | First, let's do some measuring of collection overhead. We'll store a large number of distinct int64s in collections of
 71 | each type, vary the size of the collections, and check the memory usage per object.
 72 | 
 73 | We expect bigger collections to be more efficient (fewer bytes per object). Ten million sets of size 1 should
 74 | take up more RAM than ten sets of size 1 million.
 75 | 
 76 | Bytes per entry for each collection type and size:
 77 | 
 78 | 
 79 | +-----------------------+---------+---------+---------+---------+---------+--------+---------+
 80 | |                       | 1       | 2       | 5       | 10      | 25      | 50     | 100     |
 81 | +=======================+=========+=========+=========+=========+=========+========+=========+
 82 | | set                   | 260.1   | 146.3   | 195.0   | 113.8   | 124.2   | 78.3   | 116.9   |
 83 | +-----------------------+---------+---------+---------+---------+---------+--------+---------+
 84 | | tuple                 | 89.7    | 69.4    | 50.9    | 47.0    | 43.1    | 41.8   | 41.1    |
 85 | +-----------------------+---------+---------+---------+---------+---------+--------+---------+
 86 | | cykhash Int64Set      | 160.1   | 79.9    | 38.1    | 25.3    | 15.5    | 23.5   | 22.4    |
 87 | +-----------------------+---------+---------+---------+---------+---------+--------+---------+
 88 | | numpy array (int64)   | 161.1   | 80.3    | 35.0    | 22.1    | 13.5    | 10.9   | 9.4     |
 89 | +-----------------------+---------+---------+---------+---------+---------+--------+---------+
 90 | | array (int64)         | 106.0   | 53.2    | 28.0    | 21.0    | 11.6    | 10.6   | 9.1     |
 91 | +-----------------------+---------+---------+---------+---------+---------+--------+---------+
 92 | 
 93 | That table tells us a story.
 94 | 
 95 | * Small collections of any type are extremely inefficient. Don't make collections of size 1.
 96 | * Immutable collections are cheaper. Tuples, arrays, and numpy arrays cost less memory than the set types.
 97 | * Typed collections are cheaper. Numpy arrays and `cykhash <https://github.com/realead/cykhash>`_ Int64Sets are cheaper
 98 |   than tuples or Python sets.
 99 | 
100 | The best collection in terms of memory usage is a big array. But Dex is mutable; we need to add and remove
101 | objects in a few microseconds. Rewriting a big array on change is too slow. So we'll save the arrays for
102 | FrozenDex. So the single best one for Dex is cykhash Int64Set. By why pick just one?
103 | 
104 | Blending collection types
105 | =========================
106 | 
107 | For smaller collections, below ~10 numbers, cykhash is a bit inefficient, so we use Python
108 | int64 arrays there instead. The arrays are immutable, but it's fast to discard a small array and make another one when
109 | changes occur.
110 | 
111 | And for collections of size 1, we just store the number, no container needed! That takes 28 bytes.
112 | 
113 | So the code is a bit more complex than the pseudocode above, in order to keep collection overhead from filling RAM.
114 | 
115 | Here is the table again. Dex (bottom line) uses cykhash, array, and integer types to stay RAM-efficient at all
116 | collection sizes.
117 | 
118 | +--------------------+---------+---------+---------+--------+---------+--------+---------+
119 | |                    | 1       | 2       | 5       | 10     | 25      | 50     | 100     |
120 | +====================+=========+=========+=========+========+=========+========+=========+
121 | | set                | 260.1   | 146.3   | 195.0   | 113.8  | 124.2   | 78.3   | 116.9   |
122 | +--------------------+---------+---------+---------+--------+---------+--------+---------+
123 | | cykhash Int64Set   | 160.1   | 79.9    | 38.1    | 25.3   | 15.5    | 23.5   | 22.4    |
124 | +--------------------+---------+---------+---------+--------+---------+--------+---------+
125 | | array (int64)      | 106.0   | 53.2    | 28.0    | 21.0   | 11.6    | 10.6   | 9.1     |
126 | +--------------------+---------+---------+---------+--------+---------+--------+---------+
127 | | FilterBox storage  | 28.0    | 53.2    | 28.0    | 21.0   | 15.5    | 23.5   | 22.4    |
128 | +--------------------+---------+---------+---------+--------+---------+--------+---------+
129 | 
130 | That's 4 to 10 times better than naively using Python sets to store ints. There's no tradeoff;
131 | Int64Set operations are about as fast as Python sets.
132 | 
133 | -------------------
134 | FrozenDex Internals
135 | -------------------
136 | 
137 | The FrozenDex implementation is very different from Dex. It is able to achieve better speed and lower memory usage
138 | by using data structures that don't support changes.
139 | 
140 | FrozenDex pseudocode:
141 | 
142 | .. code-block::
143 | 
144 |     class FrozenDex:
145 |         # holds each attribute index and an array of objects
146 |         indexes = {
147 |             'attr1': FrozenAttrIndex(),
148 |             'attr2': FrozenAttrIndex()
149 |         }
150 |         'objects': np.array(dtype="O")
151 |     }
152 | 
153 |     class FrozenAttrIndex:
154 |         # maps the values for a single attribute to indexes in the 'objects' array
155 | 
156 |         # parallel arrays store attribute values and object indices
157 |         val_arr = np.array(attribute value for each object)             # sorted by val_arr
158 |         obj_idx_arr = np.array(index in objects array for each object)  # sorted by val_arr
159 | 
160 |         # but if a value has lots of objects, store it in this tree instead
161 |         tree = BTree({
162 |             value: np.array(sorted_obj_arr_indexes)
163 |         })
164 | 
165 | Key points:
166 | 
167 | * The objects are stored in a Numpy array in FrozenDex
168 | * Each FrozenAttrIndex maps values to object array indexes
169 | * FrozenAttrIndex has two different ways to do that mapping - parallel arrays and BTree
170 | 
171 | Note that there are no "set" types anywhere here - so how do set operations like intersect work?
172 | 
173 | Sorted arrays are sets
174 | ======================
175 | 
176 | If you have the arrays:
177 | 
178 | .. code-block::
179 | 
180 |     [1, 3, 5, 7, 9]
181 |     [1, 2, 3, 4, 5, 6, 7]
182 | 
183 | What is their intersection? Do you need to convert them to sets to figure it out?
184 | 
185 | Of course not -- sorted array intersection is easy. It can be solved by iterating over both lists, advancing
186 | the pointer of the smaller value each time, and outputting the matches.
187 | `Galloping search <https://en.wikipedia.org/wiki/Exponential_search>`_ can make this even faster. It is faster than
188 | computing the intersection of hashsets.
189 | 
190 | FrozenDex uses a great package called
191 | `sortednp <https://pypi.org/project/sortednp/>`_ that implements fast set operations on sorted numpy arrays.
192 | So once we have the object indexes for each part of a query, ``sortednp.intersect`` and friends will get us the final
193 | object indexes.
194 | 
195 | Sorted arrays are trees
196 | =======================
197 | 
198 | FrozenDex uses sorted arrays in another way - to store values. Bisecting an array to find a value is similar to
199 | traversing a tree. Range queries are easy on sorted value arrays as well.
200 | 
201 | So, a FrozenAttrIndex has a pair of arrays, one containing values in sorted order, and the other containing
202 | the object indexes for those values. Looking up the object indexes for a value or range of values is straightforward.
203 | 
204 | That's not the only way FrozenDex maps values to objects, though. Just as Dex uses different containers depending on
205 | length, so too does FrozenDex.
206 | 
207 | When a value has many associated objects, storing the value repeatedly in an array is clearly inefficient.
208 | So values that have many objects are stored in a BTree lookup instead. The BTree maps values to arrays of object
209 | indexes.
210 | 
211 | We can't use the BTree for everything -- if a value is associated with only a few objects, allocating a numpy array to
212 | store the object indexes would incur lots of overhead. So having both data structures is the right way to go.
213 | 
214 | Integer types
215 | =============
216 | 
217 | And there's one last optimization. The indexes are stored in `uint32` arrays if there are less than a few
218 | billion objects, which is usually the case. `uint32` operations are a little faster than `uint64`, in addition to being
219 | more RAM-efficient. FrozenDex will automatically select `uint64` when there are too many objects for 32-bit addressing.
220 | 
221 | Thanks to these optimizations, FrozenDex is a very efficient tool.
222 | 
223 | -----------------------
224 | ConcurrentDex Internals
225 | -----------------------
226 | 
227 | ConcurrentDex contains:
228 | 
229 | * an instance of Dex
230 | * a `readerwriterlock <https://github.com/elarivie/pyReaderWriterLock>`_
231 | 
232 | It exposes each method of the Dex, wrapped in the appropriate lock type using `with read_lock()` or
233 | `with write_lock()`.
234 | 
235 | Performance
236 | ===========
237 | 
238 | Each lock operation adds about 5µs. Not huge, but it does add up when doing many operations in a row.
239 | 
240 | For this reason, the ``read_lock()`` and ``write_lock()`` methods are exposed.
241 | 
242 | This allows patterns like:
243 | 
244 | .. code-block::
245 | 
246 |     cdex = ConcurrentDex(...)
247 |     with cdex.write_lock()
248 |         for item in a_million_items:
249 |             cdex.box.add(item)  # cdex.box is the underlying Dex.
250 | 
251 | which are faster than calling ``cdex.add()`` many times.
252 | 
253 | By default, ConcurrentDex favors readers, allowing multiple readers to share a lock. Writers wait for all
254 | readers to release the lock. This behavior is customizable on init via the ``priority`` kwarg.
255 | 
256 | Reasons to trust it
257 | ===================
258 | 
259 | Concurrency bugs are notoriously tricky to find. ConcurrentDex is unlikely to have them because:
260 | 
261 | * It uses a very simple, coarse-grained concurrency that locks the whole object at once
262 | * It's built on a widely-used lock library
263 | * There are concurrent operation tests that succeed on ConcurrentDex and fail on Dex, proving the
264 |   locks are working properly (see ``tests/concurrent``).
265 | 


--------------------------------------------------------------------------------
/docs/img/ducks-main.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/docs/img/ducks-main.png


--------------------------------------------------------------------------------
/docs/img/perf_bench.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/docs/img/perf_bench.png


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. ducks documentation master file, created by
 2 |    sphinx-quickstart on Fri Aug  5 07:18:13 2022.
 3 | 
 4 | .. include:: ../README.rst
 5 | 
 6 | 
 7 | Contents
 8 | ========
 9 | 
10 | .. toctree::
11 |    :maxdepth: 1
12 | 
13 |    Home <self>
14 |    quick_start
15 |    demos
16 |    how_it_works
17 |    ducks
18 | 
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/docs/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | if "%1" == "" goto help
14 | 
15 | %SPHINXBUILD% >NUL 2>NUL
16 | if errorlevel 9009 (
17 | 	echo.
18 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
19 | 	echo.installed, then set the SPHINXBUILD environment variable to point
20 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
21 | 	echo.may add the Sphinx directory to PATH.
22 | 	echo.
23 | 	echo.If you don't have Sphinx installed, grab it from
24 | 	echo.https://www.sphinx-doc.org/
25 | 	exit /b 1
26 | )
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------
/docs/modules.rst:
--------------------------------------------------------------------------------
1 | ducks
2 | =====
3 | 
4 | .. toctree::
5 |    :maxdepth: 4
6 | 
7 |    ducks
8 | 


--------------------------------------------------------------------------------
/docs/quick_start.rst:
--------------------------------------------------------------------------------
  1 | ===========
  2 | Quick Start
  3 | ===========
  4 | 
  5 | -----------
  6 | Basic Usage
  7 | -----------
  8 | 
  9 | The main container in ducks is called Dex.
 10 | 
 11 | .. code-block::
 12 | 
 13 |     from ducks import Dex
 14 | 
 15 |     # make some objects
 16 |     objects = [
 17 |         {'x': 3, 'y': 'a'},
 18 |         {'x': 6, 'y': 'b'},
 19 |         {'x': 9, 'y': 'c'}
 20 |     ]
 21 | 
 22 |     # Create a Dex containing the objects.
 23 |     # Index on x and y.
 24 |     dex = Dex(objects, ['x', 'y'])
 25 | 
 26 |     # match objects
 27 |     dex[{
 28 |         'x': {'>': 5, '<': 10},  # where 5 < x < 10
 29 |         'y': {'in': ['a', 'b']}  # and y is 'a' or 'b'
 30 |     }]
 31 |     # result: [{'x': 6, 'y': 'b'}]
 32 | 
 33 | This is a Dex of dicts, but the objects can be any type.
 34 | 
 35 | Dex supports ==, !=, in, not in, <, <=, >, >=.
 36 | 
 37 | The indexes can be dict keys, object attributes, or custom functions.
 38 | 
 39 | Alternative forms:
 40 | * ``{'a': 1}`` may be used in place of ``{'a': {'==': 1}}``
 41 | * ``{'a': [1, 2, 3]}`` may be used in place of ``{'a': {'in': [1, 2, 3]}}``
 42 | * ``eq``, ``ge``, ``gt``, and so on can be used in place of ``==``, ``>=``, ``>``
 43 | 
 44 | -------------------
 45 | Add, remove, update
 46 | -------------------
 47 | 
 48 | Dex supports add, remove, and update of objects.
 49 | 
 50 | .. code-block::
 51 | 
 52 |     from ducks import Dex
 53 | 
 54 |     class Thing:
 55 |         def __init__(self):
 56 |             self.x = 1
 57 |             self.y = 1
 58 | 
 59 |         def __repr__(self):
 60 |             return f"Thing(x: {self.x}, y: {self.y})"
 61 | 
 62 |     # make an empty Dex
 63 |     dex = Dex([], ['x', 'y'])
 64 | 
 65 |     # add an object
 66 |     obj = Thing()
 67 |     dex.add(obj)
 68 |     print(dex[{'x': 1}]) # find it
 69 | 
 70 |     # update it
 71 |     obj.x = 2
 72 |     dex.update(obj)
 73 |     print(dex[{'x': 2}])  # find updated obj
 74 | 
 75 |     # remove it
 76 |     dex.remove(obj)
 77 |     print(list(dex))  # dex now contains no objects
 78 | 
 79 | Update notifies Dex that an object's attributes have changed, so the index can be updated accordingly.
 80 | There's an example in :ref:`demos` of how to automatically update Dex when objects change.
 81 | 
 82 | ---------
 83 | FrozenDex
 84 | ---------
 85 | 
 86 | If you don't need add, remove, or update, use a FrozenDex instead.
 87 | It is used just like a Dex, but it's faster and more memory-efficient.
 88 | 
 89 | .. code-block::
 90 | 
 91 |     from ducks import FrozenDex
 92 | 
 93 |     dex = FrozenDex([{'a': 1, 'b': 2}], ['a'])
 94 |     dex[{'a': 1}]  # result: [{'a': 1, 'b': 2}]
 95 | 
 96 | FrozenDex is thread-safe because it does not allow writes.
 97 | 
 98 | -------------
 99 | ConcurrentDex
100 | -------------
101 | 
102 | For multithreaded cases where writes are needed, use ConcurrentDex. It is a thin wrapper around a Dex
103 | that uses a lock to provide thread-safety.
104 | 
105 | .. code-block::
106 | 
107 |     from ducks import ConcurrentDex, FAIR, READERS, WRITERS
108 | 
109 |     objects = [{'a': 1, 'b': 2}]
110 |     dex = ConcurrentDex(objects, ['a'], priority=READERS)
111 |     dex[{'a': 1}]  # result: [{'a': 1, 'b': 2}]
112 | 
113 | The ConcurrentDex API is the same as Dex. An optional kwarg 'priority' allows prioritization of readers,
114 | writers, or neither; the default is to prioritize reads.
115 | 
116 | -------------------
117 | Function attributes
118 | -------------------
119 | 
120 | Ducks can also index using functions evaluated on the objects. This allows indexing of object types such as strings.
121 | 
122 | Let's find strings that are palindromes of length 3:
123 | 
124 | .. code-block::
125 | 
126 |     from ducks import Dex
127 |     strings = [
128 |         'ooh', 'wow',
129 |         'kayak', 'bob'
130 |     ]
131 | 
132 |     # define a function that
133 |     # takes the object as input
134 |     def is_palindrome(s):
135 |         return s == s[::-1]
136 | 
137 |     # make a Dex
138 |     dex = Dex(strings, [is_palindrome, len])
139 |     dex[{
140 |         is_palindrome: True,
141 |         len: 3
142 |     }]
143 |     # result: ['wow', 'bob']
144 | 
145 | Functions are evaluated on the object when it is added to the Dex.
146 | 
147 | -----------
148 | Nested data
149 | -----------
150 | 
151 | Use functions to get values from nested data structures.
152 | 
153 | .. code-block::
154 | 
155 |     from ducks import Dex
156 | 
157 |     objs = [
158 |         {'a': {'b': [1, 2, 3]}},
159 |         {'a': {'b': [4, 5, 6]}}
160 |     ]
161 | 
162 |     def get_nested(obj):
163 |         return obj['a']['b'][0]
164 | 
165 |     dex = Dex(objs, [get_nested])
166 |     dex[{get_nested: 4}]
167 |     # result: {'a': {'b': [4, 5, 6]}}
168 | 
169 | ------------------
170 | Missing attributes
171 | ------------------
172 | 
173 | Objects don't need to have every attribute.
174 | 
175 | Indexes are sparse. Objects that are missing an attribute will not be stored
176 | under that attribute. This saves lots of memory.
177 | 
178 | * To find all objects that have an attribute, match the special value ``ANY``.
179 | * To find objects missing the attribute, do ``{'!=': ANY}``.
180 | * In functions, raise ``MissingAttribute`` to tell ducks the attribute is missing.
181 | 
182 | Example:
183 | 
184 | .. code-block::
185 | 
186 |     from ducks import Dex, ANY, MissingAttribute
187 | 
188 |     objs = [{'a': 1}, {'a': 2}, {}]
189 | 
190 |     def get_a(obj):
191 |         try:
192 |             return obj['a']
193 |         except KeyError:
194 |             raise MissingAttribute  # tell Dex this attribute is missing
195 | 
196 |     dex = Dex(objs, ['a', get_a])
197 | 
198 |     print(dex[{'a': ANY}])          # [{'a': 1}, {'a': 2}]
199 |     print(dex[{get_a: ANY}])        # [{'a': 1}, {'a': 2}]
200 |     print(dex[{'a': {'!=': ANY}}])  # [{}]
201 | 
202 | Note that ``None`` is treated as a normal attribute value and is stored.
203 | 
204 | --------
205 | Pickling
206 | --------
207 | 
208 | Dex, ConcurrentDex, and FrozenDex can be pickled using the special functions ``save`` and ``load``.
209 | 
210 | .. code-block::
211 | 
212 |     from ducks import Dex, save, load
213 |     dex = Dex([1.2, 1.8, 2.7], [round])
214 |     save(dex, 'numbers.dex')
215 |     loaded_dex = load('numbers.dex')
216 |     loaded_dex[{round: 2}]
217 |     # result: 1.8
218 | 
219 | Objects inside the dex will be saved along with it.
220 | 
221 | ----------
222 | Class APIs
223 | ----------
224 | 
225 | There are three container classes:
226 | 
227 | * **Dex**: Can add, remove, and update objects after creation.
228 |   `[API] <https://ducks.readthedocs.io/en/latest/ducks.mutable.html#ducks.mutable.main.Dex>`_
229 | * **ConcurrentDex**: Same as Dex, but thread-safe.
230 |   `[API] <https://ducks.readthedocs.io/en/latest/ducks.concurrent.html#ducks.concurrent.main.ConcurrentDex>`_
231 | * **FrozenDex**: Cannot be changed after creation, it's read-only. But it's super fast.
232 |   `[API] <https://ducks.readthedocs.io/en/latest/ducks.frozen.html#ducks.frozen.main.FrozenDex>`_
233 | 


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | furo==2022.12.7
2 | sphinx==5.3.0
3 | sphinx-rtd-theme==1.3.0
4 | 


--------------------------------------------------------------------------------
/ducks/__init__.py:
--------------------------------------------------------------------------------
 1 | from ducks.concurrent.main import ConcurrentDex  # noqa: F401
 2 | from ducks.concurrent.main import FAIR  # noqa: F401
 3 | from ducks.concurrent.main import READERS  # noqa: F401
 4 | from ducks.concurrent.main import WRITERS  # noqa: F401
 5 | from ducks.constants import ANY  # noqa: F401
 6 | from ducks.exceptions import MissingAttribute  # noqa: F401
 7 | from ducks.frozen.main import FrozenDex  # noqa: F401
 8 | from ducks.mutable.main import Dex  # noqa: F401
 9 | from ducks.pickling import load  # noqa: F401
10 | from ducks.pickling import save  # noqa: F401
11 | 


--------------------------------------------------------------------------------
/ducks/btree.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | from typing import Dict
  3 | from typing import List
  4 | from typing import Tuple
  5 | 
  6 | from BTrees.OOBTree import OOBTree
  7 | 
  8 | 
  9 | class BTree:
 10 |     """
 11 |     Wraps an OOBTree instance. Tweaks it a bit:
 12 |      - BTrees len() does a full tree traversal, which is very slow. So we maintain a count instead.
 13 |      - BTrees stores None values as if they were just really really small. So "x < 1" will find the Nones.
 14 |        Here instead we disallow None entirely, make it throw TypeError.
 15 |      - Provide a nice interface for using >, >=, <, <= to get value ranges.
 16 |     """
 17 | 
 18 |     def __init__(self, d: Dict[Any, Any] = None):
 19 |         if d:
 20 |             if None in d:
 21 |                 raise TypeError(
 22 |                     "None is not allowed in BTree because it breaks comparisons."
 23 |                 )
 24 |             self.tree = OOBTree(d)
 25 |             self.length = len(d)
 26 |         else:
 27 |             self.tree = OOBTree()
 28 |             self.length = 0
 29 | 
 30 |     def get_range_expr(self, expr: Dict[str, Any]) -> List:
 31 |         """Get values matching a range expression like {'>': 3, '<=': 5}"""
 32 |         min_key, max_key, include_min, include_max = range_expr_to_args(expr)
 33 |         return self.get_range(min_key, max_key, include_min, include_max)
 34 | 
 35 |     def get_range(
 36 |         self,
 37 |         min_key=None,
 38 |         max_key=None,
 39 |         include_min: bool = True,
 40 |         include_max: bool = True,
 41 |     ) -> List:
 42 |         """
 43 |         Get values in the range of [min_key, max_key]. include_min and include_max
 44 |         determine whether values for the start and end keys will be included.
 45 | 
 46 |         Examples:
 47 |             Get all values: None, None, True, True
 48 |             Get 1 < key < 10: 1, 10, False, False
 49 |             Get key >= 3: 3, None, True, True
 50 |         """
 51 |         if len(self) == 0:
 52 |             return []
 53 |         excludemin = not include_min
 54 |         excludemax = not include_max
 55 |         return self.tree.values(
 56 |             min_key, max_key, excludemin=excludemin, excludemax=excludemax
 57 |         )
 58 | 
 59 |     def get(self, key, default=None):
 60 |         return self.tree.get(key, default)
 61 | 
 62 |     def keys(self):
 63 |         return self.tree.keys()
 64 | 
 65 |     def values(self):
 66 |         return self.tree.values()
 67 | 
 68 |     def items(self):
 69 |         return self.tree.items()
 70 | 
 71 |     def __len__(self):
 72 |         return self.length
 73 | 
 74 |     def __setitem__(self, key, value):
 75 |         if key is None:
 76 |             raise TypeError(
 77 |                 "None is not allowed in BTree because it breaks comparisons."
 78 |             )
 79 |         if len(self) == 0:
 80 |             # OOBTree oddity: it allows a non-comparable object on the first insert, but
 81 |             # if it gets one, all future inserts will fail.
 82 |             # So let's raise a TypeError if the very first insert is a non-comparable type.
 83 |             key > key
 84 |         if key not in self.tree:
 85 |             self.length += 1
 86 |         self.tree[key] = value
 87 | 
 88 |     def __getitem__(self, key):
 89 |         return self.tree[key]
 90 | 
 91 |     def __delitem__(self, key):
 92 |         self.length -= 1
 93 |         del self.tree[key]
 94 | 
 95 |     def __contains__(self, item):
 96 |         return item in self.tree
 97 | 
 98 | 
 99 | def range_expr_to_args(expr: Dict[str, Any]) -> Tuple[Any, Any, bool, bool]:
100 |     """
101 |     Turn a range expr into (min_key, max_key, include_min, include_max), which are easier to use with BTrees.
102 |     e.g., translates {'<': 3} into get_values(3, None, True, False).
103 |     Will ignore keys in expr other than '<', '<=', '>', '>='.
104 |     """
105 |     min_key = None
106 |     max_key = None
107 |     include_min = True
108 |     include_max = True
109 |     if ">" in expr:
110 |         min_key = expr[">"]
111 |         include_min = False
112 |     if ">=" in expr:
113 |         min_key = expr[">="]
114 |         include_min = True
115 |     if "<" in expr:
116 |         max_key = expr["<"]
117 |         include_max = False
118 |     if "<=" in expr:
119 |         max_key = expr["<="]
120 |         include_max = True
121 |     return min_key, max_key, include_min, include_max
122 | 


--------------------------------------------------------------------------------
/ducks/concurrent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/ducks/concurrent/__init__.py


--------------------------------------------------------------------------------
/ducks/concurrent/main.py:
--------------------------------------------------------------------------------
  1 | import pickle  # nosec
  2 | from contextlib import contextmanager
  3 | from typing import Any
  4 | from typing import Callable
  5 | from typing import Dict
  6 | from typing import Iterable
  7 | from typing import Iterator
  8 | from typing import List
  9 | from typing import Optional
 10 | from typing import Union
 11 | 
 12 | from ducks.mutable.main import Dex
 13 | from readerwriterlock.rwlock import RWLockFair
 14 | from readerwriterlock.rwlock import RWLockRead
 15 | from readerwriterlock.rwlock import RWLockWrite
 16 | 
 17 | 
 18 | """Lock priority options"""
 19 | READERS = "readers"
 20 | WRITERS = "writers"
 21 | FAIR = "fair"
 22 | 
 23 | 
 24 | class ConcurrentDex:
 25 |     def __init__(
 26 |         self,
 27 |         objs: Optional[Iterable[Any]] = None,
 28 |         on: Iterable[Union[str, Callable]] = None,
 29 |         priority: str = READERS,
 30 |     ):
 31 |         """Contains a Dex instance and a readerwriterlock. Wraps each Dex method in a read or write lock.
 32 | 
 33 |         Args:
 34 |             objs: see Dex API
 35 |             on: see Dex API
 36 |             priority: 'readers', 'writers', or 'fair'. Default 'readers'. Change this according to your usage pattern.
 37 |         """
 38 |         self.priority = priority
 39 |         self.box = Dex(objs, on)
 40 |         if priority == READERS:
 41 |             self.lock = RWLockRead()
 42 |         elif priority == WRITERS:
 43 |             self.lock = RWLockWrite()
 44 |         elif priority == FAIR:
 45 |             self.lock = RWLockFair()
 46 |         else:
 47 |             raise ValueError(f"priority must be {READERS}, {WRITERS}, or {FAIR}.")
 48 |         self._indexes = self.box._indexes  # only used during testing
 49 | 
 50 |     @contextmanager
 51 |     def read_lock(self):
 52 |         """Lock the ConcurrentDex for reading."""
 53 |         with self.lock.gen_rlock():
 54 |             yield
 55 | 
 56 |     @contextmanager
 57 |     def write_lock(self):
 58 |         """Lock the ConcurrentDex for writing.
 59 | 
 60 |         When doing many write operations at once, it is more efficient to do::
 61 |             with cfb.read_lock():
 62 |                 for item in items:
 63 |                     cfb.box.add(item)  # calls add() on the underlying Dex.
 64 | 
 65 |         This performs locking only once, versus calling cfb.add() which locks for each item.
 66 |         The same pattern works for update() and remove().
 67 |         """
 68 |         with self.lock.gen_wlock():
 69 |             yield
 70 | 
 71 |     def get_values(self, attr: Union[str, Callable]):
 72 |         """Get a read lock and perform Dex get_values()."""
 73 |         with self.read_lock():
 74 |             return self.box.get_values(attr)
 75 | 
 76 |     def remove(self, obj: Any):
 77 |         """Get a write lock and perform Dex.remove()."""
 78 |         with self.write_lock():
 79 |             self.box.remove(obj)
 80 | 
 81 |     def add(self, obj: Any):
 82 |         """Get a write lock and perform Dex.add()."""
 83 |         with self.write_lock():
 84 |             self.box.add(obj)
 85 | 
 86 |     def update(self, obj: Any):
 87 |         """Get a write lock and perform Dex.update()."""
 88 |         with self.write_lock():
 89 |             self.box.update(obj)
 90 | 
 91 |     def __len__(self) -> int:
 92 |         """Get a read lock and get length of Dex."""
 93 |         with self.read_lock():
 94 |             return len(self.box)
 95 | 
 96 |     def __contains__(self, obj: Any) -> bool:
 97 |         """Get a read lock and check if the item is in the Dex."""
 98 |         with self.read_lock():
 99 |             return obj in self.box
100 | 
101 |     def __iter__(self) -> Iterator:
102 |         """Get a read lock, make a list of the objects in the Dex, and return an iter to the list."""
103 |         with self.read_lock():
104 |             return iter(list(self.box))
105 | 
106 |     def __getitem__(self, query: Dict) -> List[Any]:
107 |         """Get a read lock and perform Dex __getitem__."""
108 |         with self.read_lock():
109 |             return self.box[query]
110 | 
111 | 
112 | def save(c_box: ConcurrentDex, filepath: str):
113 |     """Saves a ConcurrentDex to a pickle file."""
114 |     saved = {
115 |         "objs": list(c_box.box.obj_map.values()),
116 |         "on": list(c_box.box._indexes.keys()),
117 |         "priority": c_box.priority,
118 |     }
119 |     with open(filepath, "wb") as fh:
120 |         pickle.dump(saved, fh)
121 | 
122 | 
123 | def load(saved: Dict) -> ConcurrentDex:
124 |     """Creates a ConcurrentDex from the pickle file contents."""
125 |     return ConcurrentDex(saved["objs"], saved["on"], saved["priority"])
126 | 


--------------------------------------------------------------------------------
/ducks/constants.py:
--------------------------------------------------------------------------------
 1 | SIZE_THRESH = 100
 2 | 
 3 | ARR_TYPE = "q"  # python array type meaning "int64": https://docs.python.org/3/library/array.html
 4 | SET_SIZE_MIN = 10
 5 | ARRAY_SIZE_MAX = 20
 6 | 
 7 | 
 8 | class MatchAnything(set):
 9 |     pass
10 | 
11 | 
12 | """
13 | ANY allows lookups like find({'attr': ANY}), which gives all objects that have an 'attr' attribute.
14 | 
15 | Why is this a set()?
16 | We need a value that we can do "is" comparisons on, that will only be True
17 | when it's literally this object. set() is a simple object that satisfies this property.
18 | "ANY is ANY" evaluates to True, but "set() is ANY" evaluates to False.
19 | """
20 | ANY = MatchAnything()
21 | 
22 | VALID_OPERATORS = [
23 |     "==",
24 |     "eq",
25 |     "!=",
26 |     "ne",
27 |     "in",
28 |     "not in",
29 |     "<",
30 |     "lt",
31 |     "<=",
32 |     "lte",
33 |     "le",
34 |     ">",
35 |     "gt",
36 |     ">=",
37 |     "gte",
38 |     "ge",
39 |     "is",
40 |     "is not",
41 | ]
42 | 
43 | OPERATOR_MAP = {
44 |     "eq": "==",
45 |     "lt": "<",
46 |     "le": "<=",  # Python style <=
47 |     "lte": "<=",  # ElasticSearch style <=
48 |     "gt": ">",
49 |     "ge": ">=",  # Python style >=
50 |     "gte": ">=",  # ElasticSearch style >=
51 | }
52 | 
53 | EXCLUDE_OPERATORS = {"not in": "in", "!=": "=="}
54 | 


--------------------------------------------------------------------------------
/ducks/exceptions.py:
--------------------------------------------------------------------------------
 1 | class FrozenError(Exception):
 2 |     """Raised when attempting to modify a FrozenDex"""
 3 | 
 4 | 
 5 | class AttributeNotFoundError(Exception):
 6 |     """Raised when querying an attribute we don't have"""
 7 | 
 8 | 
 9 | class MissingAttribute(Exception):
10 |     """Raise this in your attribute functions to denote that the object is missing this attribute. Finds that
11 |     match the attribute will never return this object. Finds that exclude the attribute will."""
12 | 


--------------------------------------------------------------------------------
/ducks/frozen/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/ducks/frozen/__init__.py


--------------------------------------------------------------------------------
/ducks/frozen/frozen_attr.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Performs object lookup for a single attribute in a FrozenDex.
  3 | """
  4 | from bisect import bisect_left
  5 | from bisect import bisect_right
  6 | from typing import Callable
  7 | from typing import Set
  8 | from typing import Union
  9 | 
 10 | import numpy as np
 11 | from ducks.btree import BTree
 12 | from ducks.constants import ANY
 13 | from ducks.constants import SIZE_THRESH
 14 | from ducks.frozen.init_helpers import get_vals
 15 | from ducks.frozen.init_helpers import run_length_encode
 16 | from ducks.utils import make_empty_array
 17 | 
 18 | 
 19 | class FrozenAttrIndex:
 20 |     """
 21 |     Stores data and handles requests that are relevant to a single attribute of a FrozenDex.
 22 | 
 23 |     There are three places where object indexes are stored.
 24 |      - none_ids stores all indexes for with the attribute value None
 25 |      - val_to_obj_ids stores object ids for attribute values that have many objects
 26 |      - val_arr + obj_id_arr store all the rest.
 27 |     """
 28 | 
 29 |     def __init__(self, attr: Union[str, Callable], objs: np.ndarray, dtype: str):
 30 |         # sort the objects by attribute value, using their hashes and handling collisions
 31 |         self.dtype = dtype
 32 |         self.attr = attr
 33 | 
 34 |         # Nones get stored in their own special spot so they don't break sortability. A little convent for the Nones.
 35 |         self.none_ids = make_empty_array(self.dtype)
 36 | 
 37 |         # We will pull repeated attributes out into a BTree and pre-sort their indexes.
 38 |         # Saves memory, and makes object lookups *way* faster.
 39 |         self.val_to_obj_ids = BTree()
 40 | 
 41 |         obj_id_arr = np.arange(len(objs), dtype=self.dtype)
 42 |         for i in range(len(objs)):
 43 |             obj_id_arr[i] = i
 44 |         obj_id_arr, val_arr = get_vals(objs, obj_id_arr, self.attr)
 45 | 
 46 |         # extract Nones. These will make the array unsortable if left in.
 47 |         none_idx = np.array(
 48 |             [i for i in range(len(val_arr)) if val_arr[i] is None], dtype=self.dtype
 49 |         )
 50 |         if len(none_idx):
 51 |             none_flag = np.zeros_like(val_arr, dtype="bool")
 52 |             none_flag[none_idx] = True
 53 |             self.none_ids = np.sort(obj_id_arr[none_flag])
 54 |             obj_id_arr = obj_id_arr[~none_flag]
 55 |             val_arr = val_arr[~none_flag]
 56 | 
 57 |         # Attempt to sort the values.
 58 |         sort_order = np.argsort(val_arr)  # Throws TypeError if unsortable.
 59 |         val_arr = val_arr[sort_order]
 60 |         obj_id_arr = obj_id_arr[sort_order]
 61 | 
 62 |         val_starts, val_run_lengths, unique_vals = run_length_encode(val_arr)
 63 |         unused = np.ones_like(obj_id_arr, dtype="bool")
 64 |         n_unused = len(unused)
 65 |         for i, val in enumerate(unique_vals):
 66 |             if val_run_lengths[i] > SIZE_THRESH:
 67 |                 # extract these
 68 |                 start = val_starts[i]
 69 |                 end = start + val_run_lengths[i]
 70 |                 unused[start:end] = False
 71 |                 n_unused -= val_run_lengths[i]
 72 |                 self.val_to_obj_ids[val] = np.sort(obj_id_arr[start:end])
 73 |         self.val_arr = val_arr[unused]
 74 |         self.obj_id_arr = obj_id_arr[unused]
 75 | 
 76 |     def get(self, val) -> np.ndarray:
 77 |         """Get indexes of objects whose attribute is val."""
 78 |         if val is ANY:
 79 |             return self.get_all()
 80 |         if val is None:
 81 |             return self.none_ids
 82 |         if val in self.val_to_obj_ids:
 83 |             return self.val_to_obj_ids[val]
 84 |         # find by bisection
 85 |         left = bisect_left(self.val_arr, val)
 86 |         if left == len(self.val_arr) or self.val_arr[left] != val:
 87 |             return make_empty_array(self.dtype)
 88 |         right = bisect_right(self.val_arr, val)
 89 |         return np.sort(self.obj_id_arr[left:right])
 90 | 
 91 |     def get_all(self) -> np.ndarray:
 92 |         """Get indexes of every object with this attribute. Used when matching ANY."""
 93 |         arrs = [self.obj_id_arr]
 94 |         for v in self.val_to_obj_ids.values():
 95 |             arrs.append(v)
 96 |         arrs.append(self.none_ids)
 97 |         return np.sort(np.concatenate(arrs))
 98 | 
 99 |     def get_values(self) -> Set:
100 |         """Get each value we have objects for."""
101 |         vals = set(self.val_to_obj_ids.keys())
102 |         vals = vals.union(self.val_arr)
103 |         if len(self.none_ids):
104 |             vals.add(None)
105 |         return vals
106 | 
107 |     def _get_val_arr_matches(self, lo, hi, include_lo=False, include_hi=False):
108 |         """Get the matches for this range query from the parallel arrays"""
109 |         if len(self.val_arr) == 0:
110 |             return make_empty_array(self.dtype)
111 | 
112 |         if lo is None:
113 |             left = 0
114 |             lo = self.val_arr[0]
115 |             include_lo = True
116 |         else:
117 |             left = bisect_left(self.val_arr, lo)
118 | 
119 |         if hi is None:
120 |             right = len(self.val_arr)
121 |             hi = self.val_arr[right - 1]
122 |             include_hi = True
123 |         else:
124 |             right = bisect_right(self.val_arr, hi)
125 | 
126 |         # move left pointer up to fit > constraint
127 |         if not include_lo:
128 |             while left < len(self.val_arr) and self.val_arr[left] == lo:
129 |                 left += 1
130 |         if left == len(self.val_arr):
131 |             return make_empty_array(self.dtype)
132 | 
133 |         # move right pointer down to fit < constraint
134 |         if not include_hi:
135 |             while right > left and self.val_arr[right - 1] == hi:
136 |                 right -= 1
137 | 
138 |         small_matches = self.obj_id_arr[left:right]
139 |         return small_matches
140 | 
141 |     def get_ids_by_range(
142 |         self, lo, hi, include_lo=False, include_hi=False
143 |     ) -> np.ndarray:
144 |         """Get the object IDs associated with this value range as an Int64Set. Only usable when self.d is a tree."""
145 |         if len(self) == 0:
146 |             return make_empty_array(self.dtype)
147 | 
148 |         # Get matches from the val_to_obj_ids BTree
149 |         big_matches_list = list(
150 |             self.val_to_obj_ids.get_range(lo, hi, include_lo, include_hi)
151 |         )
152 | 
153 |         # Get matches from the parallel arrays
154 |         small_matches = self._get_val_arr_matches(lo, hi, include_lo, include_hi)
155 | 
156 |         # do return
157 |         if len(big_matches_list) == 1 and len(small_matches) == 0:
158 |             # each big_matches is stored pre-sorted, no need to sort
159 |             return big_matches_list[0]
160 | 
161 |         # concat all arrays and sort
162 |         matches = np.sort(np.concatenate([small_matches] + big_matches_list))
163 |         return matches
164 | 
165 |     def __len__(self):
166 |         return len(self.val_arr) + len(self.val_to_obj_ids) + len(self.none_ids)
167 | 


--------------------------------------------------------------------------------
/ducks/frozen/init_helpers.py:
--------------------------------------------------------------------------------
 1 | from typing import Callable
 2 | from typing import Union
 3 | 
 4 | import numpy as np
 5 | from ducks.utils import get_attribute
 6 | from ducks.utils import make_empty_array
 7 | 
 8 | 
 9 | def get_vals(objs: np.ndarray, obj_id_arr: np.ndarray, attr: Union[Callable, str]):
10 |     """Gets vals by attribute. Returned arrays will be shorter than input if objects are missing attributes."""
11 |     val_arr = np.empty(len(objs), dtype="O")
12 |     success = np.empty(len(objs), dtype=bool)
13 |     for i, obj in enumerate(objs):
14 |         val_arr[i], success[i] = get_attribute(obj, attr)
15 | 
16 |     val_arr = val_arr[success]
17 |     obj_id_arr = obj_id_arr[success]
18 |     return obj_id_arr, val_arr
19 | 
20 | 
21 | def run_length_encode(arr: np.ndarray):
22 |     """
23 |     Find counts of each element in the arr (sorted) via run-length encoding.
24 | 
25 |     Takes 10ms for 1M objs.
26 |     """
27 |     if len(arr) == 0:
28 |         return (
29 |             make_empty_array("int64"),
30 |             make_empty_array("int64"),
31 |             make_empty_array("int64"),
32 |         )
33 |     mismatch_val = arr[1:] != arr[:-1]
34 |     change_pts = np.append(np.where(mismatch_val), len(arr) - 1)
35 |     counts = np.diff(np.append(-1, change_pts))
36 |     starts = np.cumsum(np.append(0, counts))[:-1]
37 |     return starts, counts, arr[change_pts]
38 | 


--------------------------------------------------------------------------------
/ducks/frozen/main.py:
--------------------------------------------------------------------------------
  1 | import pickle  # nosec
  2 | from bisect import bisect_left
  3 | from typing import Any
  4 | from typing import Callable
  5 | from typing import Dict
  6 | from typing import Iterable
  7 | from typing import Optional
  8 | from typing import Set
  9 | from typing import Union
 10 | 
 11 | import numpy as np
 12 | import sortednp as snp
 13 | from ducks.btree import range_expr_to_args
 14 | from ducks.frozen.frozen_attr import FrozenAttrIndex
 15 | from ducks.frozen.utils import snp_difference
 16 | from ducks.utils import make_empty_array
 17 | from ducks.utils import split_query
 18 | from ducks.utils import standardize_expr
 19 | from ducks.utils import validate_and_standardize_operators
 20 | from ducks.utils import validate_query
 21 | 
 22 | 
 23 | class FrozenDex:
 24 |     def __init__(self, objs: Iterable[Any], on: Iterable[Union[str, Callable]]):
 25 |         """Create a FrozenDex containing the ``objs``, queryable by the ``on`` attributes.
 26 | 
 27 |         Args:
 28 |             objs: The objects that FrozenDex will contain.
 29 | 
 30 |             on: The attributes that will be used for finding objects.
 31 |                 Must contain at least one.
 32 | 
 33 |         It's OK if the objects in ``objs`` are missing some or all of the attributes in ``on``.
 34 | 
 35 |         For the objects that do contain the attributes on ``on``, those attribute values must be hashable and sortable.
 36 |         Most Python objects are hashable. Implement the function ``__lt__(self, other)`` to make a class sortable.
 37 |         An attribute value of ``None`` is acceptable as well, even though None is not sortable.
 38 |         """
 39 |         if not on:
 40 |             raise ValueError("Need at least one attribute.")
 41 |         if isinstance(on, str):
 42 |             on = [on]
 43 | 
 44 |         self.obj_arr = np.empty(len(objs), dtype="O")
 45 |         self.dtype = "uint32" if len(objs) < 2**32 else "uint64"
 46 |         for i, obj in enumerate(objs):
 47 |             self.obj_arr[i] = obj
 48 | 
 49 |         self._indexes = {}
 50 |         for attr in on:
 51 |             self._indexes[attr] = FrozenAttrIndex(attr, self.obj_arr, self.dtype)
 52 | 
 53 |         # only used during contains() checks
 54 |         self.sorted_obj_ids = np.sort([id(obj) for obj in self.obj_arr])
 55 | 
 56 |     def _find(  # noqa: C901
 57 |         self,
 58 |         match: Optional[Dict[Union[str, Callable], Any]] = None,
 59 |         exclude: Optional[Dict[Union[str, Callable], Any]] = None,
 60 |     ) -> np.ndarray:
 61 |         """Find objects in the FrozenDex that satisfy the match and exclude constraints.
 62 | 
 63 |         Args:
 64 |             match: Dict of ``{attribute: expression}`` defining the subset of objects that match.
 65 |                 If ``None``, all objects will match.
 66 | 
 67 |                 Each attribute is a string or Callable. Must be one of the attributes specified in the constructor.
 68 | 
 69 |                 The expression can be any of the following:
 70 |                  - A dict of ``{operator: value}``, such as ``{'==': 1}`` ``{'>': 5}``, or ``{'in': [1, 2, 3]}``.
 71 |                  - A single value, which is a shorthand for `{'==': value}`.
 72 |                  - A list of values, which is a shorthand for ``{'in': [list_of_values]}``.
 73 |                  - ``ducks.ANY``, which matches all objects having the attribute.
 74 | 
 75 |                  Valid operators are '==' 'in', '<', '<=', '>', '>='.
 76 |                  The aliases 'eq' 'lt', 'le', 'lte', 'gt', 'ge', and 'gte' work too.
 77 |                  To match a None value, use ``{'==': None}``. There is no separate operator for None values.
 78 | 
 79 |             exclude: Dict of ``{attribute: expression}`` defining the subset of objects that do not match.
 80 |                 If ``None``, no objects will be excluded.
 81 | 
 82 |                 Each attribute is a string or Callable. Must be one of the attributes specified in the constructor.
 83 |                 Valid expressions are the same as in ``match``.
 84 | 
 85 |         Returns:
 86 |             Numpy array of objects matching the constraints. Array will be in the same order as the original objects.
 87 |         """
 88 |         # validate input and convert expressions to dict
 89 |         validate_query(self._indexes, match, exclude)
 90 |         for arg in [match, exclude]:
 91 |             if arg:
 92 |                 for key in arg:
 93 |                     arg[key] = standardize_expr(arg[key])
 94 | 
 95 |         # perform 'match' query
 96 |         if match:
 97 |             hit_arrays = []
 98 |             for attr, expr in match.items():
 99 |                 hit_array = self._match_attr_expr(attr, expr)
100 |                 if len(hit_array) == 0:
101 |                     # this attr had no matches, therefore the intersection will be empty. We can stop here.
102 |                     return make_empty_array("O")
103 |                 hit_arrays.append(hit_array)
104 | 
105 |             # intersect all the hit_arrays, starting with the smallest
106 |             for i, hit_array in enumerate(sorted(hit_arrays, key=len)):
107 |                 if i == 0:
108 |                     hits = hit_array
109 |                 else:
110 |                     hits = snp.intersect(hits, hit_array)
111 |         else:
112 |             hits = np.arange(len(self.obj_arr), dtype=self.dtype)
113 | 
114 |         # perform 'exclude' query
115 |         if exclude:
116 |             exc_arrays = []
117 |             for attr, expr in exclude.items():
118 |                 exc_arrays.append(self._match_attr_expr(attr, expr))
119 | 
120 |             # subtract each of the exc_arrays, starting with the largest
121 |             for exc_array in sorted(exc_arrays, key=len, reverse=True):
122 |                 hits = snp_difference(hits, exc_array)
123 |                 if len(hits) == 0:
124 |                     break
125 | 
126 |         return self.obj_arr[hits]
127 | 
128 |     def _match_attr_expr(self, attr: Union[str, Callable], expr: dict) -> np.ndarray:
129 |         """Look at an attr, handle its expr appropriately"""
130 |         validate_and_standardize_operators(expr)
131 |         matches = None
132 |         # handle 'in' and '=='
133 |         eq_expr = {op: val for op, val in expr.items() if op in ["==", "in"]}
134 |         for op, val in eq_expr.items():
135 |             if op == "==":
136 |                 op_matches = self._indexes[attr].get(val)
137 |             elif op == "in":
138 |                 op_matches = self._match_any_value_in(attr, expr["in"])
139 |             matches = (
140 |                 op_matches if matches is None else snp.intersect(op_matches, matches)
141 |             )
142 | 
143 |         # handle range query
144 |         range_expr = {
145 |             op: val for op, val in expr.items() if op in ["<", ">", "<=", ">="]
146 |         }
147 |         if range_expr:
148 |             min_key, max_key, include_min, include_max = range_expr_to_args(range_expr)
149 |             range_matches = self._indexes[attr].get_ids_by_range(
150 |                 min_key, max_key, include_min, include_max
151 |             )
152 |             matches = (
153 |                 range_matches
154 |                 if matches is None
155 |                 else snp.intersect(range_matches, matches)
156 |             )
157 |         return matches
158 | 
159 |     def get_values(self, attr: Union[str, Callable]) -> Set:
160 |         """Get the set of unique values we have for the given attribute.
161 | 
162 |         Args:
163 |             attr: The attribute to get values for.
164 | 
165 |         Returns:
166 |             Set of all unique values for this attribute.
167 |         """
168 |         return self._indexes[attr].get_values()
169 | 
170 |     def _match_any_value_in(
171 |         self, attr: Union[str, Callable], values: Iterable[Any]
172 |     ) -> np.ndarray:
173 |         """ "Get the union of object ID matches for the values."""
174 |         matches = [self._indexes[attr].get(v) for v in values]
175 |         if matches:
176 |             return np.sort(np.concatenate(matches))
177 |         else:
178 |             return make_empty_array(self.dtype)
179 | 
180 |     def __contains__(self, obj):
181 |         obj_id = id(obj)
182 |         idx = bisect_left(self.sorted_obj_ids, obj_id)
183 |         if (
184 |             idx < 0
185 |             or idx >= len(self.sorted_obj_ids)
186 |             or self.sorted_obj_ids[idx] != obj_id
187 |         ):
188 |             return False
189 |         return True
190 | 
191 |     def __iter__(self):
192 |         return iter(self.obj_arr)
193 | 
194 |     def __len__(self):
195 |         return len(self.obj_arr)
196 | 
197 |     def __getitem__(self, query: Dict) -> np.ndarray:
198 |         """Find objects in the FrozenDex that satisfy the constraints.
199 | 
200 |         Args:
201 |             query: Dict of ``{attribute: expression}`` defining the subset of objects that match.
202 |                 If ``{}``, all objects will match.
203 | 
204 |                 Each attribute is a string or Callable. Must be one of the attributes specified in the constructor.
205 | 
206 |                 The expression can be any of the following:
207 |                  - A dict of ``{operator: value}``, such as ``{'==': 1}`` ``{'>': 5}``, or ``{'in': [1, 2, 3]}``.
208 |                  - A single value, which is a shorthand for `{'==': value}`.
209 |                  - A list of values, which is a shorthand for ``{'in': [list_of_values]}``.
210 | 
211 |                  The expression ``{'==': ducks.ANY}`` will match all objects having the attribute.
212 |                  The expression ``{'!=': ducks.ANY}`` will match all objects without the attribute.
213 | 
214 |                  Valid operators are '==', '!=', 'in', 'not in', '<', '<=', '>', '>='.
215 |                  The aliases 'eq', 'ne', 'lt', 'le', 'lte', 'gt', 'ge', and 'gte' work too.
216 |                  To match a None value, use ``{'==': None}``. There is no separate operator for None values.
217 | 
218 |         Returns:
219 |             Numpy array of objects matching the constraints. Array will be in the same order as the original objects.
220 |         """
221 |         if not isinstance(query, dict):
222 |             raise TypeError(f"Got {type(query)}; expected a dict.")
223 |         std_query = dict()
224 |         for attr, expr in query.items():
225 |             std_query[attr] = standardize_expr(expr)
226 |         match_query, exclude_query = split_query(std_query)
227 |         return self._find(match_query, exclude_query)
228 | 
229 | 
230 | def save(box: FrozenDex, filepath: str):
231 |     """Saves this object to a pickle file."""
232 |     with open(filepath, "wb") as fh:
233 |         pickle.dump(box, fh)
234 | 
235 | 
236 | def load(box: FrozenDex):
237 |     """Creates a FrozenDex from the pickle file contents."""
238 |     # If this was created by one Python process and loaded by another, the object IDs will no longer
239 |     # correspond to the objects. Re-create the object ID array with the correct IDs.
240 |     box.sorted_obj_ids = np.sort([id(obj) for obj in box.obj_arr])
241 | 


--------------------------------------------------------------------------------
/ducks/frozen/utils.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import sortednp as snp
 3 | 
 4 | 
 5 | def snp_difference(left: np.ndarray, right: np.ndarray):
 6 |     # difference = left - indexes_in_intersection(left, right)
 7 |     _, indexes = snp.intersect(left, right, indices=True)
 8 |     indexes_to_discard = indexes[0]
 9 |     keep_these = np.ones_like(left, dtype=bool)
10 |     keep_these[indexes_to_discard] = False
11 |     return left[keep_these]
12 | 


--------------------------------------------------------------------------------
/ducks/mutable/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/ducks/mutable/__init__.py


--------------------------------------------------------------------------------
/ducks/mutable/main.py:
--------------------------------------------------------------------------------
  1 | import pickle  # nosec
  2 | from operator import itemgetter
  3 | from typing import Any
  4 | from typing import Callable
  5 | from typing import Dict
  6 | from typing import Iterable
  7 | from typing import List
  8 | from typing import Optional
  9 | from typing import Set
 10 | from typing import Union
 11 | 
 12 | from cykhash import Int64Set
 13 | from ducks.mutable.mutable_attr import MutableAttrIndex
 14 | from ducks.utils import cyk_intersect
 15 | from ducks.utils import cyk_union
 16 | from ducks.utils import split_query
 17 | from ducks.utils import standardize_expr
 18 | from ducks.utils import validate_query
 19 | 
 20 | 
 21 | class Dex:
 22 |     def __init__(
 23 |         self,
 24 |         objs: Optional[Iterable[Any]] = None,
 25 |         on: Iterable[Union[str, Callable]] = None,
 26 |     ):
 27 |         """
 28 |         Create a Dex containing the ``objs``, queryable by the ``on`` attributes.
 29 | 
 30 |         Args:
 31 |             objs: The objects that Dex will contain initially. Optional.
 32 | 
 33 |             on: The attributes that will be used for finding objects.
 34 |                 Must contain at least one.
 35 | 
 36 |         It's OK if the objects in ``objs`` are missing some or all of the attributes in ``on``.
 37 | 
 38 |         For the objects that do contain the attributes in ``on``, those attribute values must be hashable and sortable.
 39 |         Most Python objects are hashable. Implement the function ``__lt__(self, other)`` to make a class sortable.
 40 |         An attribute value of ``None`` is acceptable as well, even though None is not sortable.
 41 |         """
 42 |         if not on:
 43 |             raise ValueError("Need at least one attribute.")
 44 |         if isinstance(on, str):
 45 |             on = [on]
 46 | 
 47 |         if objs:
 48 |             self.obj_map = {id(obj): obj for obj in objs}
 49 |         else:
 50 |             self.obj_map = dict()
 51 | 
 52 |         # Build an index for each attribute
 53 |         self._indexes = {}
 54 |         for attr in on:
 55 |             self._indexes[attr] = MutableAttrIndex(attr, objs)
 56 | 
 57 |     def _find(
 58 |         self,
 59 |         match: Dict[Union[str, Callable], Dict[str, Any]],
 60 |         exclude: Dict[Union[str, Callable], Dict[str, Any]],
 61 |     ) -> List:
 62 |         """Find objects in the Dex that satisfy the match and exclude constraints.
 63 | 
 64 |         Args:
 65 |             match: Dict of ``{attribute: expression}`` defining the subset of objects that match.
 66 |                 If ``None``, all objects will match.
 67 | 
 68 |                 Each attribute is a string or Callable. Must be one of the attributes specified in the constructor.
 69 | 
 70 |                 The expression can be any of the following:
 71 |                  - A dict of ``{operator: value}``, such as ``{'==': 1}`` ``{'>': 5}``, or ``{'in': [1, 2, 3]}``.
 72 |                  - A single value, which is a shorthand for `{'==': value}`.
 73 |                  - A list of values, which is a shorthand for ``{'in': [list_of_values]}``.
 74 | 
 75 |                  The special value ``ducks.ANY`` will match all objects having the attribute.
 76 | 
 77 |                  Valid operators are '==' 'in', '<', '<=', '>', '>='.
 78 |                  The aliases 'eq' 'lt', 'le', 'lte', 'gt', 'ge', and 'gte' work too.
 79 |                  To match a None value, use ``{'==': None}``. There is no separate operator for None values.
 80 | 
 81 |             exclude: Dict of ``{attribute: expression}`` defining the subset of objects that do not match.
 82 |                 If ``None``, no objects will be excluded.
 83 | 
 84 |                 Each attribute is a string or Callable. Must be one of the attributes specified in the constructor.
 85 |                 Valid expressions are the same as in ``match``.
 86 | 
 87 |         Returns:
 88 |             List of objects matching the constraints. List will be unordered.
 89 |         """
 90 |         # validate input and convert expressions to dict
 91 |         validate_query(self._indexes, match, exclude)
 92 |         obj_ids = self._find_ids(match, exclude)
 93 |         return self._obj_ids_to_objs(obj_ids)
 94 | 
 95 |     def add(self, obj: Any):
 96 |         """Add the object, evaluating any attributes and storing the results.
 97 |         If the object is already present, it will not be updated."""
 98 |         ptr = id(obj)
 99 |         if ptr in self.obj_map:
100 |             return
101 |         self.obj_map[ptr] = obj
102 |         for attr in self._indexes:
103 |             self._indexes[attr].add(ptr, obj)
104 | 
105 |     def remove(self, obj: Any):
106 |         """Remove the object. Raises KeyError if not present."""
107 |         ptr = id(obj)
108 |         if ptr not in self.obj_map:
109 |             raise KeyError
110 | 
111 |         for attr in self._indexes:
112 |             self._indexes[attr].remove(ptr, obj)
113 |         del self.obj_map[ptr]
114 | 
115 |     def update(self, obj: Any):
116 |         """Remove and re-add the object, updating all stored attributes. Raises KeyError if object not present."""
117 |         self.remove(obj)
118 |         self.add(obj)
119 | 
120 |     def get_values(self, attr: Union[str, Callable]) -> Set:
121 |         """Get the unique values we have for the given attribute.
122 | 
123 |         Args:
124 |             attr: The attribute to get values for.
125 | 
126 |         Returns:
127 |             Set of all unique values for this attribute.
128 |         """
129 |         return self._indexes[attr].get_values()
130 | 
131 |     def _find_ids(
132 |         self,
133 |         match: Optional[Dict[Union[str, Callable], Dict]] = None,
134 |         exclude: Optional[Dict[Union[str, Callable], Dict]] = None,
135 |     ) -> Int64Set:
136 |         """Perform lookup based on given constraints. Return a set of object IDs."""
137 |         # perform 'match' query
138 |         if match:
139 |             # find intersection of each attr
140 |             hit_sets = []
141 |             for attr, expr in match.items():
142 |                 hit_set = self._match_attr_expr(attr, expr)
143 |                 if len(hit_set) == 0:
144 |                     # this attr had no matches, therefore the intersection will be empty. We can stop here.
145 |                     return Int64Set()
146 |                 hit_sets.append(hit_set)
147 | 
148 |             for i, hit_set in enumerate(sorted(hit_sets, key=len)):
149 |                 # intersect this attr's hits with our hits so far
150 |                 if i == 0:
151 |                     hits = hit_set
152 |                 else:
153 |                     hits = cyk_intersect(hits, hit_set)
154 |         else:
155 |             # 'match' is unspecified, so match all objects
156 |             hits = Int64Set(self.obj_map.keys())
157 | 
158 |         # perform 'exclude' query
159 |         if exclude:
160 |             exc_sets = []
161 |             for attr, expr in exclude.items():
162 |                 exc_sets.append(self._match_attr_expr(attr, expr))
163 | 
164 |             for exc_set in sorted(exc_sets, key=len, reverse=True):
165 |                 hits = Int64Set.difference(hits, exc_set)
166 |                 if len(hits) == 0:
167 |                     break
168 | 
169 |         return hits
170 | 
171 |     def _match_attr_expr(
172 |         self, attr: Union[str, Callable], expr: Dict[str, Any]
173 |     ) -> Int64Set:
174 |         """Look at an attr, handle its expr appropriately"""
175 |         matches = None
176 |         # handle 'in' and '=='
177 |         eq_expr = {op: val for op, val in expr.items() if op in ["==", "in"]}
178 |         for op, val in eq_expr.items():
179 |             if op == "==":
180 |                 op_matches = self._indexes[attr].get_obj_ids(val)
181 |             elif op == "in":
182 |                 op_matches = self._match_any_value_in(attr, expr["in"])
183 |             matches = (
184 |                 op_matches if matches is None else cyk_intersect(op_matches, matches)
185 |             )
186 | 
187 |         # handle range query
188 |         range_expr = {
189 |             op: val for op, val in expr.items() if op in ["<", ">", "<=", ">="]
190 |         }
191 |         if range_expr:
192 |             range_matches = self._indexes[attr].get_ids_by_range(range_expr)
193 |             matches = (
194 |                 range_matches
195 |                 if matches is None
196 |                 else cyk_intersect(range_matches, matches)
197 |             )
198 |         return matches
199 | 
200 |     def _match_any_value_in(
201 |         self, attr: Union[str, Callable], values: Iterable[Any]
202 |     ) -> Int64Set:
203 |         """Handle 'in' queries. Return the union of object ID matches for the values."""
204 |         matches = Int64Set()
205 |         for v in values:
206 |             v_matches = self._indexes[attr].get_obj_ids(v)
207 |             matches = cyk_union(matches, v_matches)
208 |         return Int64Set(matches)
209 | 
210 |     def _obj_ids_to_objs(self, obj_ids: Int64Set) -> List[Any]:
211 |         """Look up each obj_id in self.obj_map, and return the list of objs."""
212 |         # Using itemgetter is about 10% faster than doing a comprehension like [self.objs[ptr] for ptr in hits]
213 |         if len(obj_ids) == 0:
214 |             return []
215 |         elif len(obj_ids) == 1:
216 |             return [
217 |                 itemgetter(*obj_ids)(self.obj_map)
218 |             ]  # itemgetter returns a single item here, not in a collection
219 |         else:
220 |             return list(
221 |                 itemgetter(*obj_ids)(self.obj_map)
222 |             )  # itemgetter returns a tuple of items here, so make it a list
223 | 
224 |     def __contains__(self, obj: Any):
225 |         return id(obj) in self.obj_map
226 | 
227 |     def __iter__(self):
228 |         return iter(self.obj_map.values())
229 | 
230 |     def __len__(self):
231 |         return len(self.obj_map)
232 | 
233 |     def __getitem__(self, query: Dict) -> List[Any]:
234 |         """Find objects in the Dex that satisfy the constraints.
235 | 
236 |         Args:
237 |             query: Dict of ``{attribute: expression}`` defining the subset of objects that match.
238 |                 If ``{}``, all objects will match.
239 | 
240 |                 Each attribute is a string or Callable. Must be one of the attributes specified in the constructor.
241 | 
242 |                 The expression can be any of the following:
243 |                  - A dict of ``{operator: value}``, such as ``{'==': 1}`` ``{'>': 5}``, or ``{'in': [1, 2, 3]}``.
244 |                  - A single value, which is a shorthand for `{'==': value}`.
245 |                  - A list of values, which is a shorthand for ``{'in': [list_of_values]}``.
246 | 
247 |                  The expression ``{'==': ducks.ANY}`` will match all objects having the attribute.
248 |                  The expression ``{'!=': ducks.ANY}`` will match all objects without the attribute.
249 | 
250 |                  Valid operators are '==', '!=', 'in', 'not in', '<', '<=', '>', '>='.
251 |                  The aliases 'eq', 'ne', 'lt', 'le', 'lte', 'gt', 'ge', and 'gte' work too.
252 |                  To match a None value, use ``{'==': None}``. There is no separate operator for None values.
253 | 
254 |         Returns:
255 |             List of objects matching the constraints. List will be unordered.
256 |         """
257 |         if not isinstance(query, dict):
258 |             raise TypeError(f"Got {type(query)}; expected a dict.")
259 |         std_query = dict()
260 |         for attr, expr in query.items():
261 |             std_query[attr] = standardize_expr(expr)
262 |         match_query, exclude_query = split_query(std_query)
263 |         return self._find(match_query, exclude_query)
264 | 
265 | 
266 | def save(box: Dex, filepath: str):
267 |     """Saves this object to a pickle file."""
268 |     # We can't pickle this easily, because:
269 |     # - Int64Sets cannot be pickled, so the MutableAttrIndex is hard to save.
270 |     # - Object IDs are specific to the process that created them, so the object map will be invalid if saved.
271 |     # Therefore, this just pickles the objects and the list of what to build indexes on.
272 |     # The Dex container will be built anew with __init__ on load.
273 |     # A bit slow, but it's simple, guaranteed to work, and is very robust against changes in the container code.
274 |     saved = {"objs": list(box.obj_map.values()), "on": list(box._indexes.keys())}
275 |     with open(filepath, "wb") as fh:
276 |         pickle.dump(saved, fh)
277 | 
278 | 
279 | def load(saved: Dict) -> Dex:
280 |     """Creates a Dex from the pickle file."""
281 |     return Dex(saved["objs"], saved["on"])
282 | 


--------------------------------------------------------------------------------
/ducks/mutable/mutable_attr.py:
--------------------------------------------------------------------------------
  1 | from array import array
  2 | from typing import Any
  3 | from typing import Callable
  4 | from typing import Dict
  5 | from typing import Hashable
  6 | from typing import Iterable
  7 | from typing import Optional
  8 | from typing import Set
  9 | from typing import Union
 10 | 
 11 | from cykhash import Int64Set
 12 | from ducks.btree import BTree
 13 | from ducks.constants import ANY
 14 | from ducks.constants import ARR_TYPE
 15 | from ducks.constants import ARRAY_SIZE_MAX
 16 | from ducks.constants import SET_SIZE_MIN
 17 | from ducks.utils import get_attribute
 18 | 
 19 | 
 20 | class MutableAttrIndex:
 21 |     """Stores data and handles requests that are relevant to a single attribute of a Dex."""
 22 | 
 23 |     def __init__(
 24 |         self,
 25 |         attr: Union[Callable, str],
 26 |         objs: Optional[Iterable[Any]] = None,
 27 |     ):
 28 |         self.attr = attr
 29 |         self.none_ids = Int64Set()  # Stores object IDs for the attribute value None
 30 |         self.tree = BTree()  # Stores object IDs for all other values
 31 |         self.n_obj_ids = 0
 32 |         if objs:
 33 |             for obj in objs:
 34 |                 self.add(id(obj), obj)
 35 | 
 36 |     def add(self, ptr: int, obj: Any):
 37 |         """Add an object if it has this attribute."""
 38 |         val, success = get_attribute(obj, self.attr)
 39 |         if not success:
 40 |             return
 41 |         self._add_val(ptr, val)
 42 |         self.n_obj_ids += 1
 43 | 
 44 |     def get_obj_ids(self, val: Any) -> Int64Set:
 45 |         """Get the object IDs associated with this value as an Int64Set."""
 46 |         if val is ANY:
 47 |             return self.get_all_ids()
 48 |         if val is None:
 49 |             return self.none_ids
 50 |         ids = self.tree.get(val, Int64Set())
 51 |         if type(ids) is array:
 52 |             return Int64Set(ids)
 53 |         elif type(ids) is Int64Set:
 54 |             return ids
 55 |         else:
 56 |             return Int64Set([ids])
 57 | 
 58 |     def remove(self, ptr: int, obj: Any):
 59 |         """Remove a single object from the index. ptr is already known to be in the Dex.
 60 |         Runs in O(1) if obj has this attr and the value of the attr hasn't changed. O(n_keys) otherwise."""
 61 |         removed = False
 62 |         val, success = get_attribute(obj, self.attr)
 63 |         if success:
 64 |             removed = self._try_remove(ptr, val)
 65 |         if not removed:
 66 |             # do O(n) search
 67 |             for val in list(self.tree.keys()):
 68 |                 removed = self._try_remove(ptr, val)
 69 |                 if removed:
 70 |                     break
 71 | 
 72 |     def get_all_ids(self) -> Int64Set:
 73 |         """Get the ID of every object that has this attribute.
 74 |         Called when matching or excluding ``{attr: hashindex.ANY}``."""
 75 |         obj_ids = Int64Set(self.none_ids)
 76 |         for val in self.tree.values():
 77 |             self._add_val_to_set(val, obj_ids)
 78 |         return obj_ids
 79 | 
 80 |     def get_values(self) -> Set:
 81 |         """Get unique values we have objects for."""
 82 |         vals = set(self.tree.keys())
 83 |         if len(self.none_ids):
 84 |             vals.add(None)
 85 |         return vals
 86 | 
 87 |     def get_ids_by_range(self, expr: Dict[str, Any]):
 88 |         """Get object IDs based on less than / greater than some value"""
 89 |         obj_ids = Int64Set()
 90 |         vals = self.tree.get_range_expr(expr)
 91 |         for val in vals:
 92 |             self._add_val_to_set(val, obj_ids)
 93 |         return obj_ids
 94 | 
 95 |     def _add_val(self, ptr, val):
 96 |         if val is None:
 97 |             self.none_ids.add(ptr)
 98 |         elif val in self.tree:
 99 |             obj_ids = self.tree[val]
100 |             if type(obj_ids) is Int64Set:
101 |                 self.tree[val].add(ptr)
102 |             elif type(obj_ids) is array:
103 |                 if len(obj_ids) == ARRAY_SIZE_MAX:
104 |                     # upgrade array -> set
105 |                     obj_ids = Int64Set(obj_ids)
106 |                     obj_ids.add(ptr)
107 |                     self.tree[val] = obj_ids
108 |                 else:
109 |                     obj_ids.append(ptr)
110 |             else:
111 |                 # obj_ids was an int, now we have two. upgrade int -> array
112 |                 self.tree[val] = array(ARR_TYPE, [obj_ids, ptr])
113 |         else:
114 |             # new val, add the int
115 |             self.tree[val] = ptr
116 | 
117 |     @staticmethod
118 |     def _add_val_to_set(val: Any, obj_ids: Int64Set):
119 |         """We need to do this a lot"""
120 |         if type(val) in [array, Int64Set]:
121 |             for v in val:
122 |                 obj_ids.add(v)
123 |         else:
124 |             obj_ids.add(val)
125 | 
126 |     def _try_remove(self, ptr: int, val: Hashable) -> bool:
127 |         """Try to remove the object from self.tree[val]. Return True on success, False otherwise."""
128 |         # handle None
129 |         if val is None and ptr in self.none_ids:
130 |             self.none_ids.remove(ptr)
131 |             self.n_obj_ids -= 1
132 |             return True
133 | 
134 |         # first, check that the ptr is in here
135 |         if val not in self.tree:
136 |             return False
137 |         if type(self.tree[val]) in [array, Int64Set]:
138 |             if ptr not in self.tree[val]:
139 |                 return False
140 |         else:
141 |             if self.tree[val] != ptr:
142 |                 return False
143 | 
144 |         # must be in the tree
145 |         obj_ids = self.tree[val]
146 |         if type(self.tree[val]) in [array, Int64Set]:
147 |             self.tree[val].remove(ptr)
148 |             if type(obj_ids) is array:
149 |                 if len(self.tree[val]) == 1:
150 |                     # downgrade array -> int
151 |                     self.tree[val] = self.tree[val][0]
152 |             else:
153 |                 if len(self.tree[val]) < SET_SIZE_MIN:
154 |                     # downgrade set -> array
155 |                     self.tree[val] = array(ARR_TYPE, list(self.tree[val]))
156 |         else:
157 |             # downgrade int -> nothing
158 |             del self.tree[val]
159 |         self.n_obj_ids -= 1
160 |         return True
161 | 
162 |     def __len__(self):
163 |         return self.n_obj_ids
164 | 


--------------------------------------------------------------------------------
/ducks/pickling.py:
--------------------------------------------------------------------------------
 1 | import pickle  # nosec
 2 | from typing import Union
 3 | 
 4 | from ducks.concurrent.main import ConcurrentDex
 5 | from ducks.concurrent.main import load as c_load
 6 | from ducks.concurrent.main import save as c_save
 7 | from ducks.frozen.main import FrozenDex
 8 | from ducks.frozen.main import load as f_load
 9 | from ducks.frozen.main import save as f_save
10 | from ducks.mutable.main import Dex
11 | from ducks.mutable.main import load as m_load
12 | from ducks.mutable.main import save as m_save
13 | 
14 | 
15 | def save(box: Union[Dex, FrozenDex, ConcurrentDex], filepath: str):
16 |     """Save a Dex, FrozenDex, or ConcurrentDex to a file."""
17 |     if type(box) is Dex:
18 |         m_save(box, filepath)
19 |     if type(box) is FrozenDex:
20 |         f_save(box, filepath)
21 |     if type(box) is ConcurrentDex:
22 |         c_save(box, filepath)
23 | 
24 | 
25 | def load(filepath: str) -> Union[Dex, FrozenDex, ConcurrentDex]:
26 |     """Load a Dex, FrozenDex, or ConcurrentDex from a pickle file."""
27 |     with open(filepath, "rb") as fh:
28 |         saved = pickle.load(fh)  # nosec
29 |         if isinstance(saved, FrozenDex):
30 |             f_load(saved)  # mutates saved
31 |             return saved
32 |         elif "priority" in saved:
33 |             return c_load(saved)
34 |         else:
35 |             return m_load(saved)
36 | 


--------------------------------------------------------------------------------
/ducks/utils.py:
--------------------------------------------------------------------------------
  1 | from typing import Any
  2 | from typing import Callable
  3 | from typing import Dict
  4 | from typing import List
  5 | from typing import Optional
  6 | from typing import Tuple
  7 | from typing import Union
  8 | 
  9 | import numpy as np
 10 | from cykhash import Int64Set
 11 | from ducks.constants import ANY
 12 | from ducks.constants import EXCLUDE_OPERATORS
 13 | from ducks.constants import OPERATOR_MAP
 14 | from ducks.constants import VALID_OPERATORS
 15 | from ducks.exceptions import AttributeNotFoundError
 16 | from ducks.exceptions import MissingAttribute
 17 | 
 18 | 
 19 | def get_attribute(obj: Any, attr: Union[Callable, str]) -> Tuple[Any, bool]:
 20 |     """Get the object's attribute value. Return (value, success). Unsuccessful if attribute is missing."""
 21 |     if callable(attr):
 22 |         try:
 23 |             val = attr(obj)
 24 |         except MissingAttribute:
 25 |             return None, False
 26 |     elif isinstance(obj, dict):
 27 |         try:
 28 |             val = obj[attr]
 29 |         except KeyError:
 30 |             return None, False
 31 |     else:
 32 |         try:
 33 |             val = getattr(obj, attr)
 34 |         except AttributeError:
 35 |             return None, False
 36 |     return val, True
 37 | 
 38 | 
 39 | def get_attributes(cls) -> List[str]:
 40 |     """Helper function to grab the attributes of a class"""
 41 |     return list(cls.__annotations__.keys())
 42 | 
 43 | 
 44 | def split_query(query: Dict) -> Tuple[Dict, Dict]:
 45 |     """Split query into match and exclude terms"""
 46 |     match_query = dict()
 47 |     exclude_query = dict()
 48 |     for attr, expr in query.items():
 49 |         match_expr = dict()
 50 |         exclude_expr = dict()
 51 |         for op, val in expr.items():
 52 |             if op in EXCLUDE_OPERATORS:
 53 |                 # invert "not in" -> "in", etc.
 54 |                 exclude_expr[EXCLUDE_OPERATORS[op]] = val
 55 |             else:
 56 |                 match_expr[op] = val
 57 |         if match_expr:
 58 |             match_query[attr] = match_expr
 59 |         if exclude_expr:
 60 |             exclude_query[attr] = exclude_expr
 61 |     return match_query, exclude_query
 62 | 
 63 | 
 64 | def standardize_expr(expr: Any) -> Dict:
 65 |     """Turn a find() expr into a dict of {operator: value}."""
 66 |     if isinstance(expr, dict):
 67 |         return validate_and_standardize_operators(expr)
 68 |     if isinstance(expr, list):
 69 |         return {"in": expr}
 70 |     if isinstance(expr, set) and expr is not ANY:
 71 |         raise ValueError(f"Expression {expr} is a set. Did you mean to make a dict?")
 72 |     # otherwise, it's a value
 73 |     return {"==": expr}
 74 | 
 75 | 
 76 | def validate_and_standardize_operators(expr: Dict) -> Dict:
 77 |     std_expr = {}
 78 |     for op, val in expr.items():
 79 |         if op in OPERATOR_MAP:
 80 |             std_expr[OPERATOR_MAP[op]] = val
 81 |         else:
 82 |             std_expr[op] = val
 83 |     for op in std_expr:
 84 |         if op not in VALID_OPERATORS:
 85 |             raise ValueError(
 86 |                 f"Invalid operator: {op}. Operator must be one of: {VALID_OPERATORS}."
 87 |             )
 88 |     if "<" in std_expr and "<=" in std_expr:
 89 |         raise ValueError(f"Either '<' or '<=' may be used in {expr}, not both.")
 90 |     if ">" in std_expr and ">=" in std_expr:
 91 |         raise ValueError(f"Either '>' or '>=' may be used in {expr}, not both.")
 92 |     return std_expr
 93 | 
 94 | 
 95 | def validate_query(
 96 |     indexes: Dict,
 97 |     match: Optional[Dict[Union[str, Callable], Any]] = None,
 98 |     exclude: Optional[Dict[Union[str, Callable], Any]] = None,
 99 | ):
100 |     # input validation -- check that we have an index for all desired lookups
101 |     required_indexes = set()
102 |     if match:
103 |         required_indexes.update(match.keys())
104 |     if exclude:
105 |         required_indexes.update(exclude.keys())
106 |     missing_indexes = required_indexes.difference(indexes)
107 |     if missing_indexes:
108 |         raise AttributeNotFoundError(
109 |             f"Cannot find on: {list(missing_indexes)}. Attributes must be specified on creation."
110 |         )
111 | 
112 | 
113 | def make_empty_array(dtype: str):
114 |     """Shorthand for making a length-0 numpy array."""
115 |     return np.empty(0, dtype=dtype)
116 | 
117 | 
118 | def cyk_intersect(s1: Int64Set, s2: Int64Set) -> Int64Set:
119 |     """Cykhash intersections are faster on small.intersect(big); handle that appropriately.
120 |     https://github.com/realead/cykhash/issues/7"""
121 |     return s1.intersection(s2) if len(s1) < len(s2) else s2.intersection(s1)
122 | 
123 | 
124 | def cyk_union(s1: Int64Set, s2: Int64Set) -> Int64Set:
125 |     """Cykhash unions are faster on big.union(small); handle that appropriately.
126 |     https://github.com/realead/cykhash/issues/7"""
127 |     return s1.union(s2) if len(s1) > len(s2) else s2.union(s1)
128 | 


--------------------------------------------------------------------------------
/examples/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/__init__.py


--------------------------------------------------------------------------------
/examples/collision.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Are any mice in range of a cat? Let's find out.
 3 | We don't want to do all n_cats * n_mice comparisons, so we'll use Dex to find ones in the same or adjacent
 4 | grid squares.
 5 | """
 6 | from ducks import Dex
 7 | 
 8 | 
 9 | class Cat:
10 |     def __init__(self, name, x, y):
11 |         self.name = name
12 |         self.x = x
13 |         self.y = y
14 | 
15 | 
16 | class Mouse:
17 |     def __init__(self, name, x, y):
18 |         self.name = name
19 |         self.x = x
20 |         self.y = y
21 | 
22 | 
23 | def in_range(mouse: Mouse, cat: Cat, radius: float = 1.0):
24 |     return ((mouse.x - cat.x) ** 2 + (mouse.y - cat.y) ** 2) ** 0.5 < radius
25 | 
26 | 
27 | def main():
28 |     mice = [
29 |         Mouse("Mickey", 0.3, 0.5),
30 |         Mouse("Minnie", 0.3, 0.6),
31 |         Mouse("Hannah", 5.3, 5.5),
32 |         Mouse("Jerry", 5.1, 1.5),
33 |     ]
34 |     cats = [
35 |         Cat("Tab", 4.0, 3.6),
36 |         Cat("Tom", 4.9, 1.1),
37 |         Cat("Hobbes", 2.2, 2.2),
38 |         Cat("Garfield", 3.6, 1.9),
39 |     ]
40 | 
41 |     def grid_x(obj):
42 |         return int(obj.x)
43 | 
44 |     def grid_y(obj):
45 |         return int(obj.y)
46 | 
47 |     def get_type(obj):
48 |         return type(obj).__name__
49 | 
50 |     fb = Dex(mice + cats, [grid_x, grid_y, get_type])
51 |     for m in mice:
52 |         # only search the grid squares near this mouse, and only look at Cats
53 |         nearby_cats = fb[
54 |             {
55 |                 grid_x: [grid_x(m), grid_x(m) - 1, grid_x(m) + 1],
56 |                 grid_y: [grid_y(m), grid_y(m) - 1, grid_y(m) + 1],
57 |                 get_type: "Cat",
58 |             }
59 |         ]
60 |         for c in nearby_cats:
61 |             if in_range(m, c):
62 |                 print(f"Mouse {m.name} is in range of cat {c.name}!")
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     main()
67 | 


--------------------------------------------------------------------------------
/examples/concurrent_perf.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "baa0976a",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import time\n",
 11 |     "\n",
 12 |     "from ducks import Dex, ConcurrentDex, FAIR, READERS, WRITERS"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 2,
 18 |    "id": "7fa5b553",
 19 |    "metadata": {},
 20 |    "outputs": [
 21 |     {
 22 |      "data": {
 23 |       "text/plain": [
 24 |        "'\\nAdding 1M items\\n - Dex: 919 ms\\n - ConcurrentDex, priority=READERS: 3.69 s\\n - ConcurrentDex, priority=WRITERS: \\n - ConcurrentDex, priority=FAIR: \\n \\nFind each of 1M items\\n - Dex: 2.91 s\\n - ConcurrentDex, priority=READERS: 3.73 s\\n - ConcurrentDex, priority=WRITERS: \\n - ConcurrentDex, priority=FAIR: \\n'"
 25 |       ]
 26 |      },
 27 |      "execution_count": 2,
 28 |      "metadata": {},
 29 |      "output_type": "execute_result"
 30 |     }
 31 |    ],
 32 |    "source": [
 33 |     "\"\"\"\n",
 34 |     "Adding 1M items\n",
 35 |     " - Dex: 919 ms\n",
 36 |     " - ConcurrentDex, priority=READERS: 3.69 s\n",
 37 |     " - ConcurrentDex, priority=WRITERS: \n",
 38 |     " - ConcurrentDex, priority=FAIR: \n",
 39 |     " \n",
 40 |     "Find each of 1M items\n",
 41 |     " - Dex: 2.91 s\n",
 42 |     " - ConcurrentDex, priority=READERS: 3.73 s\n",
 43 |     " - ConcurrentDex, priority=WRITERS: \n",
 44 |     " - ConcurrentDex, priority=FAIR: \n",
 45 |     "\"\"\""
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 3,
 51 |    "id": "9f3afff8",
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "data = [{'x': i} for i in range(10**6)]"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "id": "200558db",
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "fb = Dex(None, on='x')\n",
 66 |     "cfb_read = ConcurrentDex(None, on='x', priority=READERS)\n",
 67 |     "cfb_write = ConcurrentDex(None, on='x', priority=WRITERS)\n",
 68 |     "cfb_fair = ConcurrentDex(None, on='x', priority=FAIR)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 5,
 74 |    "id": "6dfedc2d",
 75 |    "metadata": {},
 76 |    "outputs": [],
 77 |    "source": [
 78 |     "def adds(box):\n",
 79 |     "    t0 = time.time()\n",
 80 |     "    for d in data:\n",
 81 |     "        box.add(d)\n",
 82 |     "    return time.time() - t0"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": 6,
 88 |    "id": "9dfcc06d",
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "def finds(box):\n",
 93 |     "    t0 = time.time()\n",
 94 |     "    for d in data:\n",
 95 |     "        box.find({'x' :d['x']})\n",
 96 |     "    return time.time() - t0\n"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 7,
102 |    "id": "c998ba85",
103 |    "metadata": {},
104 |    "outputs": [
105 |     {
106 |      "name": "stdout",
107 |      "output_type": "stream",
108 |      "text": [
109 |       "box_type, add, find\n",
110 |       "Dex, 0.9054300785064697, 3.01261830329895\n",
111 |       "readers, 3.5315771102905273, 8.01264214515686\n",
112 |       "writers, 5.2841057777404785, 9.07332968711853\n",
113 |       "fair, 4.2892725467681885, 8.714087963104248\n"
114 |      ]
115 |     }
116 |    ],
117 |    "source": [
118 |     "print('box_type, add, find')\n",
119 |     "for box in [fb, cfb_read, cfb_write, cfb_fair]:\n",
120 |     "    if type(box) is Dex:\n",
121 |     "        box_s = 'Dex'\n",
122 |     "    else:\n",
123 |     "        box_s = box.priority\n",
124 |     "    t_add = adds(box)\n",
125 |     "    t_read = finds(box)\n",
126 |     "    print(f'{box_s}, {t_add}, {t_read}')"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 8,
132 |    "id": "84c5e70e",
133 |    "metadata": {
134 |     "scrolled": true
135 |    },
136 |    "outputs": [
137 |     {
138 |      "name": "stdout",
139 |      "output_type": "stream",
140 |      "text": [
141 |       "2.78 µs ± 23.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "%%timeit \n",
147 |     "fb.find({'x': 1})"
148 |    ]
149 |   },
150 |   {
151 |    "cell_type": "code",
152 |    "execution_count": 9,
153 |    "id": "9b891e11",
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "8.09 µs ± 110 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
161 |      ]
162 |     }
163 |    ],
164 |    "source": [
165 |     "%%timeit \n",
166 |     "cfb_read.find({'x': 1})"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 10,
172 |    "id": "68be8c54",
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "8.84 µs ± 84.1 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "%%timeit \n",
185 |     "cfb_write.find({'x': 1})"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "code",
190 |    "execution_count": 11,
191 |    "id": "10781c93",
192 |    "metadata": {},
193 |    "outputs": [
194 |     {
195 |      "name": "stdout",
196 |      "output_type": "stream",
197 |      "text": [
198 |       "8.46 µs ± 86.8 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)\n"
199 |      ]
200 |     }
201 |    ],
202 |    "source": [
203 |     "%%timeit \n",
204 |     "cfb_fair.find({'x': 1})"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": null,
210 |    "id": "3883fec9",
211 |    "metadata": {},
212 |    "outputs": [],
213 |    "source": []
214 |   }
215 |  ],
216 |  "metadata": {
217 |   "kernelspec": {
218 |    "display_name": "Python 3 (ipykernel)",
219 |    "language": "python",
220 |    "name": "python3"
221 |   },
222 |   "language_info": {
223 |    "codemirror_mode": {
224 |     "name": "ipython",
225 |     "version": 3
226 |    },
227 |    "file_extension": ".py",
228 |    "mimetype": "text/x-python",
229 |    "name": "python",
230 |    "nbconvert_exporter": "python",
231 |    "pygments_lexer": "ipython3",
232 |    "version": "3.9.7"
233 |   }
234 |  },
235 |  "nbformat": 4,
236 |  "nbformat_minor": 5
237 | }
238 | 


--------------------------------------------------------------------------------
/examples/data/crossword_words.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/data/crossword_words.txt


--------------------------------------------------------------------------------
/examples/img/word0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/img/word0.png


--------------------------------------------------------------------------------
/examples/img/word1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/img/word1.png


--------------------------------------------------------------------------------
/examples/img/word2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/img/word2.png


--------------------------------------------------------------------------------
/examples/img/word3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/img/word3.png


--------------------------------------------------------------------------------
/examples/img/word4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/img/word4.png


--------------------------------------------------------------------------------
/examples/img/word5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/examples/img/word5.png


--------------------------------------------------------------------------------
/examples/pandas_index.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Demo - Using Ducks as an indexer for Pandas
 3 | 
 4 | Pandas allows index columns and even supports multi-column indexing.
 5 | https://pandas.pydata.org/pandas-docs/stable/user_guide/advanced.html
 6 | However, its use is not very intuitive. If you'd rather use a Dex, here's how.
 7 | """
 8 | import random
 9 | 
10 | import pandas as pd
11 | from ducks import FrozenDex
12 | 
13 | # make some objects
14 | objs = [
15 |     {
16 |         "fruit": random.choice(
17 |             ["apple", "banana", "cherry", "kiwi", "lime", "watermelon"]
18 |         ),
19 |         "size": i % 10,
20 |     }
21 |     for i in range(1000)
22 | ]
23 | 
24 | # put them in a dataframe
25 | df = pd.DataFrame(objs)
26 | 
27 | 
28 | # make lookup functions that match attributes to dataframe rows
29 | def get_fruit(i):
30 |     """Get the fruit for this position in the df"""
31 |     return df.iloc[i]["fruit"]
32 | 
33 | 
34 | def get_size(i):
35 |     return df.iloc[i]["size"]
36 | 
37 | 
38 | # Build index
39 | dex = FrozenDex(list(range(len(df))), [get_fruit, get_size])
40 | 
41 | 
42 | # Perform index lookups
43 | rows = df.iloc[dex[{get_fruit: "apple", get_size: {">=": 8}}]]
44 | print(rows)
45 | 


--------------------------------------------------------------------------------
/examples/percentile.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Look up objects by the percentile rank of an attribute.
 3 | 
 4 | In this example, we find requests with latency > p99 (99th percentile)
 5 | and requests with median latency (50th percentile).
 6 | """
 7 | import functools
 8 | from bisect import bisect_left
 9 | from typing import Any
10 | 
11 | import numpy as np
12 | from ducks import Dex
13 | 
14 | 
15 | def percentile(cutoffs: np.ndarray, attr: str, obj: Any) -> int:
16 |     """Compute percentile on obj[attr] according to the cutoffs."""
17 |     p = bisect_left(cutoffs, obj[attr])
18 |     # handle values that are outside the min and max of cutoffs.
19 |     # can happen due to float precision errors, or when new data is added.
20 |     if p < 0:
21 |         return 0
22 |     if p > 99:
23 |         return 99
24 |     return p
25 | 
26 | 
27 | def main():
28 |     objs = [{"num": i, "latency": 1 + (i / 100) ** 3} for i in range(1000)]
29 |     # make an array of size 100 containing the min cutoff values for each percentile
30 |     latencies = np.array([obj["latency"] for obj in objs])
31 |     cutoffs = np.quantile(latencies, np.linspace(0, 1, 100))
32 |     p_latency = functools.partial(percentile, cutoffs, "latency")
33 |     fb = Dex(objs, [p_latency])
34 |     print("requests with first-percentile latency:")
35 |     for obj in fb[{p_latency: [0, 1]}]:
36 |         print(obj)
37 |     print("\nrequests with median (50th percentile) latency:")
38 |     for obj in fb[{p_latency: 50}]:
39 |         print(obj)
40 |     print("\nrequests with 99th percentile latency:")
41 |     for obj in fb[{p_latency: 99}]:
42 |         print(obj)
43 | 
44 | 
45 | if __name__ == "__main__":
46 |     main()
47 | 


--------------------------------------------------------------------------------
/examples/update.py:
--------------------------------------------------------------------------------
 1 | from ducks import Dex
 2 | 
 3 | 
 4 | class Changey:
 5 |     """A class containing a variable _n that changes. On change, it will update each Dex in its listeners."""
 6 | 
 7 |     def __init__(self, n):
 8 |         self._n = n
 9 |         self.listeners = []
10 | 
11 |     def add_listener(self, f: Dex):
12 |         self.listeners.append(f)
13 | 
14 |     @property
15 |     def n(self):
16 |         return self._n
17 | 
18 |     @n.setter
19 |     def n(self, new_n):
20 |         for f in self.listeners:
21 |             f.remove(self)
22 |         self._n = new_n
23 |         for f in self.listeners:
24 |             f.add(self)
25 | 
26 | 
27 | def main():
28 |     objs = [Changey(1) for _ in range(10)]
29 |     f = Dex(objs, ["n"])
30 |     for obj in objs:
31 |         obj.add_listener(f)
32 |     assert len(f[{"n": 1}]) == 10
33 | 
34 |     # change an object
35 |     objs[0].n = 2
36 | 
37 |     # see that changes are propagated to Dex
38 |     assert len(f[{"n": 1}]) == 9
39 |     assert len(f[{"n": 2}]) == 1
40 |     print("Completed. See code for details.")
41 | 
42 | 
43 | if __name__ == "__main__":
44 |     main()
45 | 


--------------------------------------------------------------------------------
/noxfile.py:
--------------------------------------------------------------------------------
  1 | """Nox sessions."""
  2 | import os
  3 | import shlex
  4 | import shutil
  5 | import sys
  6 | from pathlib import Path
  7 | from textwrap import dedent
  8 | 
  9 | import nox
 10 | import toml
 11 | 
 12 | try:
 13 |     from nox_poetry import Session
 14 |     from nox_poetry import session
 15 | except ImportError:
 16 |     message = f"""\
 17 |     Nox failed to import the 'nox-poetry' package.
 18 | 
 19 |     Please install it using the following command:
 20 | 
 21 |     {sys.executable} -m pip install nox-poetry"""
 22 |     raise SystemExit(dedent(message)) from None
 23 | 
 24 | 
 25 | package = "ducks"
 26 | python_versions = ["3.10", "3.9", "3.8", "3.7"]
 27 | nox.needs_version = ">= 2021.6.6"
 28 | nox.options.sessions = (
 29 |     "pre-commit",
 30 |     "bandit",
 31 |     "safety",
 32 |     "tests",
 33 |     "docs-build",
 34 | )
 35 | mypy_type_packages = ()
 36 | pyproject = toml.load("pyproject.toml")
 37 | test_requirements = pyproject["tool"]["poetry"]["dev-dependencies"].keys()
 38 | 
 39 | 
 40 | def activate_virtualenv_in_precommit_hooks(session: Session) -> None:
 41 |     """Activate virtualenv in hooks installed by pre-commit.
 42 | 
 43 |     This function patches git hooks installed by pre-commit to activate the
 44 |     session's virtual environment. This allows pre-commit to locate hooks in
 45 |     that environment when invoked from git.
 46 | 
 47 |     Args:
 48 |         session: The Session object.
 49 |     """
 50 |     assert session.bin is not None  # noqa: S101
 51 | 
 52 |     # Only patch hooks containing a reference to this session's bindir. Support
 53 |     # quoting rules for Python and bash, but strip the outermost quotes so we
 54 |     # can detect paths within the bindir, like <bindir>/python.
 55 |     bindirs = [
 56 |         bindir[1:-1] if bindir[0] in "'\"" else bindir
 57 |         for bindir in (repr(session.bin), shlex.quote(session.bin))
 58 |     ]
 59 | 
 60 |     virtualenv = session.env.get("VIRTUAL_ENV")
 61 |     if virtualenv is None:
 62 |         return
 63 | 
 64 |     headers = {
 65 |         # pre-commit < 2.16.0
 66 |         "python": f"""\
 67 |             import os
 68 |             os.environ["VIRTUAL_ENV"] = {virtualenv!r}
 69 |             os.environ["PATH"] = os.pathsep.join((
 70 |                 {session.bin!r},
 71 |                 os.environ.get("PATH", ""),
 72 |             ))
 73 |             """,
 74 |         # pre-commit >= 2.16.0
 75 |         "bash": f"""\
 76 |             VIRTUAL_ENV={shlex.quote(virtualenv)}
 77 |             PATH={shlex.quote(session.bin)}"{os.pathsep}$PATH"
 78 |             """,
 79 |     }
 80 | 
 81 |     hookdir = Path(".git") / "hooks"
 82 |     if not hookdir.is_dir():
 83 |         return
 84 | 
 85 |     for hook in hookdir.iterdir():
 86 |         if hook.name.endswith(".sample") or not hook.is_file():
 87 |             continue
 88 | 
 89 |         if not hook.read_bytes().startswith(b"#!"):
 90 |             continue
 91 | 
 92 |         text = hook.read_text()
 93 | 
 94 |         if not any(
 95 |             Path("A") == Path("a") and bindir.lower() in text.lower() or bindir in text
 96 |             for bindir in bindirs
 97 |         ):
 98 |             continue
 99 | 
100 |         lines = text.splitlines()
101 | 
102 |         for executable, header in headers.items():
103 |             if executable in lines[0].lower():
104 |                 lines.insert(1, dedent(header))
105 |                 hook.write_text("\n".join(lines))
106 |                 break
107 | 
108 | 
109 | @session(name="pre-commit", python=python_versions[0])
110 | def precommit(session: Session) -> None:
111 |     """Lint using pre-commit."""
112 |     args = session.posargs or ["run", "--all-files", "--show-diff-on-failure"]
113 |     session.install(
114 |         "black",
115 |         "darglint",
116 |         "flake8",
117 |         "flake8-bandit",
118 |         "flake8-bugbear",
119 |         "flake8-docstrings",
120 |         "flake8-rst-docstrings",
121 |         "pep8-naming",
122 |         "pre-commit",
123 |         "pre-commit-hooks",
124 |         "pyupgrade",
125 |         "reorder-python-imports",
126 |     )
127 |     session.install(".")
128 |     session.run("pre-commit", *args)
129 |     if args and args[0] == "install":
130 |         activate_virtualenv_in_precommit_hooks(session)
131 | 
132 | 
133 | @session(python=python_versions[0])
134 | def safety(session: Session) -> None:
135 |     """Scan dependencies for insecure packages."""
136 |     requirements = session.poetry.export_requirements()
137 |     session.install("safety")
138 |     # safety is erroring on some numpy vulnerabilities. Fixing them requires going python >=3.8
139 |     # once numpy is upgrades to at least 1.2.2, remove --continue-on-error
140 |     session.run(
141 |         "safety",
142 |         "check",
143 |         "--full-report",
144 |         f"--file={requirements}",
145 |         "--continue-on-error",
146 |     )
147 | 
148 | 
149 | @session(python=python_versions)
150 | def mypy(session: Session) -> None:
151 |     """Type-check using mypy."""
152 |     args = session.posargs or ["ducks"]
153 |     session.install(".")
154 |     session.install("mypy", "pytest")
155 |     if len(mypy_type_packages) > 0:
156 |         session.install(*mypy_type_packages)
157 |     session.run("mypy", *args)
158 | 
159 | 
160 | @session(python=python_versions[0])
161 | def bandit(session: Session) -> None:
162 |     """Run bandit security tests"""
163 |     args = session.posargs or ["-r", "./ducks"]
164 |     session.run("bandit", *args)
165 | 
166 | 
167 | @session(python=python_versions)
168 | def tests(session: Session) -> None:
169 |     """Run the test suite."""
170 |     session.install(".")
171 |     session.install(*test_requirements)
172 |     session.run("poetry", "run", "pytest", *session.posargs)
173 | 
174 | 
175 | @session(name="docs-build", python=python_versions[0])
176 | def docs_build(session: Session) -> None:
177 |     """Build the documentation."""
178 |     args = session.posargs or ["docs", "docs/_build"]
179 |     if not session.posargs and "FORCE_COLOR" in os.environ:
180 |         args.insert(0, "--color")
181 | 
182 |     session.install(".")
183 |     session.install("sphinx", "sphinx-rtd-theme")
184 | 
185 |     build_dir = Path("docs", "_build")
186 |     if build_dir.exists():
187 |         shutil.rmtree(build_dir)
188 | 
189 |     session.run("sphinx-build", *args)
190 | 
191 | 
192 | @session(python=python_versions[0])
193 | def docs(session: Session) -> None:
194 |     """Build and serve the documentation with live reloading on file changes."""
195 |     args = session.posargs or ["--open-browser", "docs", "docs/_build"]
196 |     session.install(".")
197 |     session.install("sphinx", "sphinx-autobuild", "sphinx-click", "furo")
198 | 
199 |     build_dir = Path("docs", "_build")
200 |     if build_dir.exists():
201 |         shutil.rmtree(build_dir)
202 | 
203 |     session.run("sphinx-autobuild", *args)
204 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "ducks"
 3 | version = "0.5.1"
 4 | description = "Provides Dex, a Python container for indexing objects of any type."
 5 | authors = ["Theo Walker <theo.ca.walker@gmail.com>"]
 6 | license = "MIT"
 7 | repository = "https://github.com/manimino/ducks/"
 8 | documentation = "https://pypi.org/project/ducks/"
 9 | readme = "README.rst"
10 | 
11 | [tool.poetry.dependencies]
12 | python = "^3.7"
13 | cykhash = "^2.0.0"
14 | numpy = "^1.14"
15 | readerwriterlock = "^1.0.9"
16 | sortednp = ">=0.4,<0.6"
17 | BTrees = "^4.10.0"
18 | 
19 | [tool.poetry.dev-dependencies]
20 | pytest = "^7.4"
21 | coverage = "^7.2.7"
22 | safety = "^2.3.1"
23 | pre-commit = "^2.21.0"
24 | black = "^22.10.0"
25 | darglint = "^1.8.1"
26 | reorder-python-imports = "^3.10.0"
27 | pre-commit-hooks = "^4.3.0"
28 | pyupgrade = "^3.3.2"
29 | pytest-cov = "^4.1.0"
30 | pytest-xdist = "^3.5.0"
31 | bandit = "^1.7.5"
32 | flake8 = "^5.0.4"
33 | 
34 | [build-system]
35 | requires = ["poetry-core>=1.0.0"]
36 | build-backend = "poetry.core.masonry.api"
37 | 
38 | [tool.pytest.ini_options]
39 | addopts = "-n 4 --ignore examples --cov=ducks --cov-report=term-missing --cov-fail-under 99"
40 | 
41 | [tool.bandit]
42 | exclude= "tests/ examples/ docks/ dev/"
43 | 


--------------------------------------------------------------------------------
/test/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/test/__init__.py


--------------------------------------------------------------------------------
/test/concurrent/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/test/concurrent/__init__.py


--------------------------------------------------------------------------------
/test/concurrent/concurrent_utils.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | import pytest
 4 | from ducks.concurrent.main import FAIR
 5 | from ducks.concurrent.main import READERS
 6 | from ducks.concurrent.main import WRITERS
 7 | 
 8 | 
 9 | def slow_wrapper(method):
10 |     """Adds a tiny delay to a method. Good for triggering race conditions that would otherwise be very rare."""
11 | 
12 |     def wrapped_method(*args):
13 |         time.sleep(0.001)
14 |         return method(*args)
15 | 
16 |     return wrapped_method
17 | 
18 | 
19 | @pytest.fixture(params=[READERS, WRITERS, FAIR])
20 | def priority(request):
21 |     return request.param
22 | 


--------------------------------------------------------------------------------
/test/concurrent/test_multi_writer.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import time
 3 | from typing import Any
 4 | from typing import List
 5 | 
 6 | import pytest
 7 | from ducks import ConcurrentDex
 8 | 
 9 | from .concurrent_utils import priority
10 | from .concurrent_utils import slow_wrapper
11 | 
12 | 
13 | def worker_add_remove(objs: List[Any], box: ConcurrentDex, end_full=True):
14 |     """The worker thread adds and removes each element of objs."""
15 |     for i in range(10):
16 |         for obj in objs:
17 |             box.add(obj)
18 |         for obj in objs:
19 |             try:
20 |                 box.remove(obj)
21 |             except KeyError:
22 |                 # the other worker may have removed it already; that's OK.
23 |                 pass
24 |     if end_full:
25 |         for obj in objs:
26 |             box.add(obj)
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 |     "end_full, expected_len",
31 |     [
32 |         (True, 10),
33 |         (False, 0),
34 |     ],
35 | )
36 | def test_add_remove(priority, end_full, expected_len):
37 |     objs = [{"x": i % 2} for i in range(10)]
38 | 
39 |     box = ConcurrentDex(objs, ["x"], priority=priority)
40 |     # box = Dex(objs, ['x'])  # <--- use this instead, and you will observe frequent failures on this test.
41 | 
42 |     # Patch indexes 'add' to add a small delay, forcing race conditions to occur more often
43 |     box._indexes["x"].add = slow_wrapper(box._indexes["x"].add)
44 | 
45 |     duration = 0.2
46 |     t0 = time.time()
47 |     while time.time() - t0 < duration:
48 |         t1 = threading.Thread(
49 |             target=worker_add_remove, args=[objs, box], kwargs={"end_full": end_full}
50 |         )
51 |         t2 = threading.Thread(
52 |             target=worker_add_remove, args=[objs, box], kwargs={"end_full": end_full}
53 |         )
54 |         t1.start()
55 |         t2.start()
56 |         t1.join()
57 |         t2.join()
58 |         assert len(box) == expected_len
59 |         assert len(box._indexes["x"]) == expected_len  # fails on Dex
60 | 


--------------------------------------------------------------------------------
/test/concurrent/test_read_update.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | 
 3 | from ducks import ConcurrentDex
 4 | 
 5 | from .concurrent_utils import priority
 6 | 
 7 | 
 8 | def worker_read_update(cfb: ConcurrentDex):
 9 |     # this is one concurrency mode -- using the cfb's lock while
10 |     # modifying both the objects and cfb. It's... probably the wrong pattern.
11 |     for obj in cfb:
12 |         with cfb.write_lock():
13 |             obj["x"] += 1
14 |             cfb.box.update(obj)
15 | 
16 | 
17 | def test_read_update(priority):
18 |     objs = [{"x": 0} for _ in range(10)]
19 |     cfb = ConcurrentDex(objs, ["x"], priority=priority)
20 |     threads = []
21 |     for _ in range(5):
22 |         threads.append(threading.Thread(target=worker_read_update, args=(cfb,)))
23 |     for t in threads:
24 |         t.start()
25 |     for t in threads:
26 |         t.join()
27 |     for obj in cfb:
28 |         assert obj["x"] == 5
29 | 
30 | 
31 | def worker_update(cfb, obj_write_lock):
32 |     with obj_write_lock:
33 |         for obj in cfb:
34 |             obj["x"] += 1
35 |             cfb.update(obj)
36 | 
37 | 
38 | def test_two_lock_updating(priority):
39 |     # This is a more sensible locking strategy; objs has its own lock
40 |     # and cfb just worries about itself. Which one is correct kinda depends on how
41 |     # sensitive the user is to stale results. This one would allow stale reads to occur;
42 |     # the other one wouldn't. But this one also allows reads to happen between the writes
43 |     # so that's nice.
44 |     objs = [{"x": 0} for _ in range(10)]
45 |     cfb = ConcurrentDex(objs, ["x"], priority=priority)
46 |     threads = []
47 |     obj_lock = threading.Lock()
48 |     for _ in range(5):
49 |         threads.append(threading.Thread(target=worker_update, args=(cfb, obj_lock)))
50 |     for t in threads:
51 |         t.start()
52 |     for t in threads:
53 |         t.join()
54 |     for obj in cfb:
55 |         assert obj["x"] == 5
56 | 


--------------------------------------------------------------------------------
/test/conftest.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ducks import ConcurrentDex
 3 | from ducks import Dex
 4 | from ducks import FrozenDex
 5 | 
 6 | 
 7 | @pytest.fixture(params=[Dex, FrozenDex, ConcurrentDex])
 8 | def box_class(request):
 9 |     return request.param
10 | 
11 | 
12 | class AssertRaises:
13 |     """
14 |     While the unittest package has an assertRaises context manager, it is incompatible with pytest + fixtures.
15 |     Cleaner to just implement an AssertRaises here.
16 |     """
17 | 
18 |     def __init__(self, exc_type):
19 |         self.exc_type = exc_type
20 | 
21 |     def __enter__(self):
22 |         pass
23 | 
24 |     def __exit__(self, exception_type, exception_value, exception_traceback):
25 |         assert exception_type == self.exc_type
26 |         return True  # suppress the exception
27 | 
28 | 
29 | class Attr:
30 |     def __init__(self, n: int):
31 |         self.n = n
32 | 
33 |     def __hash__(self):
34 |         return self.n
35 | 
36 |     def __eq__(self, other):
37 |         return self.n == other.n
38 | 
39 |     def __repr__(self):
40 |         return str(self.n)
41 | 
42 |     def __lt__(self, other):
43 |         return self.n < other.n
44 | 


--------------------------------------------------------------------------------
/test/mutable/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/manimino/ducks/0217a0e9673fde155a81ac9ab23dfd3538fcd235/test/mutable/__init__.py


--------------------------------------------------------------------------------
/test/mutable/test_soak.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Dex (mutable form) is pretty complex.
  3 | Let's run a lengthy test to make sure all the pieces work as expected across many add / remove operations.
  4 | """
  5 | import random
  6 | import time
  7 | from datetime import datetime
  8 | 
  9 | from ducks import Dex
 10 | from ducks.utils import get_attribute
 11 | 
 12 | 
 13 | PLANETS = (
 14 |     ["mercury"] * 1
 15 |     + ["venus"] * 2
 16 |     + ["earth"] * 4
 17 |     + ["mars"] * 8
 18 |     + ["jupiter"] * 16
 19 |     + ["saturn"] * 32
 20 |     + ["uranus"] * 64
 21 |     + ["neptune"] * 128
 22 | )
 23 | 
 24 | 
 25 | class Collider:
 26 | 
 27 |     VALS = list(range(10))
 28 | 
 29 |     def __init__(self):
 30 |         self.n = random.choice(self.VALS)
 31 | 
 32 |     def __hash__(self):
 33 |         return self.n % 2
 34 | 
 35 |     def __eq__(self, other):
 36 |         return self.n == other.n
 37 | 
 38 |     def __lt__(self, other):
 39 |         return self.n < other.n
 40 | 
 41 | 
 42 | class Thing:
 43 |     def __init__(self, id_num):
 44 |         self.id_num = id_num
 45 |         self.ts_sec = datetime.now().replace(microsecond=0)
 46 |         self.ts = datetime.now()
 47 |         self.planet = random.choice(PLANETS)
 48 |         self.collider = Collider()
 49 |         if random.random() > 0.5:
 50 |             self.sometimes = True
 51 | 
 52 | 
 53 | def planet_len(obj):
 54 |     if isinstance(obj, dict):
 55 |         return len(obj["planet"])
 56 |     else:
 57 |         return len(obj.planet)
 58 | 
 59 | 
 60 | def make_dict_thing(id_num):
 61 |     t = Thing(id_num)
 62 |     return {
 63 |         "id_num": t.id_num,
 64 |         "ts_sec": t.ts_sec,
 65 |         "ts": t.ts,
 66 |         "planet": t.planet,
 67 |         "collider": t.collider,
 68 |         planet_len: planet_len(t),
 69 |     }
 70 | 
 71 | 
 72 | class SoakTest:
 73 |     """
 74 |     Keep running insert / update / remove operations at random for a long time.
 75 |     Check periodically to make sure[] results are correct.
 76 |     """
 77 | 
 78 |     def __init__(self):
 79 |         self.t0 = time.time()
 80 |         self.t_report = {5 * i for i in range(1000)}
 81 |         random.seed(time.time())
 82 |         self.seed = random.choice(range(10**6))
 83 |         print("running soak test with seed:", self.seed)
 84 |         random.seed(self.seed)
 85 |         self.f = Dex(on=["ts_sec", "ts", "planet", "collider", "sometimes", planet_len])
 86 |         #  self.f = Dex(on=[planet_len])
 87 |         self.objs = dict()
 88 |         self.max_id_num = 0
 89 | 
 90 |     def run(self, duration):
 91 |         while time.time() - self.t0 < duration:
 92 |             op = random.choice(
 93 |                 [
 94 |                     self.add,
 95 |                     self.add_many,
 96 |                     self.remove,
 97 |                     self.remove_all,
 98 |                     self.check_equal,
 99 |                 ]
100 |             )
101 |             op()
102 | 
103 |     def add(self):
104 |         self.max_id_num += 1
105 |         # randomly pick between a dict and a class instance
106 |         if random.random() < 0.5:
107 |             t = Thing(self.max_id_num)
108 |         else:
109 |             t = make_dict_thing(self.max_id_num)
110 |         self.objs[self.max_id_num] = t
111 |         self.f.add(t)
112 | 
113 |     def add_many(self):
114 |         for _ in range(random.choice([10, 100, 1000])):
115 |             self.add()
116 | 
117 |     def remove(self):
118 |         if self.objs:
119 |             key = random.choice(list(self.objs.keys()))
120 |             obj = self.objs[key]
121 |             self.f.remove(obj)
122 |             del self.objs[key]
123 | 
124 |     def remove_all(self):
125 |         for t in self.objs.values():
126 |             self.f.remove(t)
127 |         self.objs = dict()
128 | 
129 |     def remove_all_but_one(self):
130 |         key = random.choice(list(self.objs.keys()))
131 |         for k in self.objs:
132 |             if k != key:
133 |                 self.f.remove(self.objs[k])
134 |                 del self.objs[k]
135 | 
136 |     def random_obj(self):
137 |         if not len(self.objs):
138 |             return None
139 |         return random.choice(list(self.objs.values()))
140 | 
141 |     def check_equal(self):
142 |         # check a string key
143 |         ls = [
144 |             o for o in self.objs.values() if get_attribute(o, "planet")[0] == "saturn"
145 |         ]
146 |         f_ls = self.f[{"planet": "saturn"}]
147 |         assert len(ls) == len(f_ls)
148 |         assert len(self.objs) == len(self.f._indexes["planet"])
149 |         # check a functional key
150 |         ls = [o for o in self.objs.values() if get_attribute(o, planet_len)[0] == 6]
151 |         f_ls = self.f[{planet_len: 6}]
152 |         assert len(ls) == len(f_ls)
153 |         assert len(self.objs) == len(self.f._indexes[planet_len])
154 |         # check a null-ish key
155 |         ls = [
156 |             o for o in self.objs.values() if get_attribute(o, "sometimes")[1] is False
157 |         ]
158 |         f_ls = self.f[{"sometimes": {"!=": True}}]
159 |         assert len(ls) == len(f_ls)
160 |         # check a colliding key
161 |         c = Collider()
162 |         ls = [o for o in self.objs.values() if get_attribute(o, "collider")[0] == c]
163 |         f_ls = self.f[{"collider": c}]
164 |         assert len(ls) == len(f_ls)
165 |         assert len(self.objs) == len(self.f._indexes["collider"])
166 |         # check an object-ish key
167 |         t = self.random_obj()
168 |         if t is not None:
169 |             target_ts = get_attribute(t, "ts_sec")
170 |             ls = [
171 |                 o
172 |                 for o in self.objs.values()
173 |                 if get_attribute(o, "ts_sec")[0] == target_ts[0]
174 |             ]
175 |             f_ls = self.f[{"ts_sec": target_ts[0]}]
176 |             assert len(ls) == len(f_ls)
177 | 
178 | 
179 | def test_soak():
180 |     st = SoakTest()
181 |     st.run(3)
182 | 


--------------------------------------------------------------------------------
/test/test_basic_operations.py:
--------------------------------------------------------------------------------
  1 | from dataclasses import dataclass
  2 | from typing import Optional
  3 | from typing import Union
  4 | 
  5 | import pytest
  6 | from ducks import Dex
  7 | from ducks import FrozenDex
  8 | from ducks.utils import get_attributes
  9 | 
 10 | from .conftest import AssertRaises
 11 | 
 12 | 
 13 | @dataclass
 14 | class Pokemon:
 15 |     name: str
 16 |     type1: str
 17 |     type2: Optional[str]
 18 | 
 19 |     def __repr__(self):
 20 |         if self.type2 is None:
 21 |             return f"{self.name}: {self.type1}"
 22 |         return f"{self.name}: {self.type1}/{self.type2}"
 23 | 
 24 |     def __hash__(self):
 25 |         t = (self.name, self.type1, self.type2)
 26 |         return hash(t)
 27 | 
 28 |     def __lt__(self, other):
 29 |         return self.name < other.name
 30 | 
 31 | 
 32 | def make_test_ducks(box_class) -> Union[Dex, FrozenDex]:
 33 |     zapdos = Pokemon("Zapdos", "Electric", "Flying")
 34 |     pikachu_1 = Pokemon("Pikachu", "Electric", None)
 35 |     pikachu_2 = Pokemon("Pikachu", "Electric", None)
 36 |     eevee = Pokemon("Eevee", "Normal", None)
 37 |     f = box_class([zapdos, pikachu_1, pikachu_2, eevee], on=get_attributes(Pokemon))
 38 |     return f
 39 | 
 40 | 
 41 | def test_find_one(box_class):
 42 |     f = make_test_ducks(box_class)
 43 |     result = f[{"name": "Zapdos"}]
 44 |     assert len(result) == 1
 45 | 
 46 | 
 47 | def test_find_union(box_class):
 48 |     f = make_test_ducks(box_class)
 49 |     result = f[{"name": ["Pikachu", "Eevee"]}]
 50 |     assert len(result) == 3
 51 | 
 52 | 
 53 | def test_find_union_with_mismatch(box_class):
 54 |     f = make_test_ducks(box_class)
 55 |     result = f[{"name": ["Pikachu", "Shykadu"]}]
 56 |     assert len(result) == 2
 57 | 
 58 | 
 59 | def test_find_in_iterable_of_one(box_class):
 60 |     f = make_test_ducks(box_class)
 61 |     result = f[{"name": {"in": {"Pikachu"}}}]
 62 |     assert len(result) == 2
 63 | 
 64 | 
 65 | @pytest.mark.parametrize(
 66 |     "expr, expected_len",
 67 |     [
 68 |         ({">": "Yapdos"}, 1),
 69 |         ({">=": "Yapdos"}, 1),
 70 |         ({">": "AAA", "<": "zzz"}, 4),
 71 |         ({">=": "Eevee", "<": "Pikachu"}, 1),
 72 |         ({">=": "Eevee", "<=": "Pikachu"}, 3),
 73 |         ({"ge": "Eevee", "le": "Pikachu"}, 3),
 74 |         ({">": "Eevee", "<": "Zapdos"}, 2),
 75 |         ({"gt": "Eevee", "lt": "Zapdos"}, 2),
 76 |         ({"<": "Eevee"}, 0),
 77 |         ({"<": "Eevee", ">": "Zapdos"}, 0),
 78 |     ],
 79 | )
 80 | def test_find_greater_less(box_class, expr, expected_len):
 81 |     f = make_test_ducks(box_class)
 82 |     result = f[{"name": expr}]
 83 |     assert len(result) == expected_len
 84 | 
 85 | 
 86 | def test_find_sub_obj(box_class):
 87 |     objs = [
 88 |         {"p": Pokemon("Zapdos", "Electric", "Flying")},
 89 |         {"p": Pokemon("Pikachu", "Electric", None)},
 90 |     ]
 91 |     f = box_class(objs, on=["p"])
 92 |     found_empty = f[{}]
 93 |     assert len(found_empty) == 2
 94 |     for obj in objs:
 95 |         assert obj in found_empty
 96 | 
 97 | 
 98 | def test_find_exclude_only(box_class):
 99 |     f = make_test_ducks(box_class)
100 |     result = f[{"type2": {"!=": None}}]  # Zapdos is the only one with a type2
101 |     assert len(result) == 1
102 |     assert result[0].name == "Zapdos"
103 | 
104 | 
105 | def test_two_attrs(box_class):
106 |     f = make_test_ducks(box_class)
107 |     result = f[
108 |         {
109 |             "name": {"in": ["Pikachu", "Zapdos"]},
110 |             "type1": "Electric",
111 |             "type2": {"!=": "Flying"},
112 |         }
113 |     ]
114 |     assert len(result) == 2
115 |     assert result[0].name == "Pikachu"
116 |     assert result[1].name == "Pikachu"
117 | 
118 | 
119 | def test_three_attrs(box_class):
120 |     f = make_test_ducks(box_class)
121 |     result = f[
122 |         {
123 |             "name": {"in": ["Pikachu", "Zapdos"]},
124 |             "type1": "Electric",
125 |             "type2": "Flying",
126 |         }
127 |     ]
128 |     assert len(result) == 1
129 |     assert result[0].name == "Zapdos"
130 | 
131 | 
132 | def test_exclude_all(box_class):
133 |     f = make_test_ducks(box_class)
134 |     result = f[{"type1": {"not in": ["Electric", "Normal"]}}]
135 |     assert len(result) == 0
136 | 
137 | 
138 | def test_remove(box_class):
139 |     f = make_test_ducks(box_class)
140 |     two_chus = f[{"name": "Pikachu"}]
141 |     assert len(two_chus) == 2
142 |     if box_class == FrozenDex:
143 |         with AssertRaises(AttributeError):
144 |             f.remove(two_chus[1])
145 |     else:
146 |         f.remove(two_chus[1])
147 |         one_chu = f[{"name": "Pikachu"}]
148 |         assert len(one_chu) == 1
149 | 
150 | 
151 | def test_add(box_class):
152 |     f = make_test_ducks(box_class)
153 |     glaceon = Pokemon("Glaceon", "Ice", None)
154 |     if box_class == FrozenDex:
155 |         with AssertRaises(AttributeError):
156 |             f.add(glaceon)
157 |     else:
158 |         f.add(glaceon)
159 |         res = f[{"name": "Glaceon"}]
160 |         assert res == [glaceon]
161 | 
162 | 
163 | def test_multi_exclude(box_class):
164 |     fb = make_test_ducks(box_class)
165 |     res = fb[{"name": {"!=": "Pikachu"}, "type1": {"not in": ["Normal"]}}]
166 |     zapdos_ls = [p for p in fb if p.name == "Zapdos"]
167 |     assert res == zapdos_ls
168 | 
169 | 
170 | def test_get_values(box_class):
171 |     fb = make_test_ducks(box_class)
172 |     assert fb.get_values("name") == {"Zapdos", "Pikachu", "Eevee"}
173 |     assert fb.get_values("type1") == {"Electric", "Normal"}
174 |     assert fb.get_values("type2") == {"Flying", None}
175 | 


--------------------------------------------------------------------------------
/test/test_btree.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from ducks.btree import BTree
  3 | 
  4 | from .conftest import AssertRaises
  5 | 
  6 | 
  7 | @pytest.mark.parametrize(
  8 |     "expr, result",
  9 |     [
 10 |         ({">": 8}, [9]),
 11 |         ({">": 6}, [7, 8, 9]),
 12 |         ({"<": 1}, [0]),
 13 |         ({"<": 3}, [0, 1, 2]),
 14 |         ({">=": 9}, [9]),
 15 |         ({">=": 9, "<": 1}, []),
 16 |         ({">=": 5, "<": 6}, [5]),
 17 |         ({">=": 5, "<=": 5}, [5]),
 18 |         ({">": 6, "<=": 7}, [7]),
 19 |         ({">": 6, "<=": 8}, [7, 8]),
 20 |         ({">": 6, "<=": 6}, []),
 21 |         ({">=": 6, "<": 6}, []),
 22 |         ({">=": 999}, []),
 23 |         ({">": 999}, []),
 24 |         ({"<=": -1}, []),
 25 |         ({"<": -1}, []),
 26 |         ({">": 9.5}, []),
 27 |         ({">": 8.5, "<": 9.5}, [9]),
 28 |         ({">=": 7.5, "<=": 9.5}, [8, 9]),
 29 |         ({">": 4, "<": 3}, []),
 30 |         ({">=": 4, "<=": 3}, []),
 31 |         ({">=": 999, "<=": -1}, []),
 32 |         ({">": 999, "<": -1}, []),
 33 |         ({">": -100, "<": 100}, list(range(10))),
 34 |         ({">=": -100, "<=": 100}, list(range(10))),
 35 |     ],
 36 | )
 37 | def test_get_range_expr(expr, result):
 38 |     bt = BTree({i: i for i in range(10)})
 39 |     assert list(bt.get_range_expr(expr)) == result
 40 | 
 41 | 
 42 | def test_init_with_none():
 43 |     objs = {i: i for i in range(10)}
 44 |     objs[None] = 13
 45 |     with AssertRaises(TypeError):
 46 |         _ = BTree(objs)
 47 | 
 48 | 
 49 | def test_add_none():
 50 |     objs = {i: i for i in range(10)}
 51 |     bt = BTree(objs)
 52 |     with AssertRaises(TypeError):
 53 |         bt[None] = 13
 54 | 
 55 | 
 56 | def test_get():
 57 |     bt = BTree({1: "a"})
 58 |     assert bt.get(1) == "a"
 59 |     assert bt[1] == "a"
 60 |     assert bt.get(2) is None
 61 |     assert bt.get(3, 4) == 4
 62 | 
 63 | 
 64 | def test_get_empty():
 65 |     bt = BTree()
 66 |     assert len(bt.get_range_expr({">": 5})) == 0
 67 |     assert bt.get(3) is None
 68 |     assert bt.get(3, 45) == 45
 69 |     with AssertRaises(KeyError):
 70 |         _ = bt[3]
 71 | 
 72 | 
 73 | def test_len_full_init():
 74 |     bt = BTree({i: i for i in range(10)})
 75 |     assert len(bt) == 10
 76 |     del bt[0]
 77 |     assert len(bt) == 9
 78 |     bt[0] = 0
 79 |     assert len(bt) == 10
 80 |     bt[1] = 99  # key already present
 81 |     assert len(bt) == 10
 82 | 
 83 | 
 84 | def test_len_empty_init():
 85 |     bt = BTree()
 86 |     assert len(bt) == 0
 87 |     bt[0] = 0
 88 |     assert len(bt) == 1
 89 |     bt[0] = 99  # key already present
 90 |     assert len(bt) == 1
 91 |     del bt[0]
 92 |     assert len(bt) == 0
 93 | 
 94 | 
 95 | def test_keys_values():
 96 |     bt = BTree({"a": 1, "b": 2})
 97 |     assert list(bt.keys()) == ["a", "b"]
 98 |     assert list(bt.values()) == [1, 2]
 99 |     assert list(bt.items()) == [("a", 1), ("b", 2)]
100 | 
101 | 
102 | def test_bad_expr():
103 |     bt = BTree({"a": 1, "b": 2})
104 |     with AssertRaises(TypeError):
105 |         bt.get_range_expr({"<=": 99})
106 | 
107 | 
108 | def test_bad_first_insert():
109 |     bt = BTree()
110 |     with AssertRaises(TypeError):
111 |         bt[{"x": 1}] = 5
112 |     bt = BTree()
113 | 


--------------------------------------------------------------------------------
/test/test_container_ops.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ducks import Dex
 3 | from ducks import FrozenDex
 4 | from ducks.constants import SIZE_THRESH
 5 | 
 6 | 
 7 | def test_iter_small(box_class):
 8 |     ls = [{"i": i} for i in range(5)]
 9 |     f = box_class(ls, ["i"])
10 |     assert len(f) == len(ls)
11 |     f_ls = list(f)
12 |     assert len(f_ls) == len(ls)
13 |     for item in ls:
14 |         assert item in f_ls
15 |     assert len(f_ls) == len(ls)
16 | 
17 | 
18 | @pytest.mark.parametrize(
19 |     "idx_order",
20 |     [
21 |         ["i", "j"],
22 |         ["j", "i"],
23 |     ],
24 | )
25 | def test_iter_large(box_class, idx_order):
26 |     ls = [{"i": i, "j": -(i % 3)} for i in range(SIZE_THRESH * 3 + 3)]
27 |     ls += [{"j": 16}]  # make sure there's at least one hasfbucket
28 |     f = box_class(ls, idx_order)
29 |     assert len(f) == len(ls)
30 |     f_ls = list(f)
31 |     assert len(f_ls) == len(ls)
32 |     for item in ls:
33 |         assert item in f_ls
34 |     assert len(f_ls) == len(ls)
35 | 
36 | 
37 | @pytest.mark.parametrize(
38 |     "idx_order",
39 |     [
40 |         ["i", "j"],
41 |         ["j", "i"],
42 |     ],
43 | )
44 | def test_make_from(box_class, idx_order):
45 |     """See if we can make one index type from the other type."""
46 |     make_type = Dex if box_class == FrozenDex else FrozenDex
47 |     ls = [{"i": i, "j": -(i % 3)} for i in range(SIZE_THRESH * 3 + 3)]
48 |     f = box_class(ls, on=idx_order)
49 |     other_f = make_type(f, on=idx_order)
50 |     assert len(other_f) == len(f)
51 | 
52 | 
53 | def test_box_contains(box_class):
54 |     ls = [{"i": i} for i in range(5)]
55 |     f = box_class(ls, ["i"])
56 |     for item in ls:
57 |         assert item in f
58 | 
59 | 
60 | def test_box_not_contains(box_class):
61 |     yes = {"i": 1}
62 |     f = box_class([yes], "i")
63 |     # test a ton of these because coverage can drop otherwise
64 |     for i in [None, -1000, "apples", 1000, (1, 2, 3), 0.5] + list(range(100)):
65 |         no = {"i": i}
66 |         assert no not in f
67 | 


--------------------------------------------------------------------------------
/test/test_edge_cases.py:
--------------------------------------------------------------------------------
 1 | from ducks import Dex
 2 | 
 3 | from .conftest import AssertRaises
 4 | 
 5 | 
 6 | def test_get_zero(box_class):
 7 |     def _f(x):
 8 |         return x[0]
 9 | 
10 |     f = box_class(["a", "b", "c"], on=[_f])
11 |     assert f[{_f: "c"}] == ["c"]
12 |     assert len(f[{_f: "d"}]) == 0
13 | 
14 | 
15 | def test_get_in_no_results(box_class):
16 |     def _f(x):
17 |         return x[0]
18 | 
19 |     f = box_class(["a", "b", "c"], on=[_f])
20 |     assert len(f[{_f: {"in": ["d"]}}]) == 0
21 |     assert len(f[{_f: {"in": []}}]) == 0
22 | 
23 | 
24 | def test_double_add():
25 |     f = Dex(on="s")
26 |     x = {"s": "hello"}
27 |     f.add(x)
28 |     f.add(x)
29 |     assert len(f) == 1
30 |     assert f[{"s": "hello"}] == [x]
31 |     f.remove(x)
32 |     assert len(f) == 0
33 |     assert f[{"s": "hello"}] == []
34 | 
35 | 
36 | def test_empty_index(box_class):
37 |     f = box_class([], on=["stuff"])
38 |     result = f[{"stuff": 3}]
39 |     assert len(result) == 0
40 |     result = f[{"stuff": {"<": 3}}]
41 |     assert len(result) == 0
42 | 
43 | 
44 | def test_arg_order():
45 |     data = [{"a": i % 5, "b": i % 3} for i in range(100)]
46 |     f = Dex(data, ["a", "b"])
47 |     assert len(f[{"a": 1, "b": 2}]) == len(f[{"b": 2, "a": 1}])
48 | 
49 | 
50 | class NoSort:
51 |     def __init__(self, x):
52 |         self.x = x
53 | 
54 |     def __hash__(self):
55 |         return hash(self.x)
56 | 
57 |     def __eq__(self, other):
58 |         return self.x == other.x
59 | 
60 | 
61 | def test_unsortable_values(box_class):
62 |     """We need to support values that are hashable, even if they cannot be sorted."""
63 |     objs = [{"a": NoSort(0)}, {"a": NoSort(1)}]
64 |     with AssertRaises(TypeError):
65 |         box_class(objs, ["a"])
66 | 
67 | 
68 | def test_not_in(box_class):
69 |     """the things we do for 100% coverage"""
70 |     f = box_class([{"a": 1}], on=["a"])
71 |     assert {"a": 0} not in f
72 |     assert {"a": 2} not in f
73 | 
74 | 
75 | def test_in_with_greater(box_class):
76 |     """
77 |     Technically someone could query a '<' along with an 'in'. Does that work properly?
78 |     """
79 |     f = box_class([{"a": 1}], on="a")
80 |     assert len(f[{"a": {"<=": 1, "in": [1]}}]) == 1
81 |     assert len(f[{"a": {">": 1, "in": [1]}}]) == 0
82 |     assert len(f[{"a": {"<=": 1, "in": [0]}}]) == 0
83 |     assert len(f[{"a": {"<": 1, "in": [0]}}]) == 0
84 | 


--------------------------------------------------------------------------------
/test/test_examples.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | 
 3 | 
 4 | def test_get_nearby(box_class):
 5 |     # set of tuples
 6 |     t = {(random.random() * 10, random.random() * 10) for _ in range(10**4)}
 7 | 
 8 |     def _x(obj):
 9 |         return int(obj[0])
10 | 
11 |     def _y(obj):
12 |         return int(obj[1])
13 | 
14 |     f = box_class(t, [_x, _y])
15 |     for pt in f[{_x: 0, _y: 0}]:
16 |         assert _x(pt) < 1 and _y(pt) < 1
17 | 
18 | 
19 | def test_wordle(box_class):
20 |     ws = [
21 |         ("ABOUT", 1226734006),
22 |         ("OTHER", 978481319),
23 |         ("WHICH", 810514085),
24 |         ("THEIR", 782849411),
25 |     ]
26 | 
27 |     def has_t(w):
28 |         return "T" in w[0]
29 | 
30 |     def has_h(w):
31 |         return "H" in w[0]
32 | 
33 |     f = box_class(ws, [has_t, has_h])
34 |     found = f[{}]
35 |     found_ws = [f[0] for f in found]
36 |     for w in ws:
37 |         assert w[0] in found_ws
38 | 


--------------------------------------------------------------------------------
/test/test_exceptions.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ducks import ConcurrentDex
 3 | from ducks import Dex
 4 | from ducks import FrozenDex
 5 | from ducks.constants import SIZE_THRESH
 6 | from ducks.exceptions import AttributeNotFoundError
 7 | 
 8 | from .conftest import AssertRaises
 9 | from .conftest import Attr
10 | 
11 | 
12 | def test_remove_empty():
13 |     f = Dex([], on=["stuff"])
14 |     with AssertRaises(KeyError):
15 |         f.remove("nope")
16 | 
17 | 
18 | def test_no_index():
19 |     with AssertRaises(ValueError):
20 |         Dex(["a"])
21 | 
22 | 
23 | def test_empty_index():
24 |     with AssertRaises(ValueError):
25 |         FrozenDex(["a"], [])
26 | 
27 | 
28 | def test_bad_query(box_class):
29 |     f = box_class([{"a": 1}], on=["a"])
30 |     with AssertRaises(TypeError):
31 |         _ = f[[]]
32 |     with AssertRaises(TypeError):
33 |         _ = f[["a", 1]]
34 |     with AssertRaises(AttributeNotFoundError):
35 |         _ = f[{"b": 1}]
36 | 
37 | 
38 | @pytest.mark.parametrize("n_items", [1, 5, SIZE_THRESH + 1])
39 | def test_remove_missing_value(n_items):
40 |     """
41 |     When the value hashes to a bucket, but the bucket does not contain the value, is
42 |     an empty result correctly retrieved?
43 |     """
44 |     data = [Attr(i) for i in range(5)]
45 |     f = Dex(data, ["n"])
46 |     assert len(f[{"n": -1}]) == 0
47 |     with AssertRaises(KeyError):
48 |         f.remove(Attr(-1))
49 | 
50 | 
51 | def test_bad_priority():
52 |     with AssertRaises(ValueError):
53 |         _ = ConcurrentDex(None, on=["x"], priority="lol")
54 | 
55 | 
56 | def test_bad_expr(box_class):
57 |     f = box_class(["ok"], on="x")
58 |     with AssertRaises(ValueError):
59 |         _ = f[{"x": {">", 2}}]
60 | 
61 | 
62 | def test_bad_operator(box_class):
63 |     f = box_class(["ok"], on="x")
64 |     with AssertRaises(ValueError):
65 |         _ = f[{"x": {"qq": 2}}]
66 | 
67 | 
68 | def test_bad_gt_lt(box_class):
69 |     f = box_class(["ok"], on="x")
70 |     with AssertRaises(ValueError):
71 |         _ = f[{"x": {">": 2, ">=": 3}}]
72 |     with AssertRaises(ValueError):
73 |         _ = f[{"x": {"<": 2, "<=": 3}}]
74 | 


--------------------------------------------------------------------------------
/test/test_fancy_gets.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test attribute lookups of different kinds
 3 | e.g. getting dict attributes, or applying functions, or getting properties from namedtuples
 4 | """
 5 | import pytest
 6 | from ducks.constants import SIZE_THRESH
 7 | 
 8 | 
 9 | def make_dict_data():
10 |     dicts = [
11 |         {"t0": 0.1, "t1": 0.2, "s": "ABC"},
12 |         {"t0": 0.3, "t1": 0.4, "s": "DEF"},
13 |         {"t0": 0.5, "t1": 0.6, "s": "GHI"},
14 |     ]
15 |     return dicts
16 | 
17 | 
18 | def test_dicts(box_class):
19 |     dicts = make_dict_data()
20 |     f = box_class(dicts, ["t0", "t1", "s"])
21 |     result = f[
22 |         {
23 |             "t0": {"in": [0.1, 0.3]},
24 |             "s": {"in": ["ABC", "DEF"]},
25 |             "t1": {"!=": 0.4},
26 |         }
27 |     ]
28 |     assert result == [dicts[0]]
29 | 
30 | 
31 | def test_getter_fn(box_class):
32 |     def _middle_letter(obj):
33 |         return obj["s"][1]
34 | 
35 |     dicts = make_dict_data()
36 |     f = box_class(dicts, on=[_middle_letter])
37 |     result = f[{_middle_letter: "H"}]
38 |     assert result == [dicts[2]]
39 | 
40 | 
41 | @pytest.mark.parametrize("n", [SIZE_THRESH + 1, 5])
42 | def test_get_all(box_class, n):
43 |     """There's a special fast-path when all items are being retrieved."""
44 |     f = box_class([{"a": 1} for _ in range(n)], ["a"])
45 |     result = f[{}]
46 |     assert len(result) == n
47 | 


--------------------------------------------------------------------------------
/test/test_missing_attribute.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | from ducks import ANY
  3 | from ducks import Dex
  4 | from ducks.constants import SIZE_THRESH
  5 | from ducks.exceptions import MissingAttribute
  6 | 
  7 | 
  8 | @pytest.mark.parametrize("n_items", [1, 5, SIZE_THRESH + 1])
  9 | def test_missing_function(box_class, n_items):
 10 |     def even(obj):
 11 |         if obj % 2:
 12 |             raise MissingAttribute
 13 |         return True
 14 | 
 15 |     objs = range(n_items)
 16 |     fb = box_class(objs, [even])
 17 |     n_even = len([x for x in range(n_items) if x % 2 == 0])
 18 |     n_odd = n_items - n_even
 19 |     assert len(fb) == n_items
 20 |     assert len(fb[{even: True}]) == n_even
 21 |     assert len(fb[{even: {"!=": True}}]) == n_odd
 22 |     for idx in fb._indexes.values():
 23 |         assert len(idx) == n_even
 24 | 
 25 | 
 26 | missing_attr_data = [
 27 |     {"a": 1, "b": 2},
 28 |     {"a": 3},
 29 |     {"b": 4},
 30 |     {},
 31 | ]
 32 | 
 33 | 
 34 | def test_add_with_missing_attributes():
 35 |     fb = Dex([], ["a", "b"])
 36 |     for d in missing_attr_data:
 37 |         fb.add(d)
 38 |     assert len(fb) == 4
 39 |     assert len(fb._indexes["a"]) == 2
 40 |     assert len(fb._indexes["b"]) == 2
 41 |     assert len(fb[{"b": {"not in": [2, 4]}}]) == 2
 42 |     assert len(fb[{"a": {"not in": [1, 3]}}]) == 2
 43 | 
 44 | 
 45 | def test_remove_with_missing_attributes():
 46 |     fb = Dex(missing_attr_data, ["a", "b"])
 47 |     for d in missing_attr_data:
 48 |         fb.remove(d)
 49 |     assert len(fb) == 0
 50 |     for idx in fb._indexes.values():
 51 |         assert len(idx) == 0
 52 | 
 53 | 
 54 | def test_missing_attributes(box_class):
 55 |     fb = box_class(missing_attr_data, ["a", "b"])
 56 |     for d in missing_attr_data:
 57 |         assert d in fb
 58 |     assert len(fb._indexes["a"]) == 2
 59 |     assert len(fb._indexes["b"]) == 2
 60 | 
 61 | 
 62 | def test_add_none():
 63 |     f = Dex(on="s")
 64 |     f.add(None)
 65 |     result = f[{"s": None}]
 66 |     assert result == []
 67 | 
 68 | 
 69 | def test_empty_attribute(box_class):
 70 |     fb = box_class([None], on=["a"])
 71 |     assert len(fb) == 1
 72 | 
 73 | 
 74 | def test_find_having_attr(box_class):
 75 |     fb = box_class(missing_attr_data, ["a", "b"])
 76 |     assert len(fb[{"a": ANY}]) == 2
 77 |     assert len(fb[{"b": ANY}]) == 2
 78 |     assert len(fb[{"a": 1, "b": ANY}]) == 1
 79 | 
 80 | 
 81 | def test_find_missing_attr(box_class):
 82 |     fb = box_class(missing_attr_data, ["a", "b"])
 83 |     assert len(fb[{"a": {"!=": ANY}}]) == 2
 84 |     assert len(fb[{"b": {"!=": ANY}}]) == 2
 85 |     assert len(fb[{"a": 3, "b": {"!=": ANY}}]) == 1
 86 |     assert len(fb[{"a": {"!=": ANY}, "b": {"!=": ANY}}]) == 1
 87 | 
 88 | 
 89 | @pytest.mark.parametrize("n_items", [2, 10, SIZE_THRESH * 2 + 2])
 90 | def test_many_missing(box_class, n_items):
 91 |     data = []
 92 |     for i in range(n_items):
 93 |         if i % 2:
 94 |             data.append({"a": 1})
 95 |         else:
 96 |             data.append({})
 97 |     fb = box_class(data, ["a"])
 98 |     assert len(fb[{"a": ANY}]) == n_items // 2
 99 |     assert len(fb[{"a": {"!=": ANY}}]) == n_items // 2
100 | 
101 | 
102 | @pytest.mark.parametrize("n_items", [2, 10, SIZE_THRESH * 2 + 2])
103 | def test_get_values(box_class, n_items):
104 |     data = []
105 |     for i in range(n_items):
106 |         if i % 2:
107 |             data.append({"a": 1})
108 |         else:
109 |             data.append({})
110 |     fb = box_class(data, ["a"])
111 |     assert fb.get_values("a") == {1}
112 | 


--------------------------------------------------------------------------------
/test/test_mixed_cardinality.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ducks.constants import SIZE_THRESH
 3 | 
 4 | from .conftest import Attr
 5 | 
 6 | 
 7 | @pytest.mark.parametrize("thresh", [10**i for i in range(5)])
 8 | def test_thresh(box_class, thresh):
 9 |     def size_thresh_n(obj):
10 |         return obj["size"] < thresh
11 | 
12 |     n_items = 10**4
13 |     objs = [{"size": i} for i in range(n_items)]
14 |     fb = box_class(objs, [size_thresh_n])
15 |     assert len(fb[{size_thresh_n: True}]) == thresh
16 |     assert len(fb[{size_thresh_n: False}]) == n_items - thresh
17 | 
18 | 
19 | def test_bad_hash_mixed(box_class):
20 |     objs = [{"n": Attr(i)} for i in range(100)] + [
21 |         {"n": Attr(0)} for _ in range(SIZE_THRESH + 1)
22 |     ]
23 |     fb = box_class(objs, ["n"])
24 |     assert len(fb[{"n": objs[1]["n"]}]) == 1
25 |     assert len(fb[{"n": objs[0]["n"]}]) == SIZE_THRESH + 2
26 | 


--------------------------------------------------------------------------------
/test/test_multiple_operations.py:
--------------------------------------------------------------------------------
 1 | def test_eq_and_greater(box_class):
 2 |     objs = [{"x": i} for i in range(10)]
 3 |     fb = box_class(objs, "x")
 4 |     assert fb[{"x": {"==": 1, ">": 0}}] == [objs[1]]
 5 | 
 6 | 
 7 | def test_eq_and_in(box_class):
 8 |     objs = [{"x": i} for i in range(10)]
 9 |     fb = box_class(objs, "x")
10 |     assert fb[{"x": {"eq": 1, "in": [1, 2, 3]}}] == [objs[1]]
11 | 
12 | 
13 | def test_greater_less_and_in(box_class):
14 |     objs = [{"x": i} for i in range(10)]
15 |     fb = box_class(objs, "x")
16 |     assert len(fb[{"x": {"gt": 1, "lt": 5, "in": [1, 2, 3]}}]) == 2
17 | 
18 | 
19 | def test_gte_lte_in_and_eq(box_class):
20 |     objs = [{"x": i} for i in range(10)]
21 |     fb = box_class(objs, "x")
22 |     assert len(fb[{"x": {"gte": 1, "lte": 5, "in": [1, 2, 3], "eq": 2}}]) == 1
23 | 


--------------------------------------------------------------------------------
/test/test_mutations.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ducks.constants import ARRAY_SIZE_MAX
 3 | from ducks.constants import SET_SIZE_MIN
 4 | from ducks.constants import SIZE_THRESH
 5 | 
 6 | 
 7 | @pytest.mark.parametrize(
 8 |     "n_items", [SIZE_THRESH, ARRAY_SIZE_MAX + 1, SET_SIZE_MIN - 1, SET_SIZE_MIN + 1]
 9 | )
10 | def test_many_gets(box_class, n_items):
11 |     """At one point there was a bug involving several sequential gets, let's make sure that can't come back."""
12 | 
13 |     def f5(i):
14 |         return i["n"] % 5
15 | 
16 |     data = [{"n": i} for i in range(n_items)]
17 |     f = box_class(data, ["n", f5])
18 |     for _ in range(4):
19 |         # just a lot of queries in every conceivable flavor
20 |         assert len(f[{"n": {"in": [1, 2, 3, 4, 5]}, f5: {"in": [3, 4]}}]) == 2
21 |         assert len(f[{"n": {"in": [1, 2]}, f5: {"in": [1, 2]}}]) == 2
22 |         assert len(f[{"n": {"in": [1, 2, 3, 4, 5]}}]) == 5
23 |         assert len(f[{"n": {"in": [1, 2, 3, 4, 5]}, f5: {"not in": [1, 2]}}]) == 3
24 |         assert len(f[{"n": {"in": [6, 7, 8], "!=": 3}, f5: {"not in": [1, 2]}}]) == 1
25 |         assert (
26 |             len(f[{"n": {"in": [6, 7, 8], "!=": -1000}, f5: {"not in": [3, 4]}}]) == 2
27 |         )
28 |         assert (
29 |             len(f[{f5: {"==": 1, "in": [3, 4]}, "n": {"==": -1000, "!=": -1000}}]) == 0
30 |         )
31 |         assert (
32 |             len(
33 |                 f[
34 |                     {
35 |                         "n": {"in": [-1000, 3, 4, 5], "!=": -1000},
36 |                         f5: {"not in": [3, 4]},
37 |                     }
38 |                 ]
39 |             )
40 |             == 1
41 |         )
42 |         assert len(f[{}]) == n_items
43 | 
44 | 
45 | def test_mutated_return(box_class):
46 |     """If the user modifies the returned array, none of our arrays change, right?"""
47 |     data = [{"n": 0} for _ in range(5)]
48 |     f = box_class(data, ["n"])
49 |     arr = f[{"n": 0}]
50 |     assert len(arr) == 5
51 |     assert all(a["n"] == 0 for a in arr)
52 |     arr[0] = {"n": 1}
53 |     arr2 = f[{"n": 0}]
54 |     assert len(arr) == 5
55 |     assert all(a["n"] == 0 for a in arr2)
56 | 


--------------------------------------------------------------------------------
/test/test_nones.py:
--------------------------------------------------------------------------------
 1 | """
 2 | None is a value that cannot be compared with <, > etc. But we definitely need
 3 | to support it as it's a common attribute value.
 4 | These tests check that None is handled properly.
 5 | """
 6 | import pytest
 7 | from ducks import Dex
 8 | from ducks.constants import ARRAY_SIZE_MAX
 9 | from ducks.constants import SET_SIZE_MIN
10 | 
11 | 
12 | def test_none(box_class):
13 |     objs = [{"ok": i} for i in range(10)]
14 |     objs.append({"ok": None})
15 |     fb = box_class(objs, "ok")
16 |     assert len(fb[{"ok": None}]) == 1
17 | 
18 | 
19 | @pytest.mark.parametrize(
20 |     "n_none", [1, ARRAY_SIZE_MAX - 1, ARRAY_SIZE_MAX + 1, SET_SIZE_MIN]
21 | )
22 | def test_add_remove_none(n_none):
23 |     objs = [{"a": i} for i in range(10)]
24 |     for i in range(n_none):
25 |         objs.append({"a": None})
26 |     fb = Dex(objs, "a")
27 |     assert len(fb[{"a": [1, 2, None]}]) == 2 + n_none
28 |     assert len(fb[{"a": [None]}]) == n_none
29 |     fb.remove(objs[0])  # {'a': 0}
30 |     fb.remove(objs[-1])  # {'a': None}
31 |     assert len(fb) == len(objs) - 2
32 | 


--------------------------------------------------------------------------------
/test/test_pickling.py:
--------------------------------------------------------------------------------
 1 | from ducks import load
 2 | from ducks import save
 3 | 
 4 | 
 5 | def test_save_and_load(box_class, tmp_path):
 6 |     fn = tmp_path / "box.pkl"
 7 |     objs = [{"i": i} for i in range(10)]
 8 |     box = box_class(objs, "i")
 9 |     save(box, fn)
10 |     box2 = load(fn)
11 |     assert len(box2) == 10
12 |     objs2 = list(box2)  # objs get cloned as well
13 |     assert box2[{"i": 3}] == [objs2[3]]
14 |     assert box2[{"i": [6]}] == [objs2[6]]
15 |     assert box2[{"i": {">": 8}}] == [objs2[9]]
16 |     for obj in objs2:
17 |         assert obj in box2
18 | 


--------------------------------------------------------------------------------
/test/test_range_queries.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ducks.constants import SIZE_THRESH
 3 | 
 4 | 
 5 | @pytest.mark.parametrize(
 6 |     "expr, result",
 7 |     [
 8 |         ({">": 8}, [9]),
 9 |         ({">": 6}, [7, 8, 9]),
10 |         ({"<": 1}, [0]),
11 |         ({"<": 3}, [0, 1, 2]),
12 |         ({">=": 9}, [9]),
13 |         ({">=": 9, "<": 1}, []),
14 |         ({">=": 5, "<": 6}, [5]),
15 |         ({">=": 5, "<=": 5}, [5]),
16 |         ({">": 6, "<=": 7}, [7]),
17 |         ({">": 6, "<=": 8}, [7, 8]),
18 |         ({">": 6, "<=": 6}, []),
19 |         ({">=": 6, "<": 6}, []),
20 |         ({">=": 999}, []),
21 |         ({">": 999}, []),
22 |         ({"<=": -1}, []),
23 |         ({"<": -1}, []),
24 |         ({">": 9.5}, []),
25 |         ({">": 8.5, "<": 9.5}, [9]),
26 |         ({">=": 7.5, "<=": 9.5}, [8, 9]),
27 |         ({">": 4, "<": 3}, []),
28 |         ({">=": 4, "<=": 3}, []),
29 |         ({">=": 999, "<=": -1}, []),
30 |         ({">": 999, "<": -1}, []),
31 |         ({">": -100, "<": 100}, list(range(10))),
32 |         ({">=": -100, "<=": 100}, list(range(10))),
33 |     ],
34 | )
35 | def test_get_range_expr(box_class, expr, result):
36 |     objs = [{"a": i} for i in range(10)] + [{"a": None}]
37 |     fb = box_class(objs, "a")
38 |     assert list(sorted(o["a"] for o in fb[{"a": expr}])) == result
39 | 
40 | 
41 | @pytest.mark.parametrize(
42 |     "expr",
43 |     [
44 |         {">": 8},
45 |         {">": 6},
46 |         {"<": 1},
47 |         {"<": 3},
48 |         {">=": 9},
49 |         {">=": 9, "<": 1},
50 |         {">=": 5, "<": 6},
51 |         {">=": 5, "<=": 5},
52 |         {">": 6, "<=": 7},
53 |         {">": 6, "<=": 8},
54 |         {">": 6, "<=": 6},
55 |         {">=": 6, "<": 6},
56 |         {">=": 999},
57 |         {">": 999},
58 |         {"<=": -1},
59 |         {"<": -1},
60 |         {">": 9.5},
61 |         {">": 8.5, "<": 9.5},
62 |         {">=": 7.5, "<=": 9.5},
63 |         {">": 4, "<": 3},
64 |         {">=": 4, "<=": 3},
65 |         {">=": 999, "<=": -1},
66 |         {">": 999, "<": -1},
67 |         {">": -100, "<": 100},
68 |         {">=": -100, "<=": 100},
69 |     ],
70 | )
71 | def test_get_big(box_class, expr):
72 |     objs = [{"a": i % 10} for i in range(SIZE_THRESH * 11)]
73 |     objs += [{"a": None} for _ in range(SIZE_THRESH + 1)]
74 |     fb = box_class(objs, "a")
75 |     found = fb[{"a": expr}]
76 |     result = [o for o in objs if o["a"] is not None]
77 |     for op, val in expr.items():
78 |         if op == ">":
79 |             result = [o for o in result if o["a"] > val]
80 |         if op == "<":
81 |             result = [o for o in result if o["a"] < val]
82 |         if op == ">=":
83 |             result = [o for o in result if o["a"] >= val]
84 |         if op == "<=":
85 |             result = [o for o in result if o["a"] <= val]
86 |     found = list(sorted(found, key=lambda o: o["a"]))
87 |     result = list(sorted(result, key=lambda o: o["a"]))
88 |     assert found == result
89 | 


--------------------------------------------------------------------------------
/test/test_stale_objects.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from ducks import Dex
 3 | from ducks.constants import SIZE_THRESH
 4 | 
 5 | from .conftest import AssertRaises
 6 | from .conftest import Attr
 7 | 
 8 | 
 9 | @pytest.mark.parametrize("n_items", [5, SIZE_THRESH + 1])
10 | def test_get_stale_objects(box_class, n_items):
11 |     objs = [{"z": Attr(1)} for _ in range(n_items)]
12 |     f = box_class(objs, ["z"])
13 |     for o in objs:
14 |         o["z"] = Attr(2)
15 |     found = f[{"z": Attr(1)}]
16 |     assert len(found) == n_items  # still finds by their old value
17 |     found = f[{"z": Attr(2)}]
18 |     assert len(found) == 0
19 | 
20 | 
21 | @pytest.mark.parametrize("n_items", [1, SIZE_THRESH * 2 + 2])
22 | def test_remove_stale_objects(n_items):
23 |     objs = [{"z": 1} for _ in range(n_items)]
24 |     f = Dex(objs, ["z"])
25 |     for o in objs:
26 |         o["z"] = 2
27 |     for o in objs:
28 |         f.remove(o)
29 |     assert len(f) == 0
30 |     assert len(f._indexes["z"]) == 0
31 | 
32 | 
33 | @pytest.mark.parametrize("n_items", [1, 5, SIZE_THRESH * 2 + 2])
34 | def test_remove_missing_object(n_items):
35 |     objs = [{"z": Attr(1)} for _ in range(n_items)]
36 |     f = Dex(objs, ["z"])
37 |     with AssertRaises(KeyError):
38 |         f.remove(Attr(2))
39 | 
40 | 
41 | def test_external_object_modification(box_class):
42 |     """
43 |     What happens if the values are mutable, and someone mutates them externally?
44 |     Answer: It gives an unexpected result. Attributes are stored by reference, so
45 |     if the attribute is mutated externally, it will change inside the container as well.
46 |     Luckily, this is rare; most attributes will be ints and strings which are immutable.
47 |     Other python containers have the same problem -- you can break a frozenset if it has
48 |     a mutable attribute as a key, for example.
49 |     """
50 |     objs = [{"a": Attr(1)}]
51 |     fb = box_class(objs, "a")
52 |     assert len(fb[{"a": Attr(1)}]) == 1
53 |     objs[0]["a"].n = 5000
54 |     # external modification changed our results
55 |     assert len(fb[{"a": Attr(1)}]) == 0
56 |     assert len(fb[{"a": Attr(5000)}]) == 1
57 | 


--------------------------------------------------------------------------------
/test/test_wrong_type.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Now that we're using trees, all objects have to be comparable, including the query values.
 3 | Try doing various bad things with types.
 4 | """
 5 | import pytest
 6 | from ducks import Dex
 7 | 
 8 | from .conftest import AssertRaises
 9 | 
10 | 
11 | @pytest.mark.parametrize(
12 |     "expr, expected, raises",
13 |     [
14 |         ("lol", 0, True),
15 |         (["lol"], 0, True),
16 |         ([1, "lol"], 1, True),
17 |         ({"<": 3}, 3, False),
18 |         (
19 |             {"<": "lol"},
20 |             0,
21 |             True,
22 |         ),  # todo implement frozen value based thing, then this will work
23 |     ],
24 | )
25 | def test_find_wrong_type(box_class, expr, expected, raises):
26 |     if type(expr) is list:
27 |         # you can't write {'in': ['lol']} in a parametrize
28 |         # other keys work, but not 'in'. It looks like parametrize must
29 |         # be calling eval() or something. Pretty annoying.
30 |         expr = {"in": expr}
31 |     objs = [{"x": i} for i in range(10)]
32 |     fb = box_class(objs, "x")
33 |     if raises:
34 |         with AssertRaises(TypeError):
35 |             fb[{"x": expr}]
36 |     else:
37 |         assert len(fb[{"x": expr}]) == expected
38 | 
39 | 
40 | def test_add_wrong_type():
41 |     objs = [{"x": i} for i in range(10)]
42 |     fb = Dex(objs, "x")
43 |     assert len(fb._indexes["x"].tree) == 10
44 |     with AssertRaises(TypeError):
45 |         fb.add({"x": "lol"})
46 | 


--------------------------------------------------------------------------------
/tmp/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line, and also
 5 | # from the environment for the first two.
 6 | SPHINXOPTS    ?=
 7 | SPHINXBUILD   ?= sphinx-build
 8 | SOURCEDIR     = .
 9 | BUILDDIR      = _build
10 | 
11 | # Put it first so that "make" without argument is like "make help".
12 | help:
13 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
14 | 
15 | .PHONY: help Makefile
16 | 
17 | # Catch-all target: route all unknown targets to Sphinx using the new
18 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
19 | %: Makefile
20 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
21 | 


--------------------------------------------------------------------------------
/tmp/conf.py:
--------------------------------------------------------------------------------
 1 | # Configuration file for the Sphinx documentation builder.
 2 | #
 3 | # This file only contains a selection of the most common options. For a full
 4 | # list see the documentation:
 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html
 6 | # -- Path setup --------------------------------------------------------------
 7 | # If extensions (or modules to document with autodoc) are in another directory,
 8 | # add these directories to sys.path here. If the directory is relative to the
 9 | # documentation root, use os.path.abspath to make it absolute, like shown here.
10 | #
11 | # import os
12 | # import sys
13 | # sys.path.insert(0, os.path.abspath('.'))
14 | # -- Project information -----------------------------------------------------
15 | 
16 | project = "ducks"
17 | copyright = "2022, Theo Walker"
18 | author = "Theo Walker"
19 | 
20 | # The full version, including alpha/beta/rc tags
21 | release = "1.0.0"
22 | 
23 | 
24 | # -- General configuration ---------------------------------------------------
25 | 
26 | # Add any Sphinx extension module names here, as strings. They can be
27 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
28 | # ones.
29 | extensions = []
30 | 
31 | # Add any paths that contain templates here, relative to this directory.
32 | templates_path = ["_templates"]
33 | 
34 | # List of patterns, relative to source directory, that match files and
35 | # directories to ignore when looking for source files.
36 | # This pattern also affects html_static_path and html_extra_path.
37 | exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
38 | 
39 | 
40 | # -- Options for HTML output -------------------------------------------------
41 | 
42 | # The theme to use for HTML and HTML Help pages.  See the documentation for
43 | # a list of builtin themes.
44 | #
45 | html_theme = "alabaster"
46 | 
47 | # Add any paths that contain custom static files (such as style sheets) here,
48 | # relative to this directory. They are copied after the builtin static files,
49 | # so a file named "default.css" will overwrite the builtin "default.css".
50 | html_static_path = ["_static"]
51 | 


--------------------------------------------------------------------------------
/tmp/index.rst:
--------------------------------------------------------------------------------
 1 | .. ducks documentation master file, created by
 2 |    sphinx-quickstart on Fri Aug 26 11:10:55 2022.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to the ducks documentation!
 7 | =================================
 8 | 
 9 | .. toctree::
10 |    :maxdepth: 2
11 |    :caption: Contents:
12 | 
13 | 
14 | 
15 | Indices and tables
16 | ==================
17 | 
18 | * :ref:`genindex`
19 | * :ref:`modindex`
20 | * :ref:`search`
21 | 


--------------------------------------------------------------------------------
/tmp/make.bat:
--------------------------------------------------------------------------------
 1 | @ECHO OFF
 2 | 
 3 | pushd %~dp0
 4 | 
 5 | REM Command file for Sphinx documentation
 6 | 
 7 | if "%SPHINXBUILD%" == "" (
 8 | 	set SPHINXBUILD=sphinx-build
 9 | )
10 | set SOURCEDIR=.
11 | set BUILDDIR=_build
12 | 
13 | %SPHINXBUILD% >NUL 2>NUL
14 | if errorlevel 9009 (
15 | 	echo.
16 | 	echo.The 'sphinx-build' command was not found. Make sure you have Sphinx
17 | 	echo.installed, then set the SPHINXBUILD environment variable to point
18 | 	echo.to the full path of the 'sphinx-build' executable. Alternatively you
19 | 	echo.may add the Sphinx directory to PATH.
20 | 	echo.
21 | 	echo.If you don't have Sphinx installed, grab it from
22 | 	echo.https://www.sphinx-doc.org/
23 | 	exit /b 1
24 | )
25 | 
26 | if "%1" == "" goto help
27 | 
28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
29 | goto end
30 | 
31 | :help
32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O%
33 | 
34 | :end
35 | popd
36 | 


--------------------------------------------------------------------------------