├── .github
    └── workflows
    │   ├── cibuildwheel.yml
    │   ├── publish_pypi.yml
    │   └── tests.yml
├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── bin
    └── push-tag.sh
├── fabfile.py
├── include
    └── msvc9
    │   └── stdint.h
├── preshed
    ├── __init__.pxd
    ├── __init__.py
    ├── about.py
    ├── bloom.pxd
    ├── bloom.pyx
    ├── counter.pxd
    ├── counter.pyx
    ├── maps.pxd
    ├── maps.pyx
    └── tests
    │   ├── __init__.py
    │   ├── test_bloom.py
    │   ├── test_counter.py
    │   ├── test_hashing.py
    │   └── test_pop.py
├── pyproject.toml
├── requirements.txt
└── setup.py


/.github/workflows/cibuildwheel.yml:
--------------------------------------------------------------------------------
 1 | name: Build
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       # ytf did they invent their own syntax that's almost regex?
 7 |       # ** matches 'zero or more of any character'
 8 |       - 'release-v[0-9]+.[0-9]+.[0-9]+**'
 9 |       - 'prerelease-v[0-9]+.[0-9]+.[0-9]+**'
10 | jobs:
11 |   build_wheels:
12 |     name: Build wheels on ${{ matrix.os }}
13 |     runs-on: ${{ matrix.os }}
14 |     strategy:
15 |       matrix:
16 |         # macos-13 is an intel runner, macos-14 is apple silicon
17 |         os: [ubuntu-latest, windows-latest, macos-13, macos-14, ubuntu-24.04-arm]
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - name: Build wheels
22 |         uses: pypa/cibuildwheel@v2.21.3
23 |         env:
24 |           CIBW_SOME_OPTION: value
25 |         with:
26 |           package-dir: .
27 |           output-dir: wheelhouse
28 |           config-file: "{package}/pyproject.toml"
29 |       - uses: actions/upload-artifact@v4
30 |         with:
31 |           name: cibw-wheels-${{ matrix.os }}-${{ strategy.job-index }}
32 |           path: ./wheelhouse/*.whl
33 | 
34 |   build_sdist:
35 |     name: Build source distribution
36 |     runs-on: ubuntu-latest
37 |     steps:
38 |       - uses: actions/checkout@v4
39 | 
40 |       - name: Build sdist
41 |         run: pipx run build --sdist
42 |       - uses: actions/upload-artifact@v4
43 |         with:
44 |           name: cibw-sdist
45 |           path: dist/*.tar.gz
46 |   create_release:
47 |     needs: [build_wheels, build_sdist]
48 |     runs-on: ubuntu-latest
49 |     permissions:
50 |       contents: write
51 |       checks: write
52 |       actions: read
53 |       issues: read
54 |       packages: write
55 |       pull-requests: read
56 |       repository-projects: read
57 |       statuses: read
58 |     steps:
59 |       - name: Get the tag name and determine if it's a prerelease
60 |         id: get_tag_info
61 |         run: |
62 |           FULL_TAG=${GITHUB_REF#refs/tags/}
63 |           if [[ $FULL_TAG == release-* ]]; then
64 |             TAG_NAME=${FULL_TAG#release-}
65 |             IS_PRERELEASE=false
66 |           elif [[ $FULL_TAG == prerelease-* ]]; then
67 |             TAG_NAME=${FULL_TAG#prerelease-}
68 |             IS_PRERELEASE=true
69 |           else
70 |             echo "Tag does not match expected patterns" >&2
71 |             exit 1
72 |           fi
73 |           echo "FULL_TAG=$TAG_NAME" >> $GITHUB_ENV
74 |           echo "TAG_NAME=$TAG_NAME" >> $GITHUB_ENV
75 |           echo "IS_PRERELEASE=$IS_PRERELEASE" >> $GITHUB_ENV
76 |       - uses: actions/download-artifact@v4
77 |         with:
78 |           # unpacks all CIBW artifacts into dist/
79 |           pattern: cibw-*
80 |           path: dist
81 |           merge-multiple: true
82 |       - name: Create Draft Release
83 |         id: create_release
84 |         uses: softprops/action-gh-release@v2
85 |         if: startsWith(github.ref, 'refs/tags/')
86 |         env:
87 |           GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
88 |         with:
89 |           name: ${{ env.TAG_NAME }}
90 |           draft: true
91 |           prerelease: ${{ env.IS_PRERELEASE }}
92 |           files: "./dist/*" 
93 | 


--------------------------------------------------------------------------------
/.github/workflows/publish_pypi.yml:
--------------------------------------------------------------------------------
 1 | # The cibuildwheel action triggers on creation of a release, this
 2 | # triggers on publication.
 3 | # The expected workflow is to create a draft release and let the wheels
 4 | # upload, and then hit 'publish', which uploads to PyPi.
 5 | 
 6 | on:
 7 |   release:
 8 |     types:
 9 |       - published
10 | 
11 | jobs:
12 |   upload_pypi:
13 |     runs-on: ubuntu-latest
14 |     environment:
15 |       name: pypi
16 |       url: https://pypi.org/p/cymem
17 |     permissions:
18 |       id-token: write
19 |       contents: read
20 |     if: github.event_name == 'release' && github.event.action == 'published'
21 |     # or, alternatively, upload to PyPI on every tag starting with 'v' (remove on: release above to use this)
22 |     # if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v')
23 |     steps:
24 |       - uses: robinraju/release-downloader@v1
25 |         with:
26 |           tag: ${{ github.event.release.tag_name }}
27 |           fileName: '*'
28 |           out-file-path: 'dist'
29 |       - uses: pypa/gh-action-pypi-publish@release/v1
30 | 


--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
 1 | name: tests
 2 | 
 3 | on:
 4 |   push:
 5 |     paths-ignore:
 6 |       - "*.md"
 7 |   pull_request:
 8 |     types: [opened, synchronize, reopened, edited]
 9 |     paths-ignore:
10 |       - "*.md"
11 | 
12 | env:
13 |   MODULE_NAME: 'preshed'
14 |   RUN_MYPY: 'false'
15 | 
16 | jobs:
17 |   tests:
18 |     name: Test
19 |     if: github.repository_owner == 'explosion'
20 |     strategy:
21 |       fail-fast: false
22 |       matrix:
23 |         os: [ubuntu-latest, windows-latest, macos-13]
24 |         python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
25 |     runs-on: ${{ matrix.os }}
26 | 
27 |     steps:
28 |       - name: Check out repo
29 |         uses: actions/checkout@v3
30 | 
31 |       - name: Configure Python version
32 |         uses: actions/setup-python@v4
33 |         with:
34 |           python-version: ${{ matrix.python_version }}
35 |           architecture: x64
36 | 
37 |       - name: Build sdist
38 |         run: |
39 |           python -m pip install -U build pip setuptools
40 |           python -m pip install -U -r requirements.txt
41 |           python -m build --sdist
42 | 
43 |       - name: Run mypy
44 |         shell: bash
45 |         if: ${{ env.RUN_MYPY == 'true' }}
46 |         run: |
47 |           python -m mypy $MODULE_NAME
48 | 
49 |       - name: Delete source directory
50 |         shell: bash
51 |         run: |
52 |           rm -rf $MODULE_NAME
53 | 
54 |       - name: Uninstall all packages
55 |         run: |
56 |           python -m pip freeze > installed.txt
57 |           python -m pip uninstall -y -r installed.txt
58 | 
59 |       - name: Install from sdist
60 |         shell: bash
61 |         run: |
62 |           SDIST=$(python -c "import os;print(os.listdir('./dist')[-1])" 2>&1)
63 |           pip install dist/$SDIST
64 | 
65 |       - name: Test import
66 |         shell: bash
67 |         run: |
68 |           python -c "import $MODULE_NAME" -Werror
69 | 
70 |       - name: Install test requirements
71 |         run: |
72 |           python -m pip install -U -r requirements.txt
73 | 
74 |       - name: Run tests
75 |         shell: bash
76 |         run: |
77 |           python -m pytest --pyargs $MODULE_NAME -Werror
78 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.egg
 2 | *.egg-info
 3 | .eggs
 4 | preshed/.maps.pxd.swm
 5 | preshed/.maps.pyx.swl
 6 | *.sw[a-z]
 7 | *.so
 8 | *.pyc
 9 | *.swp
10 | *.swo
11 | *.html
12 | *.c
13 | *.cpp
14 | .env/
15 | .denv
16 | cythonize.json
17 | MANIFEST
18 | build/
19 | dist/
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2016 ExplosionAI GmbH, 2014 Matthew Honnibal
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include include *.h
2 | include LICENSE
3 | include README.md
4 | recursive-exclude preshed *.cpp
5 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <a href="https://explosion.ai"><img src="https://explosion.ai/assets/img/logo.svg" width="125" height="125" align="right" /></a>
 2 | 
 3 | # preshed: Cython Hash Table for Pre-Hashed Keys
 4 | 
 5 | Simple but high performance Cython hash table mapping pre-randomized keys to
 6 | `void*` values. Inspired by
 7 | [Jeff Preshing](http://preshing.com/20130107/this-hash-table-is-faster-than-a-judy-array/).
 8 | 
 9 | [![tests](https://github.com/explosion/preshed/actions/workflows/tests.yml/badge.svg)](https://github.com/explosion/preshed/actions/workflows/tests.yml)
10 | [![pypi Version](https://img.shields.io/pypi/v/preshed.svg?style=flat-square&logo=pypi&logoColor=white)](https://pypi.python.org/pypi/preshed)
11 | [![conda Version](https://img.shields.io/conda/vn/conda-forge/preshed.svg?style=flat-square&logo=conda-forge&logoColor=white)](https://anaconda.org/conda-forge/preshed)
12 | [![Python wheels](https://img.shields.io/badge/wheels-%E2%9C%93-4c1.svg?longCache=true&style=flat-square&logo=python&logoColor=white)](https://github.com/explosion/wheelwright/releases)
13 | 


--------------------------------------------------------------------------------
/bin/push-tag.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | set -e
 4 | 
 5 | # Insist repository is clean
 6 | git diff-index --quiet HEAD
 7 | 
 8 | git checkout $1
 9 | git pull origin $1
10 | git push origin $1
11 | 
12 | version=$(grep "__version__ = " preshed/about.py)
13 | version=${version/__version__ = }
14 | version=${version/\'/}
15 | version=${version/\'/}
16 | version=${version/\"/}
17 | version=${version/\"/}
18 | git tag "v$version"
19 | git push origin "v$version"
20 | 


--------------------------------------------------------------------------------
/fabfile.py:
--------------------------------------------------------------------------------
 1 | from fabric.api import local, run, lcd, cd, env
 2 | 
 3 | import os
 4 | from os import path
 5 | from os.path import exists as file_exists
 6 | from fabtools.python import virtualenv
 7 | 
 8 | 
 9 | PWD = path.dirname(__file__)
10 | VENV_DIR = path.join(PWD, '.env')
11 | DEV_ENV_DIR = path.join(PWD, '.denv')
12 | 
13 | 
14 | def dev():
15 |     # Allow this to persist, since we aren't as rigorous about keeping state clean
16 |     if not file_exists('.denv'):
17 |         local('virtualenv .denv')
18 |  
19 |     with virtualenv(DEV_ENV_DIR):
20 |         local('pip install -r requirements.txt')
21 |  
22 | 
23 | def sdist():
24 |     if file_exists('dist/'):
25 |         local('rm -rf dist/')
26 |     local('mkdir dist')
27 |     with virtualenv(VENV_DIR):
28 |         local('python setup.py sdist')
29 | 
30 | 
31 | def publish():
32 |     with virtualenv(VENV_DIR):
33 |         local('python setup.py register')
34 |         local('twine upload dist/*.tar.gz')
35 | 
36 | 
37 | def setup():
38 |     if file_exists('.env'):
39 |         local('rm -rf .env')
40 |     local('rm -rf *.egg')
41 |     local('virtualenv .env')
42 | 
43 | 
44 | def install():
45 |     with virtualenv(VENV_DIR):
46 |         local('pip install --upgrade setuptools')
47 |         local('pip install dist/*.tar.gz')
48 |         local('pip install pytest')
49 | 
50 | 
51 | def make():
52 |     with virtualenv(DEV_ENV_DIR):
53 |         with lcd(path.dirname(__file__)):
54 |             local('python setup.py build')
55 | 
56 | 
57 | def clean():
58 |     with lcd(os.path.dirname(__file__)):
59 |         local('python setup.py clean --all')
60 |     with virtualenv(DEV_ENV_DIR):
61 |         with lcd(os.path.dirname(__file__)):
62 |             local('python setup.py clean --all')
63 | 
64 | def test():
65 |     with virtualenv(VENV_DIR):
66 |         local('python -m pytest -x')
67 | 
68 | 
69 | def travis():
70 |     local('open https://travis-ci.org/spacy-io/preshed')
71 | 


--------------------------------------------------------------------------------
/include/msvc9/stdint.h:
--------------------------------------------------------------------------------
  1 | // ISO C9x  compliant stdint.h for Microsoft Visual Studio
  2 | // Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 
  3 | // 
  4 | //  Copyright (c) 2006-2013 Alexander Chemeris
  5 | // 
  6 | // Redistribution and use in source and binary forms, with or without
  7 | // modification, are permitted provided that the following conditions are met:
  8 | // 
  9 | //   1. Redistributions of source code must retain the above copyright notice,
 10 | //      this list of conditions and the following disclaimer.
 11 | // 
 12 | //   2. Redistributions in binary form must reproduce the above copyright
 13 | //      notice, this list of conditions and the following disclaimer in the
 14 | //      documentation and/or other materials provided with the distribution.
 15 | // 
 16 | //   3. Neither the name of the product nor the names of its contributors may
 17 | //      be used to endorse or promote products derived from this software
 18 | //      without specific prior written permission.
 19 | // 
 20 | // THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
 21 | // WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
 22 | // MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
 23 | // EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 24 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 25 | // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
 26 | // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 
 27 | // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 28 | // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
 29 | // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 30 | // 
 31 | ///////////////////////////////////////////////////////////////////////////////
 32 | 
 33 | #ifndef _MSC_VER // [
 34 | #error "Use this header only with Microsoft Visual C++ compilers!"
 35 | #endif // _MSC_VER ]
 36 | 
 37 | #ifndef _MSC_STDINT_H_ // [
 38 | #define _MSC_STDINT_H_
 39 | 
 40 | #if _MSC_VER > 1000
 41 | #pragma once
 42 | #endif
 43 | 
 44 | #if _MSC_VER >= 1600 // [
 45 | #include <stdint.h>
 46 | #else // ] _MSC_VER >= 1600 [
 47 | 
 48 | #include <limits.h>
 49 | 
 50 | // For Visual Studio 6 in C++ mode and for many Visual Studio versions when
 51 | // compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}'
 52 | // or compiler give many errors like this:
 53 | //   error C2733: second C linkage of overloaded function 'wmemchr' not allowed
 54 | #ifdef __cplusplus
 55 | extern "C" {
 56 | #endif
 57 | #  include <wchar.h>
 58 | #ifdef __cplusplus
 59 | }
 60 | #endif
 61 | 
 62 | // Define _W64 macros to mark types changing their size, like intptr_t.
 63 | #ifndef _W64
 64 | #  if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300
 65 | #     define _W64 __w64
 66 | #  else
 67 | #     define _W64
 68 | #  endif
 69 | #endif
 70 | 
 71 | 
 72 | // 7.18.1 Integer types
 73 | 
 74 | // 7.18.1.1 Exact-width integer types
 75 | 
 76 | // Visual Studio 6 and Embedded Visual C++ 4 doesn't
 77 | // realize that, e.g. char has the same size as __int8
 78 | // so we give up on __intX for them.
 79 | #if (_MSC_VER < 1300)
 80 |    typedef signed char       int8_t;
 81 |    typedef signed short      int16_t;
 82 |    typedef signed int        int32_t;
 83 |    typedef unsigned char     uint8_t;
 84 |    typedef unsigned short    uint16_t;
 85 |    typedef unsigned int      uint32_t;
 86 | #else
 87 |    typedef signed __int8     int8_t;
 88 |    typedef signed __int16    int16_t;
 89 |    typedef signed __int32    int32_t;
 90 |    typedef unsigned __int8   uint8_t;
 91 |    typedef unsigned __int16  uint16_t;
 92 |    typedef unsigned __int32  uint32_t;
 93 | #endif
 94 | typedef signed __int64       int64_t;
 95 | typedef unsigned __int64     uint64_t;
 96 | 
 97 | 
 98 | // 7.18.1.2 Minimum-width integer types
 99 | typedef int8_t    int_least8_t;
100 | typedef int16_t   int_least16_t;
101 | typedef int32_t   int_least32_t;
102 | typedef int64_t   int_least64_t;
103 | typedef uint8_t   uint_least8_t;
104 | typedef uint16_t  uint_least16_t;
105 | typedef uint32_t  uint_least32_t;
106 | typedef uint64_t  uint_least64_t;
107 | 
108 | // 7.18.1.3 Fastest minimum-width integer types
109 | typedef int8_t    int_fast8_t;
110 | typedef int16_t   int_fast16_t;
111 | typedef int32_t   int_fast32_t;
112 | typedef int64_t   int_fast64_t;
113 | typedef uint8_t   uint_fast8_t;
114 | typedef uint16_t  uint_fast16_t;
115 | typedef uint32_t  uint_fast32_t;
116 | typedef uint64_t  uint_fast64_t;
117 | 
118 | // 7.18.1.4 Integer types capable of holding object pointers
119 | #ifdef _WIN64 // [
120 |    typedef signed __int64    intptr_t;
121 |    typedef unsigned __int64  uintptr_t;
122 | #else // _WIN64 ][
123 |    typedef _W64 signed int   intptr_t;
124 |    typedef _W64 unsigned int uintptr_t;
125 | #endif // _WIN64 ]
126 | 
127 | // 7.18.1.5 Greatest-width integer types
128 | typedef int64_t   intmax_t;
129 | typedef uint64_t  uintmax_t;
130 | 
131 | 
132 | // 7.18.2 Limits of specified-width integer types
133 | 
134 | #if !defined(__cplusplus) || defined(__STDC_LIMIT_MACROS) // [   See footnote 220 at page 257 and footnote 221 at page 259
135 | 
136 | // 7.18.2.1 Limits of exact-width integer types
137 | #define INT8_MIN     ((int8_t)_I8_MIN)
138 | #define INT8_MAX     _I8_MAX
139 | #define INT16_MIN    ((int16_t)_I16_MIN)
140 | #define INT16_MAX    _I16_MAX
141 | #define INT32_MIN    ((int32_t)_I32_MIN)
142 | #define INT32_MAX    _I32_MAX
143 | #define INT64_MIN    ((int64_t)_I64_MIN)
144 | #define INT64_MAX    _I64_MAX
145 | #define UINT8_MAX    _UI8_MAX
146 | #define UINT16_MAX   _UI16_MAX
147 | #define UINT32_MAX   _UI32_MAX
148 | #define UINT64_MAX   _UI64_MAX
149 | 
150 | // 7.18.2.2 Limits of minimum-width integer types
151 | #define INT_LEAST8_MIN    INT8_MIN
152 | #define INT_LEAST8_MAX    INT8_MAX
153 | #define INT_LEAST16_MIN   INT16_MIN
154 | #define INT_LEAST16_MAX   INT16_MAX
155 | #define INT_LEAST32_MIN   INT32_MIN
156 | #define INT_LEAST32_MAX   INT32_MAX
157 | #define INT_LEAST64_MIN   INT64_MIN
158 | #define INT_LEAST64_MAX   INT64_MAX
159 | #define UINT_LEAST8_MAX   UINT8_MAX
160 | #define UINT_LEAST16_MAX  UINT16_MAX
161 | #define UINT_LEAST32_MAX  UINT32_MAX
162 | #define UINT_LEAST64_MAX  UINT64_MAX
163 | 
164 | // 7.18.2.3 Limits of fastest minimum-width integer types
165 | #define INT_FAST8_MIN    INT8_MIN
166 | #define INT_FAST8_MAX    INT8_MAX
167 | #define INT_FAST16_MIN   INT16_MIN
168 | #define INT_FAST16_MAX   INT16_MAX
169 | #define INT_FAST32_MIN   INT32_MIN
170 | #define INT_FAST32_MAX   INT32_MAX
171 | #define INT_FAST64_MIN   INT64_MIN
172 | #define INT_FAST64_MAX   INT64_MAX
173 | #define UINT_FAST8_MAX   UINT8_MAX
174 | #define UINT_FAST16_MAX  UINT16_MAX
175 | #define UINT_FAST32_MAX  UINT32_MAX
176 | #define UINT_FAST64_MAX  UINT64_MAX
177 | 
178 | // 7.18.2.4 Limits of integer types capable of holding object pointers
179 | #ifdef _WIN64 // [
180 | #  define INTPTR_MIN   INT64_MIN
181 | #  define INTPTR_MAX   INT64_MAX
182 | #  define UINTPTR_MAX  UINT64_MAX
183 | #else // _WIN64 ][
184 | #  define INTPTR_MIN   INT32_MIN
185 | #  define INTPTR_MAX   INT32_MAX
186 | #  define UINTPTR_MAX  UINT32_MAX
187 | #endif // _WIN64 ]
188 | 
189 | // 7.18.2.5 Limits of greatest-width integer types
190 | #define INTMAX_MIN   INT64_MIN
191 | #define INTMAX_MAX   INT64_MAX
192 | #define UINTMAX_MAX  UINT64_MAX
193 | 
194 | // 7.18.3 Limits of other integer types
195 | 
196 | #ifdef _WIN64 // [
197 | #  define PTRDIFF_MIN  _I64_MIN
198 | #  define PTRDIFF_MAX  _I64_MAX
199 | #else  // _WIN64 ][
200 | #  define PTRDIFF_MIN  _I32_MIN
201 | #  define PTRDIFF_MAX  _I32_MAX
202 | #endif  // _WIN64 ]
203 | 
204 | #define SIG_ATOMIC_MIN  INT_MIN
205 | #define SIG_ATOMIC_MAX  INT_MAX
206 | 
207 | #ifndef SIZE_MAX // [
208 | #  ifdef _WIN64 // [
209 | #     define SIZE_MAX  _UI64_MAX
210 | #  else // _WIN64 ][
211 | #     define SIZE_MAX  _UI32_MAX
212 | #  endif // _WIN64 ]
213 | #endif // SIZE_MAX ]
214 | 
215 | // WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h>
216 | #ifndef WCHAR_MIN // [
217 | #  define WCHAR_MIN  0
218 | #endif  // WCHAR_MIN ]
219 | #ifndef WCHAR_MAX // [
220 | #  define WCHAR_MAX  _UI16_MAX
221 | #endif  // WCHAR_MAX ]
222 | 
223 | #define WINT_MIN  0
224 | #define WINT_MAX  _UI16_MAX
225 | 
226 | #endif // __STDC_LIMIT_MACROS ]
227 | 
228 | 
229 | // 7.18.4 Limits of other integer types
230 | 
231 | #if !defined(__cplusplus) || defined(__STDC_CONSTANT_MACROS) // [   See footnote 224 at page 260
232 | 
233 | // 7.18.4.1 Macros for minimum-width integer constants
234 | 
235 | #define INT8_C(val)  val##i8
236 | #define INT16_C(val) val##i16
237 | #define INT32_C(val) val##i32
238 | #define INT64_C(val) val##i64
239 | 
240 | #define UINT8_C(val)  val##ui8
241 | #define UINT16_C(val) val##ui16
242 | #define UINT32_C(val) val##ui32
243 | #define UINT64_C(val) val##ui64
244 | 
245 | // 7.18.4.2 Macros for greatest-width integer constants
246 | // These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>.
247 | // Check out Issue 9 for the details.
248 | #ifndef INTMAX_C //   [
249 | #  define INTMAX_C   INT64_C
250 | #endif // INTMAX_C    ]
251 | #ifndef UINTMAX_C //  [
252 | #  define UINTMAX_C  UINT64_C
253 | #endif // UINTMAX_C   ]
254 | 
255 | #endif // __STDC_CONSTANT_MACROS ]
256 | 
257 | #endif // _MSC_VER >= 1600 ]
258 | 
259 | #endif // _MSC_STDINT_H_ ]
260 | 


--------------------------------------------------------------------------------
/preshed/__init__.pxd:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/preshed/7bd9d00a9b9460020ad9f9d7f06499efd6a08b58/preshed/__init__.pxd


--------------------------------------------------------------------------------
/preshed/__init__.py:
--------------------------------------------------------------------------------
1 | from .about import *
2 | 


--------------------------------------------------------------------------------
/preshed/about.py:
--------------------------------------------------------------------------------
1 | __title__ = "preshed"
2 | __version__ = "3.0.10"
3 | __summary__ = "Cython hash table that trusts the keys are pre-hashed"
4 | __uri__ = "https://github.com/explosion/preshed"
5 | __author__ = "Explosion"
6 | __email__ = "contact@explosion.ai"
7 | __license__ = "MIT"
8 | __release__ = True
9 | 


--------------------------------------------------------------------------------
/preshed/bloom.pxd:
--------------------------------------------------------------------------------
 1 | from libc.stdint cimport uint64_t, uint32_t
 2 | from cymem.cymem cimport Pool
 3 | 
 4 | ctypedef uint64_t key_t
 5 | 
 6 | cdef struct BloomStruct:
 7 |     key_t* bitfield
 8 |     key_t hcount # hash count, number of hash functions
 9 |     key_t length
10 |     uint32_t seed
11 | 
12 | 
13 | cdef class BloomFilter:
14 |     cdef Pool mem
15 |     cdef BloomStruct* c_bloom
16 |     cdef inline bint contains(self, key_t item) nogil
17 | 
18 | 
19 | cdef void bloom_init(Pool mem, BloomStruct* bloom, key_t hcount, key_t length, uint32_t seed) except *
20 | 
21 | cdef void bloom_add(BloomStruct* bloom, key_t item) nogil
22 | 
23 | cdef bint bloom_contains(const BloomStruct* bloom, key_t item) nogil
24 | 
25 | cdef void bloom_add(BloomStruct* bloom, key_t item) nogil
26 | 


--------------------------------------------------------------------------------
/preshed/bloom.pyx:
--------------------------------------------------------------------------------
  1 | # cython: infer_types=True
  2 | # cython: cdivision=True
  3 | #
  4 | from murmurhash.mrmr cimport hash128_x86
  5 | import math
  6 | from array import array
  7 | 
  8 | try:
  9 |     import copy_reg
 10 | except ImportError:
 11 |     import copyreg as copy_reg
 12 | 
 13 | 
 14 | def calculate_size_and_hash_count(members, error_rate):
 15 |     """Calculate the optimal size in bits and number of hash functions for a
 16 |     given number of members and error rate.  
 17 |     """
 18 |     base = math.log(1 / (2 ** math.log(2)))
 19 |     bit_count = math.ceil((members * math.log(error_rate)) / base)
 20 |     hash_count = math.floor((bit_count / members) * math.log(2))
 21 |     return (bit_count, hash_count)
 22 | 
 23 | 
 24 | cdef class BloomFilter:
 25 |     """Bloom filter that allows for basic membership tests.
 26 |     
 27 |     Only integers are supported as keys.
 28 |     """
 29 |     def __init__(self, key_t size=(2 ** 10), key_t hash_funcs=23, uint32_t seed=0):
 30 |         self.mem = Pool()
 31 |         self.c_bloom = <BloomStruct*>self.mem.alloc(1, sizeof(BloomStruct))
 32 |         bloom_init(self.mem, self.c_bloom, hash_funcs, size, seed)
 33 | 
 34 |     @classmethod
 35 |     def from_error_rate(cls, members, error_rate=1E-4):
 36 |         params = calculate_size_and_hash_count(members, error_rate)
 37 |         return cls(*params)
 38 | 
 39 |     def add(self, key_t item):
 40 |         bloom_add(self.c_bloom, item)
 41 | 
 42 |     def __contains__(self, item):
 43 |         return bloom_contains(self.c_bloom, item)
 44 | 
 45 |     cdef inline bint contains(self, key_t item) nogil:
 46 |         return bloom_contains(self.c_bloom, item)
 47 | 
 48 |     def to_bytes(self):
 49 |         return bloom_to_bytes(self.c_bloom)
 50 | 
 51 |     def from_bytes(self, bytes byte_string):
 52 |         bloom_from_bytes(self.mem, self.c_bloom, byte_string)
 53 |         return self
 54 | 
 55 | 
 56 | cdef bytes bloom_to_bytes(const BloomStruct* bloom):
 57 |     py = array("L")
 58 |     py.append(bloom.hcount)
 59 |     py.append(bloom.length)
 60 |     py.append(bloom.seed)
 61 |     for i in range(bloom.length // sizeof(key_t)):
 62 |         py.append(bloom.bitfield[i])
 63 |     if hasattr(py, "tobytes"):
 64 |         return py.tobytes()
 65 |     else:
 66 |         # Python 2 :(
 67 |         return py.tostring()
 68 | 
 69 | 
 70 | cdef void bloom_from_bytes(Pool mem, BloomStruct* bloom, bytes data):
 71 |     py = array("L")
 72 |     if hasattr(py, "frombytes"):
 73 |         py.frombytes(data)
 74 |     else:
 75 |         py.fromstring(data)
 76 |     bloom.hcount = py[0]
 77 |     bloom.length = py[1]
 78 |     bloom.seed = py[2]
 79 |     bloom.bitfield = <key_t*>mem.alloc(bloom.length // sizeof(key_t), sizeof(key_t))
 80 |     for i in range(bloom.length // sizeof(key_t)):
 81 |         bloom.bitfield[i] = py[3+i]
 82 | 
 83 | 
 84 | cdef void bloom_init(Pool mem, BloomStruct* bloom, key_t hcount, key_t length, uint32_t seed) except *:
 85 |     # size should be a multiple of the container size - round up
 86 |     if length % sizeof(key_t):
 87 |         length = math.ceil(length / sizeof(key_t)) * sizeof(key_t)
 88 |     bloom.length = length
 89 |     bloom.hcount = hcount
 90 |     bloom.bitfield = <key_t*>mem.alloc(length // sizeof(key_t), sizeof(key_t))
 91 |     bloom.seed = seed
 92 | 
 93 | 
 94 | # Instead of calling MurmurHash with a different seed for each hash function, this
 95 | # generates two initial hash values and then combines them to create the correct
 96 | # number of hashes. This technique is faster than just doing MurmurhHash
 97 | # repeatedly and has been shown to work as well as full hashing.
 98 | 
 99 | # For details see "Less Hashing, Same Performance: Building a Better Bloom
100 | # Filter", Kirsch & Mitzenmacher.
101 | 
102 | # https://www.semanticscholar.org/paper/Less-hashing%2C-same-performance%3A-Building-a-better-Kirsch-Mitzenmacher/65c43afbfc064705bdc40d3473f32518e9306429
103 | # The choice of seeds is arbitrary.
104 | 
105 | 
106 | cdef void bloom_add(BloomStruct* bloom, key_t item) nogil:
107 |     cdef key_t hv
108 |     cdef key_t[2] keys
109 |     cdef key_t one = 1 # We want this explicitly typed, because bits
110 |     hash128_x86(&item, sizeof(key_t), 0, &keys)
111 |     for hiter in range(bloom.hcount):
112 |         hv = (keys[0] + (hiter * keys[1])) % bloom.length
113 |         bloom.bitfield[hv // sizeof(key_t)] |= one << (hv % sizeof(key_t))
114 | 
115 | 
116 | cdef bint bloom_contains(const BloomStruct* bloom, key_t item) nogil:
117 |     cdef key_t hv
118 |     cdef key_t[2] keys
119 |     cdef key_t one = 1 # We want this explicitly typed, because bits
120 |     hash128_x86(&item, sizeof(key_t), 0, &keys)
121 |     for hiter in range(bloom.hcount):
122 |         hv = (keys[0] + (hiter * keys[1])) % bloom.length
123 |         if not (bloom.bitfield[hv // sizeof(key_t)] & one << (hv % sizeof(key_t))):
124 |             return False
125 |     return True
126 | 
127 | 
128 | def pickle_bloom(BloomFilter bloom):
129 |     return unpickle_bloom, (bloom.to_bytes(),)
130 | 
131 | 
132 | def unpickle_bloom(byte_string):
133 |     return BloomFilter().from_bytes(byte_string)
134 | 
135 | 
136 | copy_reg.pickle(BloomFilter, pickle_bloom, unpickle_bloom)
137 | 


--------------------------------------------------------------------------------
/preshed/counter.pxd:
--------------------------------------------------------------------------------
 1 | from libc.stdint cimport int64_t
 2 | 
 3 | from cymem.cymem cimport Pool
 4 | 
 5 | from .maps cimport MapStruct
 6 | from .maps cimport map_init, map_get, map_set, map_iter
 7 | from .maps cimport key_t
 8 | 
 9 | 
10 | ctypedef int64_t count_t
11 | 
12 | 
13 | cdef class PreshCounter:
14 |     cdef Pool mem
15 |     cdef MapStruct* c_map
16 |     cdef public object smoother
17 |     cdef readonly count_t total
18 | 
19 |     cpdef int inc(self, key_t key, count_t inc) except -1
20 | 


--------------------------------------------------------------------------------
/preshed/counter.pyx:
--------------------------------------------------------------------------------
  1 | """Count occurrences of uint64-valued keys."""
  2 | from __future__ import division
  3 | cimport cython
  4 | from libc.math cimport log, exp, sqrt
  5 | 
  6 | 
  7 | cdef class PreshCounter:
  8 |     def __init__(self, initial_size=8):
  9 |         assert initial_size != 0
 10 |         assert initial_size & (initial_size - 1) == 0
 11 |         self.mem = Pool()
 12 |         self.c_map = <MapStruct*>self.mem.alloc(1, sizeof(MapStruct))
 13 |         map_init(self.mem, self.c_map, initial_size)
 14 |         self.smoother = None
 15 |         self.total = 0
 16 | 
 17 |     property length:
 18 |         def __get__(self):
 19 |             return self.c_map.length
 20 | 
 21 |     def __len__(self):
 22 |         return self.c_map.length
 23 | 
 24 |     def __iter__(self):
 25 |         cdef int i = 0
 26 |         cdef key_t key
 27 |         cdef void* value
 28 |         while map_iter(self.c_map, &i, &key, &value):
 29 |             yield key, <size_t>value
 30 | 
 31 |     def __getitem__(self, key_t key):
 32 |         return <count_t>map_get(self.c_map, key)
 33 | 
 34 |     cpdef int inc(self, key_t key, count_t inc) except -1:
 35 |         cdef count_t c = <count_t>map_get(self.c_map, key)
 36 |         c += inc
 37 |         map_set(self.mem, self.c_map, key, <void*>c)
 38 |         self.total += inc
 39 |         return c
 40 | 
 41 |     def prob(self, key_t key):
 42 |         cdef GaleSmoother smoother
 43 |         cdef void* value = map_get(self.c_map, key)
 44 |         if self.smoother is not None:
 45 |             smoother = self.smoother
 46 |             r_star = self.smoother(<count_t>value)
 47 |             return r_star / self.smoother.total
 48 |         elif value == NULL:
 49 |             return 0
 50 |         else:
 51 |             return <count_t>value / self.total
 52 | 
 53 |     def smooth(self):
 54 |         self.smoother = GaleSmoother(self)
 55 |        
 56 | 
 57 | cdef class GaleSmoother:
 58 |     cdef Pool mem
 59 |     cdef count_t* Nr
 60 |     cdef double gradient
 61 |     cdef double intercept
 62 |     cdef readonly count_t cutoff
 63 |     cdef count_t Nr0
 64 |     cdef readonly double total
 65 | 
 66 |     def __init__(self, PreshCounter counts):
 67 |         count_counts = PreshCounter()
 68 |         cdef double total = 0
 69 |         for _, count in counts:
 70 |             count_counts.inc(count, 1)
 71 |             total += count
 72 |         # If we have no items seen 1 or 2 times, this doesn't work. But, this
 73 |         # won't be true in real data...
 74 |         assert count_counts[1] != 0 and count_counts[2] != 0, "Cannot smooth your weird data"
 75 |         # Extrapolate Nr0 from Nr1 and Nr2.
 76 |         self.Nr0 = count_counts[1] + (count_counts[1] - count_counts[2])
 77 |         self.mem = Pool()
 78 | 
 79 |         cdef double[2] mb
 80 | 
 81 |         cdef int n_counts = 0
 82 |         for _ in count_counts:
 83 |             n_counts += 1
 84 |         sorted_r = <count_t*>count_counts.mem.alloc(n_counts, sizeof(count_t))
 85 |         self.Nr = <count_t*>self.mem.alloc(n_counts, sizeof(count_t))
 86 |         for i, (count, count_count) in enumerate(sorted(count_counts)):
 87 |             sorted_r[i] = count
 88 |             self.Nr[i] = count_count
 89 | 
 90 |         _fit_loglinear_model(mb, sorted_r, self.Nr, n_counts)
 91 |         
 92 |         self.cutoff = _find_when_to_switch(sorted_r, self.Nr, mb[0], mb[1],
 93 |                                            n_counts)
 94 |         self.gradient = mb[0]
 95 |         self.intercept = mb[1]
 96 |         self.total = self(0) * self.Nr0
 97 |         for count, count_count in count_counts:
 98 |             self.total += self(count) * count_count
 99 | 
100 |     def __call__(self, count_t r):
101 |         if r == 0:
102 |             return self.Nr[1] / self.Nr0
103 |         elif r < self.cutoff:
104 |             return turing_estimate_of_r(<double>r, <double>self.Nr[r-1], <double>self.Nr[r])
105 |         else:
106 |             return gale_estimate_of_r(<double>r, self.gradient, self.intercept)
107 | 
108 |     def count_count(self, count_t r):
109 |         if r == 0:
110 |             return self.Nr0
111 |         else:
112 |             return self.Nr[r-1]
113 | 
114 | 
115 | @cython.cdivision(True)
116 | cdef double turing_estimate_of_r(double r, double Nr, double Nr1) except -1:
117 |     return ((r + 1) * Nr1) / Nr
118 | 
119 | 
120 | @cython.cdivision(True)
121 | cdef double gale_estimate_of_r(double r, double gradient, double intercept) except -1:
122 |     cdef double e_nr  = exp(gradient * log(r) + intercept)
123 |     cdef double e_nr1 = exp(gradient * log(r+1) + intercept)
124 |     return (r + 1) * (e_nr1 / e_nr)
125 | 
126 | 
127 | @cython.cdivision(True)
128 | cdef void _fit_loglinear_model(double* output, count_t* sorted_r, count_t* Nr,
129 |                                int length) except *:
130 |     cdef double x_mean = 0.0
131 |     cdef double y_mean = 0.0
132 | 
133 |     cdef Pool mem = Pool()
134 |     x = <double*>mem.alloc(length, sizeof(double))
135 |     y = <double*>mem.alloc(length, sizeof(double))
136 | 
137 |     cdef int i
138 |     for i in range(length):
139 |         r = sorted_r[i]
140 |         x[i] = log(<double>r)
141 |         y[i] = log(<double>_get_zr(i, sorted_r, Nr[i], length))
142 |         x_mean += x[i]
143 |         y_mean += y[i]
144 |     
145 |     x_mean /= length
146 |     y_mean /= length
147 | 
148 |     cdef double ss_xy = 0.0
149 |     cdef double ss_xx = 0.0
150 |    
151 |     for i in range(length):
152 |         x_dist = x[i] - x_mean
153 |         y_dist = y[i] - y_mean
154 |         # SS_xy = sum the product of the distances from the mean
155 |         ss_xy += x_dist * y_dist
156 |         # SS_xx = sum the squares of the x distance
157 |         ss_xx  += x_dist * x_dist
158 |     # Gradient
159 |     output[0]  = ss_xy / ss_xx
160 |     # Intercept
161 |     output[1] = y_mean - output[0] * x_mean
162 | 
163 | 
164 | @cython.cdivision(True)
165 | cdef double _get_zr(int j, count_t* sorted_r, count_t Nr_j, int n_counts) except -1:
166 |     cdef double r_i = sorted_r[j-1] if j >= 1 else 0
167 |     cdef double r_j = sorted_r[j]
168 |     cdef double r_k = sorted_r[j+1] if (j+1) < n_counts else (2 * r_i - 1)
169 |     return 2 * Nr_j / (r_k - r_i)
170 | 
171 | 
172 | @cython.cdivision(True)
173 | cdef double _variance(double r, double Nr, double Nr1) nogil:
174 |     return 1.96 * sqrt((r+1)**2 * (Nr1 / Nr**2) * (1.0 + (Nr1 / Nr)))
175 | 
176 | 
177 | @cython.cdivision(True)
178 | cdef count_t _find_when_to_switch(count_t* sorted_r, count_t* Nr, double m, double b,
179 |                                   int length) except -1:
180 |     cdef int i
181 |     cdef count_t r
182 |     for i in range(length-1):
183 |         r = sorted_r[i]
184 |         if sorted_r[i+1] != r+1:
185 |             return r
186 |         g_r = gale_estimate_of_r(r, m, b)
187 |         t_r = turing_estimate_of_r(<double>r, <double>Nr[i], <double>Nr[i+1])
188 |         if abs(t_r - g_r) <= _variance(<double>r, <double>Nr[i], <double>Nr[i+1]):
189 |             return r
190 |     else:
191 |         return length - 1
192 | 


--------------------------------------------------------------------------------
/preshed/maps.pxd:
--------------------------------------------------------------------------------
 1 | from libc.stdint cimport uint64_t
 2 | from cymem.cymem cimport Pool
 3 | 
 4 | 
 5 | ctypedef uint64_t key_t
 6 | 
 7 | 
 8 | cdef struct Cell:
 9 |     key_t key
10 |     void* value
11 | 
12 | 
13 | cdef struct Result:
14 |     int found
15 |     void* value
16 | 
17 | 
18 | cdef struct MapStruct:
19 |     Cell* cells
20 |     void* value_for_empty_key
21 |     void* value_for_del_key
22 |     key_t length
23 |     key_t filled
24 |     bint is_empty_key_set
25 |     bint is_del_key_set
26 | 
27 | 
28 | cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values,
29 |                         int n) nogil
30 | 
31 | 
32 | cdef Result map_get_unless_missing(const MapStruct* map_, const key_t key) nogil
33 | 
34 | cdef void* map_get(const MapStruct* map_, const key_t key) nogil
35 | 
36 | cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *
37 | 
38 | cdef void map_init(Pool mem, MapStruct* pmap, size_t length) except *
39 | 
40 | cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil
41 | 
42 | cdef void* map_clear(MapStruct* map_, const key_t key) nogil
43 | 
44 | 
45 | cdef class PreshMap:
46 |     cdef MapStruct* c_map
47 |     cdef Pool mem
48 | 
49 |     cdef inline void* get(self, key_t key) nogil
50 |     cdef void set(self, key_t key, void* value) except *
51 | 
52 | 
53 | cdef class PreshMapArray:
54 |     cdef Pool mem
55 |     cdef MapStruct* maps
56 |     cdef size_t length
57 | 
58 |     cdef inline void* get(self, size_t i, key_t key) nogil
59 |     cdef void set(self, size_t i, key_t key, void* value) except *
60 | 


--------------------------------------------------------------------------------
/preshed/maps.pyx:
--------------------------------------------------------------------------------
  1 | # cython: infer_types=True
  2 | # cython: cdivision=True
  3 | #
  4 | cimport cython
  5 | 
  6 | 
  7 | DEF EMPTY_KEY = 0
  8 | DEF DELETED_KEY = 1
  9 | 
 10 | 
 11 | cdef class PreshMap:
 12 |     """Hash map that assumes keys come pre-hashed. Maps uint64_t --> uint64_t.
 13 |     Uses open addressing with linear probing.
 14 | 
 15 |     Usage
 16 |         map = PreshMap() # Create a table
 17 |         map = PreshMap(initial_size=1024) # Create with initial size (efficiency)
 18 |         map[key] = value # Set a value to a key
 19 |         value = map[key] # Get a value given a key
 20 |         for key, value in map.items(): # Iterate over items
 21 |         len(map) # Get number of inserted keys
 22 |     """
 23 |     def __init__(self, size_t initial_size=8):
 24 |         # Size must be power of two
 25 |         if initial_size == 0:
 26 |             initial_size = 8
 27 |         if initial_size & (initial_size - 1) != 0:
 28 |             power = 1
 29 |             while power < initial_size:
 30 |                 power *= 2
 31 |             initial_size = power
 32 |         self.mem = Pool()
 33 |         self.c_map = <MapStruct*>self.mem.alloc(1, sizeof(MapStruct))
 34 |         map_init(self.mem, self.c_map, initial_size)
 35 | 
 36 |     property capacity:
 37 |         def __get__(self):
 38 |             return self.c_map.length
 39 | 
 40 |     def items(self):
 41 |         cdef key_t key
 42 |         cdef void* value
 43 |         cdef int i = 0
 44 |         while map_iter(self.c_map, &i, &key, &value):
 45 |             yield key, <size_t>value
 46 | 
 47 |     def keys(self):
 48 |         for key, _ in self.items():
 49 |             yield key
 50 | 
 51 |     def values(self):
 52 |         for _, value in self.items():
 53 |             yield value
 54 | 
 55 |     def pop(self, key_t key, default=None):
 56 |         cdef Result result = map_get_unless_missing(self.c_map, key)
 57 |         map_clear(self.c_map, key)
 58 |         if result.found:
 59 |             return <size_t>result.value
 60 |         else:
 61 |             return default
 62 | 
 63 |     def __getitem__(self, key_t key):
 64 |         cdef Result result = map_get_unless_missing(self.c_map, key)
 65 |         if result.found:
 66 |             return <size_t>result.value
 67 |         else:
 68 |             return None
 69 | 
 70 |     def __setitem__(self, key_t key, size_t value):
 71 |         map_set(self.mem, self.c_map, key, <void*>value)
 72 | 
 73 |     def __delitem__(self, key_t key):
 74 |         map_clear(self.c_map, key)
 75 | 
 76 |     def __len__(self):
 77 |         return self.c_map.filled
 78 | 
 79 |     def __contains__(self, key_t key):
 80 |         cdef Result result = map_get_unless_missing(self.c_map, key)
 81 |         return True if result.found else False
 82 | 
 83 |     def __iter__(self):
 84 |         for key in self.keys():
 85 |             yield key
 86 | 
 87 |     cdef inline void* get(self, key_t key) nogil:
 88 |         return map_get(self.c_map, key)
 89 | 
 90 |     cdef void set(self, key_t key, void* value) except *:
 91 |         map_set(self.mem, self.c_map, key, <void*>value)
 92 | 
 93 | 
 94 | cdef class PreshMapArray:
 95 |     """An array of hash tables that assume keys come pre-hashed.  Each table
 96 |     uses open addressing with linear probing.
 97 |     """
 98 |     def __init__(self, size_t length, size_t initial_size=8):
 99 |         self.mem = Pool()
100 |         self.length = length
101 |         self.maps = <MapStruct*>self.mem.alloc(length, sizeof(MapStruct))
102 |         for i in range(length):
103 |             map_init(self.mem, &self.maps[i], initial_size)
104 | 
105 |     cdef inline void* get(self, size_t i, key_t key) nogil:
106 |         return map_get(&self.maps[i], key)
107 | 
108 |     cdef void set(self, size_t i, key_t key, void* value) except *:
109 |         map_set(self.mem, &self.maps[i], key, <void*>value)
110 | 
111 | 
112 | cdef void map_init(Pool mem, MapStruct* map_, size_t length) except *:
113 |     map_.length = length
114 |     map_.filled = 0
115 |     map_.cells = <Cell*>mem.alloc(length, sizeof(Cell))
116 | 
117 | 
118 | cdef void map_set(Pool mem, MapStruct* map_, key_t key, void* value) except *:
119 |     cdef Cell* cell
120 |     if key == EMPTY_KEY:
121 |         map_.value_for_empty_key = value
122 |         map_.is_empty_key_set = True
123 |     elif key == DELETED_KEY:
124 |         map_.value_for_del_key = value
125 |         map_.is_del_key_set = True
126 |     else:
127 |         cell = _find_cell_for_insertion(map_.cells, map_.length, key)
128 |         if cell.key == EMPTY_KEY:
129 |             map_.filled += 1
130 |         cell.key = key
131 |         cell.value = value
132 |         if (map_.filled + 1) * 5 >= (map_.length * 3):
133 |             _resize(mem, map_)
134 | 
135 | 
136 | cdef void* map_get(const MapStruct* map_, const key_t key) nogil:
137 |     if key == EMPTY_KEY:
138 |         return map_.value_for_empty_key
139 |     elif key == DELETED_KEY:
140 |         return map_.value_for_del_key
141 |     cdef Cell* cell = _find_cell(map_.cells, map_.length, key)
142 |     return cell.value
143 | 
144 | 
145 | cdef Result map_get_unless_missing(const MapStruct* map_, const key_t key) nogil:
146 |     cdef Result result
147 |     cdef Cell* cell
148 |     result.found = 0
149 |     result.value = NULL
150 |     if key == EMPTY_KEY:
151 |         if map_.is_empty_key_set:
152 |             result.found = 1
153 |             result.value = map_.value_for_empty_key
154 |     elif key == DELETED_KEY:
155 |         if map_.is_del_key_set:
156 |             result.found = 1
157 |             result.value = map_.value_for_del_key
158 |     else:
159 |         cell = _find_cell(map_.cells, map_.length, key)
160 |         if cell.key == key:
161 |             result.found = 1
162 |             result.value = cell.value
163 |     return result
164 | 
165 | 
166 | cdef void* map_clear(MapStruct* map_, const key_t key) nogil:
167 |     if key == EMPTY_KEY:
168 |         value = map_.value_for_empty_key if map_.is_empty_key_set else NULL
169 |         map_.is_empty_key_set = False
170 |         return value
171 |     elif key == DELETED_KEY:
172 |         value = map_.value_for_del_key if map_.is_del_key_set else NULL
173 |         map_.is_del_key_set = False
174 |         return value
175 |     else:
176 |         cell = _find_cell(map_.cells, map_.length, key)
177 |         cell.key = DELETED_KEY
178 |         # We shouldn't decrement the "filled" value here, as we're not actually
179 |         # making "empty" values -- deleted values aren't quite the same.
180 |         # Instead if we manage to insert into a deleted slot, we don't increment
181 |         # the fill rate.
182 |         return cell.value
183 | 
184 | 
185 | cdef void* map_bulk_get(const MapStruct* map_, const key_t* keys, void** values,
186 |                         int n) nogil:
187 |     cdef int i
188 |     for i in range(n):
189 |         values[i] = map_get(map_, keys[i])
190 | 
191 | 
192 | cdef bint map_iter(const MapStruct* map_, int* i, key_t* key, void** value) nogil:
193 |     '''Iterate over the filled items, setting the current place in i, and the
194 |     key and value.  Return False when iteration finishes.
195 |     '''
196 |     cdef const Cell* cell
197 |     while i[0] < map_.length:
198 |         cell = &map_.cells[i[0]]
199 |         i[0] += 1
200 |         if cell[0].key != EMPTY_KEY and cell[0].key != DELETED_KEY:
201 |             key[0] = cell[0].key
202 |             value[0] = cell[0].value
203 |             return True
204 |     # Remember to check for cells keyed by the special empty and deleted keys
205 |     if i[0] == map_.length:
206 |         i[0] += 1
207 |         if map_.is_empty_key_set:
208 |             key[0] = EMPTY_KEY
209 |             value[0] = map_.value_for_empty_key
210 |             return True
211 |     if i[0] == map_.length + 1:
212 |         i[0] += 1
213 |         if map_.is_del_key_set:
214 |             key[0] = DELETED_KEY
215 |             value[0] = map_.value_for_del_key
216 |             return True
217 |     return False
218 | 
219 | 
220 | @cython.cdivision
221 | cdef inline Cell* _find_cell(Cell* cells, const key_t size, const key_t key) nogil:
222 |     # Modulo for powers-of-two via bitwise &
223 |     cdef key_t i = (key & (size - 1))
224 |     while cells[i].key != EMPTY_KEY and cells[i].key != key:
225 |         i = (i + 1) & (size - 1)
226 |     return &cells[i]
227 | 
228 | 
229 | @cython.cdivision
230 | cdef inline Cell* _find_cell_for_insertion(Cell* cells, const key_t size, const key_t key) nogil:
231 |     """Find the correct cell to insert a value, which could be a previously
232 |     deleted cell. If we cross a deleted cell and the key is in the table, we
233 |     mark the later cell as deleted, and return the earlier one."""
234 |     cdef Cell* deleted = NULL
235 |     # Modulo for powers-of-two via bitwise &
236 |     cdef key_t i = (key & (size - 1))
237 |     while cells[i].key != EMPTY_KEY and cells[i].key != key:
238 |         if cells[i].key == DELETED_KEY:
239 |             deleted = &cells[i]
240 |         i = (i + 1) & (size - 1)
241 |     if deleted is not NULL:
242 |         if cells[i].key == key:
243 |             # We need to ensure we don't end up with the key in the table twice.
244 |             # If we're using a deleted cell and we also have the key, we mark
245 |             # the later cell as deleted.
246 |             cells[i].key = DELETED_KEY
247 |         return deleted
248 |     return &cells[i]
249 | 
250 | 
251 | cdef void _resize(Pool mem, MapStruct* map_) except *:
252 |     cdef size_t new_size = map_.length * 2
253 |     cdef Cell* old_cells = map_.cells
254 |     cdef size_t old_size = map_.length
255 | 
256 |     map_.length = new_size
257 |     map_.filled = 0
258 |     map_.cells = <Cell*>mem.alloc(new_size, sizeof(Cell))
259 |     
260 |     cdef size_t i
261 |     cdef size_t slot
262 |     for i in range(old_size):
263 |         if old_cells[i].key != EMPTY_KEY and old_cells[i].key != DELETED_KEY:
264 |             map_set(mem, map_, old_cells[i].key, old_cells[i].value)
265 |     mem.free(old_cells)
266 | 


--------------------------------------------------------------------------------
/preshed/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/explosion/preshed/7bd9d00a9b9460020ad9f9d7f06499efd6a08b58/preshed/tests/__init__.py


--------------------------------------------------------------------------------
/preshed/tests/test_bloom.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import pytest
 3 | import pickle
 4 | 
 5 | from preshed.bloom import BloomFilter
 6 | 
 7 | def test_contains():
 8 |     bf = BloomFilter()
 9 |     assert 23 not in bf
10 |     bf.add(23)
11 |     assert 23 in bf
12 | 
13 |     bf.add(5)
14 |     bf.add(42)
15 |     bf.add(1002)
16 |     assert 5 in bf
17 |     assert 42 in bf
18 |     assert 1002 in bf
19 | 
20 | def test_no_false_negatives():
21 |     bf = BloomFilter(size=100, hash_funcs=2)
22 |     for ii in range(0,1000,20):
23 |         bf.add(ii)
24 | 
25 |     for ii in range(0,1000,20):
26 |         assert ii in bf
27 | 
28 | def test_from_error():
29 |     bf = BloomFilter.from_error_rate(1000)
30 |     for ii in range(0,1000,20):
31 |         bf.add(ii)
32 | 
33 |     for ii in range(0,1000,20):
34 |         assert ii in bf
35 | 
36 | def test_to_from_bytes():
37 |     bf = BloomFilter(size=100, hash_funcs=2)
38 |     for ii in range(0,1000,20):
39 |         bf.add(ii)
40 |     data = bf.to_bytes()
41 |     bf2 = BloomFilter()
42 |     for ii in range(0,1000,20):
43 |         assert ii not in bf2
44 |     bf2.from_bytes(data)
45 |     for ii in range(0,1000,20):
46 |         assert ii in bf2
47 |     assert bf2.to_bytes() == data
48 | 
49 | def test_bloom_pickle():
50 |     bf = BloomFilter(size=100, hash_funcs=2)
51 |     for ii in range(0,1000,20):
52 |         bf.add(ii)
53 |     data = pickle.dumps(bf)
54 |     bf2 = pickle.loads(data)
55 |     for ii in range(0,1000,20):
56 |         assert ii in bf2
57 | 


--------------------------------------------------------------------------------
/preshed/tests/test_counter.py:
--------------------------------------------------------------------------------
 1 | from __future__ import division
 2 | import pytest
 3 | 
 4 | from preshed.counter import PreshCounter
 5 | 
 6 | 
 7 | def test_count():
 8 |     counter = PreshCounter()
 9 |     assert counter[12] == 0
10 |     counter.inc(12, 1)
11 |     assert counter[12] == 1
12 |     counter.inc(14, 10)
13 |     counter.inc(9, 10)
14 |     counter.inc(12, 4)
15 |     assert counter[12] == 5
16 |     assert counter[14] == 10
17 |     assert counter[9] == 10
18 | 
19 | 
20 | def test_unsmooth_prob():
21 |     counter = PreshCounter()
22 |     assert counter.prob(12) == 0.0
23 |     counter.inc(12, 1)
24 |     assert counter.prob(12) == 1.0
25 |     counter.inc(14, 10)
26 |     assert counter.prob(14) == 10 / 11
27 |     assert counter.prob(12) == 1.0 / 11
28 | 
29 | def test_smooth_prob():
30 |     p = PreshCounter()
31 |     # 1 10
32 |     # 2 6
33 |     # 3 4
34 |     # 5 2
35 |     # 8 1
36 |     for i in range(10):
37 |         p.inc(100-i, 1) # 10 items of freq 1
38 |     for i in range(6):
39 |         p.inc(90 - i, 2) # 6 items of freq 2
40 |     for i in range(4):
41 |         p.inc(80 - i, 3) # 4 items of freq 3
42 |     for i in range(2):
43 |         p.inc(70 - i, 5) # 2 items of freq 5
44 |     for i in range(1):
45 |         p.inc(60 - i, 8) # 1 item of freq 8
46 | 
47 |     assert p.total == (10 * 1) + (6 * 2) + (4 * 3) + (2 * 5) + (1 * 8)
48 | 
49 |     assert p.prob(100) == 1.0 / p.total
50 |     assert p.prob(200) == 0.0
51 |     assert p.prob(60) == 8.0 / p.total
52 | 
53 |     p.smooth()
54 | 
55 |     assert p.smoother(1) < 1.0
56 |     assert p.smoother(8) < 8.0
57 |     assert p.prob(1000) < p.prob(100)
58 | 
59 |     for event, count in reversed(sorted(p, key=lambda it: it[1])):
60 |         assert p.smoother(count) < count
61 | 
62 | 
63 | import os
64 | def test_large_freqs():
65 |     if 'TEST_FILE_LOC' in os.environ:
66 |         loc = os.environ['TEST_FILE_LOC']
67 |     else:
68 |         return None
69 |     counts = PreshCounter()
70 |     for i, line in enumerate(open(loc)):
71 |         line = line.strip()
72 |         if not line:
73 |             continue
74 |         freq = int(line.split()[0])
75 |         counts.inc(i+1, freq)
76 |     oov = i+2
77 |     assert counts.prob(oov) == 0.0
78 |     assert counts.prob(1) < 0.1
79 |     counts.smooth()
80 |     assert counts.prob(oov) > 0
81 |     assert counts.prob(oov) < counts.prob(i)
82 | 


--------------------------------------------------------------------------------
/preshed/tests/test_hashing.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | from preshed.maps import PreshMap
 4 | import random
 5 | 
 6 | 
 7 | def test_insert():
 8 |     h = PreshMap()
 9 |     assert h[1] is None
10 |     h[1] = 5
11 |     assert h[1] == 5
12 |     h[2] = 6
13 |     assert h[1] == 5
14 |     assert h[2] == 6
15 | 
16 | def test_resize():
17 |     h = PreshMap(4)
18 |     h[4] = 12
19 |     for i in range(10, 100):
20 |         value = int(i * (random.random() + 1))
21 |         h[i] = value
22 |     assert h[4] == 12
23 | 
24 | 
25 | def test_zero_key():
26 |     h = PreshMap()
27 |     h[0] = 6
28 |     h[5] = 12
29 |     assert h[0] == 6
30 |     assert h[5] == 12
31 | 
32 |     for i in range(500, 1000):
33 |         h[i] = i * random.random()
34 |     assert h[0] == 6
35 |     assert h[5] == 12
36 | 
37 | 
38 | def test_iter():
39 |     key_sum = 0
40 |     val_sum = 0
41 |     h = PreshMap()
42 |     for i in range(56, 24, -3):
43 |         h[i] = i * 2
44 |         key_sum += i
45 |         val_sum += i * 2
46 |     for key, value in h.items():
47 |         key_sum -= key
48 |         val_sum -= value
49 |     assert key_sum == 0
50 |     assert val_sum == 0
51 | 
52 | 
53 | def test_one_and_empty():
54 |     # See Issue #21
55 |     table = PreshMap()
56 |     for i in range(100, 110):
57 |         table[i] = i
58 |         del table[i]
59 |     assert table[0] == None
60 | 
61 | 
62 | def test_many_and_empty():
63 |     # See Issue #21
64 |     table = PreshMap()
65 |     for i in range(100, 110):
66 |         table[i] = i
67 |     for i in range(100, 110):
68 |         del table[i]
69 |     assert table[0] == None
70 | 
71 | 
72 | def test_zero_values():
73 |     table = PreshMap()
74 |     table[10] = 0
75 |     assert table[10] == 0
76 |     assert table[11] is None
77 | 


--------------------------------------------------------------------------------
/preshed/tests/test_pop.py:
--------------------------------------------------------------------------------
 1 | from ..maps import PreshMap
 2 | 
 3 | 
 4 | def test_pop1():
 5 |     table = PreshMap()
 6 |     table[10] = 20
 7 |     table[30] = 25
 8 |     assert table[10] == 20
 9 |     assert table[30] == 25
10 |     table.pop(30)
11 |     assert table[10] == 20
12 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = [
 3 |     "setuptools",
 4 |     "cython>=0.28",
 5 |     "cymem>=2.0.2,<2.1.0",
 6 |     "murmurhash>=0.28.0,<1.1.0",
 7 | ]
 8 | build-backend = "setuptools.build_meta"
 9 | 
10 | 
11 | [tool.cibuildwheel]
12 | build = "*"
13 | skip = "pp* cp36* cp37* cp38*"
14 | test-skip = ""
15 | free-threaded-support = false
16 | 
17 | archs = ["native"]
18 | 
19 | build-frontend = "default"
20 | config-settings = {}
21 | dependency-versions = "pinned"
22 | environment = {}
23 | environment-pass = []
24 | build-verbosity = 0
25 | 
26 | before-all = ""
27 | before-build = ""
28 | repair-wheel-command = ""
29 | 
30 | test-command = ""
31 | before-test = ""
32 | test-requires = []
33 | test-extras = []
34 | 
35 | container-engine = "docker"
36 | 
37 | manylinux-x86_64-image = "manylinux2014"
38 | manylinux-i686-image = "manylinux2014"
39 | manylinux-aarch64-image = "manylinux2014"
40 | manylinux-ppc64le-image = "manylinux2014"
41 | manylinux-s390x-image = "manylinux2014"
42 | manylinux-pypy_x86_64-image = "manylinux2014"
43 | manylinux-pypy_i686-image = "manylinux2014"
44 | manylinux-pypy_aarch64-image = "manylinux2014"
45 | 
46 | musllinux-x86_64-image = "musllinux_1_2"
47 | musllinux-i686-image = "musllinux_1_2"
48 | musllinux-aarch64-image = "musllinux_1_2"
49 | musllinux-ppc64le-image = "musllinux_1_2"
50 | musllinux-s390x-image = "musllinux_1_2"
51 | 
52 | 
53 | [tool.cibuildwheel.linux]
54 | repair-wheel-command = "auditwheel repair -w {dest_dir} {wheel}"
55 | 
56 | [tool.cibuildwheel.macos]
57 | repair-wheel-command = "delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel}"
58 | 
59 | [tool.cibuildwheel.windows]
60 | 
61 | [tool.cibuildwheel.pyodide]
62 | 
63 | [tool.isort]
64 | profile = "black"
65 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | cymem>=2.0.2,<2.1.0
2 | cython>=0.28
3 | pytest
4 | murmurhash>=0.28.0,<1.1.0
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | from __future__ import print_function
  3 | import os
  4 | import sys
  5 | import contextlib
  6 | from setuptools import Extension, setup
  7 | from setuptools.command.build_ext import build_ext
  8 | from sysconfig import get_path
  9 | from Cython.Build import cythonize
 10 | 
 11 | 
 12 | PACKAGES = ["preshed", "preshed.tests"]
 13 | MOD_NAMES = ["preshed.maps", "preshed.counter", "preshed.bloom"]
 14 | 
 15 | 
 16 | # By subclassing build_extensions we have the actual compiler that will be used which is really known only after finalize_options
 17 | # http://stackoverflow.com/questions/724664/python-distutils-how-to-get-a-compiler-that-is-going-to-be-used
 18 | compile_options = {
 19 |     "msvc": ["/Ox", "/EHsc"],
 20 |     "other": ["-O3", "-Wno-strict-prototypes", "-Wno-unused-function"],
 21 | }
 22 | link_options = {"msvc": [], "other": []}
 23 | 
 24 | 
 25 | class build_ext_options:
 26 |     def build_options(self):
 27 |         for e in self.extensions:
 28 |             e.extra_compile_args = compile_options.get(
 29 |                 self.compiler.compiler_type, compile_options["other"]
 30 |             )
 31 |         for e in self.extensions:
 32 |             e.extra_link_args = link_options.get(
 33 |                 self.compiler.compiler_type, link_options["other"]
 34 |             )
 35 | 
 36 | 
 37 | class build_ext_subclass(build_ext, build_ext_options):
 38 |     def build_extensions(self):
 39 |         build_ext_options.build_options(self)
 40 |         build_ext.build_extensions(self)
 41 | 
 42 | 
 43 | def clean(path):
 44 |     for name in MOD_NAMES:
 45 |         name = name.replace(".", "/")
 46 |         for ext in [".so", ".html", ".cpp", ".c"]:
 47 |             file_path = os.path.join(path, name + ext)
 48 |             if os.path.exists(file_path):
 49 |                 os.unlink(file_path)
 50 | 
 51 | 
 52 | @contextlib.contextmanager
 53 | def chdir(new_dir):
 54 |     old_dir = os.getcwd()
 55 |     try:
 56 |         os.chdir(new_dir)
 57 |         sys.path.insert(0, new_dir)
 58 |         yield
 59 |     finally:
 60 |         del sys.path[0]
 61 |         os.chdir(old_dir)
 62 | 
 63 | 
 64 | def setup_package():
 65 |     root = os.path.abspath(os.path.dirname(__file__))
 66 | 
 67 |     if len(sys.argv) > 1 and sys.argv[1] == "clean":
 68 |         return clean(root)
 69 | 
 70 |     with chdir(root):
 71 |         with open(os.path.join(root, "preshed", "about.py")) as f:
 72 |             about = {}
 73 |             exec(f.read(), about)
 74 | 
 75 |         with open(os.path.join(root, "README.md")) as f:
 76 |             readme = f.read()
 77 | 
 78 |         include_dirs = [get_path("include")]
 79 | 
 80 |         ext_modules = []
 81 |         for mod_name in MOD_NAMES:
 82 |             mod_path = mod_name.replace(".", "/") + ".pyx"
 83 |             ext_modules.append(
 84 |                 Extension(
 85 |                     mod_name, [mod_path], language="c++", include_dirs=include_dirs
 86 |                 )
 87 |             )
 88 | 
 89 |         setup(
 90 |             name="preshed",
 91 |             zip_safe=False,
 92 |             packages=PACKAGES,
 93 |             package_data={"": ["*.pyx", "*.pxd"]},
 94 |             description=about["__summary__"],
 95 |             long_description=readme,
 96 |             long_description_content_type="text/markdown",
 97 |             author=about["__author__"],
 98 |             author_email=about["__email__"],
 99 |             version=about["__version__"],
100 |             url=about["__uri__"],
101 |             license=about["__license__"],
102 |             ext_modules=cythonize(ext_modules, language_level=2),
103 |             python_requires=">=3.6,<3.14",
104 |             install_requires=["cymem>=2.0.2,<2.1.0", "murmurhash>=0.28.0,<1.1.0"],
105 |             classifiers=[
106 |                 "Environment :: Console",
107 |                 "Intended Audience :: Developers",
108 |                 "Intended Audience :: Science/Research",
109 |                 "License :: OSI Approved :: MIT License",
110 |                 "Operating System :: POSIX :: Linux",
111 |                 "Operating System :: MacOS :: MacOS X",
112 |                 "Operating System :: Microsoft :: Windows",
113 |                 "Programming Language :: Cython",
114 |                 "Programming Language :: Python :: 3.6",
115 |                 "Programming Language :: Python :: 3.7",
116 |                 "Programming Language :: Python :: 3.8",
117 |                 "Programming Language :: Python :: 3.9",
118 |                 "Programming Language :: Python :: 3.10",
119 |                 "Programming Language :: Python :: 3.11",
120 |                 "Programming Language :: Python :: 3.12",
121 |                 "Topic :: Scientific/Engineering",
122 |             ],
123 |             cmdclass={"build_ext": build_ext_subclass},
124 |         )
125 | 
126 | 
127 | if __name__ == "__main__":
128 |     setup_package()
129 | 


--------------------------------------------------------------------------------