├── .github └── workflows │ ├── actions │ ├── entrypoint.sh │ ├── manylinux2014_aarch64 │ │ └── action.yml │ ├── manylinux2014_i686 │ │ └── action.yml │ └── manylinux2014_x86_64 │ │ └── action.yml │ ├── build_linux.yml │ ├── build_mac.yml │ ├── build_win.yml │ └── make_sdist.yml ├── .gitignore ├── .gitmodules ├── LICENSE ├── MANIFEST.in ├── README.md ├── dartsclone ├── __init__.py ├── _dartsclone.pxd └── _dartsclone.pyx ├── setup.py └── test └── test_darts.py /.github/workflows/actions/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | PYTHONS=("cp36-cp36m" "cp37-cp37m" "cp38-cp38", "cp39-cp39") 4 | 5 | for PYTHON in ${PYTHONS[@]}; do 6 | /opt/python/${PYTHON}/bin/pip install --upgrade pip 7 | /opt/python/${PYTHON}/bin/pip install -U wheel auditwheel 8 | /opt/python/${PYTHON}/bin/pip wheel . -w /github/workspace/wheelhouse/ 9 | done 10 | 11 | for whl in /github/workspace/wheelhouse/*linux_"$(uname -m)".whl; do 12 | auditwheel repair $whl --plat manylinux2014_"$(uname -m)" 13 | done 14 | -------------------------------------------------------------------------------- /.github/workflows/actions/manylinux2014_aarch64/action.yml: -------------------------------------------------------------------------------- 1 | name: 'build wheels with manylinux2014_aarch64' 2 | description: 'build wheels with manylinux2014_aarch64' 3 | runs: 4 | using: 'docker' 5 | image: docker://quay.io/pypa/manylinux2014_aarch64 6 | args: 7 | - .github/workflows/actions/entrypoint.sh 8 | -------------------------------------------------------------------------------- /.github/workflows/actions/manylinux2014_i686/action.yml: -------------------------------------------------------------------------------- 1 | name: 'build wheels with manylinux2014_i686' 2 | description: 'build wheels with manylinux2014_i686' 3 | runs: 4 | using: 'docker' 5 | image: docker://quay.io/pypa/manylinux2014_i686 6 | args: 7 | - .github/workflows/actions/entrypoint.sh 8 | -------------------------------------------------------------------------------- /.github/workflows/actions/manylinux2014_x86_64/action.yml: -------------------------------------------------------------------------------- 1 | name: 'build wheels with manylinux2014_x86_64' 2 | description: 'build wheels with manylinux2014_x86_64' 3 | runs: 4 | using: 'docker' 5 | image: docker://quay.io/pypa/manylinux2014_x86_64 6 | args: 7 | - .github/workflows/actions/entrypoint.sh 8 | -------------------------------------------------------------------------------- /.github/workflows/build_linux.yml: -------------------------------------------------------------------------------- 1 | name: build wheels for linux 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up QEMU 16 | id: qemu 17 | uses: docker/setup-qemu-action@v1 18 | - name: submodule update 19 | run: | 20 | git submodule update --init --recursive 21 | - uses: ./.github/workflows/actions/manylinux2014_x86_64/ 22 | - uses: ./.github/workflows/actions/manylinux2014_i686/ 23 | - uses: ./.github/workflows/actions/manylinux2014_aarch64/ 24 | - name: copy manylinux wheels 25 | run: | 26 | mkdir dist 27 | cp wheelhouse/dartsclone*manylinux2014_x86_64.whl dist/ 28 | cp wheelhouse/dartsclone*manylinux2014_i686.whl dist/ 29 | cp wheelhouse/dartsclone*manylinux2014_aarch64.whl dist/ 30 | - name: upload wheels 31 | uses: actions/upload-artifact@v1 32 | with: 33 | name: dist 34 | path: dist 35 | - name: Publish to PyPI 36 | env: 37 | PYPI_USERNAME: ${{ secrets.PYPI_USER }} 38 | PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 39 | run: | 40 | pip install twine 41 | python -m twine upload -u ${PYPI_USERNAME} -p ${PYPI_PASSWORD} --repository-url https://upload.pypi.org/legacy/ dist/* 42 | -------------------------------------------------------------------------------- /.github/workflows/build_mac.yml: -------------------------------------------------------------------------------- 1 | name: build wheels for macos 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ${{ matrix.os }} 12 | 13 | strategy: 14 | matrix: 15 | os: [macos-10.15] 16 | architecture: [x64] 17 | python-version: [3.6, 3.7, 3.8, 3.9] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v1 23 | with: 24 | architecture: ${{ matrix.architecture }} 25 | python-version: ${{ matrix.python-version }} 26 | - name: build wheel 27 | run: | 28 | git submodule update --init --recursive 29 | pip install -U wheel 30 | python setup.py bdist_wheel 31 | shell: bash 32 | - name: upload wheel 33 | uses: actions/upload-artifact@v1 34 | with: 35 | name: dist_${{ matrix.os }}_${{ matrix.architecture }}_${{ matrix.python-version }} 36 | path: dist 37 | - name: Publish to PyPI 38 | env: 39 | PYPI_USERNAME: ${{ secrets.PYPI_USER }} 40 | PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 41 | run: | 42 | pip install twine 43 | python -m twine upload -u ${PYPI_USERNAME} -p ${PYPI_PASSWORD} --repository-url https://upload.pypi.org/legacy/ dist/* 44 | shell: bash 45 | -------------------------------------------------------------------------------- /.github/workflows/build_win.yml: -------------------------------------------------------------------------------- 1 | name: build wheels for windows 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ${{ matrix.os }} 12 | 13 | strategy: 14 | matrix: 15 | os: [windows-2019] 16 | architecture: [x64, x86] 17 | python-version: [3.6, 3.7, 3.8, 3.9] 18 | 19 | steps: 20 | - uses: actions/checkout@v2 21 | - name: Set up Python ${{ matrix.python-version }} 22 | uses: actions/setup-python@v1 23 | with: 24 | architecture: ${{ matrix.architecture }} 25 | python-version: ${{ matrix.python-version }} 26 | - name: build wheel 27 | run: | 28 | git submodule update --init --recursive 29 | pip install -U wheel 30 | python setup.py bdist_wheel 31 | shell: bash 32 | - name: upload wheel 33 | uses: actions/upload-artifact@v1 34 | with: 35 | name: dist_${{ matrix.os }}_${{ matrix.architecture }}_${{ matrix.python-version }} 36 | path: dist 37 | - name: Publish to PyPI 38 | env: 39 | PYPI_USERNAME: ${{ secrets.PYPI_USER }} 40 | PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 41 | run: | 42 | pip install twine 43 | python -m twine upload -u ${PYPI_USERNAME} -p ${PYPI_PASSWORD} --repository-url https://upload.pypi.org/legacy/ dist/* 44 | shell: bash 45 | -------------------------------------------------------------------------------- /.github/workflows/make_sdist.yml: -------------------------------------------------------------------------------- 1 | name: make source distribution 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | 8 | jobs: 9 | build: 10 | 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up Python 16 | uses: actions/setup-python@v1 17 | with: 18 | python-version: 3.8 19 | - name: make sdist 20 | run: | 21 | git submodule update --init --recursive 22 | python setup.py sdist 23 | - name: upload sdist 24 | uses: actions/upload-artifact@v1 25 | with: 26 | name: dist 27 | path: dist 28 | - name: Publish to PyPI 29 | env: 30 | PYPI_USERNAME: ${{ secrets.PYPI_USER }} 31 | PYPI_PASSWORD: ${{ secrets.PYPI_PASSWORD }} 32 | run: | 33 | pip install twine 34 | python -m twine upload -u ${PYPI_USERNAME} -p ${PYPI_PASSWORD} --repository-url https://upload.pypi.org/legacy/ dist/* 35 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | 106 | # intelliJ 107 | .idea/ 108 | 109 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "csrc"] 2 | path = csrc 3 | url = https://github.com/s-yata/darts-clone.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include csrc/src/darts-config.h 2 | include csrc/include/darts.h 3 | include dartsclone/*.pyx 4 | include dartsclone/*.pxd 5 | include dartsclone/*.py 6 | exclude dartsclone/*.cpp 7 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # darts-clone-python 2 | 3 | [Darts-clone](https://github.com/s-yata/darts-clone) binding for Python 3.x. 4 | This repository provides Cython-based pip-installable package. 5 | 6 | ## Installation 7 | 8 | pip install dartsclone 9 | 10 | 11 | ## Usage 12 | 13 | darts-clone-python is almost compatible with darts-clone. 14 | 15 | ```python 16 | import dartsclone 17 | 18 | darts = dartsclone.DoubleArray() 19 | 20 | # build index 21 | data = [b'apple', b'banana', b'orange'] 22 | values = [1, 3, 2] 23 | darts.build(data, values=values) 24 | 25 | # exact match search 26 | result = darts.exact_match_search('apple'.encode('utf-8')) 27 | print(result) # [1, 5] 28 | 29 | # common prefix search 30 | result = darts.common_prefix_search('apples'.encode('utf-8'), pair_type=False) 31 | print(result) # [1] 32 | 33 | # save index 34 | darts.save('sample.dic') 35 | 36 | # load index 37 | darts.clear() 38 | darts.open('sample.dic') 39 | 40 | # dump array data 41 | array = darts.array() 42 | 43 | # load array data 44 | darts.clear() 45 | darts.set_array(array) 46 | 47 | ``` 48 | -------------------------------------------------------------------------------- /dartsclone/__init__.py: -------------------------------------------------------------------------------- 1 | from dartsclone._dartsclone import DoubleArray 2 | 3 | __all__ = [ 4 | "DoubleArray" 5 | ] 6 | -------------------------------------------------------------------------------- /dartsclone/_dartsclone.pxd: -------------------------------------------------------------------------------- 1 | cdef extern from "darts.h": 2 | cdef struct result_pair_type "Darts::DoubleArray::result_pair_type": 3 | int value 4 | size_t length 5 | 6 | cdef cppclass CppDoubleArray "Darts::DoubleArray": 7 | void set_array(const void *ptr, size_t size) 8 | const void *array() 9 | void clear() nogil 10 | size_t unit_size() nogil 11 | size_t size() nogil 12 | size_t total_size() nogil 13 | size_t nonzero_size() nogil 14 | int build(size_t num_keys, 15 | const char ** keys, 16 | const size_t *lengths, 17 | const int *values, 18 | int (*progress_func)(size_t, size_t)) nogil except + 19 | int open(const char *file_name, 20 | const char *mode, 21 | size_t offset, 22 | size_t size) nogil except + 23 | int save(const char *file_name, 24 | const char *mode, 25 | size_t offset) nogil except + 26 | void exact_match_search "exactMatchSearch"(const char *key, 27 | int & result, 28 | size_t length, 29 | size_t node_pos) nogil except + 30 | void exact_match_search "exactMatchSearch"(const char *key, 31 | result_pair_type & result, 32 | size_t length, 33 | size_t node_pos) nogil except + 34 | size_t common_prefix_search "commonPrefixSearch"(const char *key, 35 | int *results, 36 | size_t max_num_results, 37 | size_t length, 38 | size_t node_pos) nogil except + 39 | size_t common_prefix_search "commonPrefixSearch"(const char *key, 40 | result_pair_type *results, 41 | size_t max_num_results, 42 | size_t length, 43 | size_t node_pos) nogil except + 44 | int traverse(const char *key, 45 | size_t & node_pos, 46 | size_t & key_pos, 47 | size_t length) nogil except + 48 | 49 | 50 | 51 | 52 | cdef class DoubleArray: 53 | cdef CppDoubleArray *wrapped 54 | -------------------------------------------------------------------------------- /dartsclone/_dartsclone.pyx: -------------------------------------------------------------------------------- 1 | from libc.stdlib cimport malloc, free 2 | 3 | 4 | cdef class DoubleArray: 5 | def __cinit__(self): 6 | self.wrapped = new CppDoubleArray() 7 | 8 | def __dealloc__(self): 9 | del self.wrapped 10 | 11 | def __getstate__(self): 12 | return self.array() 13 | 14 | def __setstate__(self, array): 15 | self.set_array(array) 16 | 17 | def array(self): 18 | cdef size_t total_size = self.wrapped.total_size() 19 | cdef char[:] data = self.wrapped.array() 20 | return bytes(data) 21 | 22 | def set_array(self, const unsigned char[::1] array, size_t size=0): 23 | self.wrapped.set_array( &array[0], size) 24 | 25 | def clear(self): 26 | self.wrapped.clear() 27 | 28 | def unit_size(self): 29 | return self.wrapped.unit_size() 30 | 31 | def size(self): 32 | return self.wrapped.size() 33 | 34 | def total_size(self): 35 | return self.wrapped.total_size() 36 | 37 | def nonzero_size(self): 38 | return self.wrapped.nonzero_size() 39 | 40 | def build(self, keys, 41 | lengths = None, 42 | values = None): 43 | cdef size_t num_keys = len(keys) 44 | cdef const char** _keys = malloc(num_keys * sizeof(char*)) 45 | cdef size_t *_lengths = NULL 46 | cdef int *_values = NULL 47 | for i, key in enumerate(keys): 48 | _keys[i] = key 49 | if lengths is not None: 50 | _lengths = malloc(num_keys * sizeof(size_t)) 51 | for i, length in enumerate(lengths): 52 | _lengths[i] = length 53 | if values is not None: 54 | _values = malloc(num_keys * sizeof(int)) 55 | for i, value in enumerate(values): 56 | _values[i] = value 57 | try: 58 | self.wrapped.build(num_keys, _keys, _lengths, _values, NULL) 59 | finally: 60 | free(_keys) 61 | if lengths is not None: 62 | free(_lengths) 63 | if values is not None: 64 | free(_values) 65 | 66 | def open(self, file_name, 67 | mode = 'rb', 68 | size_t offset = 0, 69 | size_t size = 0): 70 | file_name = file_name.encode('utf-8') 71 | cdef const char *_file_name = file_name 72 | mode = mode.encode('utf-8') 73 | cdef const char *_mode = mode 74 | with nogil: 75 | self.wrapped.open(_file_name, _mode, offset, size) 76 | 77 | def save(self, file_name, 78 | mode = 'wb', 79 | size_t offset = 0): 80 | file_name = file_name.encode('utf-8') 81 | cdef const char *_file_name = file_name 82 | mode = mode.encode('utf-8') 83 | cdef const char *_mode = mode 84 | with nogil: 85 | self.wrapped.save(_file_name, _mode, offset) 86 | 87 | def exact_match_search(self, key, 88 | size_t length = 0, 89 | size_t node_pos = 0, 90 | pair_type=True): 91 | cdef const char *_key = key 92 | if pair_type: 93 | return self.__exact_match_search_pair_type(_key, length, node_pos) 94 | else: 95 | return self.__exact_match_search(_key, length, node_pos) 96 | 97 | def common_prefix_search(self, key, 98 | size_t max_num_results = 0, 99 | size_t length = 0, 100 | size_t node_pos = 0, 101 | pair_type=True): 102 | cdef const char *_key = key 103 | if max_num_results == 0: 104 | max_num_results = len(key) 105 | if pair_type: 106 | return self.__common_prefix_search_pair_type(_key, max_num_results, length, node_pos) 107 | else: 108 | return self.__common_prefix_search(_key, max_num_results, length, node_pos) 109 | 110 | def traverse(self, key, 111 | size_t node_pos, 112 | size_t key_pos, 113 | size_t length = 0): 114 | cdef const char *_key = key 115 | cdef int result 116 | with nogil: 117 | result = self.wrapped.traverse(_key, node_pos, key_pos, length) 118 | return result 119 | 120 | def __exact_match_search(self, const char *key, 121 | size_t length = 0, 122 | size_t node_pos = 0): 123 | cdef int result 124 | with nogil: 125 | self.wrapped.exact_match_search(key, result, length, node_pos) 126 | return result 127 | 128 | def __exact_match_search_pair_type(self, const char *key, 129 | size_t length = 0, 130 | size_t node_pos = 0): 131 | cdef result_pair_type result 132 | with nogil: 133 | self.wrapped.exact_match_search(key, result, length, node_pos) 134 | return result.value, result.length 135 | 136 | def __common_prefix_search(self, const char *key, 137 | size_t max_num_results, 138 | size_t length, 139 | size_t node_pos): 140 | cdef int *results = malloc(max_num_results * sizeof(int)) 141 | cdef int result_len 142 | try: 143 | with nogil: 144 | result_len = self.wrapped.common_prefix_search(key, results, max_num_results, length, node_pos) 145 | values = list() 146 | for i in range(result_len): 147 | values.append(results[i]) 148 | finally: 149 | free(results) 150 | return values 151 | 152 | def __common_prefix_search_pair_type(self, const char *key, 153 | size_t max_num_results, 154 | size_t length, 155 | size_t node_pos): 156 | cdef result_pair_type *results = malloc(max_num_results * sizeof(result_pair_type)) 157 | cdef result_pair_type result 158 | cdef int result_len 159 | try: 160 | with nogil: 161 | result_len = self.wrapped.common_prefix_search(key, results, max_num_results, length, node_pos) 162 | values = list() 163 | for i in range(result_len): 164 | result = results[i] 165 | values.append((result.value, result.length)) 166 | finally: 167 | free(results) 168 | return values 169 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup, Extension 2 | 3 | NAME = 'dartsclone' 4 | VERSION = '0.10.2' 5 | EXTENSIONS = [ 6 | Extension( 7 | '{0}._{0}'.format(NAME), 8 | language='c++', 9 | sources=[ 10 | '{0}/_{0}.pyx'.format(NAME), 11 | 'csrc/src/darts.cc' 12 | ], 13 | include_dirs=['./csrc/include'] 14 | ) 15 | ] 16 | 17 | if __name__ == '__main__': 18 | import os 19 | from os import path 20 | import glob 21 | 22 | [os.remove(f) for f in glob.glob('%s/*cpp' % NAME)] 23 | 24 | with open(path.join(path.dirname(__file__), 'README.md'), encoding='utf-8') as f: 25 | readme = f.read() 26 | 27 | setup( 28 | packages=[NAME], 29 | name=NAME, 30 | version=VERSION, 31 | description='Python binding of Darts Clone', 32 | author='@rixwew', 33 | author_email='rixwew@gmail.com', 34 | url='https://github.com/rixwew/darts-clone-python', 35 | setup_requires=[ 36 | 'cython>=0.28', 37 | ], 38 | ext_modules=EXTENSIONS, 39 | zip_safe=False, 40 | long_description=readme, 41 | long_description_content_type='text/markdown', 42 | classifiers=[ 43 | 'License :: OSI Approved :: Apache Software License', 44 | 'Programming Language :: Cython', 45 | 'Programming Language :: Python :: 2', 46 | 'Programming Language :: Python :: 3', 47 | 'Topic :: Text Processing :: Linguistic' 48 | ], 49 | install_requires=['Cython'] 50 | ) 51 | 52 | -------------------------------------------------------------------------------- /test/test_darts.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | import unittest 3 | import pickle 4 | 5 | from dartsclone import DoubleArray 6 | 7 | 8 | class DoubleArrayTest(unittest.TestCase): 9 | """test class of double array 10 | """ 11 | 12 | def test_darts_no_values(self): 13 | keys = ['test', 'テスト', 'テストケース'] 14 | darts = DoubleArray() 15 | darts.build(sorted([key.encode() for key in keys])) 16 | self.assertEqual(1, darts.exact_match_search('テスト'.encode(), pair_type=False)) 17 | self.assertEqual(0, darts.common_prefix_search('testcase'.encode(), pair_type=False)[0]) 18 | self.assertEqual(0, darts.exact_match_search('test'.encode(), pair_type=False)) 19 | self.assertEqual(2, darts.common_prefix_search('テストケース'.encode(), pair_type=False)[1]) 20 | 21 | def test_darts_with_values(self): 22 | keys = ['test', 'テスト', 'テストケース'] 23 | darts = DoubleArray() 24 | darts.build(sorted([key.encode() for key in keys]), values=[3, 5, 1]) 25 | self.assertEqual(5, darts.exact_match_search('テスト'.encode(), pair_type=False)) 26 | self.assertEqual(3, darts.common_prefix_search('testcase'.encode(), pair_type=False)[0]) 27 | self.assertEqual(1, darts.exact_match_search('テストケース'.encode(), pair_type=False)) 28 | self.assertEqual(1, darts.common_prefix_search('テストケース'.encode(), pair_type=False)[1]) 29 | 30 | def test_darts_save(self): 31 | keys = ['test', 'テスト', 'テストケース'] 32 | darts = DoubleArray() 33 | darts.build(sorted([key.encode() for key in keys]), values=[3, 5, 1]) 34 | with tempfile.NamedTemporaryFile('wb') as output_file: 35 | darts.save(output_file.name) 36 | output_file.flush() 37 | darts.clear() 38 | darts.open(output_file.name) 39 | self.assertEqual(5, darts.exact_match_search('テスト'.encode(), pair_type=False)) 40 | self.assertEqual(3, darts.common_prefix_search('testcase'.encode(), pair_type=False)[0]) 41 | 42 | def test_darts_pickle(self): 43 | keys = ['test', 'テスト', 'テストケース'] 44 | darts = DoubleArray() 45 | darts.build(sorted([key.encode() for key in keys]), values=[3, 5, 1]) 46 | with tempfile.NamedTemporaryFile('wb') as output_file: 47 | pickle.dump(darts, output_file) 48 | output_file.flush() 49 | with open(output_file.name, 'rb') as input_file: 50 | darts = pickle.load(input_file) 51 | self.assertEqual(5, darts.exact_match_search('テスト'.encode(), pair_type=False)) 52 | self.assertEqual(3, darts.common_prefix_search('testcase'.encode(), pair_type=False)[0]) 53 | 54 | def test_darts_array(self): 55 | keys = ['test', 'テスト', 'テストケース'] 56 | darts = DoubleArray() 57 | darts.build(sorted([key.encode() for key in keys]), values=[3, 5, 1]) 58 | array = darts.array() 59 | darts = DoubleArray() 60 | darts.set_array(array) 61 | self.assertEqual(5, darts.exact_match_search('テスト'.encode(), pair_type=False)) 62 | self.assertEqual(3, darts.common_prefix_search('testcase'.encode(), pair_type=False)[0]) 63 | 64 | 65 | if __name__ == "__main__": 66 | unittest.main() 67 | --------------------------------------------------------------------------------