├── .github ├── scripts │ ├── build_ffi.sh │ ├── build_linux_wheels.sh │ ├── build_mac_wheels.sh │ ├── build_pyduckling.sh │ └── run_tests.sh ├── stack │ └── stack.yaml └── workflows │ ├── linux-wheels.yml │ ├── mac-tests.yml │ ├── mac-wheels.yml │ └── rust.yml ├── .gitignore ├── .gitmodules ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── MANIFEST.in ├── README.md ├── RELEASE.md ├── build.rs ├── duckling ├── __init__.py ├── duckling.pyi └── tests │ ├── __init__.py │ └── test_duckling.py ├── ext_lib └── README.md ├── packaging ├── 0001-Allow-binaries-larger-than-32MB.patch └── build_wheels.py ├── pyproject.toml ├── setup.py └── src └── lib.rs /.github/scripts/build_ffi.sh: -------------------------------------------------------------------------------- 1 | 2 | # Install Stack 3 | # curl -sSL https://get.haskellstack.org/ | sh 4 | # export PATH="$HOME/.local/bin:$PATH" 5 | 6 | cd duckling-ffi 7 | stack build 8 | cp libducklingffi.so ../ext_lib 9 | -------------------------------------------------------------------------------- /.github/scripts/build_linux_wheels.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ex 3 | shopt -s nullglob 4 | 5 | PYBIN=/opt/python/cp$(echo $PYTHON_VERSION | sed -e 's/\.//g')*/bin 6 | echo $PYBIN 7 | # PYBIN=$(echo $PYBIN) 8 | 9 | # Install ZLib and sudo 10 | yum install -y zlib-devel sudo 11 | export HOME="/root" 12 | 13 | # Install Rustup 14 | curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly -y 15 | export PATH="$HOME/.cargo/bin:$PATH" 16 | 17 | # Install Stack 18 | curl -sSL https://get.haskellstack.org/ | sh 19 | export PATH="$HOME/.local/bin:$PATH" 20 | 21 | # Set stack resolver to 8.6.5 22 | mkdir -p $HOME/.stack/global-project 23 | cp .github/stack/stack.yaml $HOME/.stack/global-project 24 | cp packaging/0001-Allow-binaries-larger-than-32MB.patch $HOME 25 | 26 | pushd $HOME 27 | stack config set resolver ghc-8.6.5 28 | popd 29 | 30 | # Compile patchelf and apply 64MB patch 31 | pushd /root 32 | git clone https://github.com/NixOS/patchelf 33 | cd patchelf 34 | git apply $HOME/0001-Allow-binaries-larger-than-32MB.patch 35 | 36 | bash bootstrap.sh 37 | ./configure 38 | make 39 | make install 40 | popd 41 | 42 | # Compile libducklingffi 43 | pushd duckling-ffi 44 | 45 | stack build 46 | cp libducklingffi.so ../ext_lib 47 | popd 48 | 49 | # Produce wheels and patch binaries for redistribution 50 | PYBIN=$(echo $PYBIN) 51 | GHC_LIB=$(stack exec -- ghc --print-libdir) 52 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GHC_LIB/rts:$(pwd)/ext_lib 53 | # for PYBIN in /opt/python/cp{35,36,37,38,39}*/bin; do 54 | "${PYBIN}/pip" install -U setuptools wheel setuptools-rust auditwheel 55 | "${PYBIN}/python" packaging/build_wheels.py 56 | # done 57 | 58 | if [[ $PYTHON_VERSION == "3.9" ]]; then 59 | "${PYBIN}/python" setup.py sdist 60 | fi 61 | -------------------------------------------------------------------------------- /.github/scripts/build_mac_wheels.sh: -------------------------------------------------------------------------------- 1 | set -ex 2 | 3 | mkdir -p $HOME/.stack/global-project 4 | cp .github/stack/stack.yaml $HOME/.stack/global-project 5 | 6 | pushd $HOME 7 | stack config set resolver ghc-8.6.5 8 | popd 9 | 10 | # conda activate test 11 | # which python 12 | # Adjust PATH in macOS because conda is not at front of it 13 | export PATH=/usr/local/miniconda/envs/test/bin:/usr/local/miniconda/condabin:$PATH 14 | GHC_LIB=$(stack exec -- ghc --print-libdir) 15 | export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$GHC_LIB/rts:$(pwd)/ext_lib 16 | 17 | for dir in $GHC_LIB/*/; do 18 | export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$dir 19 | done 20 | 21 | # python setup.py bdist_wheel 22 | python packaging/build_wheels.py 23 | 24 | if [[ $PYTHON_VERSION == "3.8" ]]; then 25 | python setup.py sdist 26 | fi 27 | -------------------------------------------------------------------------------- /.github/scripts/build_pyduckling.sh: -------------------------------------------------------------------------------- 1 | 2 | # export PATH="$HOME/.local/bin:$PATH" 3 | mkdir -p $HOME/.stack/global-project 4 | cp .github/stack/stack.yaml $HOME/.stack/global-project 5 | 6 | pushd $HOME 7 | stack config set resolver ghc-8.6.5 8 | popd 9 | 10 | maturin develop 11 | -------------------------------------------------------------------------------- /.github/scripts/run_tests.sh: -------------------------------------------------------------------------------- 1 | 2 | # Set LD_LIBRARY_PATH in order to load dynamic libraries 3 | GHC_PATH=$(stack exec -- ghc --print-libdir) 4 | 5 | 6 | if [[ "$(uname)" != Darwin ]]; then 7 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GHC_PATH/rts:$(pwd)/ext_lib 8 | else 9 | export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$GHC_PATH/rts:$(pwd)/ext_lib 10 | fi 11 | pytest -x -v --cov=duckling duckling/tests 12 | -------------------------------------------------------------------------------- /.github/stack/stack.yaml: -------------------------------------------------------------------------------- 1 | packages: [] 2 | resolver: 3 | compiler: ghc-8.6.5 4 | -------------------------------------------------------------------------------- /.github/workflows/linux-wheels.yml: -------------------------------------------------------------------------------- 1 | name: Linux Wheels 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | linux: 14 | name: Linux (CentOS 7) Py${{ matrix.PYTHON_VERSION }} 15 | runs-on: ubuntu-latest 16 | container: 17 | image: quay.io/pypa/manylinux2014_x86_64:latest 18 | volumes: 19 | - my_docker_volume:/volume_mount 20 | env: 21 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }} 22 | RUNNER_OS: "ubuntu" 23 | HOME: "/root" 24 | # options: -u $(id -u):$(id -g) 25 | strategy: 26 | fail-fast: false 27 | matrix: 28 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8", "3.9"] 29 | steps: 30 | - name: Checkout branch 31 | uses: actions/checkout@v2 32 | with: 33 | submodules: true 34 | - name: Build wheel 35 | shell: bash -l {0} 36 | run: bash -l .github/scripts/build_linux_wheels.sh 37 | - name: Upload wheel artifact 38 | uses: actions/upload-artifact@v2 39 | with: 40 | name: linux_dist 41 | path: | 42 | dist/*.whl 43 | dist/*.tar.gz 44 | -------------------------------------------------------------------------------- /.github/workflows/mac-tests.yml: -------------------------------------------------------------------------------- 1 | name: Mac Tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | macos: 14 | name: MacOS Py${{ matrix.PYTHON_VERSION }} 15 | runs-on: macos-latest 16 | env: 17 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }} 18 | RUNNER_OS: "macos" 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8"] 23 | steps: 24 | - name: Checkout branch 25 | uses: actions/checkout@v2 26 | with: 27 | submodules: true 28 | - name: Install latest Rust nightly 29 | uses: actions-rs/toolchain@v1 30 | with: 31 | toolchain: nightly 32 | override: true 33 | components: rustfmt, clippy 34 | # - name: Install Haskell Stack 35 | # uses: mstksg/setup-stack@v1 36 | - name: Install Conda 37 | uses: goanpeca/setup-miniconda@v1 38 | with: 39 | activate-environment: test 40 | auto-update-conda: true 41 | auto-activate-base: false 42 | python-version: ${{ matrix.PYTHON_VERSION }} 43 | - name: Compile duckling-ffi 44 | shell: bash -l {0} 45 | run: bash -l .github/scripts/build_ffi.sh 46 | - name: Install build/test dependencies 47 | shell: bash -l {0} 48 | run: pip install maturin toml pytest pytest-cov coverage pendulum 49 | - name: Build pyduckling 50 | shell: bash -l {0} 51 | run: bash -l .github/scripts/build_pyduckling.sh 52 | - name: Run tests 53 | shell: bash -l {0} 54 | run: bash -l .github/scripts/run_tests.sh 55 | -------------------------------------------------------------------------------- /.github/workflows/mac-wheels.yml: -------------------------------------------------------------------------------- 1 | name: Mac Wheels 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | macos: 14 | name: MacOS Py${{ matrix.PYTHON_VERSION }} 15 | runs-on: macos-latest 16 | env: 17 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }} 18 | RUNNER_OS: "macos" 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8"] 23 | steps: 24 | - name: Checkout branch 25 | uses: actions/checkout@v2 26 | with: 27 | submodules: true 28 | - name: Install latest Rust nightly 29 | uses: actions-rs/toolchain@v1 30 | with: 31 | toolchain: nightly 32 | override: true 33 | components: rustfmt, clippy 34 | # - name: Install Haskell Stack 35 | # uses: mstksg/setup-stack@v1 36 | - name: Install Conda 37 | uses: goanpeca/setup-miniconda@v1 38 | with: 39 | activate-environment: test 40 | auto-update-conda: true 41 | auto-activate-base: false 42 | python-version: ${{ matrix.PYTHON_VERSION }} 43 | - name: Compile duckling-ffi 44 | shell: bash -l {0} 45 | run: bash -l .github/scripts/build_ffi.sh 46 | - name: Install build dependencies 47 | shell: bash -l {0} 48 | run: pip install setuptools-rust wheel auditwheel delocate toml 49 | - name: Build wheel 50 | shell: bash -l {0} 51 | run: bash -l .github/scripts/build_mac_wheels.sh 52 | - name: Upload wheel artifact 53 | uses: actions/upload-artifact@v2 54 | with: 55 | name: mac_dist 56 | path: | 57 | dist/*.whl 58 | dist/*.tar.gz 59 | -------------------------------------------------------------------------------- /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Linux Tests 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | env: 10 | CARGO_TERM_COLOR: always 11 | 12 | jobs: 13 | linux: 14 | name: Linux Py${{ matrix.PYTHON_VERSION }} 15 | runs-on: ubuntu-latest 16 | env: 17 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }} 18 | RUNNER_OS: "ubuntu" 19 | strategy: 20 | fail-fast: false 21 | matrix: 22 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8"] 23 | steps: 24 | - name: Checkout branch 25 | uses: actions/checkout@v2 26 | with: 27 | submodules: true 28 | - name: Install latest Rust nightly 29 | uses: actions-rs/toolchain@v1 30 | with: 31 | toolchain: nightly 32 | override: true 33 | components: rustfmt, clippy 34 | - name: Print Rust version 35 | shell: bash -l {0} 36 | run: rustc --version 37 | - name: Install Conda 38 | uses: goanpeca/setup-miniconda@v1 39 | with: 40 | activate-environment: test 41 | auto-update-conda: true 42 | auto-activate-base: false 43 | python-version: ${{ matrix.PYTHON_VERSION }} 44 | - name: Install Haskell Stack 45 | uses: mstksg/setup-stack@v1 46 | - name: Compile duckling-ffi 47 | shell: bash -l {0} 48 | run: bash -l .github/scripts/build_ffi.sh 49 | - name: Install build/test dependencies 50 | shell: bash -l {0} 51 | run: pip install maturin toml pytest pytest-cov coverage pendulum 52 | - name: Build pyduckling 53 | shell: bash -l {0} 54 | run: bash -l .github/scripts/build_pyduckling.sh 55 | - name: Run tests 56 | shell: bash -l {0} 57 | run: bash -l .github/scripts/run_tests.sh 58 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Generated by Cargo 2 | # will have compiled files and executables 3 | /target/ 4 | 5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html 7 | Cargo.lock 8 | 9 | # These are backup files generated by rustfmt 10 | **/*.rs.bk 11 | 12 | *.so 13 | *.o 14 | *.a 15 | 16 | # Python Rules 17 | # Byte-compiled / optimized / DLL files 18 | __pycache__/ 19 | *.py[cod] 20 | *$py.class 21 | 22 | # C extensions 23 | *.so 24 | 25 | # Distribution / packaging 26 | .Python 27 | build/ 28 | develop-eggs/ 29 | dist/ 30 | downloads/ 31 | eggs/ 32 | .eggs/ 33 | lib/ 34 | lib64/ 35 | parts/ 36 | sdist/ 37 | var/ 38 | wheels/ 39 | pip-wheel-metadata/ 40 | share/python-wheels/ 41 | *.egg-info/ 42 | .installed.cfg 43 | *.egg 44 | MANIFEST 45 | 46 | # PyInstaller 47 | # Usually these files are written by a python script from a template 48 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 49 | *.manifest 50 | *.spec 51 | 52 | # Installer logs 53 | pip-log.txt 54 | pip-delete-this-directory.txt 55 | 56 | # Unit test / coverage reports 57 | htmlcov/ 58 | .tox/ 59 | .nox/ 60 | .coverage 61 | .coverage.* 62 | .cache 63 | nosetests.xml 64 | coverage.xml 65 | *.cover 66 | *.py,cover 67 | .hypothesis/ 68 | .pytest_cache/ 69 | cover/ 70 | 71 | # Translations 72 | *.mo 73 | *.pot 74 | 75 | # Django stuff: 76 | *.log 77 | local_settings.py 78 | db.sqlite3 79 | db.sqlite3-journal 80 | 81 | # Flask stuff: 82 | instance/ 83 | .webassets-cache 84 | 85 | # Scrapy stuff: 86 | .scrapy 87 | 88 | # Sphinx documentation 89 | docs/_build/ 90 | 91 | # PyBuilder 92 | .pybuilder/ 93 | target/ 94 | 95 | # Jupyter Notebook 96 | .ipynb_checkpoints 97 | 98 | # IPython 99 | profile_default/ 100 | ipython_config.py 101 | 102 | # pyenv 103 | # For a library or package, you might want to ignore these files since the code is 104 | # intended to run in multiple environments; otherwise, check them in: 105 | # .python-version 106 | 107 | # pipenv 108 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 109 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 110 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 111 | # install all needed dependencies. 112 | #Pipfile.lock 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # static files generated from Django application using `collectstatic` 158 | media 159 | static 160 | 161 | # VSCode settings 162 | .vscode/ 163 | 164 | # Auditwheel output 165 | wheelhouse/ 166 | .wheel-process/ 167 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "duckling-ffi"] 2 | path = duckling-ffi 3 | url = https://github.com/treble-ai/duckling-ffi 4 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## Version 0.1.0 (2020/09/03) 2 | 3 | ### Issues Closed 4 | 5 | * [Issue 10](https://github.com/treble-ai/pyduckling/issues/10) - Release v0.1.0 6 | * [Issue 9](https://github.com/treble-ai/pyduckling/issues/9) - Add RELEASE instructions ([PR 15](https://github.com/treble-ai/pyduckling/pull/15) by [@andfoy](https://github.com/andfoy)) 7 | * [Issue 8](https://github.com/treble-ai/pyduckling/issues/8) - Improve README and add compilling instructions ([PR 12](https://github.com/treble-ai/pyduckling/pull/12) by [@andfoy](https://github.com/andfoy)) 8 | * [Issue 4](https://github.com/treble-ai/pyduckling/issues/4) - Migrate all functions ([PR 5](https://github.com/treble-ai/pyduckling/pull/5) by [@andfoy](https://github.com/andfoy)) 9 | * [Issue 2](https://github.com/treble-ai/pyduckling/issues/2) - Add tests ([PR 7](https://github.com/treble-ai/pyduckling/pull/7) by [@andfoy](https://github.com/andfoy)) 10 | * [Issue 1](https://github.com/treble-ai/pyduckling/issues/1) - Setup CI using Github Actions ([PR 3](https://github.com/treble-ai/pyduckling/pull/3) by [@andfoy](https://github.com/andfoy)) 11 | 12 | In this release 6 issues were closed. 13 | 14 | ### Pull Requests Merged 15 | 16 | * [PR 15](https://github.com/treble-ai/pyduckling/pull/15) - PR: Add release instructions, by [@andfoy](https://github.com/andfoy) ([9](https://github.com/treble-ai/pyduckling/issues/9)) 17 | * [PR 14](https://github.com/treble-ai/pyduckling/pull/14) - PR: Rename PyPi distribution to pyduckling-native, by [@andfoy](https://github.com/andfoy) 18 | * [PR 13](https://github.com/treble-ai/pyduckling/pull/13) - PR: Patch and relocate wheels manually, add macOS support, by [@andfoy](https://github.com/andfoy) 19 | * [PR 12](https://github.com/treble-ai/pyduckling/pull/12) - PR: Improve README and add compilation instructions, by [@andfoy](https://github.com/andfoy) ([8](https://github.com/treble-ai/pyduckling/issues/8)) 20 | * [PR 11](https://github.com/treble-ai/pyduckling/pull/11) - PR: Add typing stubs, by [@andfoy](https://github.com/andfoy) 21 | * [PR 7](https://github.com/treble-ai/pyduckling/pull/7) - PR: Add pyduckling tests, by [@andfoy](https://github.com/andfoy) ([2](https://github.com/treble-ai/pyduckling/issues/2)) 22 | * [PR 6](https://github.com/treble-ai/pyduckling/pull/6) - Fix CIs, by [@andfoy](https://github.com/andfoy) 23 | * [PR 5](https://github.com/treble-ai/pyduckling/pull/5) - PR: Migrate all functions, by [@andfoy](https://github.com/andfoy) ([4](https://github.com/treble-ai/pyduckling/issues/4)) 24 | * [PR 3](https://github.com/treble-ai/pyduckling/pull/3) - PR: Enable Github Actions, by [@andfoy](https://github.com/andfoy) ([1](https://github.com/treble-ai/pyduckling/issues/1)) 25 | 26 | In this release 9 pull requests were closed. 27 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pyduckling-native" 3 | version = "0.1.1-dev0" 4 | authors = ["Edgar Andrés Margffoy Tuay "] 5 | description = "Rust-based Python wrapper for duckling library in Haskell." 6 | repository = "https://github.com/treble-ai/pyduckling" 7 | license = "MIT" 8 | keywords = ["haskell", "python", "parse", "duckling"] 9 | readme = "README.md" 10 | build = "build.rs" 11 | edition = "2018" 12 | 13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 14 | [lib] 15 | name = "duckling" 16 | crate-type = ["cdylib"] 17 | 18 | [dependencies] 19 | libc = "0.2" 20 | 21 | [dependencies.pyo3] 22 | version = "0.10.1" 23 | features = ["extension-module"] 24 | 25 | [package.metadata.maturin] 26 | requires-dist = ["pendulum"] 27 | classifier = [ 28 | "Development Status :: 4 - Beta", 29 | "Intended Audience :: Developers", 30 | "License :: OSI Approved :: MIT License", 31 | "Programming Language :: Python :: 3.5", 32 | "Programming Language :: Python :: 3.6", 33 | "Programming Language :: Python :: 3.7", 34 | "Programming Language :: Python :: 3.8" 35 | ] 36 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 treble.ai 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include Cargo.toml 2 | recursive-include src * 3 | recursive-include ext_lib * 4 | recursive-include duckling-ffi * 5 | recursive-include packaging * 6 | global-include *.rs 7 | global-exclude *.so 8 | global-exclude *.dll 9 | global-exclude *.dylib 10 | global-exclude *.pyc 11 | global-exclude *.pyd 12 | prune duckling-ffi/.stack-work -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # PyDuckling 2 | [![Project License - MIT](https://img.shields.io/pypi/l/pyduckling-native.svg)](https://raw.githubusercontent.com/treble-ai/pyduckling-native/master/LICENSE) 3 | [![pypi version](https://img.shields.io/pypi/v/pyduckling-native.svg)](https://pypi.org/project/pyduckling-native/) 4 | [![conda version](https://img.shields.io/conda/vn/treble-ai/pyduckling.svg)](https://www.anaconda.com/download/) 5 | [![download count](https://img.shields.io/conda/dn/treble-ai/pyduckling.svg)](https://www.anaconda.com/download/) 6 | [![Downloads](https://pepy.tech/badge/pyduckling-native)](https://pepy.tech/project/pyduckling-native) 7 | [![PyPI status](https://img.shields.io/pypi/status/pyduckling-native.svg)](https://github.com/treble-ai/pyduckling-native) 8 | ![Linux Tests](https://github.com/treble-ai/pyduckling/workflows/Linux%20Tests/badge.svg?branch=master) 9 | ![Mac Tests](https://github.com/treble-ai/pyduckling/workflows/Mac%20Tests/badge.svg?branch=master) 10 | 11 | *Copyright © 2020– Treble.ai* 12 | 13 | ## Overview 14 | This package provides native bindings for Facebook's [Duckling](https://github.com/facebook/duckling) in Python. This package supports all dimensions and languages available on the original library, and it does not require to spawn a Haskell server and does not use HTTP to call the Duckling API. 15 | 16 | **Note:** This package is completely Haskell-less 17 | 18 | ## Installing 19 | To install pyduckling, you can use both conda and pip package managers: 20 | 21 | ```bash 22 | # Using pip 23 | pip install pyduckling-native 24 | 25 | # Using conda 26 | conda install pyduckling -c treble-ai 27 | ``` 28 | 29 | **Notes:** Right now, we only provide package distributions for Linux (x86_64). We will provide Windows and Mac distributions on the next release 30 | 31 | 32 | ## Dependencies 33 | To compile pyduckling, you will require the latest nightly release of [Rust](https://rustup.rs/), alongside [Cargo](https://crates.io/). Also, it requires a Python distribution with its corresponding development headers. Finally, this project depends on the following Cargo crates: 34 | 35 | * [PyO3](https://github.com/PyO3/pyo3): Library used to produce Python bindings from Rust code. 36 | * [Maturin](https://github.com/PyO3/maturin): Build system to build and publish Rust-based Python packages 37 | 38 | Additionally, this package depends on [Duckling-FFI](https://github.com/treble-ai/duckling-ffi), used to compile the native interface to Duckling on Haskell. In order to compile Duckling-FFI, you will require the [Stack](https://haskell-lang.org/get-started) Haskell manager. 39 | 40 | 41 | ## Installing locally 42 | Besides Rust and Stack, you will require the latest version of maturin installed to compile this project locally: 43 | 44 | ```bash 45 | pip install maturin toml 46 | ``` 47 | 48 | First, you will need to compile Duckling-FFI in order to produce the shared library ``libducklingffi``, to do so, you can use the git submodule found at the root of this repository: 49 | 50 | ```bash 51 | cd duckling-ffi 52 | stack build 53 | ``` 54 | 55 | Then, you will need to move the resulting binary ``libducklingffi.so`` to the ``ext_lib`` folder: 56 | 57 | ```bash 58 | cp duckling-ffi/libducklingffi.so ext_lib 59 | ``` 60 | 61 | After completing this procedure, it is possible to execute the following command to compile pyduckling: 62 | 63 | ```bash 64 | maturin develop 65 | ``` 66 | 67 | In order to produce wheels, ``maturin build`` can be used instead. This project supports [PEP517](https://www.python.org/dev/peps/pep-0517/), thus pip can be used to install this package as well: 68 | 69 | ```bash 70 | pip install -U . 71 | ``` 72 | 73 | ## Running tests 74 | We use pytest to run tests as it follows (after calling ``maturin develop``): 75 | 76 | ```bash 77 | pytest -v duckling/tests 78 | ``` 79 | 80 | ## Package usage 81 | PyDuckling provides access to the parsing capabilities of Duckling used to extract structured data from text. 82 | 83 | ```python 84 | # Core imports 85 | from duckling import (load_time_zones, parse_ref_time, 86 | parse_lang, default_locale_lang, parse_locale, 87 | parse_dimensions, parse, Context) 88 | 89 | # Load reference time for time parsing 90 | time_zones = load_time_zones("/usr/share/zoneinfo") 91 | bog_now = pendulum.now('America/Bogota').replace(microsecond=0) 92 | ref_time = parse_ref_time( 93 | time_zones, 'America/Bogota', bog_now.int_timestamp) 94 | 95 | # Load language/locale information 96 | lang_es = parse_lang('ES') 97 | default_locale = default_locale_lang(lang_es) 98 | locale = parse_locale('ES_CO', default_locale) 99 | 100 | # Create parsing context with time and language information 101 | context = Context(ref_time, locale) 102 | 103 | # Define dimensions to look-up for 104 | valid_dimensions = ["amount-of-money", "credit-card-number", "distance", 105 | "duration", "email", "number", "ordinal", 106 | "phone-number", "quantity", "temperature", 107 | "time", "time-grain", "url", "volume"] 108 | 109 | # Parse dimensions to use 110 | output_dims = parse_dimensions(valid_dimensions) 111 | 112 | # Parse a phrase 113 | result = parse('En dos semanas', context, dims, False) 114 | ``` 115 | 116 | This wrapper allows access to all the dimensions and languages available on Duckling: 117 | 118 | | Dimension | Example input | Example value output | 119 | | --------- | ------------- | -------------------- | 120 | | `amount-of-money` | "42€" | `{"value":42,"type":"value","unit":"EUR"}` | 121 | | `credit-card-number` | "4111-1111-1111-1111" | `{"value":"4111111111111111","issuer":"visa"}` | 122 | | `distance` | "6 miles" | `{"value":6,"type":"value","unit":"mile"}` | 123 | | `duration` | "3 mins" | `{"value":3,"minute":3,"unit":"minute","normalized":{"value":180,"unit":"second"}}` | 124 | | `email` | "duckling-team@fb.com" | `{"value":"duckling-team@fb.com"}` | 125 | | `number` | "eighty eight" | `{"value":88,"type":"value"}` | 126 | | `ordinal` | "33rd" | `{"value":33,"type":"value"}` | 127 | | `phone-number` | "+1 (650) 123-4567" | `{"value":"(+1) 6501234567"}` | 128 | | `quantity` | "3 cups of sugar" | `{"value":3,"type":"value","product":"sugar","unit":"cup"}` | 129 | | `temperature` | "80F" | `{"value":80,"type":"value","unit":"fahrenheit"}` | 130 | | `time` | "today at 9am" | `{"values":[{"value":"2016-12-14T09:00:00.000-08:00","grain":"hour","type":"value"}],"value":"2016-12-14T09:00:00.000-08:00","grain":"hour","type":"value"}` | 131 | | `url` | "https://api.wit.ai/message?q=hi" | `{"value":"https://api.wit.ai/message?q=hi","domain":"api.wit.ai"}` | 132 | | `volume` | "4 gallons" | `{"value":4,"type":"value","unit":"gallon"}` | 133 | 134 | 135 | ## Changelog 136 | Please see our [CHANGELOG](https://github.com/treble-ai/pyduckling/blob/master/CHANGELOG.md) file to learn more about our new features and improvements. 137 | 138 | 139 | ## Contribution guidelines 140 | We follow PEP8 and PEP257 for pure python packages and Rust to compile extensions. We use MyPy type annotations for all functions and classes declared on this package. Feel free to send a PR or create an issue if you have any problem/question. 141 | -------------------------------------------------------------------------------- /RELEASE.md: -------------------------------------------------------------------------------- 1 | To release a new version of pyduckling: 2 | 1. git fetch upstream && git checkout upstream/master 3 | 2. Close milestone on GitHub 4 | 3. git clean -xfdi 5 | 4. Update CHANGELOG.md with loghub 6 | 5. git add -A && git commit -m "Update Changelog" 7 | 6. Update release version in ``Cargo.toml`` (set release version, remove 'dev0') 8 | 7. git add -A && git commit -m "Release vX.X.X" 9 | 8. git tag -a vX.X.X -m "Release vX.X.X" 10 | 9. git push upstream master 11 | 10. git push upstream --tags 12 | 11. Wait for GitHub Actions to produce the wheels 13 | 12. Download the wheels locally for Linux and Mac 14 | 13. twine upload dist/* 15 | 14. Update development version in ``Cargo.toml`` (add '-dev0' and increment minor, see [1](#explanation)) 16 | 15. git add -A && git commit -m "Back to work" 17 | 16. git push upstream master 18 | 19 | 20 | [1] We need to append '-dev0', as Cargo does not support the '.dev0' 21 | syntax. 22 | -------------------------------------------------------------------------------- /build.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use std::process::Command; 3 | use std::{env, str}; 4 | 5 | fn command_ok(cmd: &mut Command) -> bool { 6 | cmd.status().ok().map_or(false, |s| s.success()) 7 | } 8 | 9 | fn command_output(cmd: &mut Command) -> String { 10 | str::from_utf8(&cmd.output().unwrap().stdout) 11 | .unwrap() 12 | .trim() 13 | .to_string() 14 | } 15 | 16 | fn main() { 17 | if command_ok(Command::new("stack").arg("--version")) { 18 | let ghc_lib = 19 | command_output(Command::new("stack").args(&["exec", "--", "ghc", "--print-libdir"])); 20 | let ghc_version = 21 | command_output(Command::new("stack").args(&["exec", "--", "ghc", "--numeric-version"])); 22 | let dir_path = env::current_dir().unwrap(); 23 | let path = dir_path.to_str().unwrap(); 24 | let ghc_lib_path = Path::new(&ghc_lib); 25 | let rts_path = ghc_lib_path.join("rts"); 26 | println!("cargo:rustc-link-search=native={}/ext_lib/", path); 27 | println!( 28 | "cargo:rustc-link-search=native={}", 29 | rts_path.to_str().unwrap() 30 | ); 31 | println!("cargo:rustc-link-lib=dylib=ducklingffi"); 32 | println!("cargo:rustc-link-lib=dylib=HSrts-ghc{}", ghc_version); 33 | println!("cargo:rustc-env=GHC_VERSION={}", ghc_version); 34 | } else { 35 | panic!("Stack was not found in the PATH") 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /duckling/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # ----------------------------------------------------------------------------- 3 | # Copyright (c) Treble.ai 4 | # 5 | # Licensed under the terms of the MIT License 6 | # (see LICENSE.txt for details) 7 | # ----------------------------------------------------------------------------- 8 | 9 | """Python bindings for Haskell's duckling library.""" 10 | 11 | 12 | # Standard library imports 13 | import json 14 | from typing import List 15 | 16 | # Local imports 17 | from .duckling import (init, stop, load_time_zones, 18 | get_current_ref_time, parse_ref_time, 19 | parse_lang, default_locale_lang, parse_locale, 20 | parse_dimensions, parse_text, Context, Dimension, 21 | Locale, __version__, GHC_VERSION) 22 | 23 | __version__ 24 | GHC_VERSION 25 | init 26 | stop 27 | load_time_zones 28 | parse_ref_time 29 | parse_locale 30 | parse_lang 31 | parse_text 32 | parse_dimensions 33 | default_locale_lang 34 | get_current_ref_time 35 | Context 36 | Locale 37 | 38 | # Start Haskell runtime 39 | init() 40 | 41 | 42 | def parse(text: str, context: Context, dimensions: List[Dimension], 43 | with_latent: bool = False) -> dict: 44 | """ 45 | Parse a text into a structured format. 46 | 47 | Parameters 48 | ---------- 49 | text: str 50 | Text to parse. 51 | context: Context 52 | Reference time and locale information 53 | dimensions: List[Dimension] 54 | List of dimensions to parse 55 | with_latent: bool 56 | When set, includes less certain parses, e.g. "7" as an hour of the day 57 | 58 | Returns 59 | ------- 60 | result: dict 61 | Dictionary that contains the parsed information. 62 | """ 63 | result = parse_text(text, context, dimensions, with_latent) 64 | return json.loads(result) 65 | -------------------------------------------------------------------------------- /duckling/duckling.pyi: -------------------------------------------------------------------------------- 1 | 2 | from typing import List 3 | 4 | 5 | class TimeZoneDatabase: 6 | ... 7 | 8 | 9 | class Dimension: 10 | ... 11 | 12 | 13 | class Language: 14 | @property 15 | def name(self) -> str: ... 16 | 17 | 18 | class Locale: 19 | @property 20 | def name(self) -> str: ... 21 | 22 | 23 | class DucklingTime: 24 | @property 25 | def iso8601(self) -> str: ... 26 | 27 | 28 | class Context: 29 | reference_time: DucklingTime = ... 30 | locale: Locale = ... 31 | 32 | 33 | def init(): ... 34 | def stop(): ... 35 | def load_time_zones(path: str) -> TimeZoneDatabase: ... 36 | def get_current_ref_time(tz_db: TimeZoneDatabase, tz: str) -> DucklingTime: ... 37 | def parse_ref_time(tz_db: TimeZoneDatabase, tz: str, 38 | timestamp: int) -> DucklingTime: ... 39 | def parse_lang(lang: str) -> Language: ... 40 | def default_locale_lang(lang: Language) -> Locale: ... 41 | def parse_locale(locale: str, default_locale: Locale) -> Locale: ... 42 | def parse_dimensions(dims: List[str]) -> List[Dimension]: ... 43 | def parse_text(text: str, context: Context, dimensions: List[Dimension], 44 | with_latent: bool = False) -> str: ... 45 | -------------------------------------------------------------------------------- /duckling/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/treble-ai/pyduckling/3dde5dc32828fd71627edd627362fa58da00b88a/duckling/tests/__init__.py -------------------------------------------------------------------------------- /duckling/tests/test_duckling.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # ----------------------------------------------------------------------------- 3 | # Copyright (c) Treble.ai 4 | # 5 | # Licensed under the terms of the MIT License 6 | # (see LICENSE.txt for details) 7 | # ----------------------------------------------------------------------------- 8 | 9 | """Tests for pyduckling library.""" 10 | 11 | # Pytest imports 12 | import pytest 13 | 14 | # Third-party imports 15 | import pendulum 16 | 17 | # Local imports 18 | from duckling import (load_time_zones, get_current_ref_time, parse_ref_time, 19 | parse_lang, default_locale_lang, parse_locale, 20 | parse_dimensions, parse, Context) 21 | 22 | 23 | @pytest.fixture 24 | def time_zones(): 25 | tzdb = load_time_zones("/usr/share/zoneinfo") 26 | return tzdb 27 | 28 | 29 | def test_load_time_zones(): 30 | tzdb = load_time_zones("/usr/share/zoneinfo") 31 | assert tzdb is not None 32 | 33 | 34 | def test_get_current_ref_time(time_zones): 35 | # Remove timezone information 36 | bog_now = pendulum.now('America/Bogota').naive().replace(microsecond=0) 37 | ref_time = get_current_ref_time(time_zones, 'America/Bogota') 38 | # UTC conversion in required to recover the actual datetime 39 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive() 40 | this_ref_time = this_ref_time.replace(microsecond=0) 41 | assert bog_now == this_ref_time 42 | 43 | # Function should fallback to UTC if the timezone does not exist 44 | utc_now = pendulum.now('UTC').naive().replace(microsecond=0) 45 | ref_time = get_current_ref_time(time_zones, 'Continent/Country') 46 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive() 47 | this_ref_time = this_ref_time.replace(microsecond=0) 48 | assert this_ref_time == utc_now 49 | 50 | 51 | def test_parse_ref_time(time_zones): 52 | bog_now = pendulum.now('America/Bogota').replace(microsecond=0) 53 | ref_time = parse_ref_time( 54 | time_zones, 'America/Bogota', bog_now.int_timestamp) 55 | # UTC conversion in required to recover the actual datetime 56 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive() 57 | this_ref_time = this_ref_time.replace(microsecond=0) 58 | assert bog_now.naive() == this_ref_time 59 | 60 | # Initialize any date 61 | dt = pendulum.datetime(1996, 2, 22, 9, 22, 3, 0, tz="Europe/Madrid") 62 | # bog_dt = dt.in_tz('America/Bogota') 63 | ref_time = parse_ref_time( 64 | time_zones, 'Europe/Madrid', dt.int_timestamp) 65 | # UTC conversion in required to recover the actual datetime 66 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive() 67 | this_ref_time = this_ref_time.replace(microsecond=0) 68 | assert dt.naive() == this_ref_time 69 | 70 | # Function should fallback to UTC if the timezone does not exist 71 | pst_now = pendulum.now('America/Los_Angeles').replace(microsecond=0) 72 | ref_time = parse_ref_time( 73 | time_zones, 'Continent/Country', pst_now.int_timestamp) 74 | # UTC conversion in required to recover the actual datetime 75 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive() 76 | this_ref_time = this_ref_time.replace(microsecond=0) 77 | assert pst_now.in_tz('UTC').naive() == this_ref_time 78 | 79 | 80 | def test_parse_lang(): 81 | # Function call should be case-insensitive 82 | lang_es = parse_lang('es') 83 | assert lang_es.name == 'ES' 84 | 85 | lang_pt = parse_lang('PT') 86 | assert lang_pt.name == 'PT' 87 | 88 | # Function should default to EN, when the language does not exists 89 | lang_any = parse_lang('UU') 90 | assert lang_any.name == 'EN' 91 | 92 | 93 | def test_default_locale_lang(): 94 | lang_es = parse_lang('ES') 95 | default_locale = default_locale_lang(lang_es) 96 | assert default_locale.name == 'ES_XX' 97 | 98 | 99 | def test_parse_locale(): 100 | lang_es = parse_lang('ES') 101 | default_locale = default_locale_lang(lang_es) 102 | 103 | # Parse Language + Country locale 104 | locale = parse_locale('ES_CO', default_locale) 105 | assert locale.name == 'ES_CO' 106 | 107 | # Parse Country locale 108 | locale = parse_locale('CO', default_locale) 109 | assert locale.name == 'ES_XX' 110 | 111 | 112 | def test_parse_dimensions(): 113 | valid_dimensions = ["amount-of-money", "credit-card-number", "distance", 114 | "duration", "email", "number", "ordinal", 115 | "phone-number", "quantity", "temperature", 116 | "time", "time-grain", "url", "volume"] 117 | 118 | # All dimensions should be parsed 119 | output_dims = parse_dimensions(valid_dimensions) 120 | assert len(output_dims) == len(valid_dimensions) 121 | 122 | invalid_dimensions = ["amount-of-money", "dim1", "credit-card-number", 123 | "dim2", "distance", "dim3"] 124 | 125 | # Valid-only dimensions should be parsed 126 | output_dims = parse_dimensions(invalid_dimensions) 127 | assert len(output_dims) == len(invalid_dimensions) - 3 128 | 129 | 130 | def test_parse(time_zones): 131 | bog_now = pendulum.now('America/Bogota').replace(microsecond=0) 132 | ref_time = parse_ref_time( 133 | time_zones, 'America/Bogota', bog_now.int_timestamp) 134 | lang_es = parse_lang('ES') 135 | default_locale = default_locale_lang(lang_es) 136 | locale = parse_locale('ES_CO', default_locale) 137 | 138 | context = Context(ref_time, locale) 139 | dimensions = ['time', 'duration'] 140 | dims = parse_dimensions(dimensions) 141 | 142 | # Test time periods 143 | result = parse('En dos semanas', context, dims, False) 144 | next_time = result[0]['value']['value'] 145 | next_time = pendulum.parse(next_time) 146 | assert next_time == bog_now.add(weeks=2).start_of('day') 147 | 148 | # Test distance units 149 | dimensions = ['distance'] 150 | dims = parse_dimensions(dimensions) 151 | result = parse('3 km', context, dims, False) 152 | info = result[0]['value'] 153 | value = info['value'] 154 | unit = info['unit'] 155 | assert value == 3 156 | assert unit == 'kilometre' 157 | 158 | # Test volume units 159 | dimensions = ['volume'] 160 | dims = parse_dimensions(dimensions) 161 | result = parse('5 litros de leche', context, dims, False) 162 | info = result[0]['value'] 163 | value = info['value'] 164 | unit = info['unit'] 165 | assert value == 5 166 | assert unit == 'litre' 167 | -------------------------------------------------------------------------------- /ext_lib/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Building binaries for duckling-ffi 3 | Please generate the shared library (`libducklingffi.so/dylib/dll`) produced after compilling [duckling-ffi](https://github.com/treble-ai/duckling-ffi) and put it in this folder. 4 | -------------------------------------------------------------------------------- /packaging/0001-Allow-binaries-larger-than-32MB.patch: -------------------------------------------------------------------------------- 1 | From 5b84342f57009f8cc1be801825a0a5925f0fcebc Mon Sep 17 00:00:00 2001 2 | From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?= 3 | Date: Mon, 31 Aug 2020 11:37:56 -0500 4 | Subject: [PATCH] Allow binaries larger than 32MB 5 | 6 | --- 7 | src/patchelf.cc | 2 +- 8 | 1 file changed, 1 insertion(+), 1 deletion(-) 9 | 10 | diff --git a/src/patchelf.cc b/src/patchelf.cc 11 | index d0063f9..19d1483 100644 12 | --- a/src/patchelf.cc 13 | +++ b/src/patchelf.cc 14 | @@ -328,7 +328,7 @@ static FileContents readFile(std::string fileName, 15 | size_t size = std::min(cutOff, (size_t) st.st_size); 16 | 17 | FileContents contents = std::make_shared>(); 18 | - contents->reserve(size + 32 * 1024 * 1024); 19 | + contents->reserve(size + 64 * 1024 * 1024); 20 | contents->resize(size, 0); 21 | 22 | int fd = open(fileName.c_str(), O_RDONLY); 23 | -- 24 | 2.20.1 25 | 26 | -------------------------------------------------------------------------------- /packaging/build_wheels.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # ----------------------------------------------------------------------------- 3 | # Copyright (c) Treble.ai 4 | # 5 | # Licensed under the terms of the MIT License 6 | # (see LICENSE.txt for details) 7 | # ----------------------------------------------------------------------------- 8 | 9 | """Helper script to package wheels and relocate binaries.""" 10 | 11 | # Standard library imports 12 | import os 13 | import io 14 | import sys 15 | import glob 16 | import shutil 17 | import zipfile 18 | import hashlib 19 | import platform 20 | import subprocess 21 | import os.path as osp 22 | from base64 import urlsafe_b64encode 23 | 24 | # Third party imports 25 | import toml 26 | from auditwheel.lddtree import lddtree 27 | from wheel.bdist_wheel import get_abi_tag 28 | 29 | 30 | HERE = osp.dirname(osp.abspath(__file__)) 31 | PACKAGE_ROOT = osp.dirname(HERE) 32 | PLATFORM_ARCH = platform.machine() 33 | PYTHON_VERSION = sys.version_info 34 | 35 | 36 | def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): 37 | """Yield pieces of data from a file-like object until EOF.""" 38 | while True: 39 | chunk = file.read(size) 40 | if not chunk: 41 | break 42 | yield chunk 43 | 44 | 45 | def rehash(path, blocksize=1 << 20): 46 | """Return (hash, length) for path using hashlib.sha256()""" 47 | h = hashlib.sha256() 48 | length = 0 49 | with open(path, 'rb') as f: 50 | for block in read_chunks(f, size=blocksize): 51 | length += len(block) 52 | h.update(block) 53 | digest = 'sha256=' + urlsafe_b64encode( 54 | h.digest() 55 | ).decode('latin1').rstrip('=') 56 | # unicode/str python2 issues 57 | return (digest, str(length)) # type: ignore 58 | 59 | 60 | def unzip_file(file, dest): 61 | """Decompress zip `file` into directory `dest`.""" 62 | with zipfile.ZipFile(file, 'r') as zip_ref: 63 | zip_ref.extractall(dest) 64 | 65 | 66 | def get_metadata(): 67 | """Get version from text file and avoids importing the module.""" 68 | with open(os.path.join(PACKAGE_ROOT, 'Cargo.toml'), 'r') as f: 69 | data = toml.load(f) 70 | # version = data['package']['version'] 71 | return data['package'] 72 | 73 | 74 | def is_program_installed(basename): 75 | """ 76 | Return program absolute path if installed in PATH. 77 | Otherwise, return None 78 | On macOS systems, a .app is considered installed if 79 | it exists. 80 | """ 81 | if (sys.platform == 'darwin' and basename.endswith('.app') and 82 | osp.exists(basename)): 83 | return basename 84 | 85 | for path in os.environ["PATH"].split(os.pathsep): 86 | abspath = osp.join(path, basename) 87 | if osp.isfile(abspath): 88 | return abspath 89 | 90 | 91 | def find_program(basename): 92 | """ 93 | Find program in PATH and return absolute path 94 | Try adding .exe or .bat to basename on Windows platforms 95 | (return None if not found) 96 | """ 97 | names = [basename] 98 | if os.name == 'nt': 99 | # Windows platforms 100 | extensions = ('.exe', '.bat', '.cmd') 101 | if not basename.endswith(extensions): 102 | names = [basename+ext for ext in extensions]+[basename] 103 | for name in names: 104 | path = is_program_installed(name) 105 | if path: 106 | return path 107 | 108 | 109 | def patch_new_path(library_path, new_dir): 110 | library = osp.basename(library_path) 111 | name, *rest = library.split('.') 112 | rest = '.'.join(rest) 113 | hash_id = hashlib.sha256(library_path.encode('utf-8')).hexdigest()[:8] 114 | new_name = '.'.join([name, hash_id, rest]) 115 | return osp.join(new_dir, new_name) 116 | 117 | 118 | def patch_mac(): 119 | # Find delocate location 120 | delocate_wheel = find_program('delocate-wheel') 121 | delocate_list = find_program('delocate-listdeps') 122 | if delocate_wheel is None: 123 | raise FileNotFoundError('Delocate was not found in the system, ' 124 | 'please install it via pip') 125 | # Produce wheel 126 | print('Producing wheel...') 127 | subprocess.check_output( 128 | [ 129 | sys.executable, 130 | 'setup.py', 131 | 'bdist_wheel' 132 | ], 133 | cwd=PACKAGE_ROOT 134 | ) 135 | 136 | package_info = get_metadata() 137 | version = package_info['version'].replace('-', '.') 138 | wheel_name = 'pyduckling_native-{0}-cp{1}{2}-{3}-macosx_10_15_{4}.whl'.format( 139 | version, PYTHON_VERSION.major, PYTHON_VERSION.minor, 140 | get_abi_tag(), PLATFORM_ARCH) 141 | dist = osp.join(PACKAGE_ROOT, 'dist', wheel_name) 142 | 143 | print('Calling delocate...') 144 | subprocess.check_output( 145 | [ 146 | delocate_wheel, 147 | '-v', 148 | dist 149 | ], 150 | cwd=PACKAGE_ROOT 151 | ) 152 | 153 | print('Resulting libraries') 154 | subprocess.check_output( 155 | [ 156 | delocate_list, 157 | '--all', 158 | dist 159 | ], 160 | cwd=PACKAGE_ROOT 161 | ) 162 | 163 | 164 | def patch_linux(): 165 | # Get patchelf location 166 | patchelf = find_program('patchelf') 167 | if patchelf is None: 168 | raise FileNotFoundError('Patchelf was not found in the system, please' 169 | ' make sure that is available on the PATH.') 170 | 171 | # Produce wheel 172 | print('Producing wheel...') 173 | subprocess.check_output( 174 | [ 175 | sys.executable, 176 | 'setup.py', 177 | 'bdist_wheel' 178 | ], 179 | cwd=PACKAGE_ROOT 180 | ) 181 | 182 | package_info = get_metadata() 183 | version = package_info['version'].replace('-', '.') 184 | wheel_name = 'pyduckling_native-{0}-cp{1}{2}-{3}-linux_{4}.whl'.format( 185 | version, PYTHON_VERSION.major, PYTHON_VERSION.minor, 186 | get_abi_tag(), PLATFORM_ARCH) 187 | dist = osp.join(PACKAGE_ROOT, 'dist', wheel_name) 188 | output_dir = osp.join(PACKAGE_ROOT, '.wheel-process') 189 | 190 | print(glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl'))) 191 | 192 | if osp.exists(output_dir): 193 | shutil.rmtree(output_dir) 194 | 195 | os.makedirs(output_dir) 196 | 197 | print('Unzipping wheel...') 198 | unzip_file(dist, output_dir) 199 | 200 | print('Finding ELF dependencies...') 201 | main_binary = 'duckling.cpython-{0}-{1}-linux-gnu.so'.format( 202 | get_abi_tag().replace('cp', ''), PLATFORM_ARCH) 203 | output_library = osp.join(output_dir, 'duckling') 204 | binary_path = osp.join(output_library, main_binary) 205 | 206 | ld_tree = lddtree(binary_path) 207 | tree_libs = ld_tree['libs'] 208 | 209 | binary_queue = [(n, main_binary) for n in ld_tree['needed']] 210 | binary_paths = {main_binary: binary_path} 211 | binary_dependencies = {} 212 | 213 | while binary_queue != []: 214 | library, parent = binary_queue.pop(0) 215 | library_info = tree_libs[library] 216 | print(library) 217 | print(library_info) 218 | if (library_info['path'].startswith('/lib') and 219 | not library.startswith('libpcre')): 220 | # Omit glibc/gcc/system libraries 221 | continue 222 | 223 | parent_dependencies = binary_dependencies.get(parent, []) 224 | parent_dependencies.append(library) 225 | binary_dependencies[parent] = parent_dependencies 226 | 227 | if library in binary_paths: 228 | continue 229 | 230 | binary_paths[library] = library_info['path'] 231 | binary_queue += [(n, library) for n in library_info['needed']] 232 | 233 | print('Copying dependencies to wheel directory') 234 | new_libraries_path = osp.join(output_dir, 'duckling.libs') 235 | os.makedirs(new_libraries_path) 236 | new_names = {main_binary: binary_path} 237 | 238 | for library in binary_paths: 239 | if library != main_binary: 240 | library_path = binary_paths[library] 241 | new_library_path = patch_new_path(library_path, new_libraries_path) 242 | print('{0} -> {1}'.format(library, new_library_path)) 243 | shutil.copyfile(library_path, new_library_path) 244 | new_names[library] = new_library_path 245 | 246 | print('Updating dependency names by new files') 247 | for library in binary_paths: 248 | if library != main_binary: 249 | if library not in binary_dependencies: 250 | continue 251 | library_dependencies = binary_dependencies[library] 252 | new_library_name = new_names[library] 253 | for dep in library_dependencies: 254 | new_dep = osp.basename(new_names[dep]) 255 | print('{0}: {1} -> {2}'.format(library, dep, new_dep)) 256 | subprocess.check_output( 257 | [ 258 | patchelf, 259 | '--replace-needed', 260 | dep, 261 | new_dep, 262 | new_library_name 263 | ], 264 | cwd=new_libraries_path) 265 | 266 | print('Updating library rpath') 267 | subprocess.check_output( 268 | [ 269 | patchelf, 270 | '--set-rpath', 271 | "$ORIGIN", 272 | new_library_name 273 | ], 274 | cwd=new_libraries_path) 275 | 276 | subprocess.check_output( 277 | [ 278 | patchelf, 279 | '--print-rpath', 280 | new_library_name 281 | ], 282 | cwd=new_libraries_path) 283 | 284 | print("Update main library dependencies") 285 | library_dependencies = binary_dependencies[main_binary] 286 | for dep in library_dependencies: 287 | new_dep = osp.basename(new_names[dep]) 288 | print('{0}: {1} -> {2}'.format(main_binary, dep, new_dep)) 289 | subprocess.check_output( 290 | [ 291 | patchelf, 292 | '--replace-needed', 293 | dep, 294 | new_dep, 295 | main_binary 296 | ], 297 | cwd=output_library) 298 | 299 | print('Update main library rpath') 300 | subprocess.check_output( 301 | [ 302 | patchelf, 303 | '--set-rpath', 304 | "$ORIGIN:$ORIGIN/../duckling.libs", 305 | binary_path 306 | ], 307 | cwd=output_library 308 | ) 309 | 310 | print('Update RECORD file in wheel') 311 | dist_info = osp.join( 312 | output_dir, 'pyduckling_native-{0}.dist-info'.format(version)) 313 | record_file = osp.join(dist_info, 'RECORD') 314 | 315 | with open(record_file, 'w') as f: 316 | for root, _, files in os.walk(output_dir): 317 | for this_file in files: 318 | full_file = osp.join(root, this_file) 319 | rel_file = osp.relpath(full_file, output_dir) 320 | if full_file == record_file: 321 | f.write('{0},,\n'.format(rel_file)) 322 | else: 323 | digest, size = rehash(full_file) 324 | f.write('{0},{1},{2}\n'.format(rel_file, digest, size)) 325 | 326 | print('Compressing wheel') 327 | shutil.make_archive(dist, 'zip', output_dir) 328 | os.remove(dist) 329 | shutil.move('{0}.zip'.format(dist), dist) 330 | shutil.rmtree(output_dir) 331 | 332 | 333 | if __name__ == '__main__': 334 | if sys.platform == 'linux': 335 | patch_linux() 336 | elif sys.platform == 'darwin': 337 | patch_mac() 338 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin"] 3 | build-backend = "maturin" 4 | 5 | [tool.maturin] 6 | manylinux = "off" 7 | sdist-include = ["duckling-ffi/*"] 8 | 9 | # [build-system] 10 | # requires = ["setuptools", "wheel", "setuptools-rust"] 11 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # ----------------------------------------------------------------------------- 3 | # Copyright (c) Treble.ai 4 | # 5 | # Licensed under the terms of the MIT License 6 | # (see LICENSE.txt for details) 7 | # ----------------------------------------------------------------------------- 8 | 9 | """Setup script for PyDuckling.""" 10 | 11 | # yapf: disable 12 | 13 | # Standard library imports 14 | import re 15 | import os 16 | 17 | # Third party imports 18 | import toml 19 | from setuptools import find_packages, setup 20 | from setuptools_rust import Binding, RustExtension 21 | 22 | 23 | HERE = os.path.abspath(os.path.dirname(__file__)) 24 | AUTHOR_REGEX = re.compile(r'(.*) <(.*@.*[.].*)>') 25 | 26 | 27 | def get_metadata(): 28 | """Get version from text file and avoids importing the module.""" 29 | with open(os.path.join(HERE, 'Cargo.toml'), 'r') as f: 30 | data = toml.load(f) 31 | # version = data['package']['version'] 32 | return data['package'] 33 | 34 | 35 | def get_description(): 36 | """Get long description.""" 37 | with open(os.path.join(HERE, 'README.md'), 'r') as f: 38 | data = f.read() 39 | return data 40 | 41 | 42 | def get_author(metadata): 43 | author = metadata['authors'][0] 44 | match = AUTHOR_REGEX.match(author) 45 | name = match.group(1) 46 | email = match.group(2) 47 | return name, email 48 | 49 | 50 | REQUIREMENTS = [ 51 | 'pendulum' 52 | ] 53 | 54 | metadata = get_metadata() 55 | name, email = get_author(metadata) 56 | 57 | setup( 58 | name=metadata['name'], 59 | version=metadata['version'], 60 | license=metadata['license'], 61 | description=metadata['description'], 62 | long_description=get_description(), 63 | long_description_content_type='text/markdown', 64 | author=name, 65 | author_email=email, 66 | url=metadata['repository'], 67 | keywords=metadata['keywords'], 68 | packages=find_packages(exclude=['contrib', 'docs', 'tests*']), 69 | rust_extensions=[RustExtension("duckling.duckling", binding=Binding.PyO3)], 70 | package_data={ 71 | 'duckling': ['*.dll', '*.dylib', '*.so'] 72 | }, 73 | zip_safe=False, 74 | install_requires=REQUIREMENTS, 75 | include_package_data=True, 76 | classifiers=[ 77 | 'Development Status :: 4 - Beta', 78 | 'Intended Audience :: Developers', 79 | 'License :: OSI Approved :: MIT License', 80 | 'Programming Language :: Python :: 3.5', 81 | 'Programming Language :: Python :: 3.6', 82 | 'Programming Language :: Python :: 3.7', 83 | 'Programming Language :: Python :: 3.8' 84 | ], 85 | ) 86 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | // ---------------------------------------------------------------------------- 2 | // Copyright (c) Treble.ai 3 | // 4 | // Licensed under the terms of the MIT License 5 | // (see LICENSE.txt for details) 6 | // ---------------------------------------------------------------------------- 7 | 8 | /// Python bindings for Haskell's Duckling library written in Rust 9 | // PyO3 imports 10 | // use pyo3::class::PyMappingProtocol; 11 | use pyo3::create_exception; 12 | use pyo3::exceptions; 13 | use pyo3::gc::{PyGCProtocol, PyVisit}; 14 | use pyo3::prelude::*; 15 | use pyo3::wrap_pyfunction; 16 | use pyo3::PyTraverseError; 17 | 18 | use std::ffi::{CStr, CString}; 19 | use std::os::raw::{c_char, c_int}; 20 | use std::ptr; 21 | use std::slice; 22 | use std::sync::atomic::{AtomicBool, Ordering}; 23 | use std::sync::Once; 24 | 25 | // Package version 26 | const VERSION: &'static str = env!("CARGO_PKG_VERSION"); 27 | const GHC_VERSION: &'static str = env!("GHC_VERSION"); 28 | 29 | // Haskell runtime status 30 | static START_ONCE: Once = Once::new(); 31 | static STOP_ONCE: Once = Once::new(); 32 | static STOPPED: AtomicBool = AtomicBool::new(false); 33 | 34 | pub enum HaskellValue {} 35 | 36 | extern "C" { 37 | // ----------------- Duckling API ----------------------------------- 38 | pub fn wparseText( 39 | text: *const c_char, 40 | reference_time: *mut HaskellValue, 41 | locale: *mut HaskellValue, 42 | dimensions: *mut HaskellValue, 43 | with_latent: u8, 44 | ) -> *const c_char; 45 | pub fn wparseDimensions(n: i32, dimensions: *const *const c_char) -> *mut HaskellValue; 46 | pub fn wparseLocale( 47 | locale: *const c_char, 48 | default_locale: *mut HaskellValue, 49 | ) -> *mut HaskellValue; 50 | pub fn wmakeDefaultLocale(lang: *mut HaskellValue) -> *mut HaskellValue; 51 | pub fn wparseLang(lang: *const c_char) -> *mut HaskellValue; 52 | pub fn wparseRefTime( 53 | tzdb: *mut HaskellValue, 54 | tzStr: *const c_char, 55 | timestamp: i64, 56 | ) -> *mut HaskellValue; 57 | pub fn wcurrentReftime(tzdb: *mut HaskellValue, strPtr: *const c_char) -> *mut HaskellValue; 58 | pub fn wloadTimeZoneSeries(path: *const c_char) -> *mut HaskellValue; 59 | // ----------------- Duckling API ----------------------------------- 60 | // Dimension list functions 61 | pub fn dimensionListCreate( 62 | ptrs: *const *mut HaskellValue, 63 | numElements: i32, 64 | ) -> *mut HaskellValue; 65 | pub fn dimensionListLength(dims: *mut HaskellValue) -> i32; 66 | pub fn dimensionListPtrs(dims: *mut HaskellValue) -> *mut *mut HaskellValue; 67 | pub fn dimensionListDestroy(dims: *mut HaskellValue); 68 | // Dimension functions 69 | pub fn dimensionDestroy(dim: *mut HaskellValue); 70 | // Time zone database functions 71 | pub fn tzdbDestroy(db: *mut HaskellValue); 72 | // Time reference wrapper functions 73 | pub fn duckTimeDestroy(time: *mut HaskellValue); 74 | pub fn duckTimeRepr(time: *mut HaskellValue) -> *const c_char; 75 | // Language wrapper functions 76 | pub fn langDestroy(lang: *mut HaskellValue); 77 | pub fn langRepr(lang: *mut HaskellValue) -> *const c_char; 78 | // Locale wrapper functions 79 | pub fn localeDestroy(locale: *mut HaskellValue); 80 | pub fn localeRepr(locale: *mut HaskellValue) -> *const c_char; 81 | // Haskell runtime start/stop 82 | pub fn hs_init(argc: c_int, argv: *const *const c_char); 83 | pub fn hs_exit(); 84 | } 85 | 86 | create_exception!(pyduckling, RuntimeStoppedError, exceptions::Exception); 87 | 88 | /// Initialize the Haskell runtime. This function is safe to call more than once, and 89 | /// will do nothing on subsequent calls. 90 | /// 91 | /// The runtime will automatically be shutdown at program exit, or you can stop it 92 | /// earlier with `stop`. 93 | #[pyfunction] 94 | fn init() -> PyResult<()> { 95 | START_ONCE.call_once(|| { 96 | start_hs(); 97 | unsafe { 98 | ::libc::atexit(stop_hs); 99 | } 100 | }); 101 | Ok(()) 102 | } 103 | 104 | /// Stop the Haskell runtime before the program exits. This function may only be called 105 | /// once during a program's execution. 106 | /// 107 | /// It is safe, but not useful, to call this before the runtime has started. 108 | /// 109 | /// Raises 110 | /// ------ 111 | /// RuntimeStoppedError: 112 | /// If the runtime was already stopped. 113 | #[pyfunction] 114 | pub fn stop() -> PyResult<()> { 115 | if STOPPED.swap(true, Ordering::SeqCst) { 116 | let err = "Haskell: The GHC runtime may only be stopped once. See \ 117 | https://downloads.haskell.org/%7Eghc/latest/docs/html/users_guide\ 118 | /ffi-chap.html#id1"; 119 | let exc = RuntimeStoppedError::py_err(err.to_string()); 120 | return Err(exc); 121 | } 122 | stop_hs(); 123 | Ok(()) 124 | } 125 | 126 | fn start_hs() { 127 | let mut argv = Vec::<*const c_char>::with_capacity(1); 128 | argv.push(ptr::null_mut()); 129 | unsafe { 130 | hs_init(0 as c_int, argv.as_ptr()); 131 | } 132 | } 133 | 134 | extern "C" fn stop_hs() { 135 | STOP_ONCE.call_once(|| unsafe { hs_exit() }); 136 | } 137 | 138 | /// Handle to the time zone database stored by Duckling 139 | #[pyclass(name=TimeZoneDatabase)] 140 | #[derive(Debug, Clone)] 141 | pub struct TimeZoneDatabaseWrapper { 142 | ptr: *mut HaskellValue, 143 | } 144 | 145 | #[pyproto] 146 | impl PyGCProtocol for TimeZoneDatabaseWrapper { 147 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> { 148 | Ok(()) 149 | } 150 | 151 | fn __clear__(&mut self) { 152 | unsafe { tzdbDestroy(self.ptr) } 153 | } 154 | } 155 | 156 | // impl Drop for TimeZoneDatabaseWrapper { 157 | // fn drop(&mut self) { 158 | // println!("Calling GC"); 159 | // unsafe { 160 | // tzdbDestroy(self.ptr); 161 | // } 162 | // } 163 | // } 164 | 165 | /// Handle to the time zone database stored by Duckling 166 | #[pyclass(name=DucklingTime)] 167 | #[derive(Debug, Clone)] 168 | pub struct DucklingTimeWrapper { 169 | ptr: *mut HaskellValue, 170 | } 171 | 172 | #[pymethods] 173 | impl DucklingTimeWrapper { 174 | #[getter] 175 | fn iso8601(&self) -> PyResult { 176 | let c_value = unsafe { duckTimeRepr(self.ptr) }; 177 | let string_result = unsafe { 178 | CStr::from_ptr(c_value) 179 | .to_string_lossy() 180 | .to_owned() 181 | .to_string() 182 | }; 183 | Ok(string_result) 184 | } 185 | } 186 | 187 | #[pyproto] 188 | impl PyGCProtocol for DucklingTimeWrapper { 189 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> { 190 | Ok(()) 191 | } 192 | 193 | fn __clear__(&mut self) { 194 | unsafe { duckTimeDestroy(self.ptr) } 195 | } 196 | } 197 | 198 | /// Handle to a language code stored by Duckling 199 | #[pyclass(name=Language)] 200 | #[derive(Debug, Clone)] 201 | pub struct LanguageWrapper { 202 | ptr: *mut HaskellValue, 203 | } 204 | 205 | #[pymethods] 206 | impl LanguageWrapper { 207 | #[getter] 208 | fn name(&self) -> PyResult { 209 | let c_value = unsafe { langRepr(self.ptr) }; 210 | let string_result = unsafe { 211 | CStr::from_ptr(c_value) 212 | .to_string_lossy() 213 | .to_owned() 214 | .to_string() 215 | }; 216 | Ok(string_result) 217 | } 218 | } 219 | 220 | #[pyproto] 221 | impl PyGCProtocol for LanguageWrapper { 222 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> { 223 | Ok(()) 224 | } 225 | 226 | fn __clear__(&mut self) { 227 | unsafe { langDestroy(self.ptr) } 228 | } 229 | } 230 | 231 | /// Handle to a locale code stored by Duckling 232 | #[pyclass(name=Locale)] 233 | #[derive(Debug, Clone)] 234 | pub struct LocaleWrapper { 235 | ptr: *mut HaskellValue, 236 | } 237 | 238 | #[pymethods] 239 | impl LocaleWrapper { 240 | #[getter] 241 | fn name(&self) -> PyResult { 242 | let c_value = unsafe { localeRepr(self.ptr) }; 243 | let string_result = unsafe { 244 | CStr::from_ptr(c_value) 245 | .to_string_lossy() 246 | .to_owned() 247 | .to_string() 248 | }; 249 | Ok(string_result) 250 | } 251 | } 252 | 253 | #[pyproto] 254 | impl PyGCProtocol for LocaleWrapper { 255 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> { 256 | Ok(()) 257 | } 258 | 259 | fn __clear__(&mut self) { 260 | unsafe { localeDestroy(self.ptr) } 261 | } 262 | } 263 | 264 | /// Handle to a parsing dimension identifier 265 | #[pyclass(name=Dimension)] 266 | #[derive(Debug, Clone)] 267 | pub struct DimensionWrapper { 268 | ptr: *mut HaskellValue, 269 | } 270 | 271 | #[pyproto] 272 | impl PyGCProtocol for DimensionWrapper { 273 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> { 274 | Ok(()) 275 | } 276 | 277 | fn __clear__(&mut self) { 278 | unsafe { dimensionDestroy(self.ptr) } 279 | } 280 | } 281 | 282 | #[pyclass] 283 | #[derive(Debug, Clone)] 284 | pub struct Context { 285 | pub reference_time: DucklingTimeWrapper, 286 | pub locale: LocaleWrapper, 287 | } 288 | 289 | #[pymethods] 290 | impl Context { 291 | #[new] 292 | fn new(reference_time: DucklingTimeWrapper, locale: LocaleWrapper) -> Self { 293 | Context { 294 | reference_time: reference_time, 295 | locale: locale, 296 | } 297 | } 298 | } 299 | 300 | /// Load time zone information from local Olson files. 301 | /// 302 | /// Parameters 303 | /// ---------- 304 | /// path: str 305 | /// Path to the olson data definitions. Many linux distros have 306 | /// Olson data in "/usr/share/zoneinfo/". 307 | /// 308 | /// Returns 309 | /// ------- 310 | /// tz_info: TimeZoneDatabase 311 | /// Opaque handle to a map of time zone data information in Haskell. 312 | #[pyfunction] 313 | fn load_time_zones(path: &str) -> PyResult { 314 | // let c_str = WrappedString::new(path); 315 | let c_str = CString::new(path).expect("CString::new failed"); 316 | let haskell_ptr = unsafe { wloadTimeZoneSeries(c_str.as_ptr()) }; 317 | let result = TimeZoneDatabaseWrapper { ptr: haskell_ptr }; 318 | Ok(result) 319 | } 320 | 321 | /// Get current reference time, given a Olson time zone 322 | /// 323 | /// Parameters 324 | /// ---------- 325 | /// tz_db: TimeZoneDatabase 326 | /// Opaque handle to a map of time zone data information in Haskell 327 | /// tz: str 328 | /// Time zone name according to IANA 329 | /// 330 | /// Returns 331 | /// ------- 332 | /// ref_time: DucklingTime 333 | /// Opaque handle to a time reference in Haskell 334 | #[pyfunction] 335 | fn get_current_ref_time(tz_db: TimeZoneDatabaseWrapper, tz: &str) -> PyResult { 336 | // let c_str = WrappedString::new(tz); 337 | let tz_c_str = CString::new(tz).expect("CString::new failed"); 338 | let haskell_tz = unsafe { wcurrentReftime(tz_db.ptr, tz_c_str.as_ptr()) }; 339 | let result = DucklingTimeWrapper { ptr: haskell_tz }; 340 | Ok(result) 341 | } 342 | 343 | /// Parse a reference timestamp on a given Olson time zone 344 | /// 345 | /// Parameters 346 | /// ---------- 347 | /// tz_db: TimeZoneDatabase 348 | /// Opaque handle to a map of time zone data information in Haskell 349 | /// tz: str 350 | /// Time zone name according to IANA 351 | /// timestamp: int 352 | /// UNIX integer timestamp 353 | /// 354 | /// Returns 355 | /// ------- 356 | /// ref_time: DucklingTime 357 | /// Opaque handle to a time reference in Haskell 358 | #[pyfunction] 359 | fn parse_ref_time( 360 | tz_db: TimeZoneDatabaseWrapper, 361 | tz: &str, 362 | timestamp: i64, 363 | ) -> PyResult { 364 | let tz_c_str = CString::new(tz).expect("CString::new failed"); 365 | let haskell_tz = unsafe { wparseRefTime(tz_db.ptr, tz_c_str.as_ptr(), timestamp) }; 366 | let result = DucklingTimeWrapper { ptr: haskell_tz }; 367 | Ok(result) 368 | } 369 | 370 | /// Parse an ISO-639-1 language code 371 | /// 372 | /// Parameters 373 | /// ---------- 374 | /// lang: str 375 | /// ISO-639-1 code of the language to parse 376 | /// 377 | /// Returns 378 | /// ------- 379 | /// Language: 380 | /// Opaque handle to a Haskell reference of the language. If the language 381 | /// does not exist, or if it is not supported by Duckling, 382 | /// it defaults to English (EN). 383 | #[pyfunction] 384 | fn parse_lang(lang: &str) -> PyResult { 385 | let lang_c_str = CString::new(lang).expect("CString::new failed"); 386 | let haskell_lang = unsafe { wparseLang(lang_c_str.as_ptr()) }; 387 | let result = LanguageWrapper { ptr: haskell_lang }; 388 | Ok(result) 389 | } 390 | 391 | /// Retrieve the default locale for a given language 392 | /// 393 | /// Parameters 394 | /// ---------- 395 | /// lang: Language 396 | /// Opaque handle to a Duckling language 397 | /// 398 | /// Returns 399 | /// ------- 400 | /// Locale: 401 | /// Opaque handle to the default language locale 402 | #[pyfunction] 403 | fn default_locale_lang(lang: LanguageWrapper) -> PyResult { 404 | let haskell_locale = unsafe { wmakeDefaultLocale(lang.ptr) }; 405 | let result = LocaleWrapper { 406 | ptr: haskell_locale, 407 | }; 408 | Ok(result) 409 | } 410 | 411 | /// Parse an ISO3166 alpha2 country code into a locale 412 | /// 413 | /// Parameters 414 | /// ---------- 415 | /// locale: str 416 | /// Locale identifier to parse, it can be either a country code or a language 417 | /// with its country separated by underscore. 418 | /// default_locale: Locale 419 | /// Default locale to fallback on on case that the given code is not valid. 420 | /// 421 | /// Returns 422 | /// ------- 423 | /// Locale: 424 | /// Opaque handle to the default language locale 425 | #[pyfunction] 426 | fn parse_locale(locale: &str, default_locale: LocaleWrapper) -> PyResult { 427 | let locale_c_str = CString::new(locale).expect("CString::new failed"); 428 | let haskell_locale = unsafe { wparseLocale(locale_c_str.as_ptr(), default_locale.ptr) }; 429 | let result = LocaleWrapper { 430 | ptr: haskell_locale, 431 | }; 432 | Ok(result) 433 | } 434 | 435 | /// Parse a list of dimensions to use during parsing 436 | /// 437 | /// Parameters 438 | /// ---------- 439 | /// dims: List[str] 440 | /// A list containing valid parsing dimensions to use with Duckling. See 441 | /// :class:`DucklingDimensions` to see a list of valid dimensions to use. 442 | /// 443 | /// Returns 444 | /// ------- 445 | /// wrapped_dims: List[DimensionWrapper] 446 | /// A list of opaque handlers that describe the given dimensions in Duckling. 447 | #[pyfunction] 448 | fn parse_dimensions(dims: Vec) -> PyResult> { 449 | let n_elems = dims.len() as i32; 450 | 451 | // This is required in order to preserve ownership of the pointers 452 | let cstr_dims: Vec = dims 453 | .iter() 454 | .map(|s| CString::new(s.as_str()).expect("CString::new failed")) 455 | .collect(); 456 | 457 | let c_dims: Vec<*const c_char> = cstr_dims.iter().map(|s| s.as_ptr()).collect(); 458 | 459 | let haskell_list = unsafe { wparseDimensions(n_elems, c_dims.as_ptr()) }; 460 | let haskell_length = unsafe { dimensionListLength(haskell_list) }; 461 | let haskell_ptrs = unsafe { dimensionListPtrs(haskell_list) }; 462 | let ptr_slice = unsafe { slice::from_raw_parts(haskell_ptrs, haskell_length as usize) }; 463 | let mut result_vec: Vec = Vec::new(); 464 | for ptr in ptr_slice { 465 | let wrapper = DimensionWrapper { ptr: *ptr }; 466 | result_vec.push(wrapper); 467 | } 468 | Ok(result_vec) 469 | } 470 | 471 | /// Parse a text into a structured format 472 | /// 473 | /// Parameters 474 | /// ---------- 475 | /// text: str 476 | /// Text to parse. 477 | /// context: Context 478 | /// Reference time and locale information 479 | /// dimensions: List[Dimension] 480 | /// List of dimensions to parse 481 | /// with_latent: bool 482 | /// When set, includes less certain parses, e.g. "7" as an hour of the day 483 | /// 484 | /// Returns 485 | /// ------- 486 | /// result: str 487 | /// JSON-valid string that contains the parsed information. 488 | #[pyfunction] 489 | fn parse_text( 490 | text: &str, 491 | context: Context, 492 | dimensions: Vec, 493 | with_latent: bool, 494 | ) -> PyResult { 495 | let c_text = CString::new(text).expect("CString::new failed"); 496 | let reference_time = context.reference_time; 497 | let locale = context.locale; 498 | let n_elems = dimensions.len() as i32; 499 | let c_dims: Vec<*mut HaskellValue> = dimensions.iter().map(|d| d.ptr).collect(); 500 | let dim_list = unsafe { dimensionListCreate(c_dims.as_ptr(), n_elems) }; 501 | let haskell_entities = unsafe { 502 | wparseText( 503 | c_text.as_ptr(), 504 | reference_time.ptr, 505 | locale.ptr, 506 | dim_list, 507 | with_latent as u8, 508 | ) 509 | }; 510 | let string_result = unsafe { 511 | CStr::from_ptr(haskell_entities) 512 | .to_string_lossy() 513 | .to_owned() 514 | .to_string() 515 | }; 516 | Ok(string_result) 517 | } 518 | 519 | /// This module is a python module implemented in Rust. 520 | #[pymodule] 521 | fn duckling(_py: Python, m: &PyModule) -> PyResult<()> { 522 | m.add("__version__", VERSION)?; 523 | m.add("GHC_VERSION", GHC_VERSION)?; 524 | m.add_wrapped(wrap_pyfunction!(load_time_zones))?; 525 | m.add_wrapped(wrap_pyfunction!(get_current_ref_time))?; 526 | m.add_wrapped(wrap_pyfunction!(parse_ref_time))?; 527 | m.add_wrapped(wrap_pyfunction!(parse_lang))?; 528 | m.add_wrapped(wrap_pyfunction!(default_locale_lang))?; 529 | m.add_wrapped(wrap_pyfunction!(parse_locale))?; 530 | m.add_wrapped(wrap_pyfunction!(parse_dimensions))?; 531 | m.add_wrapped(wrap_pyfunction!(parse_text))?; 532 | m.add_wrapped(wrap_pyfunction!(init))?; 533 | m.add_wrapped(wrap_pyfunction!(stop))?; 534 | m.add_class::()?; 535 | m.add_class::()?; 536 | m.add_class::()?; 537 | m.add_class::()?; 538 | m.add_class::()?; 539 | Ok(()) 540 | } 541 | --------------------------------------------------------------------------------