├── duckling
├── tests
│ ├── __init__.py
│ └── test_duckling.py
├── duckling.pyi
└── __init__.py
├── .github
├── stack
│ └── stack.yaml
├── scripts
│ ├── build_ffi.sh
│ ├── build_pyduckling.sh
│ ├── run_tests.sh
│ ├── build_mac_wheels.sh
│ └── build_linux_wheels.sh
└── workflows
│ ├── linux-wheels.yml
│ ├── mac-tests.yml
│ ├── mac-wheels.yml
│ └── rust.yml
├── .gitmodules
├── ext_lib
└── README.md
├── pyproject.toml
├── MANIFEST.in
├── RELEASE.md
├── packaging
├── 0001-Allow-binaries-larger-than-32MB.patch
└── build_wheels.py
├── LICENSE
├── Cargo.toml
├── setup.py
├── CHANGELOG.md
├── .gitignore
├── README.md
└── src
└── lib.rs
/duckling/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/.github/stack/stack.yaml:
--------------------------------------------------------------------------------
1 | packages: []
2 | resolver:
3 | compiler: ghc-8.6.5
4 |
--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "duckling-ffi"]
2 | path = duckling-ffi
3 | url = https://github.com/treble-ai/duckling-ffi
4 |
--------------------------------------------------------------------------------
/.github/scripts/build_ffi.sh:
--------------------------------------------------------------------------------
1 |
2 | # Install Stack
3 | # curl -sSL https://get.haskellstack.org/ | sh
4 | # export PATH="$HOME/.local/bin:$PATH"
5 |
6 | cd duckling-ffi
7 | stack build
8 | cp libducklingffi.so ../ext_lib
9 |
--------------------------------------------------------------------------------
/ext_lib/README.md:
--------------------------------------------------------------------------------
1 |
2 | # Building binaries for duckling-ffi
3 | Please generate the shared library (`libducklingffi.so/dylib/dll`) produced after compilling [duckling-ffi](https://github.com/treble-ai/duckling-ffi) and put it in this folder.
4 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [build-system]
2 | requires = ["maturin"]
3 | build-backend = "maturin"
4 |
5 | [tool.maturin]
6 | manylinux = "off"
7 | sdist-include = ["duckling-ffi/*"]
8 |
9 | # [build-system]
10 | # requires = ["setuptools", "wheel", "setuptools-rust"]
11 |
--------------------------------------------------------------------------------
/.github/scripts/build_pyduckling.sh:
--------------------------------------------------------------------------------
1 |
2 | # export PATH="$HOME/.local/bin:$PATH"
3 | mkdir -p $HOME/.stack/global-project
4 | cp .github/stack/stack.yaml $HOME/.stack/global-project
5 |
6 | pushd $HOME
7 | stack config set resolver ghc-8.6.5
8 | popd
9 |
10 | maturin develop
11 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include Cargo.toml
2 | recursive-include src *
3 | recursive-include ext_lib *
4 | recursive-include duckling-ffi *
5 | recursive-include packaging *
6 | global-include *.rs
7 | global-exclude *.so
8 | global-exclude *.dll
9 | global-exclude *.dylib
10 | global-exclude *.pyc
11 | global-exclude *.pyd
12 | prune duckling-ffi/.stack-work
--------------------------------------------------------------------------------
/.github/scripts/run_tests.sh:
--------------------------------------------------------------------------------
1 |
2 | # Set LD_LIBRARY_PATH in order to load dynamic libraries
3 | GHC_PATH=$(stack exec -- ghc --print-libdir)
4 |
5 |
6 | if [[ "$(uname)" != Darwin ]]; then
7 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GHC_PATH/rts:$(pwd)/ext_lib
8 | else
9 | export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$GHC_PATH/rts:$(pwd)/ext_lib
10 | fi
11 | pytest -x -v --cov=duckling duckling/tests
12 |
--------------------------------------------------------------------------------
/.github/scripts/build_mac_wheels.sh:
--------------------------------------------------------------------------------
1 | set -ex
2 |
3 | mkdir -p $HOME/.stack/global-project
4 | cp .github/stack/stack.yaml $HOME/.stack/global-project
5 |
6 | pushd $HOME
7 | stack config set resolver ghc-8.6.5
8 | popd
9 |
10 | # conda activate test
11 | # which python
12 | # Adjust PATH in macOS because conda is not at front of it
13 | export PATH=/usr/local/miniconda/envs/test/bin:/usr/local/miniconda/condabin:$PATH
14 | GHC_LIB=$(stack exec -- ghc --print-libdir)
15 | export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$GHC_LIB/rts:$(pwd)/ext_lib
16 |
17 | for dir in $GHC_LIB/*/; do
18 | export DYLD_LIBRARY_PATH=$DYLD_LIBRARY_PATH:$dir
19 | done
20 |
21 | # python setup.py bdist_wheel
22 | python packaging/build_wheels.py
23 |
24 | if [[ $PYTHON_VERSION == "3.8" ]]; then
25 | python setup.py sdist
26 | fi
27 |
--------------------------------------------------------------------------------
/RELEASE.md:
--------------------------------------------------------------------------------
1 | To release a new version of pyduckling:
2 | 1. git fetch upstream && git checkout upstream/master
3 | 2. Close milestone on GitHub
4 | 3. git clean -xfdi
5 | 4. Update CHANGELOG.md with loghub
6 | 5. git add -A && git commit -m "Update Changelog"
7 | 6. Update release version in ``Cargo.toml`` (set release version, remove 'dev0')
8 | 7. git add -A && git commit -m "Release vX.X.X"
9 | 8. git tag -a vX.X.X -m "Release vX.X.X"
10 | 9. git push upstream master
11 | 10. git push upstream --tags
12 | 11. Wait for GitHub Actions to produce the wheels
13 | 12. Download the wheels locally for Linux and Mac
14 | 13. twine upload dist/*
15 | 14. Update development version in ``Cargo.toml`` (add '-dev0' and increment minor, see [1](#explanation))
16 | 15. git add -A && git commit -m "Back to work"
17 | 16. git push upstream master
18 |
19 |
20 | [1] We need to append '-dev0', as Cargo does not support the '.dev0'
21 | syntax.
22 |
--------------------------------------------------------------------------------
/packaging/0001-Allow-binaries-larger-than-32MB.patch:
--------------------------------------------------------------------------------
1 | From 5b84342f57009f8cc1be801825a0a5925f0fcebc Mon Sep 17 00:00:00 2001
2 | From: =?UTF-8?q?Edgar=20Andr=C3=A9s=20Margffoy=20Tuay?=
3 | Date: Mon, 31 Aug 2020 11:37:56 -0500
4 | Subject: [PATCH] Allow binaries larger than 32MB
5 |
6 | ---
7 | src/patchelf.cc | 2 +-
8 | 1 file changed, 1 insertion(+), 1 deletion(-)
9 |
10 | diff --git a/src/patchelf.cc b/src/patchelf.cc
11 | index d0063f9..19d1483 100644
12 | --- a/src/patchelf.cc
13 | +++ b/src/patchelf.cc
14 | @@ -328,7 +328,7 @@ static FileContents readFile(std::string fileName,
15 | size_t size = std::min(cutOff, (size_t) st.st_size);
16 |
17 | FileContents contents = std::make_shared>();
18 | - contents->reserve(size + 32 * 1024 * 1024);
19 | + contents->reserve(size + 64 * 1024 * 1024);
20 | contents->resize(size, 0);
21 |
22 | int fd = open(fileName.c_str(), O_RDONLY);
23 | --
24 | 2.20.1
25 |
26 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2020 treble.ai
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
1 | [package]
2 | name = "pyduckling-native"
3 | version = "0.1.1-dev0"
4 | authors = ["Edgar Andrés Margffoy Tuay "]
5 | description = "Rust-based Python wrapper for duckling library in Haskell."
6 | repository = "https://github.com/treble-ai/pyduckling"
7 | license = "MIT"
8 | keywords = ["haskell", "python", "parse", "duckling"]
9 | readme = "README.md"
10 | build = "build.rs"
11 | edition = "2018"
12 |
13 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
14 | [lib]
15 | name = "duckling"
16 | crate-type = ["cdylib"]
17 |
18 | [dependencies]
19 | libc = "0.2"
20 |
21 | [dependencies.pyo3]
22 | version = "0.10.1"
23 | features = ["extension-module"]
24 |
25 | [package.metadata.maturin]
26 | requires-dist = ["pendulum"]
27 | classifier = [
28 | "Development Status :: 4 - Beta",
29 | "Intended Audience :: Developers",
30 | "License :: OSI Approved :: MIT License",
31 | "Programming Language :: Python :: 3.5",
32 | "Programming Language :: Python :: 3.6",
33 | "Programming Language :: Python :: 3.7",
34 | "Programming Language :: Python :: 3.8"
35 | ]
36 |
--------------------------------------------------------------------------------
/duckling/duckling.pyi:
--------------------------------------------------------------------------------
1 |
2 | from typing import List
3 |
4 |
5 | class TimeZoneDatabase:
6 | ...
7 |
8 |
9 | class Dimension:
10 | ...
11 |
12 |
13 | class Language:
14 | @property
15 | def name(self) -> str: ...
16 |
17 |
18 | class Locale:
19 | @property
20 | def name(self) -> str: ...
21 |
22 |
23 | class DucklingTime:
24 | @property
25 | def iso8601(self) -> str: ...
26 |
27 |
28 | class Context:
29 | reference_time: DucklingTime = ...
30 | locale: Locale = ...
31 |
32 |
33 | def init(): ...
34 | def stop(): ...
35 | def load_time_zones(path: str) -> TimeZoneDatabase: ...
36 | def get_current_ref_time(tz_db: TimeZoneDatabase, tz: str) -> DucklingTime: ...
37 | def parse_ref_time(tz_db: TimeZoneDatabase, tz: str,
38 | timestamp: int) -> DucklingTime: ...
39 | def parse_lang(lang: str) -> Language: ...
40 | def default_locale_lang(lang: Language) -> Locale: ...
41 | def parse_locale(locale: str, default_locale: Locale) -> Locale: ...
42 | def parse_dimensions(dims: List[str]) -> List[Dimension]: ...
43 | def parse_text(text: str, context: Context, dimensions: List[Dimension],
44 | with_latent: bool = False) -> str: ...
45 |
--------------------------------------------------------------------------------
/.github/workflows/linux-wheels.yml:
--------------------------------------------------------------------------------
1 | name: Linux Wheels
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | env:
10 | CARGO_TERM_COLOR: always
11 |
12 | jobs:
13 | linux:
14 | name: Linux (CentOS 7) Py${{ matrix.PYTHON_VERSION }}
15 | runs-on: ubuntu-latest
16 | container:
17 | image: quay.io/pypa/manylinux2014_x86_64:latest
18 | volumes:
19 | - my_docker_volume:/volume_mount
20 | env:
21 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }}
22 | RUNNER_OS: "ubuntu"
23 | HOME: "/root"
24 | # options: -u $(id -u):$(id -g)
25 | strategy:
26 | fail-fast: false
27 | matrix:
28 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8", "3.9"]
29 | steps:
30 | - name: Checkout branch
31 | uses: actions/checkout@v2
32 | with:
33 | submodules: true
34 | - name: Build wheel
35 | shell: bash -l {0}
36 | run: bash -l .github/scripts/build_linux_wheels.sh
37 | - name: Upload wheel artifact
38 | uses: actions/upload-artifact@v2
39 | with:
40 | name: linux_dist
41 | path: |
42 | dist/*.whl
43 | dist/*.tar.gz
44 |
--------------------------------------------------------------------------------
/.github/scripts/build_linux_wheels.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ex
3 | shopt -s nullglob
4 |
5 | PYBIN=/opt/python/cp$(echo $PYTHON_VERSION | sed -e 's/\.//g')*/bin
6 | echo $PYBIN
7 | # PYBIN=$(echo $PYBIN)
8 |
9 | # Install ZLib and sudo
10 | yum install -y zlib-devel sudo
11 | export HOME="/root"
12 |
13 | # Install Rustup
14 | curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain nightly -y
15 | export PATH="$HOME/.cargo/bin:$PATH"
16 |
17 | # Install Stack
18 | curl -sSL https://get.haskellstack.org/ | sh
19 | export PATH="$HOME/.local/bin:$PATH"
20 |
21 | # Set stack resolver to 8.6.5
22 | mkdir -p $HOME/.stack/global-project
23 | cp .github/stack/stack.yaml $HOME/.stack/global-project
24 | cp packaging/0001-Allow-binaries-larger-than-32MB.patch $HOME
25 |
26 | pushd $HOME
27 | stack config set resolver ghc-8.6.5
28 | popd
29 |
30 | # Compile patchelf and apply 64MB patch
31 | pushd /root
32 | git clone https://github.com/NixOS/patchelf
33 | cd patchelf
34 | git apply $HOME/0001-Allow-binaries-larger-than-32MB.patch
35 |
36 | bash bootstrap.sh
37 | ./configure
38 | make
39 | make install
40 | popd
41 |
42 | # Compile libducklingffi
43 | pushd duckling-ffi
44 |
45 | stack build
46 | cp libducklingffi.so ../ext_lib
47 | popd
48 |
49 | # Produce wheels and patch binaries for redistribution
50 | PYBIN=$(echo $PYBIN)
51 | GHC_LIB=$(stack exec -- ghc --print-libdir)
52 | export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$GHC_LIB/rts:$(pwd)/ext_lib
53 | # for PYBIN in /opt/python/cp{35,36,37,38,39}*/bin; do
54 | "${PYBIN}/pip" install -U setuptools wheel setuptools-rust auditwheel
55 | "${PYBIN}/python" packaging/build_wheels.py
56 | # done
57 |
58 | if [[ $PYTHON_VERSION == "3.9" ]]; then
59 | "${PYBIN}/python" setup.py sdist
60 | fi
61 |
--------------------------------------------------------------------------------
/.github/workflows/mac-tests.yml:
--------------------------------------------------------------------------------
1 | name: Mac Tests
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | env:
10 | CARGO_TERM_COLOR: always
11 |
12 | jobs:
13 | macos:
14 | name: MacOS Py${{ matrix.PYTHON_VERSION }}
15 | runs-on: macos-latest
16 | env:
17 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }}
18 | RUNNER_OS: "macos"
19 | strategy:
20 | fail-fast: false
21 | matrix:
22 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8"]
23 | steps:
24 | - name: Checkout branch
25 | uses: actions/checkout@v2
26 | with:
27 | submodules: true
28 | - name: Install latest Rust nightly
29 | uses: actions-rs/toolchain@v1
30 | with:
31 | toolchain: nightly
32 | override: true
33 | components: rustfmt, clippy
34 | # - name: Install Haskell Stack
35 | # uses: mstksg/setup-stack@v1
36 | - name: Install Conda
37 | uses: goanpeca/setup-miniconda@v1
38 | with:
39 | activate-environment: test
40 | auto-update-conda: true
41 | auto-activate-base: false
42 | python-version: ${{ matrix.PYTHON_VERSION }}
43 | - name: Compile duckling-ffi
44 | shell: bash -l {0}
45 | run: bash -l .github/scripts/build_ffi.sh
46 | - name: Install build/test dependencies
47 | shell: bash -l {0}
48 | run: pip install maturin toml pytest pytest-cov coverage pendulum
49 | - name: Build pyduckling
50 | shell: bash -l {0}
51 | run: bash -l .github/scripts/build_pyduckling.sh
52 | - name: Run tests
53 | shell: bash -l {0}
54 | run: bash -l .github/scripts/run_tests.sh
55 |
--------------------------------------------------------------------------------
/.github/workflows/mac-wheels.yml:
--------------------------------------------------------------------------------
1 | name: Mac Wheels
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | env:
10 | CARGO_TERM_COLOR: always
11 |
12 | jobs:
13 | macos:
14 | name: MacOS Py${{ matrix.PYTHON_VERSION }}
15 | runs-on: macos-latest
16 | env:
17 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }}
18 | RUNNER_OS: "macos"
19 | strategy:
20 | fail-fast: false
21 | matrix:
22 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8"]
23 | steps:
24 | - name: Checkout branch
25 | uses: actions/checkout@v2
26 | with:
27 | submodules: true
28 | - name: Install latest Rust nightly
29 | uses: actions-rs/toolchain@v1
30 | with:
31 | toolchain: nightly
32 | override: true
33 | components: rustfmt, clippy
34 | # - name: Install Haskell Stack
35 | # uses: mstksg/setup-stack@v1
36 | - name: Install Conda
37 | uses: goanpeca/setup-miniconda@v1
38 | with:
39 | activate-environment: test
40 | auto-update-conda: true
41 | auto-activate-base: false
42 | python-version: ${{ matrix.PYTHON_VERSION }}
43 | - name: Compile duckling-ffi
44 | shell: bash -l {0}
45 | run: bash -l .github/scripts/build_ffi.sh
46 | - name: Install build dependencies
47 | shell: bash -l {0}
48 | run: pip install setuptools-rust wheel auditwheel delocate toml
49 | - name: Build wheel
50 | shell: bash -l {0}
51 | run: bash -l .github/scripts/build_mac_wheels.sh
52 | - name: Upload wheel artifact
53 | uses: actions/upload-artifact@v2
54 | with:
55 | name: mac_dist
56 | path: |
57 | dist/*.whl
58 | dist/*.tar.gz
59 |
--------------------------------------------------------------------------------
/.github/workflows/rust.yml:
--------------------------------------------------------------------------------
1 | name: Linux Tests
2 |
3 | on:
4 | push:
5 | branches: [ master ]
6 | pull_request:
7 | branches: [ master ]
8 |
9 | env:
10 | CARGO_TERM_COLOR: always
11 |
12 | jobs:
13 | linux:
14 | name: Linux Py${{ matrix.PYTHON_VERSION }}
15 | runs-on: ubuntu-latest
16 | env:
17 | PYTHON_VERSION: ${{ matrix.PYTHON_VERSION }}
18 | RUNNER_OS: "ubuntu"
19 | strategy:
20 | fail-fast: false
21 | matrix:
22 | PYTHON_VERSION: ["3.5", "3.6", "3.7", "3.8"]
23 | steps:
24 | - name: Checkout branch
25 | uses: actions/checkout@v2
26 | with:
27 | submodules: true
28 | - name: Install latest Rust nightly
29 | uses: actions-rs/toolchain@v1
30 | with:
31 | toolchain: nightly
32 | override: true
33 | components: rustfmt, clippy
34 | - name: Print Rust version
35 | shell: bash -l {0}
36 | run: rustc --version
37 | - name: Install Conda
38 | uses: goanpeca/setup-miniconda@v1
39 | with:
40 | activate-environment: test
41 | auto-update-conda: true
42 | auto-activate-base: false
43 | python-version: ${{ matrix.PYTHON_VERSION }}
44 | - name: Install Haskell Stack
45 | uses: mstksg/setup-stack@v1
46 | - name: Compile duckling-ffi
47 | shell: bash -l {0}
48 | run: bash -l .github/scripts/build_ffi.sh
49 | - name: Install build/test dependencies
50 | shell: bash -l {0}
51 | run: pip install maturin toml pytest pytest-cov coverage pendulum
52 | - name: Build pyduckling
53 | shell: bash -l {0}
54 | run: bash -l .github/scripts/build_pyduckling.sh
55 | - name: Run tests
56 | shell: bash -l {0}
57 | run: bash -l .github/scripts/run_tests.sh
58 |
--------------------------------------------------------------------------------
/duckling/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------------
3 | # Copyright (c) Treble.ai
4 | #
5 | # Licensed under the terms of the MIT License
6 | # (see LICENSE.txt for details)
7 | # -----------------------------------------------------------------------------
8 |
9 | """Python bindings for Haskell's duckling library."""
10 |
11 |
12 | # Standard library imports
13 | import json
14 | from typing import List
15 |
16 | # Local imports
17 | from .duckling import (init, stop, load_time_zones,
18 | get_current_ref_time, parse_ref_time,
19 | parse_lang, default_locale_lang, parse_locale,
20 | parse_dimensions, parse_text, Context, Dimension,
21 | Locale, __version__, GHC_VERSION)
22 |
23 | __version__
24 | GHC_VERSION
25 | init
26 | stop
27 | load_time_zones
28 | parse_ref_time
29 | parse_locale
30 | parse_lang
31 | parse_text
32 | parse_dimensions
33 | default_locale_lang
34 | get_current_ref_time
35 | Context
36 | Locale
37 |
38 | # Start Haskell runtime
39 | init()
40 |
41 |
42 | def parse(text: str, context: Context, dimensions: List[Dimension],
43 | with_latent: bool = False) -> dict:
44 | """
45 | Parse a text into a structured format.
46 |
47 | Parameters
48 | ----------
49 | text: str
50 | Text to parse.
51 | context: Context
52 | Reference time and locale information
53 | dimensions: List[Dimension]
54 | List of dimensions to parse
55 | with_latent: bool
56 | When set, includes less certain parses, e.g. "7" as an hour of the day
57 |
58 | Returns
59 | -------
60 | result: dict
61 | Dictionary that contains the parsed information.
62 | """
63 | result = parse_text(text, context, dimensions, with_latent)
64 | return json.loads(result)
65 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------------
3 | # Copyright (c) Treble.ai
4 | #
5 | # Licensed under the terms of the MIT License
6 | # (see LICENSE.txt for details)
7 | # -----------------------------------------------------------------------------
8 |
9 | """Setup script for PyDuckling."""
10 |
11 | # yapf: disable
12 |
13 | # Standard library imports
14 | import re
15 | import os
16 |
17 | # Third party imports
18 | import toml
19 | from setuptools import find_packages, setup
20 | from setuptools_rust import Binding, RustExtension
21 |
22 |
23 | HERE = os.path.abspath(os.path.dirname(__file__))
24 | AUTHOR_REGEX = re.compile(r'(.*) <(.*@.*[.].*)>')
25 |
26 |
27 | def get_metadata():
28 | """Get version from text file and avoids importing the module."""
29 | with open(os.path.join(HERE, 'Cargo.toml'), 'r') as f:
30 | data = toml.load(f)
31 | # version = data['package']['version']
32 | return data['package']
33 |
34 |
35 | def get_description():
36 | """Get long description."""
37 | with open(os.path.join(HERE, 'README.md'), 'r') as f:
38 | data = f.read()
39 | return data
40 |
41 |
42 | def get_author(metadata):
43 | author = metadata['authors'][0]
44 | match = AUTHOR_REGEX.match(author)
45 | name = match.group(1)
46 | email = match.group(2)
47 | return name, email
48 |
49 |
50 | REQUIREMENTS = [
51 | 'pendulum'
52 | ]
53 |
54 | metadata = get_metadata()
55 | name, email = get_author(metadata)
56 |
57 | setup(
58 | name=metadata['name'],
59 | version=metadata['version'],
60 | license=metadata['license'],
61 | description=metadata['description'],
62 | long_description=get_description(),
63 | long_description_content_type='text/markdown',
64 | author=name,
65 | author_email=email,
66 | url=metadata['repository'],
67 | keywords=metadata['keywords'],
68 | packages=find_packages(exclude=['contrib', 'docs', 'tests*']),
69 | rust_extensions=[RustExtension("duckling.duckling", binding=Binding.PyO3)],
70 | package_data={
71 | 'duckling': ['*.dll', '*.dylib', '*.so']
72 | },
73 | zip_safe=False,
74 | install_requires=REQUIREMENTS,
75 | include_package_data=True,
76 | classifiers=[
77 | 'Development Status :: 4 - Beta',
78 | 'Intended Audience :: Developers',
79 | 'License :: OSI Approved :: MIT License',
80 | 'Programming Language :: Python :: 3.5',
81 | 'Programming Language :: Python :: 3.6',
82 | 'Programming Language :: Python :: 3.7',
83 | 'Programming Language :: Python :: 3.8'
84 | ],
85 | )
86 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | ## Version 0.1.0 (2020/09/03)
2 |
3 | ### Issues Closed
4 |
5 | * [Issue 10](https://github.com/treble-ai/pyduckling/issues/10) - Release v0.1.0
6 | * [Issue 9](https://github.com/treble-ai/pyduckling/issues/9) - Add RELEASE instructions ([PR 15](https://github.com/treble-ai/pyduckling/pull/15) by [@andfoy](https://github.com/andfoy))
7 | * [Issue 8](https://github.com/treble-ai/pyduckling/issues/8) - Improve README and add compilling instructions ([PR 12](https://github.com/treble-ai/pyduckling/pull/12) by [@andfoy](https://github.com/andfoy))
8 | * [Issue 4](https://github.com/treble-ai/pyduckling/issues/4) - Migrate all functions ([PR 5](https://github.com/treble-ai/pyduckling/pull/5) by [@andfoy](https://github.com/andfoy))
9 | * [Issue 2](https://github.com/treble-ai/pyduckling/issues/2) - Add tests ([PR 7](https://github.com/treble-ai/pyduckling/pull/7) by [@andfoy](https://github.com/andfoy))
10 | * [Issue 1](https://github.com/treble-ai/pyduckling/issues/1) - Setup CI using Github Actions ([PR 3](https://github.com/treble-ai/pyduckling/pull/3) by [@andfoy](https://github.com/andfoy))
11 |
12 | In this release 6 issues were closed.
13 |
14 | ### Pull Requests Merged
15 |
16 | * [PR 15](https://github.com/treble-ai/pyduckling/pull/15) - PR: Add release instructions, by [@andfoy](https://github.com/andfoy) ([9](https://github.com/treble-ai/pyduckling/issues/9))
17 | * [PR 14](https://github.com/treble-ai/pyduckling/pull/14) - PR: Rename PyPi distribution to pyduckling-native, by [@andfoy](https://github.com/andfoy)
18 | * [PR 13](https://github.com/treble-ai/pyduckling/pull/13) - PR: Patch and relocate wheels manually, add macOS support, by [@andfoy](https://github.com/andfoy)
19 | * [PR 12](https://github.com/treble-ai/pyduckling/pull/12) - PR: Improve README and add compilation instructions, by [@andfoy](https://github.com/andfoy) ([8](https://github.com/treble-ai/pyduckling/issues/8))
20 | * [PR 11](https://github.com/treble-ai/pyduckling/pull/11) - PR: Add typing stubs, by [@andfoy](https://github.com/andfoy)
21 | * [PR 7](https://github.com/treble-ai/pyduckling/pull/7) - PR: Add pyduckling tests, by [@andfoy](https://github.com/andfoy) ([2](https://github.com/treble-ai/pyduckling/issues/2))
22 | * [PR 6](https://github.com/treble-ai/pyduckling/pull/6) - Fix CIs, by [@andfoy](https://github.com/andfoy)
23 | * [PR 5](https://github.com/treble-ai/pyduckling/pull/5) - PR: Migrate all functions, by [@andfoy](https://github.com/andfoy) ([4](https://github.com/treble-ai/pyduckling/issues/4))
24 | * [PR 3](https://github.com/treble-ai/pyduckling/pull/3) - PR: Enable Github Actions, by [@andfoy](https://github.com/andfoy) ([1](https://github.com/treble-ai/pyduckling/issues/1))
25 |
26 | In this release 9 pull requests were closed.
27 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Generated by Cargo
2 | # will have compiled files and executables
3 | /target/
4 |
5 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
6 | # More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
7 | Cargo.lock
8 |
9 | # These are backup files generated by rustfmt
10 | **/*.rs.bk
11 |
12 | *.so
13 | *.o
14 | *.a
15 |
16 | # Python Rules
17 | # Byte-compiled / optimized / DLL files
18 | __pycache__/
19 | *.py[cod]
20 | *$py.class
21 |
22 | # C extensions
23 | *.so
24 |
25 | # Distribution / packaging
26 | .Python
27 | build/
28 | develop-eggs/
29 | dist/
30 | downloads/
31 | eggs/
32 | .eggs/
33 | lib/
34 | lib64/
35 | parts/
36 | sdist/
37 | var/
38 | wheels/
39 | pip-wheel-metadata/
40 | share/python-wheels/
41 | *.egg-info/
42 | .installed.cfg
43 | *.egg
44 | MANIFEST
45 |
46 | # PyInstaller
47 | # Usually these files are written by a python script from a template
48 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
49 | *.manifest
50 | *.spec
51 |
52 | # Installer logs
53 | pip-log.txt
54 | pip-delete-this-directory.txt
55 |
56 | # Unit test / coverage reports
57 | htmlcov/
58 | .tox/
59 | .nox/
60 | .coverage
61 | .coverage.*
62 | .cache
63 | nosetests.xml
64 | coverage.xml
65 | *.cover
66 | *.py,cover
67 | .hypothesis/
68 | .pytest_cache/
69 | cover/
70 |
71 | # Translations
72 | *.mo
73 | *.pot
74 |
75 | # Django stuff:
76 | *.log
77 | local_settings.py
78 | db.sqlite3
79 | db.sqlite3-journal
80 |
81 | # Flask stuff:
82 | instance/
83 | .webassets-cache
84 |
85 | # Scrapy stuff:
86 | .scrapy
87 |
88 | # Sphinx documentation
89 | docs/_build/
90 |
91 | # PyBuilder
92 | .pybuilder/
93 | target/
94 |
95 | # Jupyter Notebook
96 | .ipynb_checkpoints
97 |
98 | # IPython
99 | profile_default/
100 | ipython_config.py
101 |
102 | # pyenv
103 | # For a library or package, you might want to ignore these files since the code is
104 | # intended to run in multiple environments; otherwise, check them in:
105 | # .python-version
106 |
107 | # pipenv
108 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
109 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
110 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
111 | # install all needed dependencies.
112 | #Pipfile.lock
113 |
114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
115 | __pypackages__/
116 |
117 | # Celery stuff
118 | celerybeat-schedule
119 | celerybeat.pid
120 |
121 | # SageMath parsed files
122 | *.sage.py
123 |
124 | # Environments
125 | .env
126 | .venv
127 | env/
128 | venv/
129 | ENV/
130 | env.bak/
131 | venv.bak/
132 |
133 | # Spyder project settings
134 | .spyderproject
135 | .spyproject
136 |
137 | # Rope project settings
138 | .ropeproject
139 |
140 | # mkdocs documentation
141 | /site
142 |
143 | # mypy
144 | .mypy_cache/
145 | .dmypy.json
146 | dmypy.json
147 |
148 | # Pyre type checker
149 | .pyre/
150 |
151 | # pytype static type analyzer
152 | .pytype/
153 |
154 | # Cython debug symbols
155 | cython_debug/
156 |
157 | # static files generated from Django application using `collectstatic`
158 | media
159 | static
160 |
161 | # VSCode settings
162 | .vscode/
163 |
164 | # Auditwheel output
165 | wheelhouse/
166 | .wheel-process/
167 |
--------------------------------------------------------------------------------
/duckling/tests/test_duckling.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------------
3 | # Copyright (c) Treble.ai
4 | #
5 | # Licensed under the terms of the MIT License
6 | # (see LICENSE.txt for details)
7 | # -----------------------------------------------------------------------------
8 |
9 | """Tests for pyduckling library."""
10 |
11 | # Pytest imports
12 | import pytest
13 |
14 | # Third-party imports
15 | import pendulum
16 |
17 | # Local imports
18 | from duckling import (load_time_zones, get_current_ref_time, parse_ref_time,
19 | parse_lang, default_locale_lang, parse_locale,
20 | parse_dimensions, parse, Context)
21 |
22 |
23 | @pytest.fixture
24 | def time_zones():
25 | tzdb = load_time_zones("/usr/share/zoneinfo")
26 | return tzdb
27 |
28 |
29 | def test_load_time_zones():
30 | tzdb = load_time_zones("/usr/share/zoneinfo")
31 | assert tzdb is not None
32 |
33 |
34 | def test_get_current_ref_time(time_zones):
35 | # Remove timezone information
36 | bog_now = pendulum.now('America/Bogota').naive().replace(microsecond=0)
37 | ref_time = get_current_ref_time(time_zones, 'America/Bogota')
38 | # UTC conversion in required to recover the actual datetime
39 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive()
40 | this_ref_time = this_ref_time.replace(microsecond=0)
41 | assert bog_now == this_ref_time
42 |
43 | # Function should fallback to UTC if the timezone does not exist
44 | utc_now = pendulum.now('UTC').naive().replace(microsecond=0)
45 | ref_time = get_current_ref_time(time_zones, 'Continent/Country')
46 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive()
47 | this_ref_time = this_ref_time.replace(microsecond=0)
48 | assert this_ref_time == utc_now
49 |
50 |
51 | def test_parse_ref_time(time_zones):
52 | bog_now = pendulum.now('America/Bogota').replace(microsecond=0)
53 | ref_time = parse_ref_time(
54 | time_zones, 'America/Bogota', bog_now.int_timestamp)
55 | # UTC conversion in required to recover the actual datetime
56 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive()
57 | this_ref_time = this_ref_time.replace(microsecond=0)
58 | assert bog_now.naive() == this_ref_time
59 |
60 | # Initialize any date
61 | dt = pendulum.datetime(1996, 2, 22, 9, 22, 3, 0, tz="Europe/Madrid")
62 | # bog_dt = dt.in_tz('America/Bogota')
63 | ref_time = parse_ref_time(
64 | time_zones, 'Europe/Madrid', dt.int_timestamp)
65 | # UTC conversion in required to recover the actual datetime
66 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive()
67 | this_ref_time = this_ref_time.replace(microsecond=0)
68 | assert dt.naive() == this_ref_time
69 |
70 | # Function should fallback to UTC if the timezone does not exist
71 | pst_now = pendulum.now('America/Los_Angeles').replace(microsecond=0)
72 | ref_time = parse_ref_time(
73 | time_zones, 'Continent/Country', pst_now.int_timestamp)
74 | # UTC conversion in required to recover the actual datetime
75 | this_ref_time = pendulum.parse(ref_time.iso8601).in_tz('UTC').naive()
76 | this_ref_time = this_ref_time.replace(microsecond=0)
77 | assert pst_now.in_tz('UTC').naive() == this_ref_time
78 |
79 |
80 | def test_parse_lang():
81 | # Function call should be case-insensitive
82 | lang_es = parse_lang('es')
83 | assert lang_es.name == 'ES'
84 |
85 | lang_pt = parse_lang('PT')
86 | assert lang_pt.name == 'PT'
87 |
88 | # Function should default to EN, when the language does not exists
89 | lang_any = parse_lang('UU')
90 | assert lang_any.name == 'EN'
91 |
92 |
93 | def test_default_locale_lang():
94 | lang_es = parse_lang('ES')
95 | default_locale = default_locale_lang(lang_es)
96 | assert default_locale.name == 'ES_XX'
97 |
98 |
99 | def test_parse_locale():
100 | lang_es = parse_lang('ES')
101 | default_locale = default_locale_lang(lang_es)
102 |
103 | # Parse Language + Country locale
104 | locale = parse_locale('ES_CO', default_locale)
105 | assert locale.name == 'ES_CO'
106 |
107 | # Parse Country locale
108 | locale = parse_locale('CO', default_locale)
109 | assert locale.name == 'ES_XX'
110 |
111 |
112 | def test_parse_dimensions():
113 | valid_dimensions = ["amount-of-money", "credit-card-number", "distance",
114 | "duration", "email", "number", "ordinal",
115 | "phone-number", "quantity", "temperature",
116 | "time", "time-grain", "url", "volume"]
117 |
118 | # All dimensions should be parsed
119 | output_dims = parse_dimensions(valid_dimensions)
120 | assert len(output_dims) == len(valid_dimensions)
121 |
122 | invalid_dimensions = ["amount-of-money", "dim1", "credit-card-number",
123 | "dim2", "distance", "dim3"]
124 |
125 | # Valid-only dimensions should be parsed
126 | output_dims = parse_dimensions(invalid_dimensions)
127 | assert len(output_dims) == len(invalid_dimensions) - 3
128 |
129 |
130 | def test_parse(time_zones):
131 | bog_now = pendulum.now('America/Bogota').replace(microsecond=0)
132 | ref_time = parse_ref_time(
133 | time_zones, 'America/Bogota', bog_now.int_timestamp)
134 | lang_es = parse_lang('ES')
135 | default_locale = default_locale_lang(lang_es)
136 | locale = parse_locale('ES_CO', default_locale)
137 |
138 | context = Context(ref_time, locale)
139 | dimensions = ['time', 'duration']
140 | dims = parse_dimensions(dimensions)
141 |
142 | # Test time periods
143 | result = parse('En dos semanas', context, dims, False)
144 | next_time = result[0]['value']['value']
145 | next_time = pendulum.parse(next_time)
146 | assert next_time == bog_now.add(weeks=2).start_of('day')
147 |
148 | # Test distance units
149 | dimensions = ['distance']
150 | dims = parse_dimensions(dimensions)
151 | result = parse('3 km', context, dims, False)
152 | info = result[0]['value']
153 | value = info['value']
154 | unit = info['unit']
155 | assert value == 3
156 | assert unit == 'kilometre'
157 |
158 | # Test volume units
159 | dimensions = ['volume']
160 | dims = parse_dimensions(dimensions)
161 | result = parse('5 litros de leche', context, dims, False)
162 | info = result[0]['value']
163 | value = info['value']
164 | unit = info['unit']
165 | assert value == 5
166 | assert unit == 'litre'
167 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # PyDuckling
2 | [](https://raw.githubusercontent.com/treble-ai/pyduckling-native/master/LICENSE)
3 | [](https://pypi.org/project/pyduckling-native/)
4 | [](https://www.anaconda.com/download/)
5 | [](https://www.anaconda.com/download/)
6 | [](https://pepy.tech/project/pyduckling-native)
7 | [](https://github.com/treble-ai/pyduckling-native)
8 | 
9 | 
10 |
11 | *Copyright © 2020– Treble.ai*
12 |
13 | ## Overview
14 | This package provides native bindings for Facebook's [Duckling](https://github.com/facebook/duckling) in Python. This package supports all dimensions and languages available on the original library, and it does not require to spawn a Haskell server and does not use HTTP to call the Duckling API.
15 |
16 | **Note:** This package is completely Haskell-less
17 |
18 | ## Installing
19 | To install pyduckling, you can use both conda and pip package managers:
20 |
21 | ```bash
22 | # Using pip
23 | pip install pyduckling-native
24 |
25 | # Using conda
26 | conda install pyduckling -c treble-ai
27 | ```
28 |
29 | **Notes:** Right now, we only provide package distributions for Linux (x86_64). We will provide Windows and Mac distributions on the next release
30 |
31 |
32 | ## Dependencies
33 | To compile pyduckling, you will require the latest nightly release of [Rust](https://rustup.rs/), alongside [Cargo](https://crates.io/). Also, it requires a Python distribution with its corresponding development headers. Finally, this project depends on the following Cargo crates:
34 |
35 | * [PyO3](https://github.com/PyO3/pyo3): Library used to produce Python bindings from Rust code.
36 | * [Maturin](https://github.com/PyO3/maturin): Build system to build and publish Rust-based Python packages
37 |
38 | Additionally, this package depends on [Duckling-FFI](https://github.com/treble-ai/duckling-ffi), used to compile the native interface to Duckling on Haskell. In order to compile Duckling-FFI, you will require the [Stack](https://haskell-lang.org/get-started) Haskell manager.
39 |
40 |
41 | ## Installing locally
42 | Besides Rust and Stack, you will require the latest version of maturin installed to compile this project locally:
43 |
44 | ```bash
45 | pip install maturin toml
46 | ```
47 |
48 | First, you will need to compile Duckling-FFI in order to produce the shared library ``libducklingffi``, to do so, you can use the git submodule found at the root of this repository:
49 |
50 | ```bash
51 | cd duckling-ffi
52 | stack build
53 | ```
54 |
55 | Then, you will need to move the resulting binary ``libducklingffi.so`` to the ``ext_lib`` folder:
56 |
57 | ```bash
58 | cp duckling-ffi/libducklingffi.so ext_lib
59 | ```
60 |
61 | After completing this procedure, it is possible to execute the following command to compile pyduckling:
62 |
63 | ```bash
64 | maturin develop
65 | ```
66 |
67 | In order to produce wheels, ``maturin build`` can be used instead. This project supports [PEP517](https://www.python.org/dev/peps/pep-0517/), thus pip can be used to install this package as well:
68 |
69 | ```bash
70 | pip install -U .
71 | ```
72 |
73 | ## Running tests
74 | We use pytest to run tests as it follows (after calling ``maturin develop``):
75 |
76 | ```bash
77 | pytest -v duckling/tests
78 | ```
79 |
80 | ## Package usage
81 | PyDuckling provides access to the parsing capabilities of Duckling used to extract structured data from text.
82 |
83 | ```python
84 | # Core imports
85 | from duckling import (load_time_zones, parse_ref_time,
86 | parse_lang, default_locale_lang, parse_locale,
87 | parse_dimensions, parse, Context)
88 |
89 | # Load reference time for time parsing
90 | time_zones = load_time_zones("/usr/share/zoneinfo")
91 | bog_now = pendulum.now('America/Bogota').replace(microsecond=0)
92 | ref_time = parse_ref_time(
93 | time_zones, 'America/Bogota', bog_now.int_timestamp)
94 |
95 | # Load language/locale information
96 | lang_es = parse_lang('ES')
97 | default_locale = default_locale_lang(lang_es)
98 | locale = parse_locale('ES_CO', default_locale)
99 |
100 | # Create parsing context with time and language information
101 | context = Context(ref_time, locale)
102 |
103 | # Define dimensions to look-up for
104 | valid_dimensions = ["amount-of-money", "credit-card-number", "distance",
105 | "duration", "email", "number", "ordinal",
106 | "phone-number", "quantity", "temperature",
107 | "time", "time-grain", "url", "volume"]
108 |
109 | # Parse dimensions to use
110 | output_dims = parse_dimensions(valid_dimensions)
111 |
112 | # Parse a phrase
113 | result = parse('En dos semanas', context, dims, False)
114 | ```
115 |
116 | This wrapper allows access to all the dimensions and languages available on Duckling:
117 |
118 | | Dimension | Example input | Example value output |
119 | | --------- | ------------- | -------------------- |
120 | | `amount-of-money` | "42€" | `{"value":42,"type":"value","unit":"EUR"}` |
121 | | `credit-card-number` | "4111-1111-1111-1111" | `{"value":"4111111111111111","issuer":"visa"}` |
122 | | `distance` | "6 miles" | `{"value":6,"type":"value","unit":"mile"}` |
123 | | `duration` | "3 mins" | `{"value":3,"minute":3,"unit":"minute","normalized":{"value":180,"unit":"second"}}` |
124 | | `email` | "duckling-team@fb.com" | `{"value":"duckling-team@fb.com"}` |
125 | | `number` | "eighty eight" | `{"value":88,"type":"value"}` |
126 | | `ordinal` | "33rd" | `{"value":33,"type":"value"}` |
127 | | `phone-number` | "+1 (650) 123-4567" | `{"value":"(+1) 6501234567"}` |
128 | | `quantity` | "3 cups of sugar" | `{"value":3,"type":"value","product":"sugar","unit":"cup"}` |
129 | | `temperature` | "80F" | `{"value":80,"type":"value","unit":"fahrenheit"}` |
130 | | `time` | "today at 9am" | `{"values":[{"value":"2016-12-14T09:00:00.000-08:00","grain":"hour","type":"value"}],"value":"2016-12-14T09:00:00.000-08:00","grain":"hour","type":"value"}` |
131 | | `url` | "https://api.wit.ai/message?q=hi" | `{"value":"https://api.wit.ai/message?q=hi","domain":"api.wit.ai"}` |
132 | | `volume` | "4 gallons" | `{"value":4,"type":"value","unit":"gallon"}` |
133 |
134 |
135 | ## Changelog
136 | Please see our [CHANGELOG](https://github.com/treble-ai/pyduckling/blob/master/CHANGELOG.md) file to learn more about our new features and improvements.
137 |
138 |
139 | ## Contribution guidelines
140 | We follow PEP8 and PEP257 for pure python packages and Rust to compile extensions. We use MyPy type annotations for all functions and classes declared on this package. Feel free to send a PR or create an issue if you have any problem/question.
141 |
--------------------------------------------------------------------------------
/packaging/build_wheels.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | # -----------------------------------------------------------------------------
3 | # Copyright (c) Treble.ai
4 | #
5 | # Licensed under the terms of the MIT License
6 | # (see LICENSE.txt for details)
7 | # -----------------------------------------------------------------------------
8 |
9 | """Helper script to package wheels and relocate binaries."""
10 |
11 | # Standard library imports
12 | import os
13 | import io
14 | import sys
15 | import glob
16 | import shutil
17 | import zipfile
18 | import hashlib
19 | import platform
20 | import subprocess
21 | import os.path as osp
22 | from base64 import urlsafe_b64encode
23 |
24 | # Third party imports
25 | import toml
26 | from auditwheel.lddtree import lddtree
27 | from wheel.bdist_wheel import get_abi_tag
28 |
29 |
30 | HERE = osp.dirname(osp.abspath(__file__))
31 | PACKAGE_ROOT = osp.dirname(HERE)
32 | PLATFORM_ARCH = platform.machine()
33 | PYTHON_VERSION = sys.version_info
34 |
35 |
36 | def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
37 | """Yield pieces of data from a file-like object until EOF."""
38 | while True:
39 | chunk = file.read(size)
40 | if not chunk:
41 | break
42 | yield chunk
43 |
44 |
45 | def rehash(path, blocksize=1 << 20):
46 | """Return (hash, length) for path using hashlib.sha256()"""
47 | h = hashlib.sha256()
48 | length = 0
49 | with open(path, 'rb') as f:
50 | for block in read_chunks(f, size=blocksize):
51 | length += len(block)
52 | h.update(block)
53 | digest = 'sha256=' + urlsafe_b64encode(
54 | h.digest()
55 | ).decode('latin1').rstrip('=')
56 | # unicode/str python2 issues
57 | return (digest, str(length)) # type: ignore
58 |
59 |
60 | def unzip_file(file, dest):
61 | """Decompress zip `file` into directory `dest`."""
62 | with zipfile.ZipFile(file, 'r') as zip_ref:
63 | zip_ref.extractall(dest)
64 |
65 |
66 | def get_metadata():
67 | """Get version from text file and avoids importing the module."""
68 | with open(os.path.join(PACKAGE_ROOT, 'Cargo.toml'), 'r') as f:
69 | data = toml.load(f)
70 | # version = data['package']['version']
71 | return data['package']
72 |
73 |
74 | def is_program_installed(basename):
75 | """
76 | Return program absolute path if installed in PATH.
77 | Otherwise, return None
78 | On macOS systems, a .app is considered installed if
79 | it exists.
80 | """
81 | if (sys.platform == 'darwin' and basename.endswith('.app') and
82 | osp.exists(basename)):
83 | return basename
84 |
85 | for path in os.environ["PATH"].split(os.pathsep):
86 | abspath = osp.join(path, basename)
87 | if osp.isfile(abspath):
88 | return abspath
89 |
90 |
91 | def find_program(basename):
92 | """
93 | Find program in PATH and return absolute path
94 | Try adding .exe or .bat to basename on Windows platforms
95 | (return None if not found)
96 | """
97 | names = [basename]
98 | if os.name == 'nt':
99 | # Windows platforms
100 | extensions = ('.exe', '.bat', '.cmd')
101 | if not basename.endswith(extensions):
102 | names = [basename+ext for ext in extensions]+[basename]
103 | for name in names:
104 | path = is_program_installed(name)
105 | if path:
106 | return path
107 |
108 |
109 | def patch_new_path(library_path, new_dir):
110 | library = osp.basename(library_path)
111 | name, *rest = library.split('.')
112 | rest = '.'.join(rest)
113 | hash_id = hashlib.sha256(library_path.encode('utf-8')).hexdigest()[:8]
114 | new_name = '.'.join([name, hash_id, rest])
115 | return osp.join(new_dir, new_name)
116 |
117 |
118 | def patch_mac():
119 | # Find delocate location
120 | delocate_wheel = find_program('delocate-wheel')
121 | delocate_list = find_program('delocate-listdeps')
122 | if delocate_wheel is None:
123 | raise FileNotFoundError('Delocate was not found in the system, '
124 | 'please install it via pip')
125 | # Produce wheel
126 | print('Producing wheel...')
127 | subprocess.check_output(
128 | [
129 | sys.executable,
130 | 'setup.py',
131 | 'bdist_wheel'
132 | ],
133 | cwd=PACKAGE_ROOT
134 | )
135 |
136 | package_info = get_metadata()
137 | version = package_info['version'].replace('-', '.')
138 | wheel_name = 'pyduckling_native-{0}-cp{1}{2}-{3}-macosx_10_15_{4}.whl'.format(
139 | version, PYTHON_VERSION.major, PYTHON_VERSION.minor,
140 | get_abi_tag(), PLATFORM_ARCH)
141 | dist = osp.join(PACKAGE_ROOT, 'dist', wheel_name)
142 |
143 | print('Calling delocate...')
144 | subprocess.check_output(
145 | [
146 | delocate_wheel,
147 | '-v',
148 | dist
149 | ],
150 | cwd=PACKAGE_ROOT
151 | )
152 |
153 | print('Resulting libraries')
154 | subprocess.check_output(
155 | [
156 | delocate_list,
157 | '--all',
158 | dist
159 | ],
160 | cwd=PACKAGE_ROOT
161 | )
162 |
163 |
164 | def patch_linux():
165 | # Get patchelf location
166 | patchelf = find_program('patchelf')
167 | if patchelf is None:
168 | raise FileNotFoundError('Patchelf was not found in the system, please'
169 | ' make sure that is available on the PATH.')
170 |
171 | # Produce wheel
172 | print('Producing wheel...')
173 | subprocess.check_output(
174 | [
175 | sys.executable,
176 | 'setup.py',
177 | 'bdist_wheel'
178 | ],
179 | cwd=PACKAGE_ROOT
180 | )
181 |
182 | package_info = get_metadata()
183 | version = package_info['version'].replace('-', '.')
184 | wheel_name = 'pyduckling_native-{0}-cp{1}{2}-{3}-linux_{4}.whl'.format(
185 | version, PYTHON_VERSION.major, PYTHON_VERSION.minor,
186 | get_abi_tag(), PLATFORM_ARCH)
187 | dist = osp.join(PACKAGE_ROOT, 'dist', wheel_name)
188 | output_dir = osp.join(PACKAGE_ROOT, '.wheel-process')
189 |
190 | print(glob.glob(osp.join(PACKAGE_ROOT, 'dist', '*.whl')))
191 |
192 | if osp.exists(output_dir):
193 | shutil.rmtree(output_dir)
194 |
195 | os.makedirs(output_dir)
196 |
197 | print('Unzipping wheel...')
198 | unzip_file(dist, output_dir)
199 |
200 | print('Finding ELF dependencies...')
201 | main_binary = 'duckling.cpython-{0}-{1}-linux-gnu.so'.format(
202 | get_abi_tag().replace('cp', ''), PLATFORM_ARCH)
203 | output_library = osp.join(output_dir, 'duckling')
204 | binary_path = osp.join(output_library, main_binary)
205 |
206 | ld_tree = lddtree(binary_path)
207 | tree_libs = ld_tree['libs']
208 |
209 | binary_queue = [(n, main_binary) for n in ld_tree['needed']]
210 | binary_paths = {main_binary: binary_path}
211 | binary_dependencies = {}
212 |
213 | while binary_queue != []:
214 | library, parent = binary_queue.pop(0)
215 | library_info = tree_libs[library]
216 | print(library)
217 | print(library_info)
218 | if (library_info['path'].startswith('/lib') and
219 | not library.startswith('libpcre')):
220 | # Omit glibc/gcc/system libraries
221 | continue
222 |
223 | parent_dependencies = binary_dependencies.get(parent, [])
224 | parent_dependencies.append(library)
225 | binary_dependencies[parent] = parent_dependencies
226 |
227 | if library in binary_paths:
228 | continue
229 |
230 | binary_paths[library] = library_info['path']
231 | binary_queue += [(n, library) for n in library_info['needed']]
232 |
233 | print('Copying dependencies to wheel directory')
234 | new_libraries_path = osp.join(output_dir, 'duckling.libs')
235 | os.makedirs(new_libraries_path)
236 | new_names = {main_binary: binary_path}
237 |
238 | for library in binary_paths:
239 | if library != main_binary:
240 | library_path = binary_paths[library]
241 | new_library_path = patch_new_path(library_path, new_libraries_path)
242 | print('{0} -> {1}'.format(library, new_library_path))
243 | shutil.copyfile(library_path, new_library_path)
244 | new_names[library] = new_library_path
245 |
246 | print('Updating dependency names by new files')
247 | for library in binary_paths:
248 | if library != main_binary:
249 | if library not in binary_dependencies:
250 | continue
251 | library_dependencies = binary_dependencies[library]
252 | new_library_name = new_names[library]
253 | for dep in library_dependencies:
254 | new_dep = osp.basename(new_names[dep])
255 | print('{0}: {1} -> {2}'.format(library, dep, new_dep))
256 | subprocess.check_output(
257 | [
258 | patchelf,
259 | '--replace-needed',
260 | dep,
261 | new_dep,
262 | new_library_name
263 | ],
264 | cwd=new_libraries_path)
265 |
266 | print('Updating library rpath')
267 | subprocess.check_output(
268 | [
269 | patchelf,
270 | '--set-rpath',
271 | "$ORIGIN",
272 | new_library_name
273 | ],
274 | cwd=new_libraries_path)
275 |
276 | subprocess.check_output(
277 | [
278 | patchelf,
279 | '--print-rpath',
280 | new_library_name
281 | ],
282 | cwd=new_libraries_path)
283 |
284 | print("Update main library dependencies")
285 | library_dependencies = binary_dependencies[main_binary]
286 | for dep in library_dependencies:
287 | new_dep = osp.basename(new_names[dep])
288 | print('{0}: {1} -> {2}'.format(main_binary, dep, new_dep))
289 | subprocess.check_output(
290 | [
291 | patchelf,
292 | '--replace-needed',
293 | dep,
294 | new_dep,
295 | main_binary
296 | ],
297 | cwd=output_library)
298 |
299 | print('Update main library rpath')
300 | subprocess.check_output(
301 | [
302 | patchelf,
303 | '--set-rpath',
304 | "$ORIGIN:$ORIGIN/../duckling.libs",
305 | binary_path
306 | ],
307 | cwd=output_library
308 | )
309 |
310 | print('Update RECORD file in wheel')
311 | dist_info = osp.join(
312 | output_dir, 'pyduckling_native-{0}.dist-info'.format(version))
313 | record_file = osp.join(dist_info, 'RECORD')
314 |
315 | with open(record_file, 'w') as f:
316 | for root, _, files in os.walk(output_dir):
317 | for this_file in files:
318 | full_file = osp.join(root, this_file)
319 | rel_file = osp.relpath(full_file, output_dir)
320 | if full_file == record_file:
321 | f.write('{0},,\n'.format(rel_file))
322 | else:
323 | digest, size = rehash(full_file)
324 | f.write('{0},{1},{2}\n'.format(rel_file, digest, size))
325 |
326 | print('Compressing wheel')
327 | shutil.make_archive(dist, 'zip', output_dir)
328 | os.remove(dist)
329 | shutil.move('{0}.zip'.format(dist), dist)
330 | shutil.rmtree(output_dir)
331 |
332 |
333 | if __name__ == '__main__':
334 | if sys.platform == 'linux':
335 | patch_linux()
336 | elif sys.platform == 'darwin':
337 | patch_mac()
338 |
--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
1 | // ----------------------------------------------------------------------------
2 | // Copyright (c) Treble.ai
3 | //
4 | // Licensed under the terms of the MIT License
5 | // (see LICENSE.txt for details)
6 | // ----------------------------------------------------------------------------
7 |
8 | /// Python bindings for Haskell's Duckling library written in Rust
9 | // PyO3 imports
10 | // use pyo3::class::PyMappingProtocol;
11 | use pyo3::create_exception;
12 | use pyo3::exceptions;
13 | use pyo3::gc::{PyGCProtocol, PyVisit};
14 | use pyo3::prelude::*;
15 | use pyo3::wrap_pyfunction;
16 | use pyo3::PyTraverseError;
17 |
18 | use std::ffi::{CStr, CString};
19 | use std::os::raw::{c_char, c_int};
20 | use std::ptr;
21 | use std::slice;
22 | use std::sync::atomic::{AtomicBool, Ordering};
23 | use std::sync::Once;
24 |
25 | // Package version
26 | const VERSION: &'static str = env!("CARGO_PKG_VERSION");
27 | const GHC_VERSION: &'static str = env!("GHC_VERSION");
28 |
29 | // Haskell runtime status
30 | static START_ONCE: Once = Once::new();
31 | static STOP_ONCE: Once = Once::new();
32 | static STOPPED: AtomicBool = AtomicBool::new(false);
33 |
34 | pub enum HaskellValue {}
35 |
36 | extern "C" {
37 | // ----------------- Duckling API -----------------------------------
38 | pub fn wparseText(
39 | text: *const c_char,
40 | reference_time: *mut HaskellValue,
41 | locale: *mut HaskellValue,
42 | dimensions: *mut HaskellValue,
43 | with_latent: u8,
44 | ) -> *const c_char;
45 | pub fn wparseDimensions(n: i32, dimensions: *const *const c_char) -> *mut HaskellValue;
46 | pub fn wparseLocale(
47 | locale: *const c_char,
48 | default_locale: *mut HaskellValue,
49 | ) -> *mut HaskellValue;
50 | pub fn wmakeDefaultLocale(lang: *mut HaskellValue) -> *mut HaskellValue;
51 | pub fn wparseLang(lang: *const c_char) -> *mut HaskellValue;
52 | pub fn wparseRefTime(
53 | tzdb: *mut HaskellValue,
54 | tzStr: *const c_char,
55 | timestamp: i64,
56 | ) -> *mut HaskellValue;
57 | pub fn wcurrentReftime(tzdb: *mut HaskellValue, strPtr: *const c_char) -> *mut HaskellValue;
58 | pub fn wloadTimeZoneSeries(path: *const c_char) -> *mut HaskellValue;
59 | // ----------------- Duckling API -----------------------------------
60 | // Dimension list functions
61 | pub fn dimensionListCreate(
62 | ptrs: *const *mut HaskellValue,
63 | numElements: i32,
64 | ) -> *mut HaskellValue;
65 | pub fn dimensionListLength(dims: *mut HaskellValue) -> i32;
66 | pub fn dimensionListPtrs(dims: *mut HaskellValue) -> *mut *mut HaskellValue;
67 | pub fn dimensionListDestroy(dims: *mut HaskellValue);
68 | // Dimension functions
69 | pub fn dimensionDestroy(dim: *mut HaskellValue);
70 | // Time zone database functions
71 | pub fn tzdbDestroy(db: *mut HaskellValue);
72 | // Time reference wrapper functions
73 | pub fn duckTimeDestroy(time: *mut HaskellValue);
74 | pub fn duckTimeRepr(time: *mut HaskellValue) -> *const c_char;
75 | // Language wrapper functions
76 | pub fn langDestroy(lang: *mut HaskellValue);
77 | pub fn langRepr(lang: *mut HaskellValue) -> *const c_char;
78 | // Locale wrapper functions
79 | pub fn localeDestroy(locale: *mut HaskellValue);
80 | pub fn localeRepr(locale: *mut HaskellValue) -> *const c_char;
81 | // Haskell runtime start/stop
82 | pub fn hs_init(argc: c_int, argv: *const *const c_char);
83 | pub fn hs_exit();
84 | }
85 |
86 | create_exception!(pyduckling, RuntimeStoppedError, exceptions::Exception);
87 |
88 | /// Initialize the Haskell runtime. This function is safe to call more than once, and
89 | /// will do nothing on subsequent calls.
90 | ///
91 | /// The runtime will automatically be shutdown at program exit, or you can stop it
92 | /// earlier with `stop`.
93 | #[pyfunction]
94 | fn init() -> PyResult<()> {
95 | START_ONCE.call_once(|| {
96 | start_hs();
97 | unsafe {
98 | ::libc::atexit(stop_hs);
99 | }
100 | });
101 | Ok(())
102 | }
103 |
104 | /// Stop the Haskell runtime before the program exits. This function may only be called
105 | /// once during a program's execution.
106 | ///
107 | /// It is safe, but not useful, to call this before the runtime has started.
108 | ///
109 | /// Raises
110 | /// ------
111 | /// RuntimeStoppedError:
112 | /// If the runtime was already stopped.
113 | #[pyfunction]
114 | pub fn stop() -> PyResult<()> {
115 | if STOPPED.swap(true, Ordering::SeqCst) {
116 | let err = "Haskell: The GHC runtime may only be stopped once. See \
117 | https://downloads.haskell.org/%7Eghc/latest/docs/html/users_guide\
118 | /ffi-chap.html#id1";
119 | let exc = RuntimeStoppedError::py_err(err.to_string());
120 | return Err(exc);
121 | }
122 | stop_hs();
123 | Ok(())
124 | }
125 |
126 | fn start_hs() {
127 | let mut argv = Vec::<*const c_char>::with_capacity(1);
128 | argv.push(ptr::null_mut());
129 | unsafe {
130 | hs_init(0 as c_int, argv.as_ptr());
131 | }
132 | }
133 |
134 | extern "C" fn stop_hs() {
135 | STOP_ONCE.call_once(|| unsafe { hs_exit() });
136 | }
137 |
138 | /// Handle to the time zone database stored by Duckling
139 | #[pyclass(name=TimeZoneDatabase)]
140 | #[derive(Debug, Clone)]
141 | pub struct TimeZoneDatabaseWrapper {
142 | ptr: *mut HaskellValue,
143 | }
144 |
145 | #[pyproto]
146 | impl PyGCProtocol for TimeZoneDatabaseWrapper {
147 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> {
148 | Ok(())
149 | }
150 |
151 | fn __clear__(&mut self) {
152 | unsafe { tzdbDestroy(self.ptr) }
153 | }
154 | }
155 |
156 | // impl Drop for TimeZoneDatabaseWrapper {
157 | // fn drop(&mut self) {
158 | // println!("Calling GC");
159 | // unsafe {
160 | // tzdbDestroy(self.ptr);
161 | // }
162 | // }
163 | // }
164 |
165 | /// Handle to the time zone database stored by Duckling
166 | #[pyclass(name=DucklingTime)]
167 | #[derive(Debug, Clone)]
168 | pub struct DucklingTimeWrapper {
169 | ptr: *mut HaskellValue,
170 | }
171 |
172 | #[pymethods]
173 | impl DucklingTimeWrapper {
174 | #[getter]
175 | fn iso8601(&self) -> PyResult {
176 | let c_value = unsafe { duckTimeRepr(self.ptr) };
177 | let string_result = unsafe {
178 | CStr::from_ptr(c_value)
179 | .to_string_lossy()
180 | .to_owned()
181 | .to_string()
182 | };
183 | Ok(string_result)
184 | }
185 | }
186 |
187 | #[pyproto]
188 | impl PyGCProtocol for DucklingTimeWrapper {
189 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> {
190 | Ok(())
191 | }
192 |
193 | fn __clear__(&mut self) {
194 | unsafe { duckTimeDestroy(self.ptr) }
195 | }
196 | }
197 |
198 | /// Handle to a language code stored by Duckling
199 | #[pyclass(name=Language)]
200 | #[derive(Debug, Clone)]
201 | pub struct LanguageWrapper {
202 | ptr: *mut HaskellValue,
203 | }
204 |
205 | #[pymethods]
206 | impl LanguageWrapper {
207 | #[getter]
208 | fn name(&self) -> PyResult {
209 | let c_value = unsafe { langRepr(self.ptr) };
210 | let string_result = unsafe {
211 | CStr::from_ptr(c_value)
212 | .to_string_lossy()
213 | .to_owned()
214 | .to_string()
215 | };
216 | Ok(string_result)
217 | }
218 | }
219 |
220 | #[pyproto]
221 | impl PyGCProtocol for LanguageWrapper {
222 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> {
223 | Ok(())
224 | }
225 |
226 | fn __clear__(&mut self) {
227 | unsafe { langDestroy(self.ptr) }
228 | }
229 | }
230 |
231 | /// Handle to a locale code stored by Duckling
232 | #[pyclass(name=Locale)]
233 | #[derive(Debug, Clone)]
234 | pub struct LocaleWrapper {
235 | ptr: *mut HaskellValue,
236 | }
237 |
238 | #[pymethods]
239 | impl LocaleWrapper {
240 | #[getter]
241 | fn name(&self) -> PyResult {
242 | let c_value = unsafe { localeRepr(self.ptr) };
243 | let string_result = unsafe {
244 | CStr::from_ptr(c_value)
245 | .to_string_lossy()
246 | .to_owned()
247 | .to_string()
248 | };
249 | Ok(string_result)
250 | }
251 | }
252 |
253 | #[pyproto]
254 | impl PyGCProtocol for LocaleWrapper {
255 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> {
256 | Ok(())
257 | }
258 |
259 | fn __clear__(&mut self) {
260 | unsafe { localeDestroy(self.ptr) }
261 | }
262 | }
263 |
264 | /// Handle to a parsing dimension identifier
265 | #[pyclass(name=Dimension)]
266 | #[derive(Debug, Clone)]
267 | pub struct DimensionWrapper {
268 | ptr: *mut HaskellValue,
269 | }
270 |
271 | #[pyproto]
272 | impl PyGCProtocol for DimensionWrapper {
273 | fn __traverse__(&self, _visit: PyVisit) -> Result<(), PyTraverseError> {
274 | Ok(())
275 | }
276 |
277 | fn __clear__(&mut self) {
278 | unsafe { dimensionDestroy(self.ptr) }
279 | }
280 | }
281 |
282 | #[pyclass]
283 | #[derive(Debug, Clone)]
284 | pub struct Context {
285 | pub reference_time: DucklingTimeWrapper,
286 | pub locale: LocaleWrapper,
287 | }
288 |
289 | #[pymethods]
290 | impl Context {
291 | #[new]
292 | fn new(reference_time: DucklingTimeWrapper, locale: LocaleWrapper) -> Self {
293 | Context {
294 | reference_time: reference_time,
295 | locale: locale,
296 | }
297 | }
298 | }
299 |
300 | /// Load time zone information from local Olson files.
301 | ///
302 | /// Parameters
303 | /// ----------
304 | /// path: str
305 | /// Path to the olson data definitions. Many linux distros have
306 | /// Olson data in "/usr/share/zoneinfo/".
307 | ///
308 | /// Returns
309 | /// -------
310 | /// tz_info: TimeZoneDatabase
311 | /// Opaque handle to a map of time zone data information in Haskell.
312 | #[pyfunction]
313 | fn load_time_zones(path: &str) -> PyResult {
314 | // let c_str = WrappedString::new(path);
315 | let c_str = CString::new(path).expect("CString::new failed");
316 | let haskell_ptr = unsafe { wloadTimeZoneSeries(c_str.as_ptr()) };
317 | let result = TimeZoneDatabaseWrapper { ptr: haskell_ptr };
318 | Ok(result)
319 | }
320 |
321 | /// Get current reference time, given a Olson time zone
322 | ///
323 | /// Parameters
324 | /// ----------
325 | /// tz_db: TimeZoneDatabase
326 | /// Opaque handle to a map of time zone data information in Haskell
327 | /// tz: str
328 | /// Time zone name according to IANA
329 | ///
330 | /// Returns
331 | /// -------
332 | /// ref_time: DucklingTime
333 | /// Opaque handle to a time reference in Haskell
334 | #[pyfunction]
335 | fn get_current_ref_time(tz_db: TimeZoneDatabaseWrapper, tz: &str) -> PyResult {
336 | // let c_str = WrappedString::new(tz);
337 | let tz_c_str = CString::new(tz).expect("CString::new failed");
338 | let haskell_tz = unsafe { wcurrentReftime(tz_db.ptr, tz_c_str.as_ptr()) };
339 | let result = DucklingTimeWrapper { ptr: haskell_tz };
340 | Ok(result)
341 | }
342 |
343 | /// Parse a reference timestamp on a given Olson time zone
344 | ///
345 | /// Parameters
346 | /// ----------
347 | /// tz_db: TimeZoneDatabase
348 | /// Opaque handle to a map of time zone data information in Haskell
349 | /// tz: str
350 | /// Time zone name according to IANA
351 | /// timestamp: int
352 | /// UNIX integer timestamp
353 | ///
354 | /// Returns
355 | /// -------
356 | /// ref_time: DucklingTime
357 | /// Opaque handle to a time reference in Haskell
358 | #[pyfunction]
359 | fn parse_ref_time(
360 | tz_db: TimeZoneDatabaseWrapper,
361 | tz: &str,
362 | timestamp: i64,
363 | ) -> PyResult {
364 | let tz_c_str = CString::new(tz).expect("CString::new failed");
365 | let haskell_tz = unsafe { wparseRefTime(tz_db.ptr, tz_c_str.as_ptr(), timestamp) };
366 | let result = DucklingTimeWrapper { ptr: haskell_tz };
367 | Ok(result)
368 | }
369 |
370 | /// Parse an ISO-639-1 language code
371 | ///
372 | /// Parameters
373 | /// ----------
374 | /// lang: str
375 | /// ISO-639-1 code of the language to parse
376 | ///
377 | /// Returns
378 | /// -------
379 | /// Language:
380 | /// Opaque handle to a Haskell reference of the language. If the language
381 | /// does not exist, or if it is not supported by Duckling,
382 | /// it defaults to English (EN).
383 | #[pyfunction]
384 | fn parse_lang(lang: &str) -> PyResult {
385 | let lang_c_str = CString::new(lang).expect("CString::new failed");
386 | let haskell_lang = unsafe { wparseLang(lang_c_str.as_ptr()) };
387 | let result = LanguageWrapper { ptr: haskell_lang };
388 | Ok(result)
389 | }
390 |
391 | /// Retrieve the default locale for a given language
392 | ///
393 | /// Parameters
394 | /// ----------
395 | /// lang: Language
396 | /// Opaque handle to a Duckling language
397 | ///
398 | /// Returns
399 | /// -------
400 | /// Locale:
401 | /// Opaque handle to the default language locale
402 | #[pyfunction]
403 | fn default_locale_lang(lang: LanguageWrapper) -> PyResult {
404 | let haskell_locale = unsafe { wmakeDefaultLocale(lang.ptr) };
405 | let result = LocaleWrapper {
406 | ptr: haskell_locale,
407 | };
408 | Ok(result)
409 | }
410 |
411 | /// Parse an ISO3166 alpha2 country code into a locale
412 | ///
413 | /// Parameters
414 | /// ----------
415 | /// locale: str
416 | /// Locale identifier to parse, it can be either a country code or a language
417 | /// with its country separated by underscore.
418 | /// default_locale: Locale
419 | /// Default locale to fallback on on case that the given code is not valid.
420 | ///
421 | /// Returns
422 | /// -------
423 | /// Locale:
424 | /// Opaque handle to the default language locale
425 | #[pyfunction]
426 | fn parse_locale(locale: &str, default_locale: LocaleWrapper) -> PyResult {
427 | let locale_c_str = CString::new(locale).expect("CString::new failed");
428 | let haskell_locale = unsafe { wparseLocale(locale_c_str.as_ptr(), default_locale.ptr) };
429 | let result = LocaleWrapper {
430 | ptr: haskell_locale,
431 | };
432 | Ok(result)
433 | }
434 |
435 | /// Parse a list of dimensions to use during parsing
436 | ///
437 | /// Parameters
438 | /// ----------
439 | /// dims: List[str]
440 | /// A list containing valid parsing dimensions to use with Duckling. See
441 | /// :class:`DucklingDimensions` to see a list of valid dimensions to use.
442 | ///
443 | /// Returns
444 | /// -------
445 | /// wrapped_dims: List[DimensionWrapper]
446 | /// A list of opaque handlers that describe the given dimensions in Duckling.
447 | #[pyfunction]
448 | fn parse_dimensions(dims: Vec) -> PyResult> {
449 | let n_elems = dims.len() as i32;
450 |
451 | // This is required in order to preserve ownership of the pointers
452 | let cstr_dims: Vec = dims
453 | .iter()
454 | .map(|s| CString::new(s.as_str()).expect("CString::new failed"))
455 | .collect();
456 |
457 | let c_dims: Vec<*const c_char> = cstr_dims.iter().map(|s| s.as_ptr()).collect();
458 |
459 | let haskell_list = unsafe { wparseDimensions(n_elems, c_dims.as_ptr()) };
460 | let haskell_length = unsafe { dimensionListLength(haskell_list) };
461 | let haskell_ptrs = unsafe { dimensionListPtrs(haskell_list) };
462 | let ptr_slice = unsafe { slice::from_raw_parts(haskell_ptrs, haskell_length as usize) };
463 | let mut result_vec: Vec = Vec::new();
464 | for ptr in ptr_slice {
465 | let wrapper = DimensionWrapper { ptr: *ptr };
466 | result_vec.push(wrapper);
467 | }
468 | Ok(result_vec)
469 | }
470 |
471 | /// Parse a text into a structured format
472 | ///
473 | /// Parameters
474 | /// ----------
475 | /// text: str
476 | /// Text to parse.
477 | /// context: Context
478 | /// Reference time and locale information
479 | /// dimensions: List[Dimension]
480 | /// List of dimensions to parse
481 | /// with_latent: bool
482 | /// When set, includes less certain parses, e.g. "7" as an hour of the day
483 | ///
484 | /// Returns
485 | /// -------
486 | /// result: str
487 | /// JSON-valid string that contains the parsed information.
488 | #[pyfunction]
489 | fn parse_text(
490 | text: &str,
491 | context: Context,
492 | dimensions: Vec,
493 | with_latent: bool,
494 | ) -> PyResult {
495 | let c_text = CString::new(text).expect("CString::new failed");
496 | let reference_time = context.reference_time;
497 | let locale = context.locale;
498 | let n_elems = dimensions.len() as i32;
499 | let c_dims: Vec<*mut HaskellValue> = dimensions.iter().map(|d| d.ptr).collect();
500 | let dim_list = unsafe { dimensionListCreate(c_dims.as_ptr(), n_elems) };
501 | let haskell_entities = unsafe {
502 | wparseText(
503 | c_text.as_ptr(),
504 | reference_time.ptr,
505 | locale.ptr,
506 | dim_list,
507 | with_latent as u8,
508 | )
509 | };
510 | let string_result = unsafe {
511 | CStr::from_ptr(haskell_entities)
512 | .to_string_lossy()
513 | .to_owned()
514 | .to_string()
515 | };
516 | Ok(string_result)
517 | }
518 |
519 | /// This module is a python module implemented in Rust.
520 | #[pymodule]
521 | fn duckling(_py: Python, m: &PyModule) -> PyResult<()> {
522 | m.add("__version__", VERSION)?;
523 | m.add("GHC_VERSION", GHC_VERSION)?;
524 | m.add_wrapped(wrap_pyfunction!(load_time_zones))?;
525 | m.add_wrapped(wrap_pyfunction!(get_current_ref_time))?;
526 | m.add_wrapped(wrap_pyfunction!(parse_ref_time))?;
527 | m.add_wrapped(wrap_pyfunction!(parse_lang))?;
528 | m.add_wrapped(wrap_pyfunction!(default_locale_lang))?;
529 | m.add_wrapped(wrap_pyfunction!(parse_locale))?;
530 | m.add_wrapped(wrap_pyfunction!(parse_dimensions))?;
531 | m.add_wrapped(wrap_pyfunction!(parse_text))?;
532 | m.add_wrapped(wrap_pyfunction!(init))?;
533 | m.add_wrapped(wrap_pyfunction!(stop))?;
534 | m.add_class::()?;
535 | m.add_class::()?;
536 | m.add_class::()?;
537 | m.add_class::()?;
538 | m.add_class::()?;
539 | Ok(())
540 | }
541 |
--------------------------------------------------------------------------------