├── .appveyor.yml
├── .gitignore
├── .travis.yml
├── .travis
    ├── before_install.sh
    ├── install.sh
    └── test.sh
├── CHANGELOG.md
├── Cargo.toml
├── LICENSE
├── LICENSE-APACHE
├── LICENSE-MIT
├── README.rst
├── benches
    └── nlu_engine.rs
├── data
    └── tests
    │   ├── configs
    │       ├── beverage_config.json
    │       ├── game_config.json
    │       └── music_config.json
    │   ├── datasets
    │       ├── beverage_dataset.json
    │       ├── game_dataset.json
    │       └── music_dataset.json
    │   └── models
    │       ├── nlu_engine_beverage.zip
    │       ├── nlu_engine_beverage
    │           ├── builtin_entity_parser
    │           │   └── metadata.json
    │           ├── custom_entity_parser
    │           │   ├── metadata.json
    │           │   └── parser
    │           │   │   ├── metadata.json
    │           │   │   └── parser_1
    │           │   │       ├── metadata.json
    │           │   │       └── parser
    │           ├── deterministic_intent_parser
    │           │   ├── intent_parser.json
    │           │   └── metadata.json
    │           ├── lookup_intent_parser
    │           │   ├── intent_parser.json
    │           │   └── metadata.json
    │           ├── nlu_engine.json
    │           ├── probabilistic_intent_parser
    │           │   ├── intent_classifier
    │           │   │   ├── featurizer
    │           │   │   │   ├── cooccurrence_vectorizer
    │           │   │   │   │   ├── metadata.json
    │           │   │   │   │   └── vectorizer.json
    │           │   │   │   ├── featurizer.json
    │           │   │   │   ├── metadata.json
    │           │   │   │   └── tfidf_vectorizer
    │           │   │   │   │   ├── metadata.json
    │           │   │   │   │   └── vectorizer.json
    │           │   │   ├── intent_classifier.json
    │           │   │   └── metadata.json
    │           │   ├── intent_parser.json
    │           │   ├── metadata.json
    │           │   ├── slot_filler_0
    │           │   │   ├── metadata.json
    │           │   │   ├── model55imurmx.crfsuite
    │           │   │   └── slot_filler.json
    │           │   └── slot_filler_1
    │           │   │   ├── metadata.json
    │           │   │   ├── model8a9dqxnp.crfsuite
    │           │   │   └── slot_filler.json
    │           └── resources
    │           │   └── en
    │           │       ├── gazetteers
    │           │           └── top_10000_words_stemmed.txt
    │           │       ├── metadata.json
    │           │       ├── noise.txt
    │           │       ├── stemming
    │           │           └── stems.txt
    │           │       └── stop_words.txt
    │       ├── nlu_engine_game
    │           ├── builtin_entity_parser
    │           │   └── metadata.json
    │           ├── custom_entity_parser
    │           │   ├── metadata.json
    │           │   └── parser
    │           │   │   ├── metadata.json
    │           │   │   └── parser_1
    │           │   │       ├── metadata.json
    │           │   │       └── parser
    │           ├── lookup_intent_parser
    │           │   ├── intent_parser.json
    │           │   └── metadata.json
    │           ├── nlu_engine.json
    │           ├── probabilistic_intent_parser
    │           │   ├── intent_classifier
    │           │   │   ├── featurizer
    │           │   │   │   ├── featurizer.json
    │           │   │   │   ├── metadata.json
    │           │   │   │   └── tfidf_vectorizer
    │           │   │   │   │   ├── metadata.json
    │           │   │   │   │   └── vectorizer.json
    │           │   │   ├── intent_classifier.json
    │           │   │   └── metadata.json
    │           │   ├── intent_parser.json
    │           │   ├── metadata.json
    │           │   └── slot_filler_0
    │           │   │   ├── metadata.json
    │           │   │   ├── model.crfsuite
    │           │   │   └── slot_filler.json
    │           └── resources
    │           │   └── en
    │           │       ├── gazetteers
    │           │           └── top_10000_words_stemmed.txt
    │           │       ├── metadata.json
    │           │       ├── noise.txt
    │           │       ├── stemming
    │           │           └── stems.txt
    │           │       ├── stop_words.txt
    │           │       └── word_clusters
    │           │           └── brown_clusters.txt
    │       └── nlu_engine_music
    │           ├── builtin_entity_parser
    │               ├── gazetteer_entity_parser
    │               │   ├── metadata.json
    │               │   ├── parser_1
    │               │   │   ├── metadata.json
    │               │   │   └── parser
    │               │   └── parser_2
    │               │   │   ├── metadata.json
    │               │   │   └── parser
    │               └── metadata.json
    │           ├── custom_entity_parser
    │               ├── metadata.json
    │               └── parser
    │               │   ├── metadata.json
    │               │   └── parser_1
    │               │       ├── metadata.json
    │               │       └── parser
    │           ├── deterministic_intent_parser
    │               ├── intent_parser.json
    │               └── metadata.json
    │           ├── nlu_engine.json
    │           ├── probabilistic_intent_parser
    │               ├── intent_classifier
    │               │   ├── featurizer
    │               │   │   ├── cooccurrence_vectorizer
    │               │   │   │   ├── metadata.json
    │               │   │   │   └── vectorizer.json
    │               │   │   ├── featurizer.json
    │               │   │   ├── metadata.json
    │               │   │   └── tfidf_vectorizer
    │               │   │   │   ├── metadata.json
    │               │   │   │   └── vectorizer.json
    │               │   ├── intent_classifier.json
    │               │   └── metadata.json
    │               ├── intent_parser.json
    │               ├── metadata.json
    │               └── slot_filler_0
    │               │   ├── metadata.json
    │               │   ├── modeluzcfum35.crfsuite
    │               │   └── slot_filler.json
    │           └── resources
    │               └── fr
    │                   ├── gazetteers
    │                       └── top_10000_words_stemmed.txt
    │                   ├── metadata.json
    │                   ├── noise.txt
    │                   ├── stemming
    │                       └── stems.txt
    │                   └── stop_words.txt
├── examples
    └── interactive_parsing_cli.rs
├── ffi
    ├── Cargo.toml
    ├── cbindgen.toml
    └── src
    │   └── lib.rs
├── platforms
    ├── c
    │   ├── libsnips_nlu.h
    │   └── module.modulemap
    ├── kotlin
    │   ├── .gitignore
    │   ├── build.gradle
    │   ├── gradle
    │   │   └── wrapper
    │   │   │   ├── gradle-wrapper.jar
    │   │   │   └── gradle-wrapper.properties
    │   ├── gradlew
    │   ├── gradlew.bat
    │   ├── settings.gradle
    │   └── src
    │   │   ├── main
    │   │       ├── android
    │   │       │   └── AndroidManifest.xml
    │   │       └── kotlin
    │   │       │   ├── ai
    │   │       │       └── snips
    │   │       │       │   └── nlu
    │   │       │       │       └── NluEngine.kt
    │   │       │   └── com
    │   │       │       └── sun
    │   │       │           └── jna
    │   │       │               └── JnaUtils.kt
    │   │   └── test
    │   │       └── kotlin
    │   │           └── ai
    │   │               └── snips
    │   │                   └── nlu
    │   │                       └── NluEngineTest.kt
    ├── python
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── MANIFEST.in
    │   ├── README.rst
    │   ├── ffi
    │   │   ├── .gitignore
    │   │   ├── Cargo.toml
    │   │   └── src
    │   │   │   └── lib.rs
    │   ├── requirements.txt
    │   ├── setup.py
    │   ├── snips_nlu_rust
    │   │   ├── __init__.py
    │   │   ├── __version__
    │   │   ├── dylib
    │   │   │   └── .gitignore
    │   │   ├── nlu_engine.py
    │   │   ├── tests
    │   │   │   ├── __init__.py
    │   │   │   ├── test_nlu_engine_wrapper.py
    │   │   │   └── utils.py
    │   │   └── utils.py
    │   └── tox.ini
    └── swift
    │   ├── .gitignore
    │   ├── SnipsNlu.xcworkspace
    │       ├── contents.xcworkspacedata
    │       └── xcshareddata
    │       │   └── IDEWorkspaceChecks.plist
    │   └── SnipsNlu
    │       ├── Dependencies
    │           ├── .gitignore
    │           └── build.sh
    │       ├── SnipsNlu.xcodeproj
    │           ├── project.pbxproj
    │           ├── project.xcworkspace
    │           │   └── contents.xcworkspacedata
    │           └── xcshareddata
    │           │   └── xcschemes
    │           │       ├── SnipsNlu-iOS.xcscheme
    │           │       └── SnipsNlu-macOS.xcscheme
    │       ├── SnipsNlu
    │           ├── Info.plist
    │           ├── NluEngine.swift
    │           └── SnipsNlu.h
    │       └── SnipsNluTests
    │           ├── Info.plist
    │           └── NluEngineTests.swift
├── post_release.sh
├── src
    ├── entity_parser
    │   ├── builtin_entity_parser.rs
    │   ├── custom_entity_parser.rs
    │   ├── mod.rs
    │   └── utils.rs
    ├── errors.rs
    ├── injection
    │   ├── errors.rs
    │   ├── injection.rs
    │   └── mod.rs
    ├── intent_classifier
    │   ├── featurizer.rs
    │   ├── log_reg_intent_classifier.rs
    │   ├── logreg.rs
    │   └── mod.rs
    ├── intent_parser
    │   ├── deterministic_intent_parser.rs
    │   ├── lookup_intent_parser.rs
    │   ├── mod.rs
    │   └── probabilistic_intent_parser.rs
    ├── language.rs
    ├── lib.rs
    ├── models
    │   ├── intent_classifier.rs
    │   ├── intent_parser.rs
    │   ├── mod.rs
    │   ├── nlu_engine.rs
    │   ├── processing_unit_metadata.rs
    │   └── slot_filler.rs
    ├── nlu_engine.rs
    ├── resources
    │   ├── gazetteer.rs
    │   ├── loading.rs
    │   ├── mod.rs
    │   ├── stemmer.rs
    │   └── word_clusterer.rs
    ├── slot_filler
    │   ├── crf_slot_filler.rs
    │   ├── crf_utils.rs
    │   ├── feature_processor.rs
    │   ├── features.rs
    │   ├── features_utils.rs
    │   ├── macros.rs
    │   └── mod.rs
    ├── slot_utils.rs
    ├── testutils.rs
    └── utils.rs
├── update_ontology_version.sh
└── update_version.sh


/.appveyor.yml:
--------------------------------------------------------------------------------
 1 | environment:
 2 |   matrix:
 3 |   - TARGET: x86_64-pc-windows-msvc
 4 | 
 5 | branches:
 6 |   only:
 7 |   - /main\/.*/
 8 |   - /release\/.*/
 9 | 
10 | install:
11 |   - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe
12 |   - rustup-init.exe -y --default-host %TARGET%
13 |   - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin
14 |   - if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin
15 |   - rustc -V
16 |   - cargo -V
17 |   - ps: (Get-Content platforms/python/ffi/Cargo.toml) | ForEach-Object { $_ -replace "^snips-nlu-ffi = .*$", "snips-nlu-ffi = { path = `"../../../ffi`" }" } | Set-Content platforms/python/ffi/Cargo.toml
18 | 
19 | build: false
20 | 
21 | test_script:
22 |   - cargo build --all --tests --benches
23 |   - cargo test --all
24 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | ## Cargo
 2 | 
 3 | target/
 4 | Cargo.lock
 5 | **/*.rs.bk
 6 | 
 7 | ## Idea
 8 | 
 9 | .idea/
10 | *.iml
11 | 
12 | ## VIM
13 | 
14 | # Swap
15 | [._]*.s[a-v][a-z]
16 | [._]*.sw[a-p]
17 | [._]s[a-v][a-z]
18 | [._]sw[a-p]
19 | 
20 | # Session
21 | Session.vim
22 | 
23 | # Temporary
24 | .netrwhist
25 | *~
26 | # Auto-generated tag files
27 | tags
28 | 
29 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | jobs:
 2 |   include:
 3 |     - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/
 4 |       os: osx
 5 |       osx_image: xcode10.2
 6 |       language: generic
 7 |       sudo: true
 8 |       env:
 9 |         - TOXENV=py27
10 |         - PYTHON_TESTS=true
11 |     - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/
12 |       os: osx
13 |       osx_image: xcode10.2
14 |       language: generic
15 |       sudo: true
16 |       env:
17 |         - TOXENV=py36
18 |         - PYTHON_TESTS=true
19 |     - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/
20 |       os: osx
21 |       osx_image: xcode10.2
22 |       language: generic
23 |       sudo: true
24 |       env:
25 |         - MACOS_SWIFT_TESTS=true
26 |         - IOS_SWIFT_TESTS=true
27 |     - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/
28 |       os: linux
29 |       language: python
30 |       python: 2.7
31 |       env:
32 |         - TOXENV=py27
33 |         - PYTHON_TESTS=true
34 |     - os: linux
35 |       language: python
36 |       python: 3.6
37 |       env:
38 |         - TOXENV=py36
39 |         - PYTHON_TESTS=true
40 |     - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/
41 |       os: linux
42 |       language: java
43 |       jdk: openjdk8
44 |       env:
45 |         - KOTLIN_TESTS=true
46 |     - os: linux
47 |       language: rust
48 |       rust: stable
49 |       env:
50 |         - RUST_TESTS=true
51 | 
52 | before_install: . ./.travis/before_install.sh
53 | 
54 | install: ./.travis/install.sh
55 | 
56 | script: ./.travis/test.sh
57 | 


--------------------------------------------------------------------------------
/.travis/before_install.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | set -ev
 3 | 
 4 | # Install Rust
 5 | if [[ -z ${TRAVIS_RUST_VERSION+w} ]]; then
 6 |   curl https://sh.rustup.rs -sSf | bash -s -- -y
 7 | fi
 8 | 
 9 | if [[ ${TRAVIS_OS_NAME} == "osx" ]]; then
10 |   if [[ ${PYTHON_TESTS} == true ]]; then
11 |     # install pyenv
12 |     git clone https://github.com/pyenv/pyenv $HOME/.pyenv
13 |     git --git-dir=$HOME/.pyenv/.git --work-tree=$HOME/.pyenv checkout v1.2.11  # Fix for https://github.com/pyenv/pyenv/issues/1066
14 |     PYENV_ROOT="$HOME/.pyenv"
15 |     PATH="$PYENV_ROOT/bin:$PATH"
16 |     eval "$(pyenv init -)"
17 | 
18 |     # CFLAGS stuff is needed because of https://github.com/pyenv/pyenv/issues/1219
19 |     case "${TOXENV}" in
20 |       "py27")
21 |         CFLAGS="-I$(xcrun --show-sdk-path)/usr/include" pyenv install 2.7.14
22 |         pyenv global 2.7.14
23 |         ;;
24 |       "py36")
25 |         CFLAGS="-I$(xcrun --show-sdk-path)/usr/include" pyenv install 3.6.1
26 |         pyenv global 3.6.1
27 |         ;;
28 |       "py37")
29 |         CFLAGS="-I$(xcrun --show-sdk-path)/usr/include" pyenv install 3.7.2
30 |         pyenv global 3.7.2
31 |         ;;
32 |     esac
33 |     pyenv rehash
34 | 
35 |     # A manual check that the correct version of Python is running.
36 |     python --version
37 |   fi
38 | 
39 |   if [[ "${IOS_SWIFT_TESTS}" == "true" ]]; then
40 |     PATH="$HOME/.cargo/bin:$PATH"
41 |     rustup target install x86_64-apple-ios
42 |   fi
43 | fi
44 | 


--------------------------------------------------------------------------------
/.travis/install.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | set -ev
3 | 
4 | echo "Replacing snips-nlu-ffi url for local version"
5 | perl -p -i -e \
6 |     "s/^snips-nlu-ffi = .*\$/snips-nlu-ffi = { path = \"..\/..\/..\/ffi\" \}/g" \
7 |     platforms/python/ffi/Cargo.toml
8 | 


--------------------------------------------------------------------------------
/.travis/test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -ev
 3 | 
 4 | export PATH="$HOME/.cargo/bin:$PATH"
 5 | 
 6 | if [[ "${RUST_TESTS}" == "true" ]]; then
 7 |     echo "Running rust tests..."
 8 |     cargo test --all
 9 |     cargo check --benches
10 | fi
11 | 
12 | if [[ "${PYTHON_TESTS}" == "true" ]]; then
13 |     echo "Running python tests..."
14 |     cd platforms/python
15 |     pip install tox
16 |     tox
17 |     cd -
18 | fi
19 | 
20 | if [[ "${KOTLIN_TESTS}" == "true" ]]; then
21 |     echo "Running kotlin tests..."
22 |     cargo build -p snips-nlu-ffi
23 |     cd platforms/kotlin
24 |     ./gradlew -Pdebug -PrustTargetPath=../../target test --info
25 |     cd -
26 | fi
27 | 
28 | if [[ "${MACOS_SWIFT_TESTS}" == "true" ]]; then
29 |     echo "Running macOS swift tests..."
30 |     cargo build -p snips-nlu-ffi
31 |     cd platforms/swift
32 |     mkdir -p build/DerivedData
33 |     set -o pipefail && xcodebuild \
34 |         -IDECustomDerivedDataLocation=build/DerivedData \
35 |         -workspace SnipsNlu.xcworkspace \
36 |         -scheme SnipsNlu-macOS \
37 |         TARGET_BUILD_TYPE=debug \
38 |         SNIPS_USE_LOCAL=1 \
39 |         clean \
40 |         test \
41 |         | xcpretty
42 |     cd -
43 | fi
44 | 
45 | if [[ "${IOS_SWIFT_TESTS}" == "true" ]]; then
46 |     echo "Running iOS swift tests..."
47 |     TARGET_SYSROOT=$(xcrun --sdk iphonesimulator --show-sdk-path) \
48 |       cargo build -p snips-nlu-ffi --target x86_64-apple-ios
49 |     cd platforms/swift
50 |     mkdir -p build/DerivedData
51 |     set -o pipefail && xcodebuild \
52 |         -IDECustomDerivedDataLocation=build/DerivedData \
53 |         -workspace SnipsNlu.xcworkspace \
54 |         -scheme SnipsNlu-iOS \
55 |         -destination 'platform=iOS Simulator,name=iPhone 8,OS=latest' \
56 |         TARGET_BUILD_TYPE=debug \
57 |         SNIPS_USE_LOCAL=1 \
58 |         clean \
59 |         test \
60 |         | xcpretty
61 |     cd -
62 | fi
63 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "snips-nlu-lib"
 3 | version = "0.65.6"
 4 | authors = [
 5 |     "Adrien Ball <adrien.ball@snips.ai>",
 6 |     "Clement Doumouro <clement.doumouro@snips.ai>",
 7 |     "Thibaut Lorrain <thibaut.lorrain@snips.ai>",
 8 |     "Kevin Lefevre <kevin.lefevre@snips.ai>"
 9 | ]
10 | repository = "https://github.com/snipsco/snips-nlu-rs"
11 | description = "Rust implementation of Snips NLU"
12 | edition = "2018"
13 | 
14 | [workspace]
15 | members = [
16 |     "ffi",
17 |     "platforms/python/ffi",
18 | ]
19 | 
20 | [dependencies]
21 | crfsuite = { git = "https://github.com/snipsco/crfsuite-rs", tag = "0.3.3" }
22 | snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.67.1" }
23 | snips-nlu-utils = { git = "https://github.com/snipsco/snips-nlu-utils", tag = "0.9.1" }
24 | snips-nlu-parsers = { git = "https://github.com/snipsco/snips-nlu-parsers", tag = "0.4.3" }
25 | failure = "0.1"
26 | base64 = "0.10"
27 | itertools = { version = "0.8", default-features = false }
28 | log = "0.4"
29 | lru-cache = "0.1"
30 | serde = { version = "1.0", features = ["derive"] }
31 | serde_json = "1.0"
32 | tempfile = "3"
33 | ndarray = "0.12"
34 | regex = "1.0"
35 | csv = "1.0"
36 | zip = { version = "0.5", default-features = false, features = ["deflate"] }
37 | 
38 | [dev-dependencies]
39 | bencher = { git = "https://github.com/snipsco/bencher", rev = "63910ace" }
40 | clap = "2.32"
41 | dinghy-test = "0.4"
42 | env_logger = "0.6"
43 | maplit = "1.0"
44 | fs_extra = "1.1"
45 | 
46 | [[bench]]
47 | name = "nlu_engine"
48 | harness = false
49 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | ## License
 2 | 
 3 | Licensed under either of
 4 |  * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0)
 5 |  * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT)
 6 | at your option.
 7 | 
 8 | ### Contribution
 9 | 
10 | Unless you explicitly state otherwise, any contribution intentionally submitted
11 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall
12 | be dual licensed as above, without any additional terms or conditions.
13 | 


--------------------------------------------------------------------------------
/LICENSE-MIT:
--------------------------------------------------------------------------------
 1 | Permission is hereby granted, free of charge, to any
 2 | person obtaining a copy of this software and associated
 3 | documentation files (the "Software"), to deal in the
 4 | Software without restriction, including without
 5 | limitation the rights to use, copy, modify, merge,
 6 | publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software
 8 | is furnished to do so, subject to the following
 9 | conditions:
10 | 
11 | The above copyright notice and this permission notice
12 | shall be included in all copies or substantial portions
13 | of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR
22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 | DEALINGS IN THE SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/README.rst:
--------------------------------------------------------------------------------
  1 | Snips NLU Rust
  2 | ==============
  3 | 
  4 | .. image:: https://travis-ci.org/snipsco/snips-nlu-rs.svg?branch=master
  5 |    :target: https://travis-ci.org/snipsco/snips-nlu-rs
  6 | 
  7 | .. image:: https://ci.appveyor.com/api/projects/status/rsf27a9txeomic8o/branch/master?svg=true
  8 |    :target: https://ci.appveyor.com/project/snipsco/snips-nlu-rs
  9 | 
 10 | Installation
 11 | ------------
 12 | 
 13 | Add it to your ``Cargo.toml``:
 14 | 
 15 | .. code-block:: toml
 16 | 
 17 |    [dependencies]
 18 |    snips-nlu-lib = { git = "https://github.com/snipsco/snips-nlu-rs", branch = "master" }
 19 | 
 20 | Add ``extern crate snips_nlu_lib`` to your crate root and you are good to go!
 21 | 
 22 | 
 23 | Intent Parsing with Snips NLU
 24 | -----------------------------
 25 | 
 26 | The purpose of the main crate of this repository, ``snips-nlu-lib``, is to perform an information
 27 | extraction task called *intent parsing*.
 28 | 
 29 | Let’s take an example to illustrate the main purpose of this lib, and consider the following sentence:
 30 | 
 31 | .. code-block:: text
 32 | 
 33 |    "What will be the weather in paris at 9pm?"
 34 | 
 35 | Properly trained, the Snips NLU engine will be able to extract structured data such as:
 36 | 
 37 | .. code-block:: json
 38 | 
 39 |    {
 40 |       "intent": {
 41 |          "intentName": "searchWeatherForecast",
 42 |          "confidenceScore": 0.95
 43 |       },
 44 |       "slots": [
 45 |          {
 46 |             "value": "paris",
 47 |             "entity": "locality",
 48 |             "slotName": "forecast_locality"
 49 |          },
 50 |          {
 51 |             "value": {
 52 |                "kind": "InstantTime",
 53 |                "value": "2018-02-08 20:00:00 +00:00"
 54 |             },
 55 |             "entity": "snips/datetime",
 56 |             "slotName": "forecast_start_datetime"
 57 |          }
 58 |       ]
 59 |    }
 60 | 
 61 | 
 62 | In order to achieve such a result, the NLU engine needs to be fed with a trained model (json file).
 63 | This repository only contains the inference part, in order to produce trained models please check
 64 | the `Snips NLU python library <https://github.com/snipsco/snips-nlu>`_.
 65 | 
 66 | 
 67 | Example and API Usage
 68 | ---------------------
 69 | 
 70 | The `interactive parsing CLI <examples/interactive_parsing_cli.rs>`_  is a good example
 71 | of to how to use ``snips-nlu-rs``.
 72 | 
 73 | Here is how you can run the CLI example:
 74 | 
 75 | .. code-block:: bash
 76 | 
 77 |    $ git clone https://github.com/snipsco/snips-nlu-rs
 78 |    $ cd snips-nlu-rs
 79 |    $ cargo run --example interactive_parsing_cli data/tests/models/nlu_engine
 80 | 
 81 | Here we used a sample trained engine, which consists in two intents: ``MakeCoffee`` and ``MakeTea``.
 82 | Thus, it will be able to parse queries like ``"Make me two cups of coffee please"`` or ``"I'd like a hot tea"``.
 83 | 
 84 | As mentioned in the previous section, you can train your own nlu engine with the
 85 | `Snips NLU python library <https://github.com/snipsco/snips-nlu>`_.
 86 | 
 87 | 
 88 | License
 89 | -------
 90 | 
 91 | Licensed under either of
 92 |  * Apache License, Version 2.0 (`LICENSE-APACHE <LICENSE-APACHE>`_ or http://www.apache.org/licenses/LICENSE-2.0)
 93 |  * MIT license (`LICENSE-MIT <LICENSE-MIT>`_) or http://opensource.org/licenses/MIT)
 94 | at your option.
 95 | 
 96 | Contribution
 97 | ------------
 98 | 
 99 | Unless you explicitly state otherwise, any contribution intentionally submitted
100 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall
101 | be dual licensed as above, without any additional terms or conditions.
102 | 


--------------------------------------------------------------------------------
/benches/nlu_engine.rs:
--------------------------------------------------------------------------------
 1 | #![allow(clippy::never_loop)]
 2 | 
 3 | #[macro_use]
 4 | extern crate bencher;
 5 | extern crate dinghy_test;
 6 | extern crate snips_nlu_lib;
 7 | 
 8 | use std::env;
 9 | 
10 | use bencher::Bencher;
11 | use snips_nlu_lib::*;
12 | 
13 | const ENGINE_DIR_ENV: &str = "SNIPS_NLU_BENCH_ENGINE_DIR";
14 | const SENTENCE_ENV: &str = "SNIPS_NLU_BENCH_SENTENCE";
15 | 
16 | fn file_path(filename: &str) -> ::std::path::PathBuf {
17 |     dinghy_test::try_test_file_path("data")
18 |         .unwrap_or_else(|| "../data".into())
19 |         .join(filename)
20 | }
21 | 
22 | fn load_nlu_engine() -> SnipsNluEngine {
23 |     let engine_path = if let Ok(engine_directory) = env::var(ENGINE_DIR_ENV) {
24 |         file_path(&engine_directory)
25 |     } else {
26 |         file_path("untracked")
27 |     };
28 | 
29 |     SnipsNluEngine::from_path(engine_path).unwrap()
30 | }
31 | 
32 | fn nlu_loading(b: &mut Bencher) {
33 |     b.iter(|| {
34 |         let _ = load_nlu_engine();
35 |     });
36 | }
37 | 
38 | fn nlu_parsing(b: &mut Bencher) {
39 |     let nlu_engine = load_nlu_engine();
40 |     let sentence = env::var(SENTENCE_ENV)
41 |         .map_err(|_| format!("{} env var not defined", SENTENCE_ENV))
42 |         .unwrap();
43 | 
44 |     b.iter(|| {
45 |         let _ = nlu_engine.parse(&sentence, None, None);
46 |     });
47 | }
48 | 
49 | benchmark_group!(load, nlu_loading);
50 | benchmark_group!(run, nlu_parsing);
51 | 
52 | benchmark_main!(load, run);
53 | 


--------------------------------------------------------------------------------
/data/tests/configs/beverage_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "unit_name": "nlu_engine",
  3 |   "intent_parsers_configs": [
  4 |     {
  5 |       "unit_name": "lookup_intent_parser",
  6 |       "ignore_stop_words": true
  7 |     },
  8 |     {
  9 |       "unit_name": "deterministic_intent_parser",
 10 |       "max_pattern_length": 1000,
 11 |       "max_queries": 100,
 12 |       "ignore_stop_words": true
 13 |     },
 14 |     {
 15 |       "unit_name": "probabilistic_intent_parser",
 16 |       "intent_classifier_config": {
 17 |         "data_augmentation_config": {
 18 |           "noise_factor": 5,
 19 |           "unknown_word_prob": 0,
 20 |           "max_unknown_words": 0,
 21 |           "add_builtin_entities_examples": false,
 22 |           "unknown_words_replacement_string": null,
 23 |           "min_utterances": 20
 24 |         },
 25 |         "unit_name": "log_reg_intent_classifier",
 26 |         "featurizer_config": {
 27 |           "cooccurrence_vectorizer_config": {
 28 |             "window_size": 5,
 29 |             "unit_name": "cooccurrence_vectorizer",
 30 |             "filter_stop_words": true,
 31 |             "unknown_words_replacement_string": null,
 32 |             "keep_order": true
 33 |           },
 34 |           "unit_name": "featurizer",
 35 |           "added_cooccurrence_feature_ratio": 0.25,
 36 |           "tfidf_vectorizer_config": {
 37 |             "unit_name": "tfidf_vectorizer",
 38 |             "use_stemming": false,
 39 |             "word_clusters_name": null
 40 |           },
 41 |           "pvalue_threshold": 0.4
 42 |         },
 43 |         "random_seed": null
 44 |       },
 45 |       "slot_filler_config": {
 46 |         "unit_name": "crf_slot_filler",
 47 |         "random_seed": null,
 48 |         "tagging_scheme": 1,
 49 |         "data_augmentation_config": {
 50 |           "capitalization_ratio": 0.2,
 51 |           "min_utterances": 200,
 52 |           "add_builtin_entities_examples": true
 53 |         },
 54 |         "crf_args": {
 55 |           "c2": 0.1,
 56 |           "c1": 0.1,
 57 |           "algorithm": "lbfgs"
 58 |         },
 59 |         "feature_factory_configs": [
 60 |           {
 61 |             "args": {
 62 |               "common_words_gazetteer_name": "top_10000_words_stemmed",
 63 |               "use_stemming": true,
 64 |               "n": 1
 65 |             },
 66 |             "factory_name": "ngram",
 67 |             "offsets": [
 68 |               -2,
 69 |               -1,
 70 |               0,
 71 |               1,
 72 |               2
 73 |             ]
 74 |           },
 75 |           {
 76 |             "args": {
 77 |               "common_words_gazetteer_name": "top_10000_words_stemmed",
 78 |               "use_stemming": true,
 79 |               "n": 2
 80 |             },
 81 |             "factory_name": "ngram",
 82 |             "offsets": [
 83 |               -2,
 84 |               1
 85 |             ]
 86 |           },
 87 |           {
 88 |             "args": {},
 89 |             "factory_name": "is_digit",
 90 |             "offsets": [
 91 |               -1,
 92 |               0,
 93 |               1
 94 |             ]
 95 |           },
 96 |           {
 97 |             "args": {},
 98 |             "factory_name": "is_first",
 99 |             "offsets": [
100 |               -2,
101 |               -1,
102 |               0
103 |             ]
104 |           },
105 |           {
106 |             "args": {},
107 |             "factory_name": "is_last",
108 |             "offsets": [
109 |               0,
110 |               1,
111 |               2
112 |             ]
113 |           },
114 |           {
115 |             "args": {
116 |               "n": 1
117 |             },
118 |             "factory_name": "shape_ngram",
119 |             "offsets": [
120 |               0
121 |             ]
122 |           },
123 |           {
124 |             "args": {
125 |               "n": 2
126 |             },
127 |             "factory_name": "shape_ngram",
128 |             "offsets": [
129 |               -1,
130 |               0
131 |             ]
132 |           },
133 |           {
134 |             "args": {
135 |               "n": 3
136 |             },
137 |             "factory_name": "shape_ngram",
138 |             "offsets": [
139 |               -1
140 |             ]
141 |           },
142 |           {
143 |             "args": {
144 |               "tagging_scheme_code": 2,
145 |               "use_stemming": true
146 |             },
147 |             "factory_name": "entity_match",
148 |             "drop_out": 0.5,
149 |             "offsets": [
150 |               -2,
151 |               -1,
152 |               0
153 |             ]
154 |           },
155 |           {
156 |             "args": {
157 |               "tagging_scheme_code": 1
158 |             },
159 |             "factory_name": "builtin_entity_match",
160 |             "offsets": [
161 |               -2,
162 |               -1,
163 |               0
164 |             ]
165 |           }
166 |         ]
167 |       }
168 |     }
169 |   ]
170 | }
171 | 


--------------------------------------------------------------------------------
/data/tests/configs/game_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "unit_name": "nlu_engine",
  3 |   "intent_parsers_configs": [
  4 |     {
  5 |       "unit_name": "lookup_intent_parser",
  6 |       "ignore_stop_words": true
  7 |     },
  8 |     {
  9 |       "unit_name": "deterministic_intent_parser",
 10 |       "max_pattern_length": 1000,
 11 |       "max_queries": 100,
 12 |       "ignore_stop_words": true
 13 |     },
 14 |     {
 15 |       "unit_name": "probabilistic_intent_parser",
 16 |       "intent_classifier_config": {
 17 |         "data_augmentation_config": {
 18 |           "noise_factor": 5,
 19 |           "unknown_word_prob": 0,
 20 |           "max_unknown_words": 0,
 21 |           "add_builtin_entities_examples": false,
 22 |           "unknown_words_replacement_string": null,
 23 |           "min_utterances": 20
 24 |         },
 25 |         "unit_name": "log_reg_intent_classifier",
 26 |         "featurizer_config": {
 27 |           "cooccurrence_vectorizer_config": {
 28 |             "window_size": 5,
 29 |             "unit_name": "cooccurrence_vectorizer",
 30 |             "filter_stop_words": true,
 31 |             "unknown_words_replacement_string": null,
 32 |             "keep_order": true
 33 |           },
 34 |           "unit_name": "featurizer",
 35 |           "added_cooccurrence_feature_ratio": 0.25,
 36 |           "tfidf_vectorizer_config": {
 37 |             "unit_name": "tfidf_vectorizer",
 38 |             "use_stemming": false,
 39 |             "word_clusters_name": null
 40 |           },
 41 |           "pvalue_threshold": 0.4
 42 |         },
 43 |         "random_seed": null
 44 |       },
 45 |       "slot_filler_config": {
 46 |         "unit_name": "crf_slot_filler",
 47 |         "random_seed": null,
 48 |         "tagging_scheme": 1,
 49 |         "data_augmentation_config": {
 50 |           "capitalization_ratio": 0.2,
 51 |           "min_utterances": 200,
 52 |           "add_builtin_entities_examples": true
 53 |         },
 54 |         "crf_args": {
 55 |           "c2": 0.1,
 56 |           "c1": 0.1,
 57 |           "algorithm": "lbfgs"
 58 |         },
 59 |         "feature_factory_configs": [
 60 |           {
 61 |             "args": {
 62 |               "common_words_gazetteer_name": "top_10000_words_stemmed",
 63 |               "use_stemming": true,
 64 |               "n": 1
 65 |             },
 66 |             "factory_name": "ngram",
 67 |             "offsets": [
 68 |               -2,
 69 |               -1,
 70 |               0,
 71 |               1,
 72 |               2
 73 |             ]
 74 |           },
 75 |           {
 76 |             "args": {
 77 |               "common_words_gazetteer_name": "top_10000_words_stemmed",
 78 |               "use_stemming": true,
 79 |               "n": 2
 80 |             },
 81 |             "factory_name": "ngram",
 82 |             "offsets": [
 83 |               -2,
 84 |               1
 85 |             ]
 86 |           },
 87 |           {
 88 |             "args": {},
 89 |             "factory_name": "is_digit",
 90 |             "offsets": [
 91 |               -1,
 92 |               0,
 93 |               1
 94 |             ]
 95 |           },
 96 |           {
 97 |             "args": {},
 98 |             "factory_name": "is_first",
 99 |             "offsets": [
100 |               -2,
101 |               -1,
102 |               0
103 |             ]
104 |           },
105 |           {
106 |             "args": {},
107 |             "factory_name": "is_last",
108 |             "offsets": [
109 |               0,
110 |               1,
111 |               2
112 |             ]
113 |           },
114 |           {
115 |             "args": {
116 |               "n": 1
117 |             },
118 |             "factory_name": "shape_ngram",
119 |             "offsets": [
120 |               0
121 |             ]
122 |           },
123 |           {
124 |             "args": {
125 |               "n": 2
126 |             },
127 |             "factory_name": "shape_ngram",
128 |             "offsets": [
129 |               -1,
130 |               0
131 |             ]
132 |           },
133 |           {
134 |             "args": {
135 |               "n": 3
136 |             },
137 |             "factory_name": "shape_ngram",
138 |             "offsets": [
139 |               -1
140 |             ]
141 |           },
142 |           {
143 |             "args": {
144 |               "tagging_scheme_code": 2,
145 |               "use_stemming": true
146 |             },
147 |             "factory_name": "entity_match",
148 |             "drop_out": 0.5,
149 |             "offsets": [
150 |               -2,
151 |               -1,
152 |               0
153 |             ]
154 |           },
155 |           {
156 |             "args": {
157 |               "tagging_scheme_code": 1
158 |             },
159 |             "factory_name": "builtin_entity_match",
160 |             "offsets": [
161 |               -2,
162 |               -1,
163 |               0
164 |             ]
165 |           }
166 |         ]
167 |       }
168 |     }
169 |   ]
170 | }
171 | 


--------------------------------------------------------------------------------
/data/tests/configs/music_config.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "unit_name": "nlu_engine",
  3 |   "intent_parsers_configs": [
  4 |     {
  5 |       "unit_name": "deterministic_intent_parser",
  6 |       "max_pattern_length": 1000,
  7 |       "max_queries": 100,
  8 |       "ignore_stop_words": true
  9 |     },
 10 |     {
 11 |       "unit_name": "probabilistic_intent_parser",
 12 |       "intent_classifier_config": {
 13 |         "data_augmentation_config": {
 14 |           "noise_factor": 5,
 15 |           "unknown_word_prob": 0.5,
 16 |           "max_unknown_words": 5,
 17 |           "add_builtin_entities_examples": true,
 18 |           "unknown_words_replacement_string": "unknownword",
 19 |           "min_utterances": 20
 20 |         },
 21 |         "unit_name": "log_reg_intent_classifier",
 22 |         "featurizer_config": {
 23 |           "cooccurrence_vectorizer_config": {
 24 |             "window_size": 5,
 25 |             "unit_name": "cooccurrence_vectorizer",
 26 |             "filter_stop_words": true,
 27 |             "unknown_words_replacement_string": "unknownword",
 28 |             "keep_order": true
 29 |           },
 30 |           "unit_name": "featurizer",
 31 |           "added_cooccurrence_feature_ratio": 0.25,
 32 |           "tfidf_vectorizer_config": {
 33 |             "unit_name": "tfidf_vectorizer",
 34 |             "use_stemming": false,
 35 |             "word_clusters_name": null
 36 |           },
 37 |           "pvalue_threshold": 0.4
 38 |         },
 39 |         "random_seed": null
 40 |       },
 41 |       "slot_filler_config": {
 42 |         "unit_name": "crf_slot_filler",
 43 |         "random_seed": null,
 44 |         "tagging_scheme": 1,
 45 |         "data_augmentation_config": {
 46 |           "capitalization_ratio": 0.2,
 47 |           "min_utterances": 200,
 48 |           "add_builtin_entities_examples": true
 49 |         },
 50 |         "crf_args": {
 51 |           "c2": 0.1,
 52 |           "c1": 0.1,
 53 |           "algorithm": "lbfgs"
 54 |         },
 55 |         "feature_factory_configs": [
 56 |           {
 57 |             "args": {
 58 |               "common_words_gazetteer_name": "top_10000_words_stemmed",
 59 |               "use_stemming": true,
 60 |               "n": 1
 61 |             },
 62 |             "factory_name": "ngram",
 63 |             "offsets": [
 64 |               -2,
 65 |               -1,
 66 |               0,
 67 |               1,
 68 |               2
 69 |             ]
 70 |           },
 71 |           {
 72 |             "args": {
 73 |               "common_words_gazetteer_name": "top_10000_words_stemmed",
 74 |               "use_stemming": true,
 75 |               "n": 2
 76 |             },
 77 |             "factory_name": "ngram",
 78 |             "offsets": [
 79 |               -2,
 80 |               1
 81 |             ]
 82 |           },
 83 |           {
 84 |             "args": {},
 85 |             "factory_name": "is_digit",
 86 |             "offsets": [
 87 |               -1,
 88 |               0,
 89 |               1
 90 |             ]
 91 |           },
 92 |           {
 93 |             "args": {},
 94 |             "factory_name": "is_first",
 95 |             "offsets": [
 96 |               -2,
 97 |               -1,
 98 |               0
 99 |             ]
100 |           },
101 |           {
102 |             "args": {},
103 |             "factory_name": "is_last",
104 |             "offsets": [
105 |               0,
106 |               1,
107 |               2
108 |             ]
109 |           },
110 |           {
111 |             "args": {
112 |               "n": 1
113 |             },
114 |             "factory_name": "shape_ngram",
115 |             "offsets": [
116 |               0
117 |             ]
118 |           },
119 |           {
120 |             "args": {
121 |               "n": 2
122 |             },
123 |             "factory_name": "shape_ngram",
124 |             "offsets": [
125 |               -1,
126 |               0
127 |             ]
128 |           },
129 |           {
130 |             "args": {
131 |               "n": 3
132 |             },
133 |             "factory_name": "shape_ngram",
134 |             "offsets": [
135 |               -1
136 |             ]
137 |           },
138 |           {
139 |             "args": {
140 |               "tagging_scheme_code": 2,
141 |               "use_stemming": true
142 |             },
143 |             "factory_name": "entity_match",
144 |             "drop_out": 0.5,
145 |             "offsets": [
146 |               -2,
147 |               -1,
148 |               0
149 |             ]
150 |           },
151 |           {
152 |             "args": {
153 |               "tagging_scheme_code": 1
154 |             },
155 |             "factory_name": "builtin_entity_match",
156 |             "offsets": [
157 |               -2,
158 |               -1,
159 |               0
160 |             ]
161 |           }
162 |         ]
163 |       }
164 |     }
165 |   ]
166 | }
167 | 


--------------------------------------------------------------------------------
/data/tests/datasets/beverage_dataset.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "entities": {
  3 |     "Temperature": {
  4 |       "automatically_extensible": true,
  5 |       "use_synonyms": true,
  6 |       "data": [],
  7 |       "matching_strictness": 1.0
  8 |     },
  9 |     "snips/number": {}
 10 |   },
 11 |   "intents": {
 12 |     "MakeCoffee": {
 13 |       "utterances": [
 14 |         {
 15 |           "data": [
 16 |             {
 17 |               "text": "make me "
 18 |             },
 19 |             {
 20 |               "text": "one",
 21 |               "entity": "snips/number",
 22 |               "slot_name": "number_of_cups"
 23 |             },
 24 |             {
 25 |               "text": " cup of coffee please"
 26 |             }
 27 |           ]
 28 |         },
 29 |         {
 30 |           "data": [
 31 |             {
 32 |               "text": "brew "
 33 |             },
 34 |             {
 35 |               "text": "three hundred and four",
 36 |               "entity": "snips/number",
 37 |               "slot_name": "number_of_cups"
 38 |             },
 39 |             {
 40 |               "text": " cups of coffee"
 41 |             }
 42 |           ]
 43 |         },
 44 |         {
 45 |           "data": [
 46 |             {
 47 |               "text": "can you prepare "
 48 |             },
 49 |             {
 50 |               "text": "2001",
 51 |               "entity": "snips/number",
 52 |               "slot_name": "number_of_cups"
 53 |             },
 54 |             {
 55 |               "text": " cup of coffee"
 56 |             }
 57 |           ]
 58 |         }
 59 |       ]
 60 |     },
 61 |     "MakeTea": {
 62 |       "utterances": [
 63 |         {
 64 |           "data": [
 65 |             {
 66 |               "text": "make me a "
 67 |             },
 68 |             {
 69 |               "text": "hot",
 70 |               "entity": "Temperature",
 71 |               "slot_name": "beverage_temperature"
 72 |             },
 73 |             {
 74 |               "text": " cup of tea"
 75 |             }
 76 |           ]
 77 |         },
 78 |         {
 79 |           "data": [
 80 |             {
 81 |               "text": "make me "
 82 |             },
 83 |             {
 84 |               "text": "five",
 85 |               "entity": "snips/number",
 86 |               "slot_name": "number_of_cups"
 87 |             },
 88 |             {
 89 |               "text": " tea cups"
 90 |             }
 91 |           ]
 92 |         },
 93 |         {
 94 |           "data": [
 95 |             {
 96 |               "text": "i want "
 97 |             },
 98 |             {
 99 |               "text": "2001",
100 |               "entity": "snips/number",
101 |               "slot_name": "number_of_cups"
102 |             },
103 |             {
104 |               "text": " cups of "
105 |             },
106 |             {
107 |               "text": "boiling hot",
108 |               "entity": "Temperature",
109 |               "slot_name": "beverage_temperature"
110 |             },
111 |             {
112 |               "text": " tea pls"
113 |             }
114 |           ]
115 |         },
116 |         {
117 |           "data": [
118 |             {
119 |               "text": "can you prepare "
120 |             },
121 |             {
122 |               "text": "twenty one",
123 |               "entity": "snips/number",
124 |               "slot_name": "number_of_cups"
125 |             },
126 |             {
127 |               "text": " cup of "
128 |             },
129 |             {
130 |               "text": "cold",
131 |               "entity": "Temperature",
132 |               "slot_name": "beverage_temperature"
133 |             },
134 |             {
135 |               "text": " tea ?"
136 |             }
137 |           ]
138 |         }
139 |       ]
140 |     }
141 |   },
142 |   "language": "en"
143 | }


--------------------------------------------------------------------------------
/data/tests/datasets/game_dataset.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "entities": {
 3 |     "game": {
 4 |       "automatically_extensible": true,
 5 |       "data": [
 6 |         {
 7 |           "synonyms": [
 8 |             "space invader"
 9 |           ],
10 |           "value": "Space Invader Limited Edition"
11 |         },
12 |         {
13 |           "synonyms": [
14 |             "invader attack"
15 |           ],
16 |           "value": "Invader Attack 3"
17 |         },
18 |         {
19 |           "synonyms": [
20 |             "invader war"
21 |           ],
22 |           "value": "Invader War Demo"
23 |         },
24 |         {
25 |           "synonyms": [
26 |             "star invader"
27 |           ],
28 |           "value": "Star Invader II"
29 |         }
30 |       ],
31 |       "matching_strictness": 0.5,
32 |       "use_synonyms": true
33 |     }
34 |   },
35 |   "intents": {
36 |     "PlayGame": {
37 |       "utterances": [
38 |         {
39 |           "data": [
40 |             {
41 |               "text": "I want to play to "
42 |             },
43 |             {
44 |               "entity": "game",
45 |               "slot_name": "game",
46 |               "text": "space invader"
47 |             }
48 |           ]
49 |         },
50 |         {
51 |           "data": [
52 |             {
53 |               "text": "please launch the "
54 |             },
55 |             {
56 |               "entity": "game",
57 |               "slot_name": "game",
58 |               "text": "invader attack"
59 |             },
60 |             {
61 |               "text": " game"
62 |             }
63 |           ]
64 |         }
65 |       ]
66 |     }
67 |   },
68 |   "language": "en"
69 | }
70 | 


--------------------------------------------------------------------------------
/data/tests/datasets/music_dataset.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "intents": {
  3 |     "adri:PlayMusic": {
  4 |       "utterances": [
  5 |         {
  6 |           "data": [
  7 |             {
  8 |               "text": "mets un son de "
  9 |             },
 10 |             {
 11 |               "text": "l'imperatrice",
 12 |               "entity": "snips/musicArtist",
 13 |               "slot_name": "musicArtist"
 14 |             },
 15 |             {
 16 |               "text": "  "
 17 |             }
 18 |           ]
 19 |         },
 20 |         {
 21 |           "data": [
 22 |             {
 23 |               "text": "je veux ecouter une chanson de "
 24 |             },
 25 |             {
 26 |               "text": "linkin park",
 27 |               "entity": "snips/musicArtist",
 28 |               "slot_name": "musicArtist"
 29 |             },
 30 |             {
 31 |               "text": " please"
 32 |             }
 33 |           ]
 34 |         },
 35 |         {
 36 |           "data": [
 37 |             {
 38 |               "text": "je souhaiterais écouter l'album "
 39 |             },
 40 |             {
 41 |               "text": "random access memories",
 42 |               "entity": "snips/musicAlbum",
 43 |               "slot_name": "musicAlbum"
 44 |             }
 45 |           ]
 46 |         },
 47 |         {
 48 |           "data": [
 49 |             {
 50 |               "text": "mets l'album "
 51 |             },
 52 |             {
 53 |               "text": "gravé dans la roche",
 54 |               "entity": "snips/musicAlbum",
 55 |               "slot_name": "musicAlbum"
 56 |             },
 57 |             {
 58 |               "text": " de "
 59 |             },
 60 |             {
 61 |               "text": "sniper",
 62 |               "entity": "snips/musicArtist",
 63 |               "slot_name": "musicArtist"
 64 |             },
 65 |             {
 66 |               "text": " stp"
 67 |             }
 68 |           ]
 69 |         },
 70 |         {
 71 |           "data": [
 72 |             {
 73 |               "text": "lance l'album "
 74 |             },
 75 |             {
 76 |               "text": "conspiracy of one",
 77 |               "entity": "snips/musicAlbum",
 78 |               "slot_name": "musicAlbum"
 79 |             },
 80 |             {
 81 |               "text": " veux tu ?"
 82 |             }
 83 |           ]
 84 |         },
 85 |         {
 86 |           "data": [
 87 |             {
 88 |               "text": "mets-moi du "
 89 |             },
 90 |             {
 91 |               "text": "jacques brel",
 92 |               "entity": "snips/musicArtist",
 93 |               "slot_name": "musicArtist"
 94 |             }
 95 |           ]
 96 |         },
 97 |         {
 98 |           "data": [
 99 |             {
100 |               "text": "je veux ecouter l'album "
101 |             },
102 |             {
103 |               "text": "discovery",
104 |               "entity": "snips/musicAlbum",
105 |               "slot_name": "musicAlbum"
106 |             },
107 |             {
108 |               "text": " des "
109 |             },
110 |             {
111 |               "text": "daft punk",
112 |               "entity": "snips/musicArtist",
113 |               "slot_name": "musicArtist"
114 |             }
115 |           ]
116 |         },
117 |         {
118 |           "data": [
119 |             {
120 |               "text": "peux-tu mettre les "
121 |             },
122 |             {
123 |               "text": "rolling stones",
124 |               "entity": "snips/musicArtist",
125 |               "slot_name": "musicArtist"
126 |             },
127 |             {
128 |               "text": " stp"
129 |             }
130 |           ]
131 |         },
132 |         {
133 |           "data": [
134 |             {
135 |               "text": "je voudrais ecouter "
136 |             },
137 |             {
138 |               "text": "michael jackson",
139 |               "entity": "snips/musicArtist",
140 |               "slot_name": "musicArtist"
141 |             }
142 |           ]
143 |         },
144 |         {
145 |           "data": [
146 |             {
147 |               "text": "je voudrais ecouter ma playlist "
148 |             },
149 |             {
150 |               "text": "jazz",
151 |               "entity": "playlist",
152 |               "slot_name": "playlist"
153 |             }
154 |           ]
155 |         }
156 |       ],
157 |       "version": "0.8.0",
158 |       "language": "fr"
159 |     }
160 |   },
161 |   "entities": {
162 |     "snips/musicArtist": {
163 |       "name": "snips/musicArtist",
164 |       "data": [],
165 |       "use_synonyms": false,
166 |       "automatically_extensible": false
167 |     },
168 |     "snips/musicAlbum": {
169 |       "name": "snips/musicAlbum",
170 |       "data": [],
171 |       "use_synonyms": false,
172 |       "automatically_extensible": false
173 |     },
174 |     "playlist": {
175 |       "name": "playlist",
176 |       "data": [],
177 |       "use_synonyms": false,
178 |       "automatically_extensible": false
179 |     }
180 |   },
181 |   "language": "fr"
182 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage.zip


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/builtin_entity_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "language": "en",
3 |   "gazetteer_parser": null
4 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/custom_entity_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "language": "en",
3 |   "parser_directory": "parser",
4 |   "parser_usage": 2
5 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "parsers_metadata": [
3 |     {
4 |       "entity_identifier": "Temperature",
5 |       "entity_parser": "parser_1"
6 |     }
7 |   ]
8 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/parser_1/metadata.json:
--------------------------------------------------------------------------------
1 | {"version":"0.7.0","parser_filename":"parser","threshold":1.0,"stop_words":[],"edge_cases":[]}


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/parser_1/parser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/parser_1/parser


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/deterministic_intent_parser/intent_parser.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "ignore_stop_words": true,
 4 |     "max_pattern_length": 1000,
 5 |     "max_queries": 100,
 6 |     "unit_name": "deterministic_intent_parser"
 7 |   },
 8 |   "group_names_to_slot_names": {
 9 |     "group0": "beverage_temperature",
10 |     "group1": "number_of_cups"
11 |   },
12 |   "language_code": "en",
13 |   "patterns": {
14 |     "MakeCoffee": [
15 |       "^\\s*make\\s*(?P<group1>%SNIPSNUMBER%)\\s*cup\\s*of\\s*coffee\\s*$",
16 |       "^\\s*brew\\s*(?P<group1>%SNIPSNUMBER%)\\s*cups\\s*of\\s*coffee\\s*$",
17 |       "^\\s*prepare\\s*(?P<group1>%SNIPSNUMBER%)\\s*cup\\s*of\\s*coffee\\s*$"
18 |     ],
19 |     "MakeTea": [
20 |       "^\\s*make\\s*(?P<group0>%TEMPERATURE%)\\s*cup\\s*of\\s*tea\\s*$",
21 |       "^\\s*make\\s*(?P<group1>%SNIPSNUMBER%)\\s*tea\\s*cups\\s*$",
22 |       "^\\s*i\\s*want\\s*(?P<group1>%SNIPSNUMBER%)\\s*cups\\s*of\\s*(?P<group0>%TEMPERATURE%)\\s*tea\\s*$",
23 |       "^\\s*prepare\\s*(?P<group1>%SNIPSNUMBER%)\\s*cup\\s*of\\s*(?P<group0>%TEMPERATURE%)\\s*tea\\s*$"
24 |     ]
25 |   },
26 |   "slot_names_to_entities": {
27 |     "MakeCoffee": {
28 |       "number_of_cups": "snips/number"
29 |     },
30 |     "MakeTea": {
31 |       "beverage_temperature": "Temperature",
32 |       "number_of_cups": "snips/number"
33 |     }
34 |   },
35 |   "stop_words_whitelist": {}
36 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/deterministic_intent_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "deterministic_intent_parser"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/lookup_intent_parser/intent_parser.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "ignore_stop_words": true,
 4 |     "unit_name": "lookup_intent_parser"
 5 |   },
 6 |   "entity_scopes": [
 7 |     {
 8 |       "entity_scope": {
 9 |         "builtin": [
10 |           "snips/number"
11 |         ],
12 |         "custom": []
13 |       },
14 |       "intent_group": [
15 |         "MakeCoffee"
16 |       ]
17 |     },
18 |     {
19 |       "entity_scope": {
20 |         "builtin": [
21 |           "snips/number"
22 |         ],
23 |         "custom": [
24 |           "Temperature"
25 |         ]
26 |       },
27 |       "intent_group": [
28 |         "MakeTea"
29 |       ]
30 |     }
31 |   ],
32 |   "intents_names": [
33 |     "MakeCoffee",
34 |     "MakeTea"
35 |   ],
36 |   "language_code": "en",
37 |   "map": {
38 |     "-1658454006": [
39 |       1,
40 |       [
41 |         0
42 |       ]
43 |     ],
44 |     "-1533083481": [
45 |       0,
46 |       [
47 |         0
48 |       ]
49 |     ],
50 |     "-1416877420": [
51 |       0,
52 |       [
53 |         0
54 |       ]
55 |     ],
56 |     "-1362288387": [
57 |       1,
58 |       [
59 |         0,
60 |         1
61 |       ]
62 |     ],
63 |     "-687749971": [
64 |       0,
65 |       [
66 |         0
67 |       ]
68 |     ],
69 |     "1085718744": [
70 |       1,
71 |       [
72 |         1
73 |       ]
74 |     ],
75 |     "1413162768": [
76 |       1,
77 |       [
78 |         0,
79 |         1
80 |       ]
81 |     ]
82 |   },
83 |   "slots_names": [
84 |     "number_of_cups",
85 |     "beverage_temperature"
86 |   ],
87 |   "stop_words_whitelist": {}
88 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/lookup_intent_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "lookup_intent_parser"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "cooccurrence_vectorizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/vectorizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "builtin_entity_scope": [
 3 |     "snips/number"
 4 |   ],
 5 |   "config": {
 6 |     "filter_stop_words": true,
 7 |     "keep_order": true,
 8 |     "unit_name": "cooccurrence_vectorizer",
 9 |     "unknown_words_replacement_string": null,
10 |     "window_size": 5
11 |   },
12 |   "language_code": "en",
13 |   "word_pairs": {
14 |     "0": [
15 |       "SNIPSNUMBER",
16 |       "coffee"
17 |     ],
18 |     "1": [
19 |       "SNIPSNUMBER",
20 |       "tea"
21 |     ],
22 |     "2": [
23 |       "TEMPERATURE",
24 |       "tea"
25 |     ],
26 |     "3": [
27 |       "of",
28 |       "coffee"
29 |     ],
30 |     "4": [
31 |       "of",
32 |       "tea"
33 |     ]
34 |   }
35 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/featurizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "added_cooccurrence_feature_ratio": 0.25,
 4 |     "cooccurrence_vectorizer_config": {
 5 |       "filter_stop_words": true,
 6 |       "keep_order": true,
 7 |       "unit_name": "cooccurrence_vectorizer",
 8 |       "unknown_words_replacement_string": null,
 9 |       "window_size": 5
10 |     },
11 |     "pvalue_threshold": 0.4,
12 |     "tfidf_vectorizer_config": {
13 |       "unit_name": "tfidf_vectorizer",
14 |       "use_stemming": false,
15 |       "word_clusters_name": null
16 |     },
17 |     "unit_name": "featurizer"
18 |   },
19 |   "cooccurrence_vectorizer": "cooccurrence_vectorizer",
20 |   "language_code": "en",
21 |   "tfidf_vectorizer": "tfidf_vectorizer"
22 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "featurizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "tfidf_vectorizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/vectorizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "builtin_entity_scope": [
 3 |     "snips/number"
 4 |   ],
 5 |   "config": {
 6 |     "unit_name": "tfidf_vectorizer",
 7 |     "use_stemming": false,
 8 |     "word_clusters_name": null
 9 |   },
10 |   "language_code": "en",
11 |   "vectorizer": {
12 |     "idf_diag": [
13 |       4.157000421150114,
14 |       3.176171168138387,
15 |       4.002849741322855,
16 |       3.8693183486983322,
17 |       2.2110902720948,
18 |       3.463853240590168,
19 |       2.904237452654745,
20 |       4.339321977944068,
21 |       2.7707060600302227,
22 |       3.0583881324820035,
23 |       3.176171168138387,
24 |       3.463853240590168,
25 |       3.3097025607629096,
26 |       3.0583881324820035,
27 |       3.0043209112117277,
28 |       1.9057086225436182,
29 |       3.8693183486983322,
30 |       4.157000421150114,
31 |       3.463853240590168,
32 |       2.904237452654745,
33 |       2.5814640603916943,
34 |       4.157000421150114,
35 |       3.2407096892759584
36 |     ],
37 |     "vocab": {
38 |       "?": 0,
39 |       "a": 1,
40 |       "boiling": 2,
41 |       "brew": 3,
42 |       "builtinentityfeaturesnipsnumber": 4,
43 |       "can": 5,
44 |       "coffee": 6,
45 |       "cold": 7,
46 |       "cup": 8,
47 |       "cups": 9,
48 |       "entityfeaturetemperature": 10,
49 |       "hot": 11,
50 |       "i": 12,
51 |       "make": 13,
52 |       "me": 14,
53 |       "of": 15,
54 |       "please": 16,
55 |       "pls": 17,
56 |       "prepare": 18,
57 |       "tea": 19,
58 |       "the": 20,
59 |       "want": 21,
60 |       "you": 22
61 |     }
62 |   }
63 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/intent_classifier.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "coeffs": [
  3 |     [
  4 |       -0.06628334386039435,
  5 |       -0.8643735409565843,
  6 |       -0.10162233194496288,
  7 |       0.3731953851089176,
  8 |       -0.4488682974331702,
  9 |       0.17636658950526615,
 10 |       0.7184102193303363,
 11 |       -0.06865280010172024,
 12 |       0.29743179731574193,
 13 |       0.0346771374230032,
 14 |       -0.1761175736061183,
 15 |       -0.13726751217877073,
 16 |       -0.8071682094773692,
 17 |       -0.05137180345130995,
 18 |       -0.11880606643573582,
 19 |       -1.0869381590717797,
 20 |       0.32523561280093755,
 21 |       -0.049393800526571136,
 22 |       0.17636658950526615,
 23 |       -0.37371467568972533,
 24 |       -1.552232223994024,
 25 |       -0.049393800526571136,
 26 |       -0.07258216842039956,
 27 |       1.773882907274256,
 28 |       -0.7513363572925229,
 29 |       -0.5328718550242008,
 30 |       1.773882907274256,
 31 |       -0.5328718550242008
 32 |     ],
 33 |     [
 34 |       0.09460701831794095,
 35 |       -0.6521590578791224,
 36 |       0.2457151029413227,
 37 |       -0.1564569232219889,
 38 |       -0.501396327528731,
 39 |       -0.006657999049205669,
 40 |       -0.3254543267957415,
 41 |       0.19982413835098595,
 42 |       0.09848862892018242,
 43 |       0.4824274378008,
 44 |       0.4213894656886821,
 45 |       0.30004773187161327,
 46 |       -0.7342364277722051,
 47 |       0.6427012659535128,
 48 |       0.5425430291591661,
 49 |       -1.2591009549072656,
 50 |       -0.18164901959554086,
 51 |       0.10599990651714576,
 52 |       -0.006657999049205669,
 53 |       0.8868013621218258,
 54 |       -1.5676891353952978,
 55 |       0.10599990651714576,
 56 |       -0.2459922083905006,
 57 |       -0.8070607856886259,
 58 |       1.5929339342221138,
 59 |       1.2525103361872103,
 60 |       -0.8070607856886259,
 61 |       1.2525103361872103
 62 |     ],
 63 |     [
 64 |       -0.07342804814279959,
 65 |       0.5198593428656567,
 66 |       -0.1581096606820083,
 67 |       -0.26280946680431544,
 68 |       0.00904118342688167,
 69 |       -0.2318387692350015,
 70 |       -0.4684730287438426,
 71 |       -0.1304212706183918,
 72 |       -0.44631551521307367,
 73 |       -0.5533957320987131,
 74 |       -0.29099767056562215,
 75 |       -0.21324635553933227,
 76 |       0.5486473938992574,
 77 |       -0.5604628287636957,
 78 |       -0.4863443376102945,
 79 |       0.7764987437479216,
 80 |       -0.17070720658830346,
 81 |       -0.09943826061076572,
 82 |       -0.2318387692350015,
 83 |       -0.5248553684542913,
 84 |       1.3219543672776477,
 85 |       -0.09943826061076572,
 86 |       -0.025792600962746038,
 87 |       -1.1511881362239413,
 88 |       -0.9891682959063401,
 89 |       -0.8725891493847063,
 90 |       -1.1511881362239413,
 91 |       -0.8725891493847063
 92 |     ]
 93 |   ],
 94 |   "config": {
 95 |     "data_augmentation_config": {
 96 |       "add_builtin_entities_examples": false,
 97 |       "max_unknown_words": 0,
 98 |       "min_utterances": 20,
 99 |       "noise_factor": 5,
100 |       "unknown_word_prob": 0,
101 |       "unknown_words_replacement_string": null
102 |     },
103 |     "featurizer_config": {
104 |       "added_cooccurrence_feature_ratio": 0.25,
105 |       "cooccurrence_vectorizer_config": {
106 |         "filter_stop_words": true,
107 |         "keep_order": true,
108 |         "unit_name": "cooccurrence_vectorizer",
109 |         "unknown_words_replacement_string": null,
110 |         "window_size": 5
111 |       },
112 |       "pvalue_threshold": 0.4,
113 |       "tfidf_vectorizer_config": {
114 |         "unit_name": "tfidf_vectorizer",
115 |         "use_stemming": false,
116 |         "word_clusters_name": null
117 |       },
118 |       "unit_name": "featurizer"
119 |     },
120 |     "noise_reweight_factor": 1.0,
121 |     "unit_name": "log_reg_intent_classifier"
122 |   },
123 |   "featurizer": "featurizer",
124 |   "intent_list": [
125 |     "MakeCoffee",
126 |     "MakeTea",
127 |     null
128 |   ],
129 |   "intercept": [
130 |     -0.797273067375694,
131 |     -0.7793999633924479,
132 |     0.36488637926624495
133 |   ],
134 |   "t_": 3221.0
135 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "log_reg_intent_classifier"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_parser.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "config": {
  3 |     "intent_classifier_config": {
  4 |       "data_augmentation_config": {
  5 |         "add_builtin_entities_examples": false,
  6 |         "max_unknown_words": 0,
  7 |         "min_utterances": 20,
  8 |         "noise_factor": 5,
  9 |         "unknown_word_prob": 0,
 10 |         "unknown_words_replacement_string": null
 11 |       },
 12 |       "featurizer_config": {
 13 |         "added_cooccurrence_feature_ratio": 0.25,
 14 |         "cooccurrence_vectorizer_config": {
 15 |           "filter_stop_words": true,
 16 |           "keep_order": true,
 17 |           "unit_name": "cooccurrence_vectorizer",
 18 |           "unknown_words_replacement_string": null,
 19 |           "window_size": 5
 20 |         },
 21 |         "pvalue_threshold": 0.4,
 22 |         "tfidf_vectorizer_config": {
 23 |           "unit_name": "tfidf_vectorizer",
 24 |           "use_stemming": false,
 25 |           "word_clusters_name": null
 26 |         },
 27 |         "unit_name": "featurizer"
 28 |       },
 29 |       "noise_reweight_factor": 1.0,
 30 |       "unit_name": "log_reg_intent_classifier"
 31 |     },
 32 |     "slot_filler_config": {
 33 |       "crf_args": {
 34 |         "algorithm": "lbfgs",
 35 |         "c1": 0.1,
 36 |         "c2": 0.1
 37 |       },
 38 |       "data_augmentation_config": {
 39 |         "add_builtin_entities_examples": true,
 40 |         "capitalization_ratio": 0.2,
 41 |         "min_utterances": 200
 42 |       },
 43 |       "feature_factory_configs": [
 44 |         {
 45 |           "args": {
 46 |             "common_words_gazetteer_name": "top_10000_words_stemmed",
 47 |             "n": 1,
 48 |             "use_stemming": true
 49 |           },
 50 |           "factory_name": "ngram",
 51 |           "offsets": [
 52 |             -2,
 53 |             -1,
 54 |             0,
 55 |             1,
 56 |             2
 57 |           ]
 58 |         },
 59 |         {
 60 |           "args": {
 61 |             "common_words_gazetteer_name": "top_10000_words_stemmed",
 62 |             "n": 2,
 63 |             "use_stemming": true
 64 |           },
 65 |           "factory_name": "ngram",
 66 |           "offsets": [
 67 |             -2,
 68 |             1
 69 |           ]
 70 |         },
 71 |         {
 72 |           "args": {},
 73 |           "factory_name": "is_digit",
 74 |           "offsets": [
 75 |             -1,
 76 |             0,
 77 |             1
 78 |           ]
 79 |         },
 80 |         {
 81 |           "args": {},
 82 |           "factory_name": "is_first",
 83 |           "offsets": [
 84 |             -2,
 85 |             -1,
 86 |             0
 87 |           ]
 88 |         },
 89 |         {
 90 |           "args": {},
 91 |           "factory_name": "is_last",
 92 |           "offsets": [
 93 |             0,
 94 |             1,
 95 |             2
 96 |           ]
 97 |         },
 98 |         {
 99 |           "args": {
100 |             "n": 1
101 |           },
102 |           "factory_name": "shape_ngram",
103 |           "offsets": [
104 |             0
105 |           ]
106 |         },
107 |         {
108 |           "args": {
109 |             "n": 2
110 |           },
111 |           "factory_name": "shape_ngram",
112 |           "offsets": [
113 |             -1,
114 |             0
115 |           ]
116 |         },
117 |         {
118 |           "args": {
119 |             "n": 3
120 |           },
121 |           "factory_name": "shape_ngram",
122 |           "offsets": [
123 |             -1
124 |           ]
125 |         },
126 |         {
127 |           "args": {
128 |             "tagging_scheme_code": 2,
129 |             "use_stemming": true
130 |           },
131 |           "drop_out": 0.5,
132 |           "factory_name": "entity_match",
133 |           "offsets": [
134 |             -2,
135 |             -1,
136 |             0
137 |           ]
138 |         },
139 |         {
140 |           "args": {
141 |             "tagging_scheme_code": 1
142 |           },
143 |           "factory_name": "builtin_entity_match",
144 |           "offsets": [
145 |             -2,
146 |             -1,
147 |             0
148 |           ]
149 |         }
150 |       ],
151 |       "tagging_scheme": 1,
152 |       "unit_name": "crf_slot_filler"
153 |     },
154 |     "unit_name": "probabilistic_intent_parser"
155 |   },
156 |   "slot_fillers": [
157 |     {
158 |       "intent": "MakeCoffee",
159 |       "slot_filler_name": "slot_filler_0"
160 |     },
161 |     {
162 |       "intent": "MakeTea",
163 |       "slot_filler_name": "slot_filler_1"
164 |     }
165 |   ]
166 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "probabilistic_intent_parser"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "crf_slot_filler"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/model55imurmx.crfsuite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/model55imurmx.crfsuite


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/slot_filler.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "config": {
  3 |     "crf_args": {
  4 |       "algorithm": "lbfgs",
  5 |       "c1": 0.1,
  6 |       "c2": 0.1
  7 |     },
  8 |     "data_augmentation_config": {
  9 |       "add_builtin_entities_examples": true,
 10 |       "capitalization_ratio": 0.2,
 11 |       "min_utterances": 200
 12 |     },
 13 |     "feature_factory_configs": [
 14 |       {
 15 |         "args": {
 16 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 17 |           "language_code": "en",
 18 |           "n": 1,
 19 |           "use_stemming": true
 20 |         },
 21 |         "factory_name": "ngram",
 22 |         "offsets": [
 23 |           -2,
 24 |           -1,
 25 |           0,
 26 |           1,
 27 |           2
 28 |         ]
 29 |       },
 30 |       {
 31 |         "args": {
 32 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 33 |           "language_code": "en",
 34 |           "n": 2,
 35 |           "use_stemming": true
 36 |         },
 37 |         "factory_name": "ngram",
 38 |         "offsets": [
 39 |           -2,
 40 |           1
 41 |         ]
 42 |       },
 43 |       {
 44 |         "args": {},
 45 |         "factory_name": "is_digit",
 46 |         "offsets": [
 47 |           -1,
 48 |           0,
 49 |           1
 50 |         ]
 51 |       },
 52 |       {
 53 |         "args": {},
 54 |         "factory_name": "is_first",
 55 |         "offsets": [
 56 |           -2,
 57 |           -1,
 58 |           0
 59 |         ]
 60 |       },
 61 |       {
 62 |         "args": {},
 63 |         "factory_name": "is_last",
 64 |         "offsets": [
 65 |           0,
 66 |           1,
 67 |           2
 68 |         ]
 69 |       },
 70 |       {
 71 |         "args": {
 72 |           "language_code": "en",
 73 |           "n": 1
 74 |         },
 75 |         "factory_name": "shape_ngram",
 76 |         "offsets": [
 77 |           0
 78 |         ]
 79 |       },
 80 |       {
 81 |         "args": {
 82 |           "language_code": "en",
 83 |           "n": 2
 84 |         },
 85 |         "factory_name": "shape_ngram",
 86 |         "offsets": [
 87 |           -1,
 88 |           0
 89 |         ]
 90 |       },
 91 |       {
 92 |         "args": {
 93 |           "language_code": "en",
 94 |           "n": 3
 95 |         },
 96 |         "factory_name": "shape_ngram",
 97 |         "offsets": [
 98 |           -1
 99 |         ]
100 |       },
101 |       {
102 |         "args": {
103 |           "entities": [],
104 |           "tagging_scheme_code": 2,
105 |           "use_stemming": true
106 |         },
107 |         "drop_out": 0.5,
108 |         "factory_name": "entity_match",
109 |         "offsets": [
110 |           -2,
111 |           -1,
112 |           0
113 |         ]
114 |       },
115 |       {
116 |         "args": {
117 |           "entity_labels": [
118 |             "snips/amountOfMoney",
119 |             "snips/date",
120 |             "snips/datePeriod",
121 |             "snips/datetime",
122 |             "snips/duration",
123 |             "snips/number",
124 |             "snips/ordinal",
125 |             "snips/percentage",
126 |             "snips/temperature",
127 |             "snips/time",
128 |             "snips/timePeriod"
129 |           ],
130 |           "language_code": "en",
131 |           "tagging_scheme_code": 1
132 |         },
133 |         "factory_name": "builtin_entity_match",
134 |         "offsets": [
135 |           -2,
136 |           -1,
137 |           0
138 |         ]
139 |       }
140 |     ],
141 |     "tagging_scheme": 1,
142 |     "unit_name": "crf_slot_filler"
143 |   },
144 |   "crf_model_file": "model55imurmx.crfsuite",
145 |   "intent": "MakeCoffee",
146 |   "language_code": "en",
147 |   "slot_name_mapping": {
148 |     "number_of_cups": "snips/number"
149 |   }
150 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "crf_slot_filler"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/model8a9dqxnp.crfsuite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/model8a9dqxnp.crfsuite


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/slot_filler.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "config": {
  3 |     "crf_args": {
  4 |       "algorithm": "lbfgs",
  5 |       "c1": 0.1,
  6 |       "c2": 0.1
  7 |     },
  8 |     "data_augmentation_config": {
  9 |       "add_builtin_entities_examples": true,
 10 |       "capitalization_ratio": 0.2,
 11 |       "min_utterances": 200
 12 |     },
 13 |     "feature_factory_configs": [
 14 |       {
 15 |         "args": {
 16 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 17 |           "language_code": "en",
 18 |           "n": 1,
 19 |           "use_stemming": true
 20 |         },
 21 |         "factory_name": "ngram",
 22 |         "offsets": [
 23 |           -2,
 24 |           -1,
 25 |           0,
 26 |           1,
 27 |           2
 28 |         ]
 29 |       },
 30 |       {
 31 |         "args": {
 32 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 33 |           "language_code": "en",
 34 |           "n": 2,
 35 |           "use_stemming": true
 36 |         },
 37 |         "factory_name": "ngram",
 38 |         "offsets": [
 39 |           -2,
 40 |           1
 41 |         ]
 42 |       },
 43 |       {
 44 |         "args": {},
 45 |         "factory_name": "is_digit",
 46 |         "offsets": [
 47 |           -1,
 48 |           0,
 49 |           1
 50 |         ]
 51 |       },
 52 |       {
 53 |         "args": {},
 54 |         "factory_name": "is_first",
 55 |         "offsets": [
 56 |           -2,
 57 |           -1,
 58 |           0
 59 |         ]
 60 |       },
 61 |       {
 62 |         "args": {},
 63 |         "factory_name": "is_last",
 64 |         "offsets": [
 65 |           0,
 66 |           1,
 67 |           2
 68 |         ]
 69 |       },
 70 |       {
 71 |         "args": {
 72 |           "language_code": "en",
 73 |           "n": 1
 74 |         },
 75 |         "factory_name": "shape_ngram",
 76 |         "offsets": [
 77 |           0
 78 |         ]
 79 |       },
 80 |       {
 81 |         "args": {
 82 |           "language_code": "en",
 83 |           "n": 2
 84 |         },
 85 |         "factory_name": "shape_ngram",
 86 |         "offsets": [
 87 |           -1,
 88 |           0
 89 |         ]
 90 |       },
 91 |       {
 92 |         "args": {
 93 |           "language_code": "en",
 94 |           "n": 3
 95 |         },
 96 |         "factory_name": "shape_ngram",
 97 |         "offsets": [
 98 |           -1
 99 |         ]
100 |       },
101 |       {
102 |         "args": {
103 |           "entities": [
104 |             "Temperature"
105 |           ],
106 |           "tagging_scheme_code": 2,
107 |           "use_stemming": true
108 |         },
109 |         "drop_out": 0.5,
110 |         "factory_name": "entity_match",
111 |         "offsets": [
112 |           -2,
113 |           -1,
114 |           0
115 |         ]
116 |       },
117 |       {
118 |         "args": {
119 |           "entity_labels": [
120 |             "snips/amountOfMoney",
121 |             "snips/date",
122 |             "snips/datePeriod",
123 |             "snips/datetime",
124 |             "snips/duration",
125 |             "snips/number",
126 |             "snips/ordinal",
127 |             "snips/percentage",
128 |             "snips/temperature",
129 |             "snips/time",
130 |             "snips/timePeriod"
131 |           ],
132 |           "language_code": "en",
133 |           "tagging_scheme_code": 1
134 |         },
135 |         "factory_name": "builtin_entity_match",
136 |         "offsets": [
137 |           -2,
138 |           -1,
139 |           0
140 |         ]
141 |       }
142 |     ],
143 |     "tagging_scheme": 1,
144 |     "unit_name": "crf_slot_filler"
145 |   },
146 |   "crf_model_file": "model8a9dqxnp.crfsuite",
147 |   "intent": "MakeTea",
148 |   "language_code": "en",
149 |   "slot_name_mapping": {
150 |     "beverage_temperature": "Temperature",
151 |     "number_of_cups": "snips/number"
152 |   }
153 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/resources/en/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Rosa Stern",
 3 |   "description": "Language resources for English",
 4 |   "email": "rosa.stern@snips.ai",
 5 |   "gazetteers": [
 6 |     "top_10000_words_stemmed"
 7 |   ],
 8 |   "language": "en",
 9 |   "license": "Apache License, Version 2.0",
10 |   "name": "snips_nlu_en",
11 |   "noise": "noise",
12 |   "snips_nlu_version": ">=0.1.0,<1.0.0",
13 |   "stems": "stems",
14 |   "stop_words": "stop_words",
15 |   "url": "https://snips-nlu.readthedocs.io",
16 |   "version": "0.2.2",
17 |   "word_clusters": []
18 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_beverage/resources/en/stop_words.txt:
--------------------------------------------------------------------------------
 1 | !
 2 | ?
 3 | a
 4 | about
 5 | allright
 6 | alright
 7 | although
 8 | an
 9 | and
10 | any
11 | besides
12 | but
13 | can
14 | hello
15 | hey
16 | hi
17 | instead
18 | just
19 | lol
20 | man
21 | me
22 | my
23 | now
24 | ok
25 | only
26 | please
27 | pls
28 | so
29 | some
30 | such
31 | that
32 | the
33 | then
34 | these
35 | this
36 | those
37 | though
38 | to
39 | too
40 | very
41 | while
42 | yo
43 | you
44 | your
45 | yours
46 | yourself
47 | 


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/builtin_entity_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "language": "en",
3 |   "gazetteer_parser": null
4 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/custom_entity_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "language": "en",
3 |   "parser_directory": "parser",
4 |   "parser_usage": 2
5 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/custom_entity_parser/parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "parsers_metadata": [
3 |     {
4 |       "entity_identifier": "game",
5 |       "entity_parser": "parser_1"
6 |     }
7 |   ]
8 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/custom_entity_parser/parser/parser_1/metadata.json:
--------------------------------------------------------------------------------
1 | {"version":"0.7.2","parser_filename":"parser","threshold":0.5,"stop_words":[],"edge_cases":[]}


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/custom_entity_parser/parser/parser_1/parser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_game/custom_entity_parser/parser/parser_1/parser


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/lookup_intent_parser/intent_parser.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "ignore_stop_words": true,
 4 |     "unit_name": "lookup_intent_parser"
 5 |   },
 6 |   "entity_scopes": [
 7 |     {
 8 |       "entity_scope": {
 9 |         "builtin": [],
10 |         "custom": [
11 |           "game"
12 |         ]
13 |       },
14 |       "intent_group": [
15 |         "PlayGame"
16 |       ]
17 |     }
18 |   ],
19 |   "intents_names": [
20 |     "PlayGame"
21 |   ],
22 |   "language_code": "en",
23 |   "map": {
24 |     "483944904": [
25 |       0,
26 |       [
27 |         0
28 |       ]
29 |     ],
30 |     "1316274424": [
31 |       0,
32 |       [
33 |         0
34 |       ]
35 |     ]
36 |   },
37 |   "slots_names": [
38 |     "game"
39 |   ],
40 |   "stop_words_whitelist": {}
41 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/lookup_intent_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "lookup_intent_parser"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/featurizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "added_cooccurrence_feature_ratio": 0.0,
 4 |     "cooccurrence_vectorizer_config": {
 5 |       "filter_stop_words": true,
 6 |       "keep_order": true,
 7 |       "unit_name": "cooccurrence_vectorizer",
 8 |       "unknown_words_replacement_string": null,
 9 |       "window_size": null
10 |     },
11 |     "pvalue_threshold": 0.4,
12 |     "tfidf_vectorizer_config": {
13 |       "unit_name": "tfidf_vectorizer",
14 |       "use_stemming": false,
15 |       "word_clusters_name": null
16 |     },
17 |     "unit_name": "featurizer"
18 |   },
19 |   "cooccurrence_vectorizer": null,
20 |   "language_code": "en",
21 |   "tfidf_vectorizer": "tfidf_vectorizer"
22 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "featurizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "tfidf_vectorizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/vectorizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "builtin_entity_scope": [],
 3 |   "config": {
 4 |     "unit_name": "tfidf_vectorizer",
 5 |     "use_stemming": false,
 6 |     "word_clusters_name": null
 7 |   },
 8 |   "language_code": "en",
 9 |   "vectorizer": {
10 |     "idf_diag": [
11 |       4.697178256928631,
12 |       3.849880396541428,
13 |       4.697178256928631,
14 |       4.697178256928631,
15 |       2.751268107873318,
16 |       3.3978952727983707,
17 |       3.0232018233569597,
18 |       4.4094961844768505,
19 |       3.2308411881352046,
20 |       2.751268107873318,
21 |       3.3978952727983707,
22 |       4.697178256928631,
23 |       2.80005827204275,
24 |       3.3978952727983707,
25 |       3.3978952727983707,
26 |       4.004031076368686,
27 |       4.004031076368686,
28 |       4.697178256928631,
29 |       2.962577201540525,
30 |       3.3978952727983707,
31 |       4.186352633162641
32 |     ],
33 |     "vocab": {
34 |       "3": 0,
35 |       "attack": 1,
36 |       "demo": 2,
37 |       "edition": 3,
38 |       "entityfeaturegame": 4,
39 |       "game": 5,
40 |       "i": 6,
41 |       "ii": 7,
42 |       "in": 8,
43 |       "invader": 9,
44 |       "launch": 10,
45 |       "limited": 11,
46 |       "of": 12,
47 |       "play": 13,
48 |       "please": 14,
49 |       "space": 15,
50 |       "star": 16,
51 |       "three": 17,
52 |       "to": 18,
53 |       "want": 19,
54 |       "war": 20
55 |     }
56 |   }
57 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/intent_classifier.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "coeffs": [
 3 |     [
 4 |       -0.20149987419774504,
 5 |       -0.4999326799959391,
 6 |       -0.2072185982938113,
 7 |       -0.20533716633716562,
 8 |       -1.212997119952126,
 9 |       -0.7430829768823239,
10 |       -0.10687891515378825,
11 |       -0.28544080700845964,
12 |       0.7737448377337215,
13 |       -1.212997119952126,
14 |       -0.7430829768823239,
15 |       -0.20533716633716562,
16 |       1.019183684420755,
17 |       -0.7550033666508472,
18 |       -0.7430829768823239,
19 |       -0.4433146539838388,
20 |       -0.437280810875116,
21 |       -0.20277376407358896,
22 |       -0.5550491008658663,
23 |       -0.7550033666508472,
24 |       -0.38138780663082483
25 |     ]
26 |   ],
27 |   "config": {
28 |     "data_augmentation_config": {
29 |       "add_builtin_entities_examples": false,
30 |       "max_unknown_words": null,
31 |       "min_utterances": 20,
32 |       "noise_factor": 5,
33 |       "unknown_word_prob": 0.0,
34 |       "unknown_words_replacement_string": null
35 |     },
36 |     "featurizer_config": {
37 |       "added_cooccurrence_feature_ratio": 0.0,
38 |       "cooccurrence_vectorizer_config": {
39 |         "filter_stop_words": true,
40 |         "keep_order": true,
41 |         "unit_name": "cooccurrence_vectorizer",
42 |         "unknown_words_replacement_string": null,
43 |         "window_size": null
44 |       },
45 |       "pvalue_threshold": 0.4,
46 |       "tfidf_vectorizer_config": {
47 |         "unit_name": "tfidf_vectorizer",
48 |         "use_stemming": false,
49 |         "word_clusters_name": null
50 |       },
51 |       "unit_name": "featurizer"
52 |     },
53 |     "noise_reweight_factor": 1,
54 |     "unit_name": "log_reg_intent_classifier"
55 |   },
56 |   "featurizer": "featurizer",
57 |   "intent_list": [
58 |     "PlayGame",
59 |     null
60 |   ],
61 |   "intercept": [
62 |     0.2646563858371308
63 |   ],
64 |   "t_": 2641.0
65 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "log_reg_intent_classifier"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_parser.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "config": {
  3 |     "intent_classifier_config": {
  4 |       "data_augmentation_config": {
  5 |         "add_builtin_entities_examples": false,
  6 |         "max_unknown_words": null,
  7 |         "min_utterances": 20,
  8 |         "noise_factor": 5,
  9 |         "unknown_word_prob": 0.0,
 10 |         "unknown_words_replacement_string": null
 11 |       },
 12 |       "featurizer_config": {
 13 |         "added_cooccurrence_feature_ratio": 0.0,
 14 |         "cooccurrence_vectorizer_config": {
 15 |           "filter_stop_words": true,
 16 |           "keep_order": true,
 17 |           "unit_name": "cooccurrence_vectorizer",
 18 |           "unknown_words_replacement_string": null,
 19 |           "window_size": null
 20 |         },
 21 |         "pvalue_threshold": 0.4,
 22 |         "tfidf_vectorizer_config": {
 23 |           "unit_name": "tfidf_vectorizer",
 24 |           "use_stemming": false,
 25 |           "word_clusters_name": null
 26 |         },
 27 |         "unit_name": "featurizer"
 28 |       },
 29 |       "noise_reweight_factor": 1,
 30 |       "unit_name": "log_reg_intent_classifier"
 31 |     },
 32 |     "slot_filler_config": {
 33 |       "crf_args": {
 34 |         "algorithm": "lbfgs",
 35 |         "c1": 0.1,
 36 |         "c2": 0.1
 37 |       },
 38 |       "data_augmentation_config": {
 39 |         "add_builtin_entities_examples": true,
 40 |         "capitalization_ratio": 0.2,
 41 |         "min_utterances": 200
 42 |       },
 43 |       "feature_factory_configs": [
 44 |         {
 45 |           "args": {
 46 |             "common_words_gazetteer_name": "top_10000_words_stemmed",
 47 |             "n": 1,
 48 |             "use_stemming": true
 49 |           },
 50 |           "factory_name": "ngram",
 51 |           "offsets": [
 52 |             -2,
 53 |             -1,
 54 |             0,
 55 |             1,
 56 |             2
 57 |           ]
 58 |         },
 59 |         {
 60 |           "args": {
 61 |             "common_words_gazetteer_name": "top_10000_words_stemmed",
 62 |             "n": 2,
 63 |             "use_stemming": true
 64 |           },
 65 |           "factory_name": "ngram",
 66 |           "offsets": [
 67 |             -2,
 68 |             1
 69 |           ]
 70 |         },
 71 |         {
 72 |           "args": {},
 73 |           "factory_name": "is_digit",
 74 |           "offsets": [
 75 |             -1,
 76 |             0,
 77 |             1
 78 |           ]
 79 |         },
 80 |         {
 81 |           "args": {},
 82 |           "factory_name": "is_first",
 83 |           "offsets": [
 84 |             -2,
 85 |             -1,
 86 |             0
 87 |           ]
 88 |         },
 89 |         {
 90 |           "args": {},
 91 |           "factory_name": "is_last",
 92 |           "offsets": [
 93 |             0,
 94 |             1,
 95 |             2
 96 |           ]
 97 |         },
 98 |         {
 99 |           "args": {
100 |             "n": 1
101 |           },
102 |           "factory_name": "shape_ngram",
103 |           "offsets": [
104 |             0
105 |           ]
106 |         },
107 |         {
108 |           "args": {
109 |             "n": 2
110 |           },
111 |           "factory_name": "shape_ngram",
112 |           "offsets": [
113 |             -1,
114 |             0
115 |           ]
116 |         },
117 |         {
118 |           "args": {
119 |             "n": 3
120 |           },
121 |           "factory_name": "shape_ngram",
122 |           "offsets": [
123 |             -1
124 |           ]
125 |         },
126 |         {
127 |           "args": {
128 |             "entity_filter": {
129 |               "automatically_extensible": false
130 |             },
131 |             "tagging_scheme_code": 2,
132 |             "use_stemming": true
133 |           },
134 |           "factory_name": "entity_match",
135 |           "offsets": [
136 |             -2,
137 |             -1,
138 |             0
139 |           ]
140 |         },
141 |         {
142 |           "args": {
143 |             "entity_filter": {
144 |               "automatically_extensible": true
145 |             },
146 |             "tagging_scheme_code": 2,
147 |             "use_stemming": true
148 |           },
149 |           "drop_out": 0.5,
150 |           "factory_name": "entity_match",
151 |           "offsets": [
152 |             -2,
153 |             -1,
154 |             0
155 |           ]
156 |         },
157 |         {
158 |           "args": {
159 |             "tagging_scheme_code": 1
160 |           },
161 |           "factory_name": "builtin_entity_match",
162 |           "offsets": [
163 |             -2,
164 |             -1,
165 |             0
166 |           ]
167 |         },
168 |         {
169 |           "args": {
170 |             "cluster_name": "brown_clusters",
171 |             "use_stemming": false
172 |           },
173 |           "factory_name": "word_cluster",
174 |           "offsets": [
175 |             -2,
176 |             -1,
177 |             0,
178 |             1
179 |           ]
180 |         }
181 |       ],
182 |       "tagging_scheme": 1,
183 |       "unit_name": "crf_slot_filler"
184 |     },
185 |     "unit_name": "probabilistic_intent_parser"
186 |   },
187 |   "slot_fillers": [
188 |     {
189 |       "intent": "PlayGame",
190 |       "slot_filler_name": "slot_filler_0"
191 |     }
192 |   ]
193 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "probabilistic_intent_parser"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "crf_slot_filler"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/model.crfsuite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/model.crfsuite


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/slot_filler.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "config": {
  3 |     "crf_args": {
  4 |       "algorithm": "lbfgs",
  5 |       "c1": 0.1,
  6 |       "c2": 0.1
  7 |     },
  8 |     "data_augmentation_config": {
  9 |       "add_builtin_entities_examples": true,
 10 |       "capitalization_ratio": 0.2,
 11 |       "min_utterances": 200
 12 |     },
 13 |     "feature_factory_configs": [
 14 |       {
 15 |         "args": {
 16 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 17 |           "language_code": "en",
 18 |           "n": 1,
 19 |           "use_stemming": true
 20 |         },
 21 |         "factory_name": "ngram",
 22 |         "offsets": [
 23 |           -2,
 24 |           -1,
 25 |           0,
 26 |           1,
 27 |           2
 28 |         ]
 29 |       },
 30 |       {
 31 |         "args": {
 32 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 33 |           "language_code": "en",
 34 |           "n": 2,
 35 |           "use_stemming": true
 36 |         },
 37 |         "factory_name": "ngram",
 38 |         "offsets": [
 39 |           -2,
 40 |           1
 41 |         ]
 42 |       },
 43 |       {
 44 |         "args": {},
 45 |         "factory_name": "is_digit",
 46 |         "offsets": [
 47 |           -1,
 48 |           0,
 49 |           1
 50 |         ]
 51 |       },
 52 |       {
 53 |         "args": {},
 54 |         "factory_name": "is_first",
 55 |         "offsets": [
 56 |           -2,
 57 |           -1,
 58 |           0
 59 |         ]
 60 |       },
 61 |       {
 62 |         "args": {},
 63 |         "factory_name": "is_last",
 64 |         "offsets": [
 65 |           0,
 66 |           1,
 67 |           2
 68 |         ]
 69 |       },
 70 |       {
 71 |         "args": {
 72 |           "language_code": "en",
 73 |           "n": 1
 74 |         },
 75 |         "factory_name": "shape_ngram",
 76 |         "offsets": [
 77 |           0
 78 |         ]
 79 |       },
 80 |       {
 81 |         "args": {
 82 |           "language_code": "en",
 83 |           "n": 2
 84 |         },
 85 |         "factory_name": "shape_ngram",
 86 |         "offsets": [
 87 |           -1,
 88 |           0
 89 |         ]
 90 |       },
 91 |       {
 92 |         "args": {
 93 |           "language_code": "en",
 94 |           "n": 3
 95 |         },
 96 |         "factory_name": "shape_ngram",
 97 |         "offsets": [
 98 |           -1
 99 |         ]
100 |       },
101 |       {
102 |         "args": {
103 |           "entities": [],
104 |           "entity_filter": {
105 |             "automatically_extensible": false
106 |           },
107 |           "tagging_scheme_code": 2,
108 |           "use_stemming": true
109 |         },
110 |         "factory_name": "entity_match",
111 |         "offsets": [
112 |           -2,
113 |           -1,
114 |           0
115 |         ]
116 |       },
117 |       {
118 |         "args": {
119 |           "entities": [
120 |             "game"
121 |           ],
122 |           "entity_filter": {
123 |             "automatically_extensible": true
124 |           },
125 |           "tagging_scheme_code": 2,
126 |           "use_stemming": true
127 |         },
128 |         "drop_out": 0.5,
129 |         "factory_name": "entity_match",
130 |         "offsets": [
131 |           -2,
132 |           -1,
133 |           0
134 |         ]
135 |       },
136 |       {
137 |         "args": {
138 |           "entity_labels": [
139 |             "snips/amountOfMoney",
140 |             "snips/date",
141 |             "snips/datePeriod",
142 |             "snips/datetime",
143 |             "snips/duration",
144 |             "snips/number",
145 |             "snips/ordinal",
146 |             "snips/percentage",
147 |             "snips/temperature",
148 |             "snips/time",
149 |             "snips/timePeriod"
150 |           ],
151 |           "language_code": "en",
152 |           "tagging_scheme_code": 1
153 |         },
154 |         "factory_name": "builtin_entity_match",
155 |         "offsets": [
156 |           -2,
157 |           -1,
158 |           0
159 |         ]
160 |       },
161 |       {
162 |         "args": {
163 |           "cluster_name": "brown_clusters",
164 |           "use_stemming": false
165 |         },
166 |         "factory_name": "word_cluster",
167 |         "offsets": [
168 |           -2,
169 |           -1,
170 |           0,
171 |           1
172 |         ]
173 |       }
174 |     ],
175 |     "tagging_scheme": 1,
176 |     "unit_name": "crf_slot_filler"
177 |   },
178 |   "crf_model_file": "model.crfsuite",
179 |   "intent": "PlayGame",
180 |   "language_code": "en",
181 |   "slot_name_mapping": {
182 |     "game": "game"
183 |   }
184 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/resources/en/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Rosa Stern",
 3 |   "description": "Language resources for English",
 4 |   "email": "rosa.stern@snips.ai",
 5 |   "gazetteers": [
 6 |     "top_10000_words_stemmed"
 7 |   ],
 8 |   "language": "en",
 9 |   "license": "Apache License, Version 2.0",
10 |   "name": "snips_nlu_en",
11 |   "noise": "noise",
12 |   "snips_nlu_version": ">=0.1.0,<1.0.0",
13 |   "stems": "stems",
14 |   "stop_words": "stop_words",
15 |   "url": "https://snips-nlu.readthedocs.io",
16 |   "version": "0.2.2",
17 |   "word_clusters": [
18 |     "brown_clusters"
19 |   ]
20 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_game/resources/en/stop_words.txt:
--------------------------------------------------------------------------------
 1 | !
 2 | ?
 3 | a
 4 | about
 5 | allright
 6 | alright
 7 | although
 8 | an
 9 | and
10 | any
11 | besides
12 | but
13 | can
14 | hello
15 | hey
16 | hi
17 | instead
18 | just
19 | lol
20 | man
21 | me
22 | my
23 | now
24 | ok
25 | only
26 | please
27 | pls
28 | so
29 | some
30 | such
31 | that
32 | the
33 | then
34 | these
35 | this
36 | those
37 | though
38 | to
39 | too
40 | very
41 | while
42 | yo
43 | you
44 | your
45 | yours
46 | yourself
47 | 


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "parsers_metadata": [
 3 |     {
 4 |       "entity_identifier": "snips/musicAlbum",
 5 |       "entity_parser": "parser_1"
 6 |     },
 7 |     {
 8 |       "entity_identifier": "snips/musicArtist",
 9 |       "entity_parser": "parser_2"
10 |     }
11 |   ]
12 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_1/parser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_1/parser


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_2/metadata.json:
--------------------------------------------------------------------------------
1 | {"version":"0.7.0","parser_filename":"parser","threshold":0.5,"stop_words":["mais","elles","steve","dans","vos","un","david","s","van","richard","me","à","avec","mark","sommes","nous","sur","y","sa","tu","des","ses","leur","te","pour","avons","ne","qui","qu","band","ce","and","ai","l","es","joe","tes","the","j","vous","elle","mike","avez","ont","ma","pas","ete","le","son","suis","que","c","même","of","je","thomas","meme","ta","james","eux","votre","peter","william","paul","il","m","de","sont","ou","n","les","robert","eu","moi","la","t","par","une","du","orchestra","mes","chris","martin","nos","êtes","black","lui","d","etes","lee","michael","est","a","ils","au","se","en","symphony","aux","in","ton","as","ces","été","notre","toi","mon","john","on","george","et"],"edge_cases":["The Avons","Ai","LA Symphony","Steve Lee","Black M","Mike D","Mike Lee","J","Mike","The Black","D&D","David & David","Es","The The","William Black","Mark James","EU","Joe","SA","Symphony in C","Steve James","David A. Martin","James Thomas","Meme","James","Richard Robert","John Martin","Joe Ma","Du Du A","George Martin","M","The David","N","Me&John","John Lee","AU","RoBERT","Paul C","De Van","David James","George Michael","M&S","Y&T","D/C","-M-","Chris Martin","Black","D","Richard Band","Richard Thomas","Peter Thomas","ME","Michael Lee","A","Chris Lee","David Lee","Les Elles","T & N","John David","Robert Thomas","moi","The Orchestra","John Thomas","Michael Mark","TOI","Robert Black","George","Michael Au","Chris D.","AI","Peter Peter","Chris and Thomas","On","A Band","Martin Lee","David","S / S / S","Joe Thomas","Robert John","James Michael","Avec","David Thomas","S","Me Me Me","LE","Steve Richard","Peter Martin","Mark Lui","The Band","Chris Thomas","Paul Martin","David J","John William","Qui","Mike Martin","UN"]}


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_2/parser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_2/parser


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/builtin_entity_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "language": "fr",
3 |   "gazetteer_parser": "gazetteer_entity_parser"
4 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/custom_entity_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "language": "fr",
3 |   "parser_directory": "parser",
4 |   "parser_usage": 2
5 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/custom_entity_parser/parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "parsers_metadata": [
3 |     {
4 |       "entity_identifier": "playlist",
5 |       "entity_parser": "parser_1"
6 |     }
7 |   ]
8 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/custom_entity_parser/parser/parser_1/metadata.json:
--------------------------------------------------------------------------------
1 | {"version":"0.7.0","parser_filename":"parser","threshold":1.0,"stop_words":[],"edge_cases":[]}


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/custom_entity_parser/parser/parser_1/parser:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/custom_entity_parser/parser/parser_1/parser


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/deterministic_intent_parser/intent_parser.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "ignore_stop_words": true,
 4 |     "max_pattern_length": 1000,
 5 |     "max_queries": 100,
 6 |     "unit_name": "deterministic_intent_parser"
 7 |   },
 8 |   "group_names_to_slot_names": {
 9 |     "group0": "musicAlbum",
10 |     "group1": "musicArtist",
11 |     "group2": "playlist"
12 |   },
13 |   "language_code": "fr",
14 |   "patterns": {
15 |     "adri:PlayMusic": [
16 |       "^\\s*mets\\s*son\\s*(?P<group1>%SNIPSMUSICARTIST%)\\s*$",
17 |       "^\\s*je\\s*veux\\s*ecouter\\s*chanson\\s*(?P<group1>%SNIPSMUSICARTIST%)\\s*please\\s*$",
18 |       "^\\s*je\\s*souhaiterais\\s*\u00e9couter\\s*album\\s*(?P<group0>%SNIPSMUSICALBUM%)\\s*$",
19 |       "^\\s*mets\\s*album\\s*(?P<group0>%SNIPSMUSICALBUM%)\\s*(?P<group1>%SNIPSMUSICARTIST%)\\s*$",
20 |       "^\\s*lance\\s*album\\s*(?P<group0>%SNIPSMUSICALBUM%)\\s*veux\\s*$",
21 |       "^\\s*mets\\s*(?P<group1>%SNIPSMUSICARTIST%)\\s*$",
22 |       "^\\s*je\\s*veux\\s*ecouter\\s*album\\s*(?P<group0>%SNIPSMUSICALBUM%)\\s*(?P<group1>%SNIPSMUSICARTIST%)\\s*$",
23 |       "^\\s*peux\\s*mettre\\s*(?P<group1>%SNIPSMUSICARTIST%)\\s*$",
24 |       "^\\s*je\\s*voudrais\\s*ecouter\\s*(?P<group1>%SNIPSMUSICARTIST%)\\s*$",
25 |       "^\\s*je\\s*voudrais\\s*ecouter\\s*playlist\\s*(?P<group2>%PLAYLIST%)\\s*$"
26 |     ]
27 |   },
28 |   "slot_names_to_entities": {
29 |     "adri:PlayMusic": {
30 |       "musicAlbum": "snips/musicAlbum",
31 |       "musicArtist": "snips/musicArtist",
32 |       "playlist": "playlist"
33 |     }
34 |   },
35 |   "stop_words_whitelist": {}
36 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/deterministic_intent_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "deterministic_intent_parser"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/nlu_engine.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "builtin_entity_parser": "builtin_entity_parser",
  3 |   "config": {
  4 |     "intent_parsers_configs": [
  5 |       {
  6 |         "ignore_stop_words": true,
  7 |         "max_pattern_length": 1000,
  8 |         "max_queries": 100,
  9 |         "unit_name": "deterministic_intent_parser"
 10 |       },
 11 |       {
 12 |         "intent_classifier_config": {
 13 |           "data_augmentation_config": {
 14 |             "add_builtin_entities_examples": true,
 15 |             "max_unknown_words": 5,
 16 |             "min_utterances": 20,
 17 |             "noise_factor": 5,
 18 |             "unknown_word_prob": 0.5,
 19 |             "unknown_words_replacement_string": "unknownword"
 20 |           },
 21 |           "featurizer_config": {
 22 |             "added_cooccurrence_feature_ratio": 0.25,
 23 |             "cooccurrence_vectorizer_config": {
 24 |               "filter_stop_words": true,
 25 |               "keep_order": true,
 26 |               "unit_name": "cooccurrence_vectorizer",
 27 |               "unknown_words_replacement_string": "unknownword",
 28 |               "window_size": 5
 29 |             },
 30 |             "pvalue_threshold": 0.4,
 31 |             "tfidf_vectorizer_config": {
 32 |               "unit_name": "tfidf_vectorizer",
 33 |               "use_stemming": false,
 34 |               "word_clusters_name": null
 35 |             },
 36 |             "unit_name": "featurizer"
 37 |           },
 38 |           "noise_reweight_factor": 1.0,
 39 |           "unit_name": "log_reg_intent_classifier"
 40 |         },
 41 |         "slot_filler_config": {
 42 |           "crf_args": {
 43 |             "algorithm": "lbfgs",
 44 |             "c1": 0.1,
 45 |             "c2": 0.1
 46 |           },
 47 |           "data_augmentation_config": {
 48 |             "add_builtin_entities_examples": true,
 49 |             "capitalization_ratio": 0.2,
 50 |             "min_utterances": 200
 51 |           },
 52 |           "feature_factory_configs": [
 53 |             {
 54 |               "args": {
 55 |                 "common_words_gazetteer_name": "top_10000_words_stemmed",
 56 |                 "n": 1,
 57 |                 "use_stemming": true
 58 |               },
 59 |               "factory_name": "ngram",
 60 |               "offsets": [
 61 |                 -2,
 62 |                 -1,
 63 |                 0,
 64 |                 1,
 65 |                 2
 66 |               ]
 67 |             },
 68 |             {
 69 |               "args": {
 70 |                 "common_words_gazetteer_name": "top_10000_words_stemmed",
 71 |                 "n": 2,
 72 |                 "use_stemming": true
 73 |               },
 74 |               "factory_name": "ngram",
 75 |               "offsets": [
 76 |                 -2,
 77 |                 1
 78 |               ]
 79 |             },
 80 |             {
 81 |               "args": {},
 82 |               "factory_name": "is_digit",
 83 |               "offsets": [
 84 |                 -1,
 85 |                 0,
 86 |                 1
 87 |               ]
 88 |             },
 89 |             {
 90 |               "args": {},
 91 |               "factory_name": "is_first",
 92 |               "offsets": [
 93 |                 -2,
 94 |                 -1,
 95 |                 0
 96 |               ]
 97 |             },
 98 |             {
 99 |               "args": {},
100 |               "factory_name": "is_last",
101 |               "offsets": [
102 |                 0,
103 |                 1,
104 |                 2
105 |               ]
106 |             },
107 |             {
108 |               "args": {
109 |                 "n": 1
110 |               },
111 |               "factory_name": "shape_ngram",
112 |               "offsets": [
113 |                 0
114 |               ]
115 |             },
116 |             {
117 |               "args": {
118 |                 "n": 2
119 |               },
120 |               "factory_name": "shape_ngram",
121 |               "offsets": [
122 |                 -1,
123 |                 0
124 |               ]
125 |             },
126 |             {
127 |               "args": {
128 |                 "n": 3
129 |               },
130 |               "factory_name": "shape_ngram",
131 |               "offsets": [
132 |                 -1
133 |               ]
134 |             },
135 |             {
136 |               "args": {
137 |                 "tagging_scheme_code": 2,
138 |                 "use_stemming": true
139 |               },
140 |               "drop_out": 0.5,
141 |               "factory_name": "entity_match",
142 |               "offsets": [
143 |                 -2,
144 |                 -1,
145 |                 0
146 |               ]
147 |             },
148 |             {
149 |               "args": {
150 |                 "tagging_scheme_code": 1
151 |               },
152 |               "factory_name": "builtin_entity_match",
153 |               "offsets": [
154 |                 -2,
155 |                 -1,
156 |                 0
157 |               ]
158 |             }
159 |           ],
160 |           "tagging_scheme": 1,
161 |           "unit_name": "crf_slot_filler"
162 |         },
163 |         "unit_name": "probabilistic_intent_parser"
164 |       }
165 |     ],
166 |     "unit_name": "nlu_engine"
167 |   },
168 |   "custom_entity_parser": "custom_entity_parser",
169 |   "dataset_metadata": {
170 |     "entities": {
171 |       "playlist": {
172 |         "automatically_extensible": false
173 |       }
174 |     },
175 |     "language_code": "fr",
176 |     "slot_name_mappings": {
177 |       "adri:PlayMusic": {
178 |         "musicAlbum": "snips/musicAlbum",
179 |         "musicArtist": "snips/musicArtist",
180 |         "playlist": "playlist"
181 |       }
182 |     }
183 |   },
184 |   "intent_parsers": [
185 |     "deterministic_intent_parser",
186 |     "probabilistic_intent_parser"
187 |   ],
188 |   "model_version": "0.20.0",
189 |   "training_package_version": "0.20.0",
190 |   "unit_name": "nlu_engine"
191 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "cooccurrence_vectorizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/vectorizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "builtin_entity_scope": [
 3 |     "snips/musicAlbum",
 4 |     "snips/musicArtist"
 5 |   ],
 6 |   "config": {
 7 |     "filter_stop_words": true,
 8 |     "keep_order": true,
 9 |     "unit_name": "cooccurrence_vectorizer",
10 |     "unknown_words_replacement_string": "unknownword",
11 |     "window_size": 5
12 |   },
13 |   "language_code": "fr",
14 |   "word_pairs": {
15 |     "0": [
16 |       "album",
17 |       "SNIPSMUSICALBUM"
18 |     ],
19 |     "1": [
20 |       "ecouter",
21 |       "SNIPSMUSICALBUM"
22 |     ],
23 |     "2": [
24 |       "ecouter",
25 |       "SNIPSMUSICARTIST"
26 |     ],
27 |     "3": [
28 |       "je",
29 |       "SNIPSMUSICALBUM"
30 |     ],
31 |     "4": [
32 |       "je",
33 |       "ecouter"
34 |     ],
35 |     "5": [
36 |       "mets",
37 |       "SNIPSMUSICARTIST"
38 |     ]
39 |   }
40 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/featurizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "config": {
 3 |     "added_cooccurrence_feature_ratio": 0.25,
 4 |     "cooccurrence_vectorizer_config": {
 5 |       "filter_stop_words": true,
 6 |       "keep_order": true,
 7 |       "unit_name": "cooccurrence_vectorizer",
 8 |       "unknown_words_replacement_string": "unknownword",
 9 |       "window_size": 5
10 |     },
11 |     "pvalue_threshold": 0.4,
12 |     "tfidf_vectorizer_config": {
13 |       "unit_name": "tfidf_vectorizer",
14 |       "use_stemming": false,
15 |       "word_clusters_name": null
16 |     },
17 |     "unit_name": "featurizer"
18 |   },
19 |   "cooccurrence_vectorizer": "cooccurrence_vectorizer",
20 |   "language_code": "fr",
21 |   "tfidf_vectorizer": "tfidf_vectorizer"
22 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "featurizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "tfidf_vectorizer"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/vectorizer.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "builtin_entity_scope": [
 3 |     "snips/musicAlbum",
 4 |     "snips/musicArtist"
 5 |   ],
 6 |   "config": {
 7 |     "unit_name": "tfidf_vectorizer",
 8 |     "use_stemming": false,
 9 |     "word_clusters_name": null
10 |   },
11 |   "language_code": "fr",
12 |   "vectorizer": {
13 |     "idf_diag": [
14 |       4.697178256928631,
15 |       3.4932054526026954,
16 |       2.2404424841073274,
17 |       2.361803341111595,
18 |       4.697178256928631,
19 |       3.5985659682605218,
20 |       2.3300546427970144,
21 |       3.3978952727983707,
22 |       4.697178256928631,
23 |       4.697178256928631,
24 |       3.4932054526026954,
25 |       2.8513515664303006,
26 |       4.697178256928631,
27 |       3.849880396541428,
28 |       4.4094961844768505,
29 |       4.4094961844768505,
30 |       4.697178256928631,
31 |       4.697178256928631,
32 |       4.186352633162641,
33 |       4.697178256928631,
34 |       4.186352633162641,
35 |       1.0772916743016465,
36 |       3.716349003916905,
37 |       4.186352633162641
38 |     ],
39 |     "vocab": {
40 |       "?": 0,
41 |       "album": 1,
42 |       "builtinentityfeaturesnipsmusicalbum": 2,
43 |       "builtinentityfeaturesnipsmusicartist": 3,
44 |       "chanson": 4,
45 |       "dans": 5,
46 |       "de": 6,
47 |       "ecouter": 7,
48 |       "entityfeatureplaylist": 8,
49 |       "jazz": 9,
50 |       "l": 10,
51 |       "la": 11,
52 |       "lance": 12,
53 |       "mets": 13,
54 |       "mettre": 14,
55 |       "moi": 15,
56 |       "playlist": 16,
57 |       "please": 17,
58 |       "son": 18,
59 |       "souhaiterais": 19,
60 |       "stp": 20,
61 |       "unknownword": 21,
62 |       "veux": 22,
63 |       "voudrais": 23
64 |     }
65 |   }
66 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/intent_classifier.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "coeffs": [
 3 |     [
 4 |       -0.5166360015252428,
 5 |       -0.37123136560399383,
 6 |       -0.5990894741148578,
 7 |       -1.118727734094978,
 8 |       -0.030597456416977947,
 9 |       1.0322279612160994,
10 |       1.016885422821252,
11 |       -0.7621951278255702,
12 |       -0.18816721865208444,
13 |       -0.18816721865208444,
14 |       -0.3827265434396571,
15 |       1.0195576774593815,
16 |       -0.5166360015252428,
17 |       -1.1284893473963162,
18 |       -1.3002036067433036,
19 |       -0.5832964749395109,
20 |       -0.18816721865208444,
21 |       -0.030597456416977947,
22 |       -0.15204859193852968,
23 |       -0.1956950722711481,
24 |       -1.3811965585429193,
25 |       3.2429520118942476,
26 |       -0.2631328048160813,
27 |       -0.7114748378163408,
28 |       -1.8419736587848352,
29 |       -0.636452201857874,
30 |       -1.0594513135610186,
31 |       -0.6320896470933497,
32 |       -1.6115371082209173,
33 |       -2.444543746213874
34 |     ]
35 |   ],
36 |   "config": {
37 |     "data_augmentation_config": {
38 |       "add_builtin_entities_examples": true,
39 |       "max_unknown_words": 5,
40 |       "min_utterances": 20,
41 |       "noise_factor": 5,
42 |       "unknown_word_prob": 0.5,
43 |       "unknown_words_replacement_string": "unknownword"
44 |     },
45 |     "featurizer_config": {
46 |       "added_cooccurrence_feature_ratio": 0.25,
47 |       "cooccurrence_vectorizer_config": {
48 |         "filter_stop_words": true,
49 |         "keep_order": true,
50 |         "unit_name": "cooccurrence_vectorizer",
51 |         "unknown_words_replacement_string": "unknownword",
52 |         "window_size": 5
53 |       },
54 |       "pvalue_threshold": 0.4,
55 |       "tfidf_vectorizer_config": {
56 |         "unit_name": "tfidf_vectorizer",
57 |         "use_stemming": false,
58 |         "word_clusters_name": null
59 |       },
60 |       "unit_name": "featurizer"
61 |     },
62 |     "noise_reweight_factor": 1.0,
63 |     "unit_name": "log_reg_intent_classifier"
64 |   },
65 |   "featurizer": "featurizer",
66 |   "intent_list": [
67 |     "adri:PlayMusic",
68 |     null
69 |   ],
70 |   "intercept": [
71 |     0.028391354876375573
72 |   ],
73 |   "t_": 961.0
74 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "log_reg_intent_classifier"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_parser.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "config": {
  3 |     "intent_classifier_config": {
  4 |       "data_augmentation_config": {
  5 |         "add_builtin_entities_examples": true,
  6 |         "max_unknown_words": 5,
  7 |         "min_utterances": 20,
  8 |         "noise_factor": 5,
  9 |         "unknown_word_prob": 0.5,
 10 |         "unknown_words_replacement_string": "unknownword"
 11 |       },
 12 |       "featurizer_config": {
 13 |         "added_cooccurrence_feature_ratio": 0.25,
 14 |         "cooccurrence_vectorizer_config": {
 15 |           "filter_stop_words": true,
 16 |           "keep_order": true,
 17 |           "unit_name": "cooccurrence_vectorizer",
 18 |           "unknown_words_replacement_string": "unknownword",
 19 |           "window_size": 5
 20 |         },
 21 |         "pvalue_threshold": 0.4,
 22 |         "tfidf_vectorizer_config": {
 23 |           "unit_name": "tfidf_vectorizer",
 24 |           "use_stemming": false,
 25 |           "word_clusters_name": null
 26 |         },
 27 |         "unit_name": "featurizer"
 28 |       },
 29 |       "noise_reweight_factor": 1.0,
 30 |       "unit_name": "log_reg_intent_classifier"
 31 |     },
 32 |     "slot_filler_config": {
 33 |       "crf_args": {
 34 |         "algorithm": "lbfgs",
 35 |         "c1": 0.1,
 36 |         "c2": 0.1
 37 |       },
 38 |       "data_augmentation_config": {
 39 |         "add_builtin_entities_examples": true,
 40 |         "capitalization_ratio": 0.2,
 41 |         "min_utterances": 200
 42 |       },
 43 |       "feature_factory_configs": [
 44 |         {
 45 |           "args": {
 46 |             "common_words_gazetteer_name": "top_10000_words_stemmed",
 47 |             "n": 1,
 48 |             "use_stemming": true
 49 |           },
 50 |           "factory_name": "ngram",
 51 |           "offsets": [
 52 |             -2,
 53 |             -1,
 54 |             0,
 55 |             1,
 56 |             2
 57 |           ]
 58 |         },
 59 |         {
 60 |           "args": {
 61 |             "common_words_gazetteer_name": "top_10000_words_stemmed",
 62 |             "n": 2,
 63 |             "use_stemming": true
 64 |           },
 65 |           "factory_name": "ngram",
 66 |           "offsets": [
 67 |             -2,
 68 |             1
 69 |           ]
 70 |         },
 71 |         {
 72 |           "args": {},
 73 |           "factory_name": "is_digit",
 74 |           "offsets": [
 75 |             -1,
 76 |             0,
 77 |             1
 78 |           ]
 79 |         },
 80 |         {
 81 |           "args": {},
 82 |           "factory_name": "is_first",
 83 |           "offsets": [
 84 |             -2,
 85 |             -1,
 86 |             0
 87 |           ]
 88 |         },
 89 |         {
 90 |           "args": {},
 91 |           "factory_name": "is_last",
 92 |           "offsets": [
 93 |             0,
 94 |             1,
 95 |             2
 96 |           ]
 97 |         },
 98 |         {
 99 |           "args": {
100 |             "n": 1
101 |           },
102 |           "factory_name": "shape_ngram",
103 |           "offsets": [
104 |             0
105 |           ]
106 |         },
107 |         {
108 |           "args": {
109 |             "n": 2
110 |           },
111 |           "factory_name": "shape_ngram",
112 |           "offsets": [
113 |             -1,
114 |             0
115 |           ]
116 |         },
117 |         {
118 |           "args": {
119 |             "n": 3
120 |           },
121 |           "factory_name": "shape_ngram",
122 |           "offsets": [
123 |             -1
124 |           ]
125 |         },
126 |         {
127 |           "args": {
128 |             "tagging_scheme_code": 2,
129 |             "use_stemming": true
130 |           },
131 |           "drop_out": 0.5,
132 |           "factory_name": "entity_match",
133 |           "offsets": [
134 |             -2,
135 |             -1,
136 |             0
137 |           ]
138 |         },
139 |         {
140 |           "args": {
141 |             "tagging_scheme_code": 1
142 |           },
143 |           "factory_name": "builtin_entity_match",
144 |           "offsets": [
145 |             -2,
146 |             -1,
147 |             0
148 |           ]
149 |         }
150 |       ],
151 |       "tagging_scheme": 1,
152 |       "unit_name": "crf_slot_filler"
153 |     },
154 |     "unit_name": "probabilistic_intent_parser"
155 |   },
156 |   "slot_fillers": [
157 |     {
158 |       "intent": "adri:PlayMusic",
159 |       "slot_filler_name": "slot_filler_0"
160 |     }
161 |   ]
162 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "probabilistic_intent_parser"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/metadata.json:
--------------------------------------------------------------------------------
1 | {
2 |   "unit_name": "crf_slot_filler"
3 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/modeluzcfum35.crfsuite:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/modeluzcfum35.crfsuite


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/slot_filler.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "config": {
  3 |     "crf_args": {
  4 |       "algorithm": "lbfgs",
  5 |       "c1": 0.1,
  6 |       "c2": 0.1
  7 |     },
  8 |     "data_augmentation_config": {
  9 |       "add_builtin_entities_examples": true,
 10 |       "capitalization_ratio": 0.2,
 11 |       "min_utterances": 200
 12 |     },
 13 |     "feature_factory_configs": [
 14 |       {
 15 |         "args": {
 16 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 17 |           "language_code": "fr",
 18 |           "n": 1,
 19 |           "use_stemming": true
 20 |         },
 21 |         "factory_name": "ngram",
 22 |         "offsets": [
 23 |           -2,
 24 |           -1,
 25 |           0,
 26 |           1,
 27 |           2
 28 |         ]
 29 |       },
 30 |       {
 31 |         "args": {
 32 |           "common_words_gazetteer_name": "top_10000_words_stemmed",
 33 |           "language_code": "fr",
 34 |           "n": 2,
 35 |           "use_stemming": true
 36 |         },
 37 |         "factory_name": "ngram",
 38 |         "offsets": [
 39 |           -2,
 40 |           1
 41 |         ]
 42 |       },
 43 |       {
 44 |         "args": {},
 45 |         "factory_name": "is_digit",
 46 |         "offsets": [
 47 |           -1,
 48 |           0,
 49 |           1
 50 |         ]
 51 |       },
 52 |       {
 53 |         "args": {},
 54 |         "factory_name": "is_first",
 55 |         "offsets": [
 56 |           -2,
 57 |           -1,
 58 |           0
 59 |         ]
 60 |       },
 61 |       {
 62 |         "args": {},
 63 |         "factory_name": "is_last",
 64 |         "offsets": [
 65 |           0,
 66 |           1,
 67 |           2
 68 |         ]
 69 |       },
 70 |       {
 71 |         "args": {
 72 |           "language_code": "fr",
 73 |           "n": 1
 74 |         },
 75 |         "factory_name": "shape_ngram",
 76 |         "offsets": [
 77 |           0
 78 |         ]
 79 |       },
 80 |       {
 81 |         "args": {
 82 |           "language_code": "fr",
 83 |           "n": 2
 84 |         },
 85 |         "factory_name": "shape_ngram",
 86 |         "offsets": [
 87 |           -1,
 88 |           0
 89 |         ]
 90 |       },
 91 |       {
 92 |         "args": {
 93 |           "language_code": "fr",
 94 |           "n": 3
 95 |         },
 96 |         "factory_name": "shape_ngram",
 97 |         "offsets": [
 98 |           -1
 99 |         ]
100 |       },
101 |       {
102 |         "args": {
103 |           "entities": [
104 |             "playlist"
105 |           ],
106 |           "tagging_scheme_code": 2,
107 |           "use_stemming": true
108 |         },
109 |         "drop_out": 0.5,
110 |         "factory_name": "entity_match",
111 |         "offsets": [
112 |           -2,
113 |           -1,
114 |           0
115 |         ]
116 |       },
117 |       {
118 |         "args": {
119 |           "entity_labels": [
120 |             "snips/amountOfMoney",
121 |             "snips/datetime",
122 |             "snips/duration",
123 |             "snips/musicAlbum",
124 |             "snips/musicArtist",
125 |             "snips/number",
126 |             "snips/ordinal",
127 |             "snips/percentage",
128 |             "snips/temperature"
129 |           ],
130 |           "language_code": "fr",
131 |           "tagging_scheme_code": 1
132 |         },
133 |         "factory_name": "builtin_entity_match",
134 |         "offsets": [
135 |           -2,
136 |           -1,
137 |           0
138 |         ]
139 |       }
140 |     ],
141 |     "tagging_scheme": 1,
142 |     "unit_name": "crf_slot_filler"
143 |   },
144 |   "crf_model_file": "modeluzcfum35.crfsuite",
145 |   "intent": "adri:PlayMusic",
146 |   "language_code": "fr",
147 |   "slot_name_mapping": {
148 |     "musicAlbum": "snips/musicAlbum",
149 |     "musicArtist": "snips/musicArtist",
150 |     "playlist": "playlist"
151 |   }
152 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/resources/fr/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "author": "Rosa Stern",
 3 |   "description": "Language resources for French",
 4 |   "email": "rosa.stern@snips.ai",
 5 |   "gazetteers": [
 6 |     "top_10000_words_stemmed"
 7 |   ],
 8 |   "language": "fr",
 9 |   "license": "Apache License, Version 2.0",
10 |   "name": "snips_nlu_fr",
11 |   "noise": "noise",
12 |   "snips_nlu_version": ">=0.1.0,<1.0.0",
13 |   "stems": "stems",
14 |   "stop_words": "stop_words",
15 |   "url": "https://snips-nlu.readthedocs.io",
16 |   "version": "0.2.4",
17 |   "word_clusters": []
18 | }


--------------------------------------------------------------------------------
/data/tests/models/nlu_engine_music/resources/fr/stop_words.txt:
--------------------------------------------------------------------------------
 1 | !
 2 | ?
 3 | a
 4 | au
 5 | aussi
 6 | aux
 7 | bonjour
 8 | bonsoir
 9 | c
10 | ce
11 | ceci
12 | cela
13 | ces
14 | cet
15 | cette
16 | d
17 | de
18 | des
19 | du
20 | et
21 | hey
22 | j
23 | juste
24 | l
25 | la
26 | le
27 | les
28 | lol
29 | m
30 | ma
31 | maintenant
32 | mais
33 | me
34 | merci
35 | mes
36 | moi
37 | mon
38 | ok
39 | puis
40 | s
41 | salut
42 | se
43 | stp
44 | svp
45 | toi
46 | tres
47 | tu
48 | un
49 | une
50 | vous
51 | y
52 | yo
53 | 


--------------------------------------------------------------------------------
/examples/interactive_parsing_cli.rs:
--------------------------------------------------------------------------------
 1 | extern crate clap;
 2 | extern crate env_logger;
 3 | extern crate serde_json;
 4 | extern crate snips_nlu_lib;
 5 | 
 6 | use clap::{App, Arg};
 7 | use snips_nlu_lib::SnipsNluEngine;
 8 | use std::io;
 9 | use std::io::Write;
10 | 
11 | fn main() {
12 |     env_logger::Builder::from_default_env()
13 |         .default_format_timestamp_nanos(true)
14 |         .init();
15 | 
16 |     let matches = App::new("snips-nlu-parse")
17 |         .about("Snips NLU interactive CLI for parsing intents")
18 |         .arg(
19 |             Arg::with_name("NLU_ENGINE_DIR")
20 |                 .required(true)
21 |                 .takes_value(true)
22 |                 .index(1)
23 |                 .help("path to the trained nlu engine directory"),
24 |         )
25 |         .arg(
26 |             Arg::with_name("intents_alternatives")
27 |                 .short("i")
28 |                 .long("--intents-alternatives")
29 |                 .takes_value(true)
30 |                 .help("number of alternative parsing results to return in the output"),
31 |         )
32 |         .arg(
33 |             Arg::with_name("slots_alternatives")
34 |                 .short("s")
35 |                 .long("--slots-alternatives")
36 |                 .takes_value(true)
37 |                 .help("number of alternative slot values to return along with each extracted slot"),
38 |         )
39 |         .get_matches();
40 |     let engine_dir = matches.value_of("NLU_ENGINE_DIR").unwrap();
41 |     let intents_alternatives = matches
42 |         .value_of("intents_alternatives")
43 |         .map(|v| v.to_string().parse::<usize>().unwrap())
44 |         .unwrap_or(0);
45 |     let slots_alternatives = matches
46 |         .value_of("slots_alternatives")
47 |         .map(|v| v.to_string().parse::<usize>().unwrap())
48 |         .unwrap_or(0);
49 | 
50 |     println!("\nLoading the nlu engine...");
51 |     let engine = SnipsNluEngine::from_path(engine_dir).unwrap();
52 | 
53 |     loop {
54 |         print!("> ");
55 |         io::stdout().flush().unwrap();
56 |         let mut query = String::new();
57 |         io::stdin().read_line(&mut query).unwrap();
58 |         let result = engine
59 |             .parse_with_alternatives(
60 |                 query.trim(),
61 |                 None,
62 |                 None,
63 |                 intents_alternatives,
64 |                 slots_alternatives,
65 |             )
66 |             .unwrap();
67 |         let result_json = serde_json::to_string_pretty(&result).unwrap();
68 |         println!("{}", result_json);
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/ffi/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "snips-nlu-ffi"
 3 | version = "0.65.6"
 4 | edition = "2018"
 5 | authors = [
 6 |     "Adrien Ball <adrien.ball@snips.ai>",
 7 |     "Clement Doumouro <clement.doumouro@snips.ai>",
 8 |     "Kevin Lefevre <kevin.lefevre@snips.ai>",
 9 |     "Thibaut Lorrain <thibaut.lorrain@snips.ai>"
10 | ]
11 | 
12 | [dependencies]
13 | ffi-utils = { git = "https://github.com/snipsco/snips-utils-rs", rev = "4292ad9" }
14 | snips-nlu-lib = { path = ".." }
15 | snips-nlu-ontology-ffi-macros = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.67.1" }
16 | failure = "0.1"
17 | lazy_static = "1.0"
18 | libc = "0.2"
19 | serde_json = "1.0"
20 | 
21 | [lib]
22 | crate-type = ["rlib", "staticlib", "cdylib"]
23 | 


--------------------------------------------------------------------------------
/ffi/cbindgen.toml:
--------------------------------------------------------------------------------
 1 | language = "c"
 2 | 
 3 | include_guard = "LIBSNIPS_NLU_H_"
 4 | 
 5 | header = "#define SNIPS_NLU_VERSION \"0.65.6\""
 6 | 
 7 | [parse]
 8 | parse_deps = true
 9 | include = [
10 |     "snips_nlu_ffi",
11 |     "ffi_utils",
12 |     "snips_nlu_ontology_ffi",
13 |     "snips_nlu_ontology_ffi_macros",
14 | ]
15 | 
16 | [parse.expand]
17 | crates = [
18 |     "snips-nlu-ffi",
19 | ]
20 | 
21 | [export]
22 | # These types are hidden behind a void pointer, let's include them
23 | include = [
24 |     "CActionSessionInit",
25 |     "CNumberValue",
26 |     "COrdinalValue",
27 |     "CPercentageValue",
28 |     "CInstantTimeValue",
29 |     "CTimeIntervalValue",
30 |     "CAmountOfMoneyValue",
31 |     "CTemperatureValue",
32 |     "CDurationValue",
33 | ]
34 | 


--------------------------------------------------------------------------------
/platforms/c/module.modulemap:
--------------------------------------------------------------------------------
1 | module Clibsnips_nlu {
2 |     header "./libsnips_nlu.h"
3 |     link "snips_nlu_ffi"
4 |     export *
5 | }
6 | 


--------------------------------------------------------------------------------
/platforms/kotlin/.gitignore:
--------------------------------------------------------------------------------
1 | build/
2 | .gradle
3 | out/
4 | 


--------------------------------------------------------------------------------
/platforms/kotlin/build.gradle:
--------------------------------------------------------------------------------
  1 | buildscript {
  2 |     ext.kotlin_version = '1.3.11'
  3 |     repositories {
  4 |         jcenter()
  5 |     }
  6 |     dependencies {
  7 |         classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version"
  8 |         classpath 'net.java.dev.jna:jna:4.5.0'
  9 |     }
 10 | }
 11 | 
 12 | apply plugin: 'kotlin'
 13 | 
 14 | version = "0.65.6"
 15 | group = "ai.snips"
 16 | 
 17 | repositories {
 18 |     jcenter()
 19 |     maven {
 20 |         url "https://nexus-repository.snips.ai/repository/snips-maven-releases/"
 21 |     }
 22 |     maven {
 23 |         url "https://nexus-repository.snips.ai/repository/snips-maven-snapshots/"
 24 |     }
 25 | }
 26 | 
 27 | configurations {
 28 |     aarArchives
 29 |     jarArchives
 30 | }
 31 | 
 32 | dependencies {
 33 |     compile "org.jetbrains.kotlin:kotlin-stdlib-jdk7:$kotlin_version"
 34 |     compile 'net.java.dev.jna:jna:4.5.0'
 35 |     compile "ai.snips:snips-nlu-ontology:0.67.1"
 36 |     testCompile 'junit:junit:4.12'
 37 |     testCompile 'com.google.truth:truth:0.36'
 38 | }
 39 | 
 40 | def buildType = project.hasProperty("debug") ? "debug" : "release"
 41 | println "Using build type $buildType"
 42 | 
 43 | def rustTargetPath = project.hasProperty("rustTargetPath") ? project.rustTargetPath : "../../target"
 44 | println "Using rust target path $rustTargetPath"
 45 | 
 46 | def soForJar = [
 47 |         ["$rustTargetPath/$buildType/libsnips_nlu_ffi.so", "linux-x86-64", "linuxNative" ],
 48 |         ["$rustTargetPath/$buildType/libsnips_nlu_ffi.dylib", "darwin", "macOsNative" ],
 49 |         ["$rustTargetPath/arm-unknown-linux-gnueabihf/$buildType/libsnips_nlu_ffi.so", "linux-arm", "linuxArmCross"]
 50 | ]
 51 | 
 52 | def jarClassifier = {
 53 |     def exisiting = soForJar.findAll{ file(it[0]).exists() }
 54 |     if(exisiting.size == 1) exisiting[0][1]
 55 |     else if(exisiting.size == 0) "naked"
 56 |     else "multiarch"
 57 | }
 58 | 
 59 | jar {
 60 |     classifier "${ -> jarClassifier() }"
 61 | }
 62 | 
 63 | def jniLibsDir = new File(buildDir, "jniLibs")
 64 | 
 65 | soForJar.forEach {
 66 |     def taskName = "copySo${it[2].capitalize()}ForJar"
 67 |     def soFile = file(it[0])
 68 |     def destDir = new File(jniLibsDir, it[1])
 69 | 
 70 |     task(taskName, type: Copy) {
 71 |         from soFile
 72 |         into destDir
 73 |     }
 74 |     processResources.dependsOn(taskName)
 75 | 
 76 | }
 77 | 
 78 | sourceSets {
 79 |     main {
 80 |         resources {
 81 |             srcDir jniLibsDir
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | def aarDir = new File(buildDir, "aar")
 87 | 
 88 | task("aar", type: Zip) {
 89 |     destinationDir new File(buildDir, "libs")
 90 |     baseName = "${project.name}-android"
 91 |     version = project.version
 92 |     extension = "aar"
 93 |     from aarDir
 94 | }
 95 | 
 96 | task("classesJarForAar", type: Zip) {
 97 |     destinationDir aarDir
 98 |     archiveName "classes.jar"
 99 |     from new File(buildDir, "classes/java/main")
100 | }
101 | 
102 | classesJarForAar.dependsOn(classes)
103 | aar.dependsOn(classesJarForAar)
104 | 
105 | task("manifestForAar", type: Copy) {
106 |     from new File("src/main/android/AndroidManifest.xml")
107 |     destinationDir aarDir
108 | }
109 | 
110 | aar.dependsOn(manifestForAar)
111 | 
112 | def soForAar = [
113 |         ["$rustTargetPath/arm-linux-androideabi/$buildType/libsnips_nlu.so", "armeabi"],
114 |         ["$rustTargetPath/armv7-linux-androideabi/$buildType/libsnips_nlu.so", "armeabi-v7a"],
115 |         ["$rustTargetPath/aarch64-linux-android/$buildType/libsnips_nlu.so", "arm64-v8a"],
116 |         ["$rustTargetPath/i686-linux-android/$buildType/libsnips_nlu.so", "x86"],
117 |         ["$rustTargetPath/x86_64-linux-android/$buildType/libsnips_nlu.so", "x86_64"]
118 | ]
119 | 
120 | soForAar.forEach {
121 |     def taskName = "copySo${it[1].capitalize()}ForAar"
122 |     def soFile = file(it[0])
123 |     def destDir = new File(aarDir, "jni/${it[1]}")
124 | 
125 |     task(taskName, type: Copy) {
126 |         from soFile
127 |         into destDir
128 |     }
129 |     aar.dependsOn(taskName)
130 | 
131 | }
132 | 
133 | sourceCompatibility = "1.7"
134 | targetCompatibility = "1.7"
135 | 
136 | artifacts {
137 |     aarArchives aar.archivePath
138 |     jarArchives jar.archivePath
139 | }
140 | 
141 | apply plugin: 'maven'
142 | 
143 | def _nexusUsername = project.hasProperty("nexusUsername") ? nexusUsername : ""
144 | def _nexusPassword = project.hasProperty("nexusPassword") ? nexusPassword : ""
145 | 
146 | uploadJarArchives {
147 |     repositories {
148 |         mavenDeployer {
149 |             repository(url: "https://nexus-repository.snips.ai/repository/snips-maven-releases/") {
150 |                 authentication(userName: _nexusUsername, password: _nexusPassword)
151 |             }
152 |             snapshotRepository(url: "https://nexus-repository.snips.ai/repository/snips-maven-snapshots/") {
153 |                 authentication(userName: _nexusUsername, password: _nexusPassword)
154 |             }
155 |         }
156 |     }
157 | }
158 | 
159 | uploadJarArchives.dependsOn(jar)
160 | uploadArchives.dependsOn(uploadJarArchives)
161 | 
162 | uploadAarArchives {
163 |     repositories {
164 |         mavenDeployer {
165 |             repository(url: "https://nexus-repository.snips.ai/repository/snips-maven-releases/") {
166 |                 authentication(userName: _nexusUsername, password: _nexusPassword)
167 |             }
168 |             snapshotRepository(url: "https://nexus-repository.snips.ai/repository/snips-maven-snapshots/") {
169 |                 authentication(userName: _nexusUsername, password: _nexusPassword)
170 |             }
171 | 
172 |             repository(url: "file://localhost/tmp/myRepo/")
173 |             pom.name = "snips-nlu-android"
174 | 
175 |             pom.whenConfigured { pom ->
176 |                 pom.dependencies.find { dep -> dep.groupId == 'net.java.dev.jna' && dep.artifactId == 'jna' }.scope = "provided"
177 |             }
178 |         }
179 |     }
180 | }
181 | 
182 | uploadAarArchives.dependsOn(aar)
183 | uploadArchives.dependsOn(uploadAarArchives)
184 | 


--------------------------------------------------------------------------------
/platforms/kotlin/gradle/wrapper/gradle-wrapper.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/platforms/kotlin/gradle/wrapper/gradle-wrapper.jar


--------------------------------------------------------------------------------
/platforms/kotlin/gradle/wrapper/gradle-wrapper.properties:
--------------------------------------------------------------------------------
1 | #Wed Aug 30 18:30:10 CEST 2017
2 | distributionBase=GRADLE_USER_HOME
3 | distributionPath=wrapper/dists
4 | zipStoreBase=GRADLE_USER_HOME
5 | zipStorePath=wrapper/dists
6 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.1-all.zip
7 | 


--------------------------------------------------------------------------------
/platforms/kotlin/gradlew:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env sh
  2 | 
  3 | ##############################################################################
  4 | ##
  5 | ##  Gradle start up script for UN*X
  6 | ##
  7 | ##############################################################################
  8 | 
  9 | # Attempt to set APP_HOME
 10 | # Resolve links: $0 may be a link
 11 | PRG="$0"
 12 | # Need this for relative symlinks.
 13 | while [ -h "$PRG" ] ; do
 14 |     ls=`ls -ld "$PRG"`
 15 |     link=`expr "$ls" : '.*-> \(.*\)$'`
 16 |     if expr "$link" : '/.*' > /dev/null; then
 17 |         PRG="$link"
 18 |     else
 19 |         PRG=`dirname "$PRG"`"/$link"
 20 |     fi
 21 | done
 22 | SAVED="`pwd`"
 23 | cd "`dirname \"$PRG\"`/" >/dev/null
 24 | APP_HOME="`pwd -P`"
 25 | cd "$SAVED" >/dev/null
 26 | 
 27 | APP_NAME="Gradle"
 28 | APP_BASE_NAME=`basename "$0"`
 29 | 
 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
 31 | DEFAULT_JVM_OPTS=""
 32 | 
 33 | # Use the maximum available, or set MAX_FD != -1 to use that value.
 34 | MAX_FD="maximum"
 35 | 
 36 | warn () {
 37 |     echo "$*"
 38 | }
 39 | 
 40 | die () {
 41 |     echo
 42 |     echo "$*"
 43 |     echo
 44 |     exit 1
 45 | }
 46 | 
 47 | # OS specific support (must be 'true' or 'false').
 48 | cygwin=false
 49 | msys=false
 50 | darwin=false
 51 | nonstop=false
 52 | case "`uname`" in
 53 |   CYGWIN* )
 54 |     cygwin=true
 55 |     ;;
 56 |   Darwin* )
 57 |     darwin=true
 58 |     ;;
 59 |   MINGW* )
 60 |     msys=true
 61 |     ;;
 62 |   NONSTOP* )
 63 |     nonstop=true
 64 |     ;;
 65 | esac
 66 | 
 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
 68 | 
 69 | # Determine the Java command to use to start the JVM.
 70 | if [ -n "$JAVA_HOME" ] ; then
 71 |     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
 72 |         # IBM's JDK on AIX uses strange locations for the executables
 73 |         JAVACMD="$JAVA_HOME/jre/sh/java"
 74 |     else
 75 |         JAVACMD="$JAVA_HOME/bin/java"
 76 |     fi
 77 |     if [ ! -x "$JAVACMD" ] ; then
 78 |         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
 79 | 
 80 | Please set the JAVA_HOME variable in your environment to match the
 81 | location of your Java installation."
 82 |     fi
 83 | else
 84 |     JAVACMD="java"
 85 |     which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
 86 | 
 87 | Please set the JAVA_HOME variable in your environment to match the
 88 | location of your Java installation."
 89 | fi
 90 | 
 91 | # Increase the maximum file descriptors if we can.
 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
 93 |     MAX_FD_LIMIT=`ulimit -H -n`
 94 |     if [ $? -eq 0 ] ; then
 95 |         if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
 96 |             MAX_FD="$MAX_FD_LIMIT"
 97 |         fi
 98 |         ulimit -n $MAX_FD
 99 |         if [ $? -ne 0 ] ; then
100 |             warn "Could not set maximum file descriptor limit: $MAX_FD"
101 |         fi
102 |     else
103 |         warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
104 |     fi
105 | fi
106 | 
107 | # For Darwin, add options to specify how the application appears in the dock
108 | if $darwin; then
109 |     GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
110 | fi
111 | 
112 | # For Cygwin, switch paths to Windows format before running java
113 | if $cygwin ; then
114 |     APP_HOME=`cygpath --path --mixed "$APP_HOME"`
115 |     CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
116 |     JAVACMD=`cygpath --unix "$JAVACMD"`
117 | 
118 |     # We build the pattern for arguments to be converted via cygpath
119 |     ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
120 |     SEP=""
121 |     for dir in $ROOTDIRSRAW ; do
122 |         ROOTDIRS="$ROOTDIRS$SEP$dir"
123 |         SEP="|"
124 |     done
125 |     OURCYGPATTERN="(^($ROOTDIRS))"
126 |     # Add a user-defined pattern to the cygpath arguments
127 |     if [ "$GRADLE_CYGPATTERN" != "" ] ; then
128 |         OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
129 |     fi
130 |     # Now convert the arguments - kludge to limit ourselves to /bin/sh
131 |     i=0
132 |     for arg in "$@" ; do
133 |         CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
134 |         CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
135 | 
136 |         if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
137 |             eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
138 |         else
139 |             eval `echo args$i`="\"$arg\""
140 |         fi
141 |         i=$((i+1))
142 |     done
143 |     case $i in
144 |         (0) set -- ;;
145 |         (1) set -- "$args0" ;;
146 |         (2) set -- "$args0" "$args1" ;;
147 |         (3) set -- "$args0" "$args1" "$args2" ;;
148 |         (4) set -- "$args0" "$args1" "$args2" "$args3" ;;
149 |         (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
150 |         (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
151 |         (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
152 |         (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
153 |         (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
154 |     esac
155 | fi
156 | 
157 | # Escape application args
158 | save () {
159 |     for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
160 |     echo " "
161 | }
162 | APP_ARGS=$(save "$@")
163 | 
164 | # Collect all arguments for the java command, following the shell quoting and substitution rules
165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
166 | 
167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong
168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then
169 |   cd "$(dirname "$0")"
170 | fi
171 | 
172 | exec "$JAVACMD" "$@"
173 | 


--------------------------------------------------------------------------------
/platforms/kotlin/gradlew.bat:
--------------------------------------------------------------------------------
 1 | @if "%DEBUG%" == "" @echo off
 2 | @rem ##########################################################################
 3 | @rem
 4 | @rem  Gradle startup script for Windows
 5 | @rem
 6 | @rem ##########################################################################
 7 | 
 8 | @rem Set local scope for the variables with windows NT shell
 9 | if "%OS%"=="Windows_NT" setlocal
10 | 
11 | set DIRNAME=%~dp0
12 | if "%DIRNAME%" == "" set DIRNAME=.
13 | set APP_BASE_NAME=%~n0
14 | set APP_HOME=%DIRNAME%
15 | 
16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
17 | set DEFAULT_JVM_OPTS=
18 | 
19 | @rem Find java.exe
20 | if defined JAVA_HOME goto findJavaFromJavaHome
21 | 
22 | set JAVA_EXE=java.exe
23 | %JAVA_EXE% -version >NUL 2>&1
24 | if "%ERRORLEVEL%" == "0" goto init
25 | 
26 | echo.
27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
28 | echo.
29 | echo Please set the JAVA_HOME variable in your environment to match the
30 | echo location of your Java installation.
31 | 
32 | goto fail
33 | 
34 | :findJavaFromJavaHome
35 | set JAVA_HOME=%JAVA_HOME:"=%
36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe
37 | 
38 | if exist "%JAVA_EXE%" goto init
39 | 
40 | echo.
41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
42 | echo.
43 | echo Please set the JAVA_HOME variable in your environment to match the
44 | echo location of your Java installation.
45 | 
46 | goto fail
47 | 
48 | :init
49 | @rem Get command-line arguments, handling Windows variants
50 | 
51 | if not "%OS%" == "Windows_NT" goto win9xME_args
52 | 
53 | :win9xME_args
54 | @rem Slurp the command line arguments.
55 | set CMD_LINE_ARGS=
56 | set _SKIP=2
57 | 
58 | :win9xME_args_slurp
59 | if "x%~1" == "x" goto execute
60 | 
61 | set CMD_LINE_ARGS=%*
62 | 
63 | :execute
64 | @rem Setup the command line
65 | 
66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
67 | 
68 | @rem Execute Gradle
69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
70 | 
71 | :end
72 | @rem End local scope for the variables with windows NT shell
73 | if "%ERRORLEVEL%"=="0" goto mainEnd
74 | 
75 | :fail
76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
77 | rem the _cmd.exe /c_ return code!
78 | if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
79 | exit /b 1
80 | 
81 | :mainEnd
82 | if "%OS%"=="Windows_NT" endlocal
83 | 
84 | :omega
85 | 


--------------------------------------------------------------------------------
/platforms/kotlin/settings.gradle:
--------------------------------------------------------------------------------
1 | rootProject.name = "snips-nlu"
2 | 


--------------------------------------------------------------------------------
/platforms/kotlin/src/main/android/AndroidManifest.xml:
--------------------------------------------------------------------------------
1 | <manifest package="ai.snips.nlu"
2 |           xmlns:android="http://schemas.android.com/apk/res/android">
3 | 
4 |     <application />
5 | 
6 | </manifest>
7 | 


--------------------------------------------------------------------------------
/platforms/kotlin/src/main/kotlin/com/sun/jna/JnaUtils.kt:
--------------------------------------------------------------------------------
1 | package com.sun.jna
2 | 
3 | // NativeString is package private...
4 | fun String.toJnaPointer(encoding: String) = NativeString(this, encoding).pointer
5 | 


--------------------------------------------------------------------------------
/platforms/python/.gitignore:
--------------------------------------------------------------------------------
 1 | venv/
 2 | venv3/
 3 | venv34/
 4 | venv36/
 5 | venv37/
 6 | build/
 7 | dist/
 8 | *.pyc
 9 | *.py.bak
10 | *.egg-info/
11 | .idea
12 | .tox/
13 | 


--------------------------------------------------------------------------------
/platforms/python/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2018 Snips
 2 | 
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 |    http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | 


--------------------------------------------------------------------------------
/platforms/python/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include snips_nlu_rust/__version__
2 | include LICENSE README.rst
3 | recursive-include snips_nlu_rust/dylib/ *
4 | recursive-include ffi/ *
5 | recursive-exclude ffi/target/ *
6 | global-exclude __pycache__ *.py[cod]
7 | 


--------------------------------------------------------------------------------
/platforms/python/README.rst:
--------------------------------------------------------------------------------
 1 | Snips NLU Rust Wrapper
 2 | ======================
 3 | 
 4 | Installation
 5 | ------------
 6 | 
 7 | It is recommended to install the package with a virtualenv:
 8 | 
 9 | .. code-block:: bash
10 | 
11 |     virtualenv -p python3.6 venv
12 |     . venv/bin/activate
13 | 
14 | The package is available on pypi, and can be installed with `pip`:
15 | 
16 | .. code-block:: bash
17 | 
18 |     pip install snips-nlu-rust
19 | 


--------------------------------------------------------------------------------
/platforms/python/ffi/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Rust template
 3 | # Generated by Cargo
 4 | # will have compiled files and executables
 5 | /target/
 6 | 
 7 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
 8 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock
 9 | Cargo.lock
10 | 
11 | # These are backup files generated by rustfmt
12 | **/*.rs.bk
13 | 
14 | 


--------------------------------------------------------------------------------
/platforms/python/ffi/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "snips-nlu-python-ffi"
 3 | version = "0.65.6"
 4 | authors = ["Adrien Ball <adrien.ball@snips.ai>"]
 5 | edition = "2018"
 6 | 
 7 | [lib]
 8 | name = "snips_nlu_python_ffi"
 9 | crate-type = ["cdylib"]
10 | 
11 | [dependencies]
12 | libc = "0.2"
13 | ffi-utils = { git = "https://github.com/snipsco/snips-utils-rs", rev = "4292ad9" }
14 | snips-nlu-ffi = { git = "https://github.com/snipsco/snips-nlu-rs", tag = "0.65.6" }
15 | 


--------------------------------------------------------------------------------
/platforms/python/ffi/src/lib.rs:
--------------------------------------------------------------------------------
 1 | extern crate ffi_utils;
 2 | extern crate libc;
 3 | extern crate snips_nlu_ffi;
 4 | 
 5 | use ffi_utils::{CStringArray, SNIPS_RESULT};
 6 | use snips_nlu_ffi::CSnipsNluEngine;
 7 | 
 8 | #[doc(hidden)]
 9 | #[macro_export]
10 | macro_rules! export_c_symbol {
11 |     ($alias:ident, fn $name:ident($( $arg:ident : $type:ty ),*) -> $ret:ty) => {
12 |         #[no_mangle]
13 |         pub extern "C" fn $alias($( $arg : $type),*) -> $ret {
14 |             ::snips_nlu_ffi::$name($( $arg ),*)
15 |         }
16 |     };
17 |     ($alias:ident, fn $name:ident($( $arg:ident : $type:ty ),*)) => {
18 |         export_c_symbol!($alias, fn $name($( $arg : $type),*) -> ());
19 |     }
20 | }
21 | 
22 | export_c_symbol!(ffi_snips_nlu_engine_create_from_dir, fn snips_nlu_engine_create_from_dir(root_dir: *const libc::c_char, client: *mut *const CSnipsNluEngine) -> SNIPS_RESULT);
23 | export_c_symbol!(ffi_snips_nlu_engine_create_from_zip, fn snips_nlu_engine_create_from_zip(zip: *const libc::c_uchar, zip_size: libc::c_uint, client: *mut *const CSnipsNluEngine) -> SNIPS_RESULT);
24 | export_c_symbol!(ffi_snips_nlu_engine_run_parse_into_json, fn snips_nlu_engine_run_parse_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intents_whitelist: *const CStringArray, intents_blacklist: *const CStringArray, result_json: *mut *const libc::c_char) -> SNIPS_RESULT);
25 | export_c_symbol!(ffi_snips_nlu_engine_run_parse_with_alternatives_into_json, fn snips_nlu_engine_run_parse_with_alternatives_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intents_whitelist: *const CStringArray, intents_blacklist: *const CStringArray, intents_alternatives: libc::c_uint, slots_alternatives: libc::c_uint, result_json: *mut *const libc::c_char) -> SNIPS_RESULT);
26 | export_c_symbol!(ffi_snips_nlu_engine_run_get_slots_into_json, fn snips_nlu_engine_run_get_slots_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intent: *const libc::c_char, result_json: *mut *const libc::c_char) -> SNIPS_RESULT);
27 | export_c_symbol!(ffi_snips_nlu_engine_run_get_slots_with_alternatives_into_json, fn snips_nlu_engine_run_get_slots_with_alternatives_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intent: *const libc::c_char, slots_alternatives: libc::c_uint, result_json: *mut *const libc::c_char) -> SNIPS_RESULT);
28 | export_c_symbol!(ffi_snips_nlu_engine_run_get_intents_into_json, fn snips_nlu_engine_run_get_intents_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, result_json: *mut *const libc::c_char) -> SNIPS_RESULT);
29 | export_c_symbol!(ffi_snips_nlu_engine_get_last_error, fn snips_nlu_engine_get_last_error(error: *mut *const libc::c_char) -> SNIPS_RESULT);
30 | export_c_symbol!(ffi_snips_nlu_engine_destroy_string, fn snips_nlu_engine_destroy_string(string: *mut libc::c_char) -> SNIPS_RESULT);
31 | export_c_symbol!(ffi_snips_nlu_engine_destroy_client, fn snips_nlu_engine_destroy_client(client: *mut CSnipsNluEngine) -> SNIPS_RESULT);
32 | export_c_symbol!(ffi_snips_nlu_engine_get_model_version, fn snips_nlu_engine_get_model_version(version: *mut *const libc::c_char) -> SNIPS_RESULT);
33 | 


--------------------------------------------------------------------------------
/platforms/python/requirements.txt:
--------------------------------------------------------------------------------
1 | setuptools_rust==0.8.4
2 | wheel==0.30.0
3 | 


--------------------------------------------------------------------------------
/platforms/python/setup.py:
--------------------------------------------------------------------------------
 1 | from __future__ import print_function
 2 | 
 3 | import io
 4 | import os
 5 | import sys
 6 | 
 7 | from setuptools import setup, find_packages
 8 | from setuptools_rust import Binding, RustExtension
 9 | 
10 | packages = [p for p in find_packages() if "tests" not in p]
11 | 
12 | PACKAGE_NAME = "snips_nlu_rust"
13 | ROOT_PATH = os.path.dirname(os.path.abspath(__file__))
14 | PACKAGE_PATH = os.path.join(ROOT_PATH, PACKAGE_NAME)
15 | VERSION = "__version__"
16 | README = os.path.join(ROOT_PATH, "README.rst")
17 | 
18 | RUST_EXTENSION_NAME = 'snips_nlu_rust.dylib.libsnips_nlu_rs'
19 | CARGO_ROOT_PATH = os.path.join(ROOT_PATH, 'ffi')
20 | CARGO_FILE_PATH = os.path.join(CARGO_ROOT_PATH, 'Cargo.toml')
21 | CARGO_TARGET_DIR = os.path.join(CARGO_ROOT_PATH, 'target')
22 | os.environ['CARGO_TARGET_DIR'] = CARGO_TARGET_DIR
23 | 
24 | with io.open(os.path.join(PACKAGE_PATH, VERSION)) as f:
25 |     version = f.readline()
26 | 
27 | with io.open(README, "rt", encoding="utf8") as f:
28 |     readme = f.read()
29 | 
30 | setup(name=PACKAGE_NAME,
31 |       version=version,
32 |       description='Python wrapper of the Rust Snips NLU engine',
33 |       long_description=readme,
34 |       author='Thibaut Lorrain, Adrien Ball',
35 |       author_email='thibaut.lorrain@snips.ai, adrien.ball@snips.ai',
36 |       classifiers=[
37 |           "Programming Language :: Python :: 2",
38 |           "Programming Language :: Python :: 2.7",
39 |           "Programming Language :: Python :: 3",
40 |           "Programming Language :: Python :: 3.4",
41 |           "Programming Language :: Python :: 3.5",
42 |           "Programming Language :: Python :: 3.6",
43 |           "Programming Language :: Python :: 3.7",
44 |       ],
45 |       install_requires=[
46 |           "future>=0.16,<0.18",
47 |           "pathlib>=1.0,<2.0; python_version < '3.4'",
48 |       ],
49 |       packages=packages,
50 |       include_package_data=True,
51 |       rust_extensions=[RustExtension(RUST_EXTENSION_NAME, CARGO_FILE_PATH,
52 |                                      debug="develop" in sys.argv,
53 |                                      binding=Binding.NoBinding)],
54 |       zip_safe=False)
55 | 


--------------------------------------------------------------------------------
/platforms/python/snips_nlu_rust/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | 
3 | from snips_nlu_rust.nlu_engine import NLUEngine
4 | 


--------------------------------------------------------------------------------
/platforms/python/snips_nlu_rust/__version__:
--------------------------------------------------------------------------------
1 | 0.65.6
2 | 


--------------------------------------------------------------------------------
/platforms/python/snips_nlu_rust/dylib/.gitignore:
--------------------------------------------------------------------------------
1 | *.dylib
2 | *.so
3 | *.dll


--------------------------------------------------------------------------------
/platforms/python/snips_nlu_rust/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/platforms/python/snips_nlu_rust/tests/__init__.py


--------------------------------------------------------------------------------
/platforms/python/snips_nlu_rust/tests/utils.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | 
 3 | import io
 4 | import os
 5 | 
 6 | TEST_DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)),
 7 |                               "..", "..", "..", "..", "data", "tests")
 8 | 
 9 | GAME_ENGINE_DIR = os.path.join(TEST_DATA_PATH, "models", "nlu_engine_game")
10 | BEVERAGE_ENGINE_DIR = os.path.join(TEST_DATA_PATH, "models",
11 |                                    "nlu_engine_beverage")
12 | BEVERAGE_ENGINE_ZIP_PATH = os.path.join(TEST_DATA_PATH, "models",
13 |                                         "nlu_engine_beverage.zip")
14 | 
15 | with io.open(BEVERAGE_ENGINE_ZIP_PATH, mode='rb') as f:
16 |     BEVERAGE_ENGINE_ZIP_BYTES = bytearray(f.read())
17 | 


--------------------------------------------------------------------------------
/platforms/python/snips_nlu_rust/utils.py:
--------------------------------------------------------------------------------
 1 | from _ctypes import Structure, POINTER, byref
 2 | from contextlib import contextmanager
 3 | from ctypes import cdll, c_char_p, c_int32, string_at
 4 | from pathlib import Path
 5 | 
 6 | dylib_dir = Path(__file__).parent / "dylib"
 7 | dylib_path = list(dylib_dir.glob("libsnips_nlu*"))[0]
 8 | lib = cdll.LoadLibrary(str(dylib_path))
 9 | 
10 | 
11 | @contextmanager
12 | def string_pointer(ptr):
13 |     try:
14 |         yield ptr
15 |     finally:
16 |         lib.ffi_snips_nlu_engine_destroy_string(ptr)
17 | 
18 | 
19 | class CStringArray(Structure):
20 |     _fields_ = [
21 |         ("data", POINTER(c_char_p)),
22 |         ("size", c_int32)
23 |     ]
24 | 
25 | 
26 | def check_ffi_error(exit_code, error_context_msg):
27 |     if exit_code != 0:
28 |         with string_pointer(c_char_p()) as ptr:
29 |             if lib.snips_nlu_engine_get_last_error(byref(ptr)) == 0:
30 |                 ffi_error_message = string_at(ptr).decode("utf8")
31 |             else:
32 |                 ffi_error_message = "see stderr"
33 |         raise ValueError("%s: %s" % (error_context_msg, ffi_error_message))
34 | 


--------------------------------------------------------------------------------
/platforms/python/tox.ini:
--------------------------------------------------------------------------------
 1 | [tox]
 2 | envlist = py27, py35, py36, py37
 3 | skipsdist = True
 4 | 
 5 | [testenv]
 6 | skip_install = true
 7 | commands =
 8 |     pip install -r requirements.txt
 9 |     pip install -e . --verbose
10 |     python -m unittest discover
11 | setenv=
12 |     LANG=en_US.UTF-8
13 |     PYTHONIOENCODING=UTF-8
14 | 


--------------------------------------------------------------------------------
/platforms/swift/.gitignore:
--------------------------------------------------------------------------------
 1 | #### joe made this: http://goel.io/joe
 2 | 
 3 | #### swift ####
 4 | # Xcode
 5 | #
 6 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore
 7 | 
 8 | ## Build generated
 9 | build/
10 | DerivedData/
11 | 
12 | ## Various settings
13 | *.pbxuser
14 | !default.pbxuser
15 | *.mode1v3
16 | !default.mode1v3
17 | *.mode2v3
18 | !default.mode2v3
19 | *.perspectivev3
20 | !default.perspectivev3
21 | xcuserdata/
22 | 
23 | ## Other
24 | *.moved-aside
25 | *.xccheckout
26 | *.xcscmblueprint
27 | 
28 | ## Obj-C/Swift specific
29 | *.hmap
30 | *.ipa
31 | *.dSYM.zip
32 | *.dSYM
33 | 
34 | ## Playgrounds
35 | timeline.xctimeline
36 | playground.xcworkspace
37 | 
38 | # Swift Package Manager
39 | #
40 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies.
41 | # Packages/
42 | # Package.pins
43 | .build/
44 | 
45 | # CocoaPods
46 | #
47 | # We recommend against adding the Pods directory to your .gitignore. However
48 | # you should judge for yourself, the pros and cons are mentioned at:
49 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
50 | #
51 | # Pods/
52 | 
53 | # Carthage
54 | #
55 | # Add this line if you want to avoid checking in source code from Carthage dependencies.
56 | # Carthage/Checkouts
57 | 
58 | Carthage/Build
59 | 
60 | # fastlane
61 | #
62 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the
63 | # screenshots whenever they are needed.
64 | # For more information about the recommended setup visit:
65 | # https://docs.fastlane.tools/best-practices/source-control/#source-control
66 | 
67 | fastlane/report.xml
68 | fastlane/Preview.html
69 | fastlane/screenshots
70 | fastlane/test_output
71 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Workspace
3 |    version = "1.0">
4 |    <FileRef
5 |       location = "group:SnipsNlu/SnipsNlu.xcodeproj">
6 |    </FileRef>
7 | </Workspace>
8 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
3 | <plist version="1.0">
4 | <dict>
5 | 	<key>IDEDidComputeMac32BitWarning</key>
6 | 	<true/>
7 | </dict>
8 | </plist>
9 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/Dependencies/.gitignore:
--------------------------------------------------------------------------------
1 | ios/
2 | macos/
3 | tvos/
4 | watchos/
5 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/Dependencies/build.sh:
--------------------------------------------------------------------------------
  1 | #!/bin/sh -e
  2 | 
  3 |  : ${PROJECT_DIR:?"${0##*/} must be invoked as part of an Xcode script phase"}
  4 | 
  5 | set -e
  6 | 
  7 | VERSION="0.65.6"
  8 | SYSTEM=$(echo $1 | tr '[:upper:]' '[:lower:]')
  9 | LIBRARY_NAME=libsnips_nlu_ffi
 10 | LIBRARY_NAME_A=${LIBRARY_NAME}.a
 11 | LIBRARY_NAME_H=libsnips_nlu.h
 12 | OUT_DIR=${PROJECT_DIR}/Dependencies/${SYSTEM}
 13 | 
 14 | if [ -z "$TARGET_BUILD_TYPE" ]; then
 15 | TARGET_BUILD_TYPE=$(echo ${CONFIGURATION} | tr '[:upper:]' '[:lower:]')
 16 | fi
 17 | 
 18 | if [ "${SYSTEM}" != "ios" ] && [ "${SYSTEM}" != "macos" ]; then
 19 |     echo "Given system should be 'ios' or 'macos'."
 20 |     exit 1
 21 | fi
 22 | 
 23 | mkdir -p ${OUT_DIR}
 24 | 
 25 | install_remote_core () {
 26 |     echo "Trying remote installation"
 27 | 
 28 |     local filename=snips-nlu-${SYSTEM}.${VERSION}.tgz
 29 |     local url=https://s3.amazonaws.com/snips/snips-nlu-dev/${filename}
 30 | 
 31 |     echo "Will download '${filename}' in '${OUT_DIR}'"
 32 |     if curl --output /dev/null --silent --head --fail "$url"; then
 33 |         $(cd ${OUT_DIR} && curl -s ${url} | tar zxv)
 34 |     else
 35 |         echo "Version ${VERSION} doesn't seem to have been released yet"
 36 |         echo "Could not find any file at '${url}'"
 37 |         echo "Please file issue on 'https://github.com/snipsco/snips-nlu-rs' if you believe this is an issue"
 38 |         return 1
 39 |     fi
 40 | 
 41 |     return 0
 42 | }
 43 | 
 44 | install_local_core () {
 45 |     echo "Trying local installation"
 46 | 
 47 |     # TODO: Find a better way to retrieve root_dir
 48 |     local root_dir=${PROJECT_DIR}/../../../
 49 |     local target_dir=${root_dir}/target/
 50 | 
 51 |     if [ ${SYSTEM} = macos ]; then
 52 |         echo "Using macOS local build"
 53 | 
 54 |         local library_path=${target_dir}/${TARGET_BUILD_TYPE}/${LIBRARY_NAME_A}
 55 |         if [ ! -e ${library_path} ]; then
 56 |             echo "Missing file '${library_path}'"
 57 |             return 1
 58 |         fi
 59 | 
 60 |         cp ${library_path} ${OUT_DIR}
 61 |         cp ${PROJECT_DIR}/../../c/${LIBRARY_NAME_H} ${OUT_DIR}
 62 |         cp ${PROJECT_DIR}/../../c/module.modulemap ${OUT_DIR}
 63 | 
 64 |     elif [ ${SYSTEM} = ios ]; then
 65 |         echo "Using iOS local build"
 66 |         local archs_array=( ${ARCHS} )
 67 | 
 68 |         for arch in "${archs_array[@]}"; do
 69 |             if [ ${arch} = arm64 ]; then
 70 |                 local arch=aarch64
 71 |             fi
 72 |             local library_path=${target_dir}/${arch}-apple-ios/${TARGET_BUILD_TYPE}/${LIBRARY_NAME_A}
 73 |             if [ ! -e ${library_path} ]; then
 74 |                 echo "Can't find library for arch ${arch}"
 75 |                 echo "Missing file '${library_path}'"
 76 |                 return 1
 77 |             fi
 78 |             cp ${library_path} ${OUT_DIR}/${LIBRARY_NAME}-${arch}.a
 79 |         done
 80 | 
 81 |         lipo -create $(find ${OUT_DIR}/${LIBRARY_NAME}-*.a) \
 82 |             -output ${OUT_DIR}/${LIBRARY_NAME_A}
 83 |         cp ${PROJECT_DIR}/../../c/${LIBRARY_NAME_H} ${OUT_DIR}
 84 |         cp ${PROJECT_DIR}/../../c/module.modulemap ${OUT_DIR}
 85 | 
 86 |     else
 87 |         echo "${SYSTEM} isn't supported"
 88 |         return 1
 89 |     fi
 90 | 
 91 |     return 0
 92 | }
 93 | 
 94 | core_is_present () {
 95 |     echo "Checking if core is present (and complete)"
 96 |     local files=(
 97 |         ${OUT_DIR}/module.modulemap
 98 |         ${OUT_DIR}/${LIBRARY_NAME_A}
 99 |         ${OUT_DIR}/${LIBRARY_NAME_H}
100 |     )
101 | 
102 |     for file in "${files[@]}"; do
103 |         if [ ! -e $file ]; then
104 |             echo "Core isn't complete"
105 |             echo "Missing file '$file'"
106 |             return 1
107 |         fi
108 |     done
109 | 
110 |     echo "Core is present"
111 |     return 0
112 | }
113 | 
114 | core_is_up_to_date () {
115 |     echo "Checking if core is up-to-date"
116 | 
117 |     local header_path=${OUT_DIR}/${LIBRARY_NAME_H}
118 | 
119 |     if [ -z $(grep "SNIPS_NLU_VERSION" $header_path) ]; then
120 |         echo "SNIPS_NLU_VERSION not present. Skipping up-to-date check..."
121 |         return 0
122 |     fi
123 | 
124 |     local core_version=$(grep "SNIPS_NLU_VERSION" $header_path | cut -d'"' -f2)
125 | 
126 |     if [ "$core_version" = ${VERSION} ]; then
127 |         echo "Core is up-to-date"
128 |         return 0
129 |     fi
130 | 
131 |     echo "Core isn't up-to-date"
132 |     echo "Found version ${core_version}, expected version ${VERSION}"
133 |     return 1
134 | }
135 | 
136 | echo "Will check if core is present and up-to-date"
137 | if core_is_present && core_is_up_to_date; then
138 |     echo "Core seems present and up-to-date !"
139 |     exit 0
140 | fi
141 | 
142 | rm -f ${OUT_DIR}/*
143 | if [ "${SNIPS_USE_LOCAL}" == 1 ]; then
144 |     echo "SNIPS_USE_LOCAL=1 Will try local installation only"
145 |     install_local_core && exit 0
146 | elif [ "${SNIPS_USE_REMOTE}" == 1 ]; then
147 |     echo "SNIPS_USE_REMOTE=1 Will try remote installation only"
148 |     install_remote_core && exit 0
149 | else
150 |     if ! install_local_core; then
151 |         echo "Local installation failed"
152 |         install_remote_core && exit 0
153 |     fi
154 | fi
155 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/SnipsNlu.xcodeproj/project.xcworkspace/contents.xcworkspacedata:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <Workspace
3 |    version = "1.0">
4 |    <FileRef
5 |       location = "self:SnipsNlu.xcodeproj">
6 |    </FileRef>
7 | </Workspace>
8 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/SnipsNlu.xcodeproj/xcshareddata/xcschemes/SnipsNlu-iOS.xcscheme:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <Scheme
  3 |    LastUpgradeVersion = "1020"
  4 |    version = "1.3">
  5 |    <BuildAction
  6 |       parallelizeBuildables = "YES"
  7 |       buildImplicitDependencies = "YES">
  8 |       <BuildActionEntries>
  9 |          <BuildActionEntry
 10 |             buildForTesting = "YES"
 11 |             buildForRunning = "YES"
 12 |             buildForProfiling = "YES"
 13 |             buildForArchiving = "YES"
 14 |             buildForAnalyzing = "YES">
 15 |             <BuildableReference
 16 |                BuildableIdentifier = "primary"
 17 |                BlueprintIdentifier = "F76504BD1EFD086C007FF022"
 18 |                BuildableName = "SnipsNlu.framework"
 19 |                BlueprintName = "SnipsNlu-iOS"
 20 |                ReferencedContainer = "container:SnipsNlu.xcodeproj">
 21 |             </BuildableReference>
 22 |          </BuildActionEntry>
 23 |       </BuildActionEntries>
 24 |    </BuildAction>
 25 |    <TestAction
 26 |       buildConfiguration = "Debug"
 27 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
 28 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
 29 |       shouldUseLaunchSchemeArgsEnv = "YES">
 30 |       <Testables>
 31 |          <TestableReference
 32 |             skipped = "NO">
 33 |             <BuildableReference
 34 |                BuildableIdentifier = "primary"
 35 |                BlueprintIdentifier = "F76504C61EFD086C007FF022"
 36 |                BuildableName = "SnipsNlu-iOSTests.xctest"
 37 |                BlueprintName = "SnipsNlu-iOSTests"
 38 |                ReferencedContainer = "container:SnipsNlu.xcodeproj">
 39 |             </BuildableReference>
 40 |          </TestableReference>
 41 |       </Testables>
 42 |       <MacroExpansion>
 43 |          <BuildableReference
 44 |             BuildableIdentifier = "primary"
 45 |             BlueprintIdentifier = "F76504BD1EFD086C007FF022"
 46 |             BuildableName = "SnipsNlu.framework"
 47 |             BlueprintName = "SnipsNlu-iOS"
 48 |             ReferencedContainer = "container:SnipsNlu.xcodeproj">
 49 |          </BuildableReference>
 50 |       </MacroExpansion>
 51 |       <AdditionalOptions>
 52 |       </AdditionalOptions>
 53 |    </TestAction>
 54 |    <LaunchAction
 55 |       buildConfiguration = "Debug"
 56 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
 57 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
 58 |       launchStyle = "0"
 59 |       useCustomWorkingDirectory = "NO"
 60 |       ignoresPersistentStateOnLaunch = "NO"
 61 |       debugDocumentVersioning = "YES"
 62 |       debugServiceExtension = "internal"
 63 |       allowLocationSimulation = "YES">
 64 |       <MacroExpansion>
 65 |          <BuildableReference
 66 |             BuildableIdentifier = "primary"
 67 |             BlueprintIdentifier = "F76504BD1EFD086C007FF022"
 68 |             BuildableName = "SnipsNlu.framework"
 69 |             BlueprintName = "SnipsNlu-iOS"
 70 |             ReferencedContainer = "container:SnipsNlu.xcodeproj">
 71 |          </BuildableReference>
 72 |       </MacroExpansion>
 73 |       <AdditionalOptions>
 74 |       </AdditionalOptions>
 75 |    </LaunchAction>
 76 |    <ProfileAction
 77 |       buildConfiguration = "Release"
 78 |       shouldUseLaunchSchemeArgsEnv = "YES"
 79 |       savedToolIdentifier = ""
 80 |       useCustomWorkingDirectory = "NO"
 81 |       debugDocumentVersioning = "YES">
 82 |       <MacroExpansion>
 83 |          <BuildableReference
 84 |             BuildableIdentifier = "primary"
 85 |             BlueprintIdentifier = "F76504BD1EFD086C007FF022"
 86 |             BuildableName = "SnipsNlu.framework"
 87 |             BlueprintName = "SnipsNlu-iOS"
 88 |             ReferencedContainer = "container:SnipsNlu.xcodeproj">
 89 |          </BuildableReference>
 90 |       </MacroExpansion>
 91 |    </ProfileAction>
 92 |    <AnalyzeAction
 93 |       buildConfiguration = "Debug">
 94 |    </AnalyzeAction>
 95 |    <ArchiveAction
 96 |       buildConfiguration = "Release"
 97 |       revealArchiveInOrganizer = "YES">
 98 |    </ArchiveAction>
 99 | </Scheme>
100 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/SnipsNlu.xcodeproj/xcshareddata/xcschemes/SnipsNlu-macOS.xcscheme:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <Scheme
  3 |    LastUpgradeVersion = "1020"
  4 |    version = "1.3">
  5 |    <BuildAction
  6 |       parallelizeBuildables = "YES"
  7 |       buildImplicitDependencies = "YES">
  8 |       <BuildActionEntries>
  9 |          <BuildActionEntry
 10 |             buildForTesting = "YES"
 11 |             buildForRunning = "YES"
 12 |             buildForProfiling = "YES"
 13 |             buildForArchiving = "YES"
 14 |             buildForAnalyzing = "YES">
 15 |             <BuildableReference
 16 |                BuildableIdentifier = "primary"
 17 |                BlueprintIdentifier = "F76504DE1EFD08A8007FF022"
 18 |                BuildableName = "SnipsNlu.framework"
 19 |                BlueprintName = "SnipsNlu-macOS"
 20 |                ReferencedContainer = "container:SnipsNlu.xcodeproj">
 21 |             </BuildableReference>
 22 |          </BuildActionEntry>
 23 |       </BuildActionEntries>
 24 |    </BuildAction>
 25 |    <TestAction
 26 |       buildConfiguration = "Debug"
 27 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
 28 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
 29 |       shouldUseLaunchSchemeArgsEnv = "YES">
 30 |       <Testables>
 31 |          <TestableReference
 32 |             skipped = "NO">
 33 |             <BuildableReference
 34 |                BuildableIdentifier = "primary"
 35 |                BlueprintIdentifier = "F76504E61EFD08A8007FF022"
 36 |                BuildableName = "SnipsNlu-macOSTests.xctest"
 37 |                BlueprintName = "SnipsNlu-macOSTests"
 38 |                ReferencedContainer = "container:SnipsNlu.xcodeproj">
 39 |             </BuildableReference>
 40 |          </TestableReference>
 41 |       </Testables>
 42 |       <MacroExpansion>
 43 |          <BuildableReference
 44 |             BuildableIdentifier = "primary"
 45 |             BlueprintIdentifier = "F76504DE1EFD08A8007FF022"
 46 |             BuildableName = "SnipsNlu.framework"
 47 |             BlueprintName = "SnipsNlu-macOS"
 48 |             ReferencedContainer = "container:SnipsNlu.xcodeproj">
 49 |          </BuildableReference>
 50 |       </MacroExpansion>
 51 |       <AdditionalOptions>
 52 |       </AdditionalOptions>
 53 |    </TestAction>
 54 |    <LaunchAction
 55 |       buildConfiguration = "Debug"
 56 |       selectedDebuggerIdentifier = "Xcode.DebuggerFoundation.Debugger.LLDB"
 57 |       selectedLauncherIdentifier = "Xcode.DebuggerFoundation.Launcher.LLDB"
 58 |       launchStyle = "0"
 59 |       useCustomWorkingDirectory = "NO"
 60 |       ignoresPersistentStateOnLaunch = "NO"
 61 |       debugDocumentVersioning = "YES"
 62 |       debugServiceExtension = "internal"
 63 |       allowLocationSimulation = "YES">
 64 |       <MacroExpansion>
 65 |          <BuildableReference
 66 |             BuildableIdentifier = "primary"
 67 |             BlueprintIdentifier = "F76504DE1EFD08A8007FF022"
 68 |             BuildableName = "SnipsNlu.framework"
 69 |             BlueprintName = "SnipsNlu-macOS"
 70 |             ReferencedContainer = "container:SnipsNlu.xcodeproj">
 71 |          </BuildableReference>
 72 |       </MacroExpansion>
 73 |       <AdditionalOptions>
 74 |       </AdditionalOptions>
 75 |    </LaunchAction>
 76 |    <ProfileAction
 77 |       buildConfiguration = "Release"
 78 |       shouldUseLaunchSchemeArgsEnv = "YES"
 79 |       savedToolIdentifier = ""
 80 |       useCustomWorkingDirectory = "NO"
 81 |       debugDocumentVersioning = "YES">
 82 |       <MacroExpansion>
 83 |          <BuildableReference
 84 |             BuildableIdentifier = "primary"
 85 |             BlueprintIdentifier = "F76504DE1EFD08A8007FF022"
 86 |             BuildableName = "SnipsNlu.framework"
 87 |             BlueprintName = "SnipsNlu-macOS"
 88 |             ReferencedContainer = "container:SnipsNlu.xcodeproj">
 89 |          </BuildableReference>
 90 |       </MacroExpansion>
 91 |    </ProfileAction>
 92 |    <AnalyzeAction
 93 |       buildConfiguration = "Debug">
 94 |    </AnalyzeAction>
 95 |    <ArchiveAction
 96 |       buildConfiguration = "Release"
 97 |       revealArchiveInOrganizer = "YES">
 98 |    </ArchiveAction>
 99 | </Scheme>
100 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/SnipsNlu/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>CFBundleDevelopmentRegion</key>
 6 | 	<string>en</string>
 7 | 	<key>CFBundleExecutable</key>
 8 | 	<string>$(EXECUTABLE_NAME)</string>
 9 | 	<key>CFBundleIdentifier</key>
10 | 	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
11 | 	<key>CFBundleInfoDictionaryVersion</key>
12 | 	<string>6.0</string>
13 | 	<key>CFBundleName</key>
14 | 	<string>$(PRODUCT_NAME)</string>
15 | 	<key>CFBundlePackageType</key>
16 | 	<string>FMWK</string>
17 | 	<key>CFBundleShortVersionString</key>
18 | 	<string>1.0</string>
19 | 	<key>CFBundleVersion</key>
20 | 	<string>$(CURRENT_PROJECT_VERSION)</string>
21 | 	<key>NSHumanReadableCopyright</key>
22 | 	<string>Copyright © 2017 Snips. All rights reserved.</string>
23 | 	<key>NSPrincipalClass</key>
24 | 	<string></string>
25 | </dict>
26 | </plist>
27 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/SnipsNlu/SnipsNlu.h:
--------------------------------------------------------------------------------
 1 | //
 2 | //  SnipsNlu.h
 3 | //  SnipsNlu
 4 | //
 5 | //  Created by Kevin Lefevre on 23/06/2017.
 6 | //  Copyright © 2017 Snips. All rights reserved.
 7 | //
 8 | 
 9 | #import <UIKit/UIKit.h>
10 | 
11 | //! Project version number for SnipsNlu.
12 | FOUNDATION_EXPORT double SnipsNluVersionNumber;
13 | 
14 | //! Project version string for SnipsNlu.
15 | FOUNDATION_EXPORT const unsigned char SnipsNluVersionString[];
16 | 
17 | // In this header, you should import all the public headers of your framework using statements like #import <SnipsNlu/PublicHeader.h>
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/platforms/swift/SnipsNlu/SnipsNluTests/Info.plist:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 3 | <plist version="1.0">
 4 | <dict>
 5 | 	<key>CFBundleDevelopmentRegion</key>
 6 | 	<string>en</string>
 7 | 	<key>CFBundleExecutable</key>
 8 | 	<string>$(EXECUTABLE_NAME)</string>
 9 | 	<key>CFBundleIdentifier</key>
10 | 	<string>$(PRODUCT_BUNDLE_IDENTIFIER)</string>
11 | 	<key>CFBundleInfoDictionaryVersion</key>
12 | 	<string>6.0</string>
13 | 	<key>CFBundleName</key>
14 | 	<string>$(PRODUCT_NAME)</string>
15 | 	<key>CFBundlePackageType</key>
16 | 	<string>BNDL</string>
17 | 	<key>CFBundleShortVersionString</key>
18 | 	<string>1.0</string>
19 | 	<key>CFBundleVersion</key>
20 | 	<string>1</string>
21 | </dict>
22 | </plist>
23 | 


--------------------------------------------------------------------------------
/post_release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | VERSION=$1
 4 | 
 5 | if [[ -z "$VERSION" ]]
 6 | then
 7 |     echo "Usage: $0 <version>"
 8 |     exit 1
 9 | fi
10 | 
11 | set -ex
12 | 
13 | ./update_version.sh ${VERSION}
14 | 
15 | git commit . -m "Set post-release version to $VERSION"
16 | 


--------------------------------------------------------------------------------
/src/entity_parser/builtin_entity_parser.rs:
--------------------------------------------------------------------------------
 1 | use std::path::Path;
 2 | use std::sync::Mutex;
 3 | 
 4 | use log::info;
 5 | use snips_nlu_ontology::{BuiltinEntity, BuiltinEntityKind};
 6 | use snips_nlu_parsers::BuiltinEntityParser as _BuiltinEntityParser;
 7 | 
 8 | use super::utils::Cache;
 9 | use crate::errors::*;
10 | 
11 | pub trait BuiltinEntityParser: Send + Sync {
12 |     fn extract_entities(
13 |         &self,
14 |         sentence: &str,
15 |         filter_entity_kinds: Option<&[BuiltinEntityKind]>,
16 |         use_cache: bool,
17 |         max_alternative_resolved_values: usize,
18 |     ) -> Result<Vec<BuiltinEntity>>;
19 | }
20 | 
21 | pub struct CachingBuiltinEntityParser {
22 |     parser: _BuiltinEntityParser,
23 |     cache: Mutex<Cache<CacheKey, Vec<BuiltinEntity>>>,
24 | }
25 | 
26 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
27 | struct CacheKey {
28 |     input: String,
29 |     kinds: Option<Vec<BuiltinEntityKind>>,
30 |     max_alternative_resolved_values: usize,
31 | }
32 | 
33 | impl BuiltinEntityParser for CachingBuiltinEntityParser {
34 |     fn extract_entities(
35 |         &self,
36 |         sentence: &str,
37 |         filter_entity_kinds: Option<&[BuiltinEntityKind]>,
38 |         use_cache: bool,
39 |         max_alternative_resolved_values: usize,
40 |     ) -> Result<Vec<BuiltinEntity>> {
41 |         let lowercased_sentence = sentence.to_lowercase();
42 |         if !use_cache {
43 |             return self.parser.extract_entities(
44 |                 &lowercased_sentence,
45 |                 filter_entity_kinds,
46 |                 max_alternative_resolved_values,
47 |             );
48 |         }
49 |         let cache_key = CacheKey {
50 |             input: lowercased_sentence,
51 |             kinds: filter_entity_kinds.map(|entity_kinds| entity_kinds.to_vec()),
52 |             max_alternative_resolved_values,
53 |         };
54 | 
55 |         self.cache
56 |             .lock()
57 |             .unwrap()
58 |             .try_cache(&cache_key, |cache_key| {
59 |                 self.parser.extract_entities(
60 |                     &cache_key.input,
61 |                     filter_entity_kinds,
62 |                     max_alternative_resolved_values,
63 |                 )
64 |             })
65 |     }
66 | }
67 | 
68 | impl CachingBuiltinEntityParser {
69 |     pub fn from_path<P: AsRef<Path>>(path: P, cache_capacity: usize) -> Result<Self> {
70 |         info!("Loading builtin entity parser ({:?}) ...", path.as_ref());
71 |         let parser = _BuiltinEntityParser::from_path(path)?;
72 |         let cache = Mutex::new(Cache::new(cache_capacity));
73 |         info!("Builtin entity parser loaded");
74 |         Ok(Self { parser, cache })
75 |     }
76 | }
77 | 


--------------------------------------------------------------------------------
/src/entity_parser/mod.rs:
--------------------------------------------------------------------------------
1 | pub mod builtin_entity_parser;
2 | pub mod custom_entity_parser;
3 | mod utils;
4 | 
5 | pub use self::builtin_entity_parser::*;
6 | pub use self::custom_entity_parser::*;
7 | 


--------------------------------------------------------------------------------
/src/entity_parser/utils.rs:
--------------------------------------------------------------------------------
 1 | use std::hash::Hash;
 2 | 
 3 | use lru_cache::LruCache;
 4 | 
 5 | use crate::errors::*;
 6 | 
 7 | pub struct Cache<K, V>(LruCache<K, V>)
 8 | where
 9 |     K: Eq + Hash + Clone,
10 |     V: Clone;
11 | 
12 | impl<K, V> Cache<K, V>
13 | where
14 |     K: Eq + Hash + Clone,
15 |     V: Clone,
16 | {
17 |     pub fn new(capacity: usize) -> Self {
18 |         Cache(LruCache::new(capacity))
19 |     }
20 | 
21 |     pub fn try_cache<F: Fn(&K) -> Result<V>>(&mut self, key: &K, producer: F) -> Result<V> {
22 |         let cached_value = self.0.get_mut(key).cloned();
23 |         if let Some(value) = cached_value {
24 |             return Ok(value);
25 |         }
26 |         let value = producer(key)?;
27 |         self.0.insert(key.clone(), value.clone());
28 |         Ok(value)
29 |     }
30 | }
31 | 


--------------------------------------------------------------------------------
/src/errors.rs:
--------------------------------------------------------------------------------
 1 | use failure::Fail;
 2 | 
 3 | #[derive(Debug, Fail)]
 4 | pub enum SnipsNluError {
 5 |     #[fail(display = "Unable to read file '{}'", _0)]
 6 |     ModelLoad(String),
 7 |     #[fail(display = "Mismatched model version: model is {} but runner is {}", model, runner)]
 8 |     WrongModelVersion{ model: String, runner: &'static str},
 9 |     #[fail(display = "Unknown intent: '{}'", _0)]
10 |     UnknownIntent(String),
11 |     #[fail(display = "Internal error: {}", _0)]
12 |     InternalError(String),
13 | }
14 | 
15 | pub type Result<T> = ::std::result::Result<T, ::failure::Error>;
16 | 


--------------------------------------------------------------------------------
/src/injection/errors.rs:
--------------------------------------------------------------------------------
 1 | use failure::{Backtrace, Context, Fail};
 2 | use std::fmt;
 3 | use std::fmt::Display;
 4 | 
 5 | #[derive(Debug)]
 6 | pub struct NluInjectionError {
 7 |     inner: Context<NluInjectionErrorKind>,
 8 | }
 9 | 
10 | #[derive(Debug, Fail)]
11 | pub enum NluInjectionErrorKind {
12 |     #[fail(display = "Entity is not injectable: {}", msg)]
13 |     EntityNotInjectable { msg: String },
14 |     #[fail(display = "Internal injection error: {}", msg)]
15 |     InternalInjectionError { msg: String },
16 | }
17 | 
18 | //  Boilerplate
19 | impl Fail for NluInjectionError {
20 |     fn cause(&self) -> Option<&dyn Fail> {
21 |         self.inner.cause()
22 |     }
23 | 
24 |     fn backtrace(&self) -> Option<&Backtrace> {
25 |         self.inner.backtrace()
26 |     }
27 | }
28 | 
29 | impl Display for NluInjectionError {
30 |     fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
31 |         Display::fmt(&self.inner, f)
32 |     }
33 | }
34 | 
35 | impl From<NluInjectionErrorKind> for NluInjectionError {
36 |     fn from(kind: NluInjectionErrorKind) -> NluInjectionError {
37 |         NluInjectionError {
38 |             inner: Context::new(kind),
39 |         }
40 |     }
41 | }
42 | 
43 | impl From<Context<NluInjectionErrorKind>> for NluInjectionError {
44 |     fn from(inner: Context<NluInjectionErrorKind>) -> NluInjectionError {
45 |         NluInjectionError { inner }
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/injection/mod.rs:
--------------------------------------------------------------------------------
1 | mod errors;
2 | mod injection;
3 | 
4 | pub use self::errors::{NluInjectionError, NluInjectionErrorKind};
5 | pub use self::injection::{InjectedEntity, InjectedValue, NluInjector};
6 | 


--------------------------------------------------------------------------------
/src/intent_classifier/logreg.rs:
--------------------------------------------------------------------------------
  1 | use ndarray::prelude::*;
  2 | use ndarray::{array, stack};
  3 | 
  4 | use crate::errors::*;
  5 | 
  6 | /// The multiclass probability estimates are derived from binary (one-vs.-rest)
  7 | /// estimates by simple normalization
  8 | pub struct MulticlassLogisticRegression {
  9 |     /// matrix with shape (f, c)
 10 |     /// ------------------------
 11 |     ///
 12 |     /// - f = number of features
 13 |     /// - c = number of classes
 14 |     weights: Array2<f32>,
 15 | }
 16 | 
 17 | impl MulticlassLogisticRegression {
 18 |     fn nb_features(&self) -> usize {
 19 |         // without intercept
 20 |         self.weights.dim().0 - 1
 21 |     }
 22 | 
 23 |     fn nb_classes(&self) -> usize {
 24 |         self.weights.dim().1
 25 |     }
 26 | 
 27 |     fn is_binary(&self) -> bool {
 28 |         self.nb_classes() == 1
 29 |     }
 30 | }
 31 | 
 32 | impl MulticlassLogisticRegression {
 33 |     pub fn new(intercept: Array1<f32>, weights: Array2<f32>) -> Result<Self> {
 34 |         let nb_classes = intercept.dim();
 35 |         let reshaped_intercept = intercept.into_shape((1, nb_classes))?;
 36 |         let weights_with_intercept = stack![Axis(0), reshaped_intercept, weights];
 37 |         Ok(Self {
 38 |             weights: weights_with_intercept,
 39 |         })
 40 |     }
 41 | 
 42 |     pub fn run(&self, features: &ArrayView1<f32>) -> Result<Array1<f32>> {
 43 |         let reshaped_features = features.into_shape((1, self.nb_features()))?;
 44 |         let reshaped_features = stack![Axis(1), array![[1.]], reshaped_features];
 45 |         let mut result = reshaped_features
 46 |             .dot(&self.weights)
 47 |             .into_shape(self.nb_classes())?;
 48 |         result.mapv_inplace(logit);
 49 |         if self.is_binary() {
 50 |             return Ok(arr1(&[1.0 - result[0], result[0]]));
 51 |         }
 52 |         Ok(result)
 53 |     }
 54 | }
 55 | 
 56 | fn logit(x: f32) -> f32 {
 57 |     1. / (1. + (-x).exp())
 58 | }
 59 | 
 60 | #[cfg(test)]
 61 | mod tests {
 62 |     use super::MulticlassLogisticRegression;
 63 |     use crate::testutils::assert_epsilon_eq_array1;
 64 |     use ndarray::array;
 65 | 
 66 |     #[test]
 67 |     fn test_multiclass_logistic_regression() {
 68 |         // Given
 69 |         let intercept = array![0.98, 0.32, -0.76];
 70 |         let weights = array![
 71 |             [2.5, -0.6, 0.5],
 72 |             [1.2, 1.2, -2.7],
 73 |             [1.5, 0.1, -3.2],
 74 |             [-0.9, 1.4, 1.8]
 75 |         ];
 76 | 
 77 |         let features = array![0.4, -2.3, 1.9, 1.3];
 78 |         let regression = MulticlassLogisticRegression::new(intercept, weights).unwrap();
 79 | 
 80 |         // When
 81 |         let predictions = regression.run(&features.view()).unwrap();
 82 | 
 83 |         // Then
 84 |         let expected_predictions = array![0.7109495, 0.3384968, 0.8710191];
 85 |         assert_epsilon_eq_array1(&predictions, &expected_predictions, 1e-06);
 86 |     }
 87 | 
 88 |     #[test]
 89 |     fn test_multiclass_logistic_regression_when_binary() {
 90 |         // Given
 91 |         let intercept = array![0.98];
 92 |         let weights = array![[2.5], [1.2], [1.5], [-0.9]];
 93 | 
 94 |         let features = array![0.4, -2.3, 1.9, 1.3];
 95 |         let regression = MulticlassLogisticRegression::new(intercept, weights).unwrap();
 96 | 
 97 |         // When
 98 |         let predictions = regression.run(&features.view()).unwrap();
 99 | 
100 |         // Then
101 |         let expected_predictions = array![0.2890504, 0.7109495];
102 |         assert_epsilon_eq_array1(&predictions, &expected_predictions, 1e-06);
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/src/intent_classifier/mod.rs:
--------------------------------------------------------------------------------
 1 | mod featurizer;
 2 | mod log_reg_intent_classifier;
 3 | mod logreg;
 4 | 
 5 | use std::fs::File;
 6 | use std::path::Path;
 7 | use std::sync::Arc;
 8 | 
 9 | use crate::errors::*;
10 | use failure::{format_err, ResultExt};
11 | use snips_nlu_ontology::IntentClassifierResult;
12 | 
13 | pub use self::featurizer::{CooccurrenceVectorizer, Featurizer, TfidfVectorizer};
14 | pub use self::log_reg_intent_classifier::LogRegIntentClassifier;
15 | use crate::models::ProcessingUnitMetadata;
16 | use crate::resources::SharedResources;
17 | 
18 | pub trait IntentClassifier: Send + Sync {
19 |     fn get_intent(
20 |         &self,
21 |         input: &str,
22 |         intents_whitelist: Option<&[&str]>,
23 |     ) -> Result<IntentClassifierResult>;
24 | 
25 |     fn get_intents(&self, input: &str) -> Result<Vec<IntentClassifierResult>>;
26 | }
27 | 
28 | pub fn build_intent_classifier<P: AsRef<Path>>(
29 |     path: P,
30 |     shared_resources: Arc<SharedResources>,
31 | ) -> Result<Box<dyn IntentClassifier>> {
32 |     let metadata_path = path.as_ref().join("metadata.json");
33 |     let metadata_file = File::open(&metadata_path).with_context(|_| {
34 |         format!(
35 |             "Cannot open intent classifier metadata file '{:?}'",
36 |             &metadata_path
37 |         )
38 |     })?;
39 |     let metadata: ProcessingUnitMetadata = serde_json::from_reader(metadata_file)
40 |         .with_context(|_| "Cannot deserialize intent classifier json data")?;
41 |     match metadata {
42 |         ProcessingUnitMetadata::LogRegIntentClassifier => {
43 |             Ok(Box::new(LogRegIntentClassifier::from_path(path, shared_resources)?) as _)
44 |         }
45 |         _ => Err(format_err!("{:?} is not an intent classifier", metadata)),
46 |     }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/intent_parser/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod deterministic_intent_parser;
 2 | pub mod lookup_intent_parser;
 3 | pub mod probabilistic_intent_parser;
 4 | 
 5 | use std::path::Path;
 6 | use std::sync::Arc;
 7 | 
 8 | use failure::format_err;
 9 | use snips_nlu_ontology::IntentClassifierResult;
10 | 
11 | pub use self::deterministic_intent_parser::DeterministicIntentParser;
12 | pub use self::lookup_intent_parser::LookupIntentParser;
13 | pub use self::probabilistic_intent_parser::ProbabilisticIntentParser;
14 | use crate::errors::*;
15 | use crate::models::ProcessingUnitMetadata;
16 | use crate::resources::SharedResources;
17 | pub use crate::slot_utils::InternalSlot;
18 | use crate::utils::IntentName;
19 | 
20 | #[derive(Debug, Clone, PartialEq)]
21 | pub struct InternalParsingResult {
22 |     pub intent: IntentClassifierResult,
23 |     pub slots: Vec<InternalSlot>,
24 | }
25 | 
26 | impl InternalParsingResult {
27 |     pub fn empty() -> InternalParsingResult {
28 |         InternalParsingResult {
29 |             intent: IntentClassifierResult {
30 |                 intent_name: None,
31 |                 confidence_score: 1.0,
32 |             },
33 |             slots: vec![],
34 |         }
35 |     }
36 | }
37 | 
38 | pub fn internal_parsing_result(
39 |     intent_name: Option<IntentName>,
40 |     intent_proba: f32,
41 |     slots: Vec<InternalSlot>,
42 | ) -> InternalParsingResult {
43 |     InternalParsingResult {
44 |         intent: IntentClassifierResult {
45 |             intent_name,
46 |             confidence_score: intent_proba,
47 |         },
48 |         slots,
49 |     }
50 | }
51 | 
52 | pub trait IntentParser: Send + Sync {
53 |     fn parse(
54 |         &self,
55 |         input: &str,
56 |         intents_whitelist: Option<&[&str]>,
57 |     ) -> Result<InternalParsingResult>;
58 | 
59 |     fn get_intents(&self, input: &str) -> Result<Vec<IntentClassifierResult>>;
60 | 
61 |     fn get_slots(&self, input: &str, intent: &str) -> Result<Vec<InternalSlot>>;
62 | }
63 | 
64 | pub fn build_intent_parser<P: AsRef<Path>>(
65 |     metadata: ProcessingUnitMetadata,
66 |     path: P,
67 |     shared_resources: Arc<SharedResources>,
68 | ) -> Result<Box<dyn IntentParser>> {
69 |     match metadata {
70 |         ProcessingUnitMetadata::LookupIntentParser => {
71 |             Ok(Box::new(LookupIntentParser::from_path(path, shared_resources)?) as _)
72 |         }
73 |         ProcessingUnitMetadata::DeterministicIntentParser => Ok(Box::new(
74 |             DeterministicIntentParser::from_path(path, shared_resources)?,
75 |         ) as _),
76 |         ProcessingUnitMetadata::ProbabilisticIntentParser => Ok(Box::new(
77 |             ProbabilisticIntentParser::from_path(path, shared_resources)?,
78 |         ) as _),
79 |         _ => Err(format_err!("{:?} is not an intent parser", metadata)),
80 |     }
81 | }
82 | 


--------------------------------------------------------------------------------
/src/language.rs:
--------------------------------------------------------------------------------
 1 | use snips_nlu_ontology::Language;
 2 | use snips_nlu_utils::language::Language as NluUtilsLanguage;
 3 | 
 4 | pub trait FromLanguage {
 5 |     fn from_language(l: Language) -> Self;
 6 | }
 7 | 
 8 | impl FromLanguage for NluUtilsLanguage {
 9 |     fn from_language(l: Language) -> Self {
10 |         match l {
11 |             Language::DE => NluUtilsLanguage::DE,
12 |             Language::EN => NluUtilsLanguage::EN,
13 |             Language::ES => NluUtilsLanguage::ES,
14 |             Language::FR => NluUtilsLanguage::FR,
15 |             Language::IT => NluUtilsLanguage::IT,
16 |             Language::JA => NluUtilsLanguage::JA,
17 |             Language::KO => NluUtilsLanguage::KO,
18 |             Language::PT_PT => NluUtilsLanguage::PT_PT,
19 |             Language::PT_BR => NluUtilsLanguage::PT_BR,
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/src/lib.rs:
--------------------------------------------------------------------------------
 1 | #![allow(
 2 |     clippy::unreadable_literal,
 3 |     clippy::excessive_precision,
 4 |     clippy::module_inception
 5 | )]
 6 | 
 7 | mod entity_parser;
 8 | pub mod errors;
 9 | pub mod injection;
10 | mod intent_classifier;
11 | mod intent_parser;
12 | mod language;
13 | pub mod models;
14 | mod nlu_engine;
15 | mod resources;
16 | mod slot_filler;
17 | mod slot_utils;
18 | #[cfg(test)]
19 | mod testutils;
20 | mod utils;
21 | 
22 | pub const MODEL_VERSION: &str = "0.20.0";
23 | 
24 | pub extern crate snips_nlu_ontology as ontology;
25 | pub use crate::errors::*;
26 | pub use crate::intent_classifier::{IntentClassifier, LogRegIntentClassifier};
27 | pub use crate::intent_parser::{
28 |     DeterministicIntentParser, IntentParser, LookupIntentParser, ProbabilisticIntentParser,
29 | };
30 | pub use crate::models::*;
31 | pub use crate::nlu_engine::SnipsNluEngine;
32 | pub use crate::resources::loading::load_shared_resources;
33 | pub use crate::resources::SharedResources;
34 | pub use crate::slot_filler::{CRFSlotFiller, SlotFiller};
35 | pub use snips_nlu_ontology::Language;
36 | 


--------------------------------------------------------------------------------
/src/models/intent_classifier.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use serde::Deserialize;
 4 | 
 5 | use crate::utils::IntentName;
 6 | 
 7 | #[derive(Debug, Deserialize)]
 8 | pub struct IntentClassifierModel {
 9 |     pub featurizer: Option<String>,
10 |     pub intercept: Option<Vec<f32>>,
11 |     pub coeffs: Option<Vec<Vec<f32>>>,
12 |     pub intent_list: Vec<Option<IntentName>>,
13 | }
14 | 
15 | #[derive(Debug, Deserialize)]
16 | pub struct FeaturizerModel {
17 |     pub language_code: String,
18 |     pub tfidf_vectorizer: String,
19 |     pub cooccurrence_vectorizer: Option<String>,
20 | }
21 | 
22 | #[derive(Debug, Deserialize)]
23 | pub struct TfidfVectorizerModel {
24 |     pub language_code: String,
25 |     pub builtin_entity_scope: Vec<String>,
26 |     pub vectorizer: SklearnVectorizerModel,
27 |     pub config: TfidfVectorizerConfiguration,
28 | }
29 | 
30 | #[derive(Debug, Deserialize)]
31 | pub struct TfidfVectorizerConfiguration {
32 |     pub use_stemming: bool,
33 |     pub word_clusters_name: Option<String>,
34 | }
35 | 
36 | #[derive(Debug, Deserialize)]
37 | pub struct SklearnVectorizerModel {
38 |     pub idf_diag: Vec<f32>,
39 |     pub vocab: HashMap<String, usize>,
40 | }
41 | 
42 | #[derive(Debug, Deserialize)]
43 | pub struct CooccurrenceVectorizerModel {
44 |     pub language_code: String,
45 |     pub builtin_entity_scope: Vec<String>,
46 |     pub word_pairs: HashMap<usize, (String, String)>,
47 |     pub config: CooccurrenceVectorizerConfiguration,
48 | }
49 | 
50 | #[derive(Debug, Deserialize)]
51 | pub struct CooccurrenceVectorizerConfiguration {
52 |     pub window_size: Option<usize>,
53 |     pub filter_stop_words: bool,
54 |     pub keep_order: bool,
55 |     pub unknown_words_replacement_string: Option<String>,
56 | }
57 | 


--------------------------------------------------------------------------------
/src/models/intent_parser.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use serde::Deserialize;
 4 | 
 5 | use crate::utils::{EntityName, IntentName, SlotName};
 6 | 
 7 | pub type InputHash = i32;
 8 | pub type IntentId = i32;
 9 | pub type SlotId = i32;
10 | 
11 | #[derive(Debug, Deserialize)]
12 | pub struct DeterministicParserModel {
13 |     pub language_code: String,
14 |     pub patterns: HashMap<IntentName, Vec<String>>,
15 |     pub group_names_to_slot_names: HashMap<String, SlotName>,
16 |     pub slot_names_to_entities: HashMap<IntentName, HashMap<SlotName, EntityName>>,
17 |     #[serde(default)]
18 |     pub stop_words_whitelist: HashMap<IntentName, Vec<String>>,
19 |     pub config: DeterministicParserConfig,
20 | }
21 | 
22 | #[derive(Debug, Deserialize)]
23 | pub struct LookupParserModel {
24 |     pub language_code: String,
25 |     pub slots_names: Vec<SlotName>,
26 |     pub intents_names: Vec<IntentName>,
27 |     pub map: HashMap<InputHash, (IntentId, Vec<SlotId>)>,
28 |     pub entity_scopes: Vec<GroupedEntityScope>,
29 |     pub stop_words_whitelist: HashMap<IntentName, Vec<String>>,
30 |     pub config: LookupParserConfig,
31 | }
32 | 
33 | #[derive(Debug, Deserialize)]
34 | pub struct GroupedEntityScope {
35 |     pub intent_group: Vec<IntentName>,
36 |     pub entity_scope: EntityScope,
37 | }
38 | 
39 | #[derive(Debug, Deserialize)]
40 | pub struct EntityScope {
41 |     pub builtin: Vec<EntityName>,
42 |     pub custom: Vec<EntityName>,
43 | }
44 | 
45 | #[derive(Debug, Deserialize)]
46 | pub struct DeterministicParserConfig {
47 |     #[serde(default)]
48 |     pub ignore_stop_words: bool,
49 | }
50 | 
51 | #[derive(Debug, Deserialize)]
52 | pub struct LookupParserConfig {
53 |     #[serde(default)]
54 |     pub ignore_stop_words: bool,
55 | }
56 | 
57 | #[derive(Debug, Deserialize)]
58 | pub struct ProbabilisticParserModel {
59 |     pub slot_fillers: Vec<SlotFillerMetadata>,
60 | }
61 | 
62 | #[derive(Debug, Deserialize)]
63 | pub struct SlotFillerMetadata {
64 |     pub intent: IntentName,
65 |     pub slot_filler_name: String,
66 | }
67 | 


--------------------------------------------------------------------------------
/src/models/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod intent_classifier;
 2 | pub mod intent_parser;
 3 | pub mod nlu_engine;
 4 | pub mod processing_unit_metadata;
 5 | pub mod slot_filler;
 6 | 
 7 | pub use self::intent_classifier::*;
 8 | pub use self::intent_parser::*;
 9 | pub use self::nlu_engine::*;
10 | pub use self::processing_unit_metadata::*;
11 | pub use self::slot_filler::*;
12 | 


--------------------------------------------------------------------------------
/src/models/nlu_engine.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use serde::Deserialize;
 4 | 
 5 | use crate::utils::{EntityName, IntentName, SlotName};
 6 | 
 7 | #[derive(Debug, Deserialize)]
 8 | pub struct ModelVersion {
 9 |     pub model_version: String,
10 | }
11 | 
12 | #[derive(Debug, Deserialize)]
13 | pub struct NluEngineModel {
14 |     pub dataset_metadata: DatasetMetadata,
15 |     pub intent_parsers: Vec<String>,
16 |     pub model_version: String,
17 |     pub training_package_version: String,
18 |     pub builtin_entity_parser: String,
19 |     pub custom_entity_parser: String,
20 | }
21 | 
22 | #[derive(Debug, Deserialize)]
23 | pub struct DatasetMetadata {
24 |     pub language_code: String,
25 |     pub entities: HashMap<String, Entity>,
26 |     pub slot_name_mappings: HashMap<IntentName, HashMap<SlotName, EntityName>>,
27 | }
28 | 
29 | #[derive(Debug, Deserialize, Clone)]
30 | pub struct Entity {
31 |     pub automatically_extensible: bool,
32 | }
33 | 


--------------------------------------------------------------------------------
/src/models/processing_unit_metadata.rs:
--------------------------------------------------------------------------------
 1 | use serde::Deserialize;
 2 | 
 3 | #[derive(Debug, Deserialize, Copy, Clone, PartialEq, Eq)]
 4 | #[serde(tag = "unit_name")]
 5 | #[serde(rename_all = "snake_case")]
 6 | pub enum ProcessingUnitMetadata {
 7 |     DeterministicIntentParser,
 8 |     LookupIntentParser,
 9 |     ProbabilisticIntentParser,
10 |     CrfSlotFiller,
11 |     LogRegIntentClassifier,
12 | }
13 | 
14 | #[cfg(test)]
15 | mod tests {
16 |     use super::*;
17 |     use serde_json;
18 | 
19 |     #[test]
20 |     fn test_deserialize() {
21 |         let data = r#"{
22 |                         "unit_name": "crf_slot_filler"
23 |                       }"#;
24 |         let metadata: ProcessingUnitMetadata = serde_json::from_str(data).unwrap();
25 |         assert_eq!(ProcessingUnitMetadata::CrfSlotFiller, metadata);
26 |     }
27 | }
28 | 


--------------------------------------------------------------------------------
/src/models/slot_filler.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use serde::Deserialize;
 4 | 
 5 | use crate::utils::{EntityName, IntentName, SlotName};
 6 | 
 7 | #[derive(Debug, Deserialize)]
 8 | pub struct SlotFillerModel {
 9 |     pub language_code: String,
10 |     pub intent: IntentName,
11 |     pub slot_name_mapping: HashMap<SlotName, EntityName>,
12 |     pub crf_model_file: Option<String>,
13 |     pub config: SlotFillerConfiguration,
14 | }
15 | 
16 | #[derive(Debug, Deserialize)]
17 | pub struct SlotFillerConfiguration {
18 |     pub tagging_scheme: u8,
19 |     pub feature_factory_configs: Vec<FeatureFactory>,
20 | }
21 | 
22 | #[derive(Debug, Deserialize)]
23 | pub struct FeatureFactory {
24 |     pub factory_name: String,
25 |     pub offsets: Vec<i32>,
26 |     pub args: HashMap<String, serde_json::Value>,
27 | }
28 | 


--------------------------------------------------------------------------------
/src/resources/gazetteer.rs:
--------------------------------------------------------------------------------
 1 | use crate::errors::*;
 2 | use snips_nlu_utils::string::hash_str_to_i32;
 3 | use std::collections::HashSet;
 4 | use std::io::{BufRead, BufReader, Read};
 5 | use std::iter::FromIterator;
 6 | 
 7 | pub trait Gazetteer: Send + Sync {
 8 |     fn contains(&self, value: &str) -> bool;
 9 | }
10 | 
11 | pub struct HashSetGazetteer {
12 |     values: HashSet<i32>,
13 | }
14 | 
15 | impl HashSetGazetteer {
16 |     pub fn from_reader<R: Read>(reader: R) -> Result<Self> {
17 |         let reader = BufReader::new(reader);
18 |         let mut values = HashSet::new();
19 |         for line in reader.lines() {
20 |             let word = line?;
21 |             if !word.is_empty() {
22 |                 values.insert(hash_str_to_i32(&*word));
23 |             }
24 |         }
25 |         Ok(Self { values })
26 |     }
27 | }
28 | 
29 | impl FromIterator<String> for HashSetGazetteer {
30 |     fn from_iter<T: IntoIterator<Item = String>>(iter: T) -> Self {
31 |         Self {
32 |             values: iter
33 |                 .into_iter()
34 |                 .map(|str_value| hash_str_to_i32(&*str_value))
35 |                 .collect(),
36 |         }
37 |     }
38 | }
39 | 
40 | impl Gazetteer for HashSetGazetteer {
41 |     fn contains(&self, value: &str) -> bool {
42 |         self.values.contains(&hash_str_to_i32(value))
43 |     }
44 | }
45 | 
46 | #[cfg(test)]
47 | mod tests {
48 |     use super::{Gazetteer, HashSetGazetteer};
49 | 
50 |     #[test]
51 |     fn test_hashset_gazetteer() {
52 |         // Given
53 |         let gazetteer: &[u8] = r#"
54 | dog
55 | cat
56 | bear
57 | crocodile"#
58 |             .as_ref();
59 | 
60 |         // When
61 |         let gazetteer = HashSetGazetteer::from_reader(gazetteer);
62 | 
63 |         // Then
64 |         assert!(gazetteer.is_ok());
65 |         let gazetteer = gazetteer.unwrap();
66 |         assert!(gazetteer.contains("dog"));
67 |         assert!(gazetteer.contains("crocodile"));
68 |         assert!(!gazetteer.contains("bird"));
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/resources/mod.rs:
--------------------------------------------------------------------------------
 1 | pub mod gazetteer;
 2 | pub mod loading;
 3 | pub mod stemmer;
 4 | pub mod word_clusterer;
 5 | 
 6 | use std::collections::{HashMap, HashSet};
 7 | use std::sync::Arc;
 8 | 
 9 | use self::gazetteer::Gazetteer;
10 | use self::stemmer::Stemmer;
11 | use self::word_clusterer::WordClusterer;
12 | use super::entity_parser::{BuiltinEntityParser, CustomEntityParser};
13 | 
14 | pub struct SharedResources {
15 |     pub builtin_entity_parser: Arc<dyn BuiltinEntityParser>,
16 |     pub custom_entity_parser: Arc<dyn CustomEntityParser>,
17 |     pub gazetteers: HashMap<String, Arc<dyn Gazetteer>>,
18 |     pub stemmer: Option<Arc<dyn Stemmer>>,
19 |     pub word_clusterers: HashMap<String, Arc<dyn WordClusterer>>,
20 |     pub stop_words: HashSet<String>,
21 | }
22 | 


--------------------------------------------------------------------------------
/src/resources/stemmer.rs:
--------------------------------------------------------------------------------
 1 | use crate::errors::*;
 2 | use snips_nlu_utils::string::{hash_str_to_i32, normalize};
 3 | use std::collections::HashMap;
 4 | use std::io::Read;
 5 | use std::iter::FromIterator;
 6 | 
 7 | pub trait Stemmer: Send + Sync {
 8 |     fn stem(&self, value: &str) -> String;
 9 | }
10 | 
11 | pub struct HashMapStemmer {
12 |     values: HashMap<i32, String>,
13 | }
14 | 
15 | impl HashMapStemmer {
16 |     pub fn from_reader<R: Read>(reader: R) -> Result<Self> {
17 |         let mut values = HashMap::new();
18 |         let mut csv_reader = csv::ReaderBuilder::new()
19 |             .delimiter(b',')
20 |             .quoting(false)
21 |             .flexible(true)
22 |             .has_headers(false)
23 |             .from_reader(reader);
24 | 
25 |         for record in csv_reader.records() {
26 |             let elements = record?;
27 |             let stem = &elements[0];
28 |             for value in elements.iter().skip(1) {
29 |                 values.insert(hash_str_to_i32(value), stem.to_string());
30 |             }
31 |         }
32 |         Ok(Self { values })
33 |     }
34 | }
35 | 
36 | impl FromIterator<(String, String)> for HashMapStemmer {
37 |     fn from_iter<T: IntoIterator<Item = (String, String)>>(iter: T) -> Self {
38 |         Self {
39 |             values: iter
40 |                 .into_iter()
41 |                 .map(|(str_key, str_value)| (hash_str_to_i32(&*str_key), str_value))
42 |                 .collect(),
43 |         }
44 |     }
45 | }
46 | 
47 | impl Stemmer for HashMapStemmer {
48 |     fn stem(&self, value: &str) -> String {
49 |         self.values
50 |             .get(&hash_str_to_i32(&*normalize(value)))
51 |             .map(|v| v.to_string())
52 |             .unwrap_or_else(|| value.to_string())
53 |     }
54 | }
55 | 
56 | #[cfg(test)]
57 | mod tests {
58 |     use super::*;
59 | 
60 |     #[test]
61 |     fn test_hashmap_stemmer() {
62 |         // Given
63 |         let stems: &[u8] = r#"
64 | investigate,investigated,investigation,"investigate
65 | do,done,don't,doing,did,does"#
66 |             .as_ref();
67 | 
68 |         // When
69 |         let stemmer = HashMapStemmer::from_reader(stems);
70 | 
71 |         // Then
72 |         assert!(stemmer.is_ok());
73 |         let stemmer = stemmer.unwrap();
74 |         assert_eq!(stemmer.stem("don't"), "do".to_string());
75 |         assert_eq!(stemmer.stem("does"), "do".to_string());
76 |         assert_eq!(stemmer.stem("\"investigate"), "investigate".to_string());
77 |         assert_eq!(stemmer.stem("unknown"), "unknown".to_string());
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/src/resources/word_clusterer.rs:
--------------------------------------------------------------------------------
  1 | use crate::errors::*;
  2 | use itertools::Either;
  3 | use snips_nlu_ontology::Language;
  4 | use snips_nlu_utils::string::hash_str_to_i32;
  5 | use std::collections::HashMap;
  6 | use std::io::Read;
  7 | use std::str::FromStr;
  8 | 
  9 | pub trait WordClusterer: Send + Sync {
 10 |     fn get_cluster(&self, word: &str) -> Option<String>;
 11 | }
 12 | 
 13 | pub struct HashMapWordClusterer {
 14 |     /// This implementation allows to support both u16 and raw string representations for
 15 |     /// word clusters
 16 |     values: Either<HashMap<i32, u16>, HashMap<i32, String>>,
 17 | }
 18 | 
 19 | impl HashMapWordClusterer {
 20 |     pub fn from_reader<R: Read>(reader: R) -> Result<Self> {
 21 |         let mut csv_reader = csv::ReaderBuilder::new()
 22 |             .delimiter(b'\t')
 23 |             .quoting(false)
 24 |             .has_headers(false)
 25 |             .from_reader(reader);
 26 |         // This flag is switched to false as soon as a record is found which cannot
 27 |         // be converted to a u16
 28 |         let mut u16_casting_ok = true;
 29 |         let mut u16_values = HashMap::new();
 30 |         let mut str_values = HashMap::new();
 31 |         for record in csv_reader.records() {
 32 |             let elements = record?;
 33 |             let hashed_key = hash_str_to_i32(elements[0].as_ref());
 34 |             // Casting into u16 is attempted only when all previous clusters were converted
 35 |             // successfully
 36 |             if u16_casting_ok {
 37 |                 match u16::from_str(elements[1].as_ref()) {
 38 |                     Ok(u16_value) => {
 39 |                         u16_values.insert(hashed_key, u16_value);
 40 |                     }
 41 |                     Err(_) => {
 42 |                         // A word cluster cannot be converted into a u16, let's move all the
 43 |                         // previously stored clusters into a raw string representation
 44 |                         for (hash, value) in u16_values.iter() {
 45 |                             str_values.insert(*hash, format!("{}", value));
 46 |                         }
 47 |                         str_values.insert(hashed_key, elements[1].to_string());
 48 |                         u16_casting_ok = false;
 49 |                         u16_values.clear();
 50 |                     }
 51 |                 }
 52 |             } else {
 53 |                 str_values.insert(hashed_key, elements[1].to_string());
 54 |             }
 55 |         }
 56 |         Ok(Self {
 57 |             values: if u16_casting_ok {
 58 |                 Either::Left(u16_values)
 59 |             } else {
 60 |                 Either::Right(str_values)
 61 |             },
 62 |         })
 63 |     }
 64 | }
 65 | 
 66 | impl WordClusterer for HashMapWordClusterer {
 67 |     fn get_cluster(&self, word: &str) -> Option<String> {
 68 |         let hashed_key = hash_str_to_i32(word);
 69 |         match &self.values {
 70 |             Either::Left(u16_values) => u16_values.get(&hashed_key).map(|v| format!("{}", v)),
 71 |             Either::Right(str_values) => str_values.get(&hashed_key).cloned(),
 72 |         }
 73 |     }
 74 | }
 75 | 
 76 | #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 77 | pub struct WordClustererConfiguration {
 78 |     language: Language,
 79 |     clusters_name: String,
 80 | }
 81 | 
 82 | #[cfg(test)]
 83 | mod tests {
 84 |     use super::*;
 85 | 
 86 |     #[test]
 87 |     fn test_hashmap_word_clusterer_with_non_u16_values() {
 88 |         // Given
 89 |         let clusters: &[u8] = r#"
 90 | hello	42
 91 | world	123
 92 | "yolo	cluster_which_is_not_u16
 93 | "#
 94 |         .as_ref();
 95 | 
 96 |         // When
 97 |         let clusterer = HashMapWordClusterer::from_reader(clusters);
 98 | 
 99 |         // Then
100 |         assert!(clusterer.is_ok());
101 |         let clusterer = clusterer.unwrap();
102 |         assert!(clusterer.values.is_right());
103 |         assert_eq!(clusterer.get_cluster("hello"), Some("42".to_string()));
104 |         assert_eq!(clusterer.get_cluster("world"), Some("123".to_string()));
105 |         assert_eq!(clusterer.get_cluster("\"yolo"), Some("cluster_which_is_not_u16".to_string()));
106 |         assert_eq!(clusterer.get_cluster("unknown"), None);
107 |     }
108 | 
109 |     #[test]
110 |     fn test_hashmap_word_clusterer_with_u16_values() {
111 |         // Given
112 |         let clusters: &[u8] = r#"
113 | hello	42
114 | world	123
115 | yolo	65500
116 | "#
117 |             .as_ref();
118 | 
119 |         // When
120 |         let clusterer = HashMapWordClusterer::from_reader(clusters);
121 | 
122 |         // Then
123 |         assert!(clusterer.is_ok());
124 |         let clusterer = clusterer.unwrap();
125 |         assert!(clusterer.values.is_left());
126 |         assert_eq!(clusterer.get_cluster("hello"), Some("42".to_string()));
127 |         assert_eq!(clusterer.get_cluster("world"), Some("123".to_string()));
128 |         assert_eq!(clusterer.get_cluster("yolo"), Some("65500".to_string()));
129 |         assert_eq!(clusterer.get_cluster("unknown"), None);
130 |     }
131 | }
132 | 


--------------------------------------------------------------------------------
/src/slot_filler/features_utils.rs:
--------------------------------------------------------------------------------
  1 | use std::iter::FromIterator;
  2 | use std::str;
  3 | 
  4 | use snips_nlu_utils::token::Token;
  5 | 
  6 | pub fn get_word_chunk(
  7 |     word: &str,
  8 |     chunk_size: usize,
  9 |     chunk_start: usize,
 10 |     reverse: bool,
 11 | ) -> Option<String> {
 12 |     if reverse && chunk_size > chunk_start {
 13 |         return None;
 14 |     }
 15 |     let start = if reverse {
 16 |         chunk_start - chunk_size
 17 |     } else {
 18 |         chunk_start
 19 |     };
 20 |     if start + chunk_size > word.chars().count() {
 21 |         None
 22 |     } else {
 23 |         Some(word.chars().skip(start).take(chunk_size).collect())
 24 |     }
 25 | }
 26 | 
 27 | pub fn initial_string_from_tokens(tokens: &[Token]) -> String {
 28 |     let mut current_index = 0;
 29 |     let mut chunks: Vec<String> = Vec::with_capacity(2 * tokens.len() - 1);
 30 |     for token in tokens {
 31 |         if token.char_range.start > current_index {
 32 |             let nb_spaces = token.char_range.start - current_index;
 33 |             let spaces = String::from_iter(vec![' '; nb_spaces]);
 34 |             chunks.push(spaces);
 35 |         }
 36 |         chunks.push(token.value.clone());
 37 |         current_index = token.char_range.end;
 38 |     }
 39 |     chunks.join("")
 40 | }
 41 | 
 42 | #[cfg(test)]
 43 | mod tests {
 44 |     use super::*;
 45 | 
 46 |     #[test]
 47 |     fn test_get_word_chunk() {
 48 |         // Given
 49 |         let word = "hello_world";
 50 |         let chunk_size = 6;
 51 |         let chunk_start = 5;
 52 |         let reverse = false;
 53 | 
 54 |         // When
 55 |         let word_chunk = get_word_chunk(word, chunk_size, chunk_start, reverse);
 56 | 
 57 |         // Then
 58 |         let expected_chunk = Some("_world".to_string());
 59 |         assert_eq!(word_chunk, expected_chunk);
 60 |     }
 61 | 
 62 |     #[test]
 63 |     fn test_get_word_chunk_reversed() {
 64 |         // Given
 65 |         let word = "hello_world";
 66 |         let chunk_size = 8;
 67 |         let chunk_start = 8;
 68 |         let reverse = true;
 69 | 
 70 |         // When
 71 |         let word_chunk = get_word_chunk(word, chunk_size, chunk_start, reverse);
 72 | 
 73 |         // Then
 74 |         let expected_chunk = Some("hello_wo".to_string());
 75 |         assert_eq!(word_chunk, expected_chunk);
 76 |     }
 77 | 
 78 |     #[test]
 79 |     fn test_get_word_chunk_out_of_bound() {
 80 |         // Given
 81 |         let word = "hello_world";
 82 |         let chunk_size = 4;
 83 |         let chunk_start = 8;
 84 |         let reverse = false;
 85 | 
 86 |         // When
 87 |         let word_chunk = get_word_chunk(word, chunk_size, chunk_start, reverse);
 88 | 
 89 |         // Then
 90 |         assert_eq!(word_chunk, None);
 91 |     }
 92 | 
 93 |     #[test]
 94 |     fn test_initial_string_from_tokens() {
 95 |         // Given
 96 |         let tokens = vec![
 97 |             Token::new("hello".to_string(), 0..5, 0..5),
 98 |             Token::new("world".to_string(), 9..14, 9..14),
 99 |             Token::new("!!!".to_string(), 17..20, 17..20),
100 |         ];
101 | 
102 |         // When
103 |         let result = initial_string_from_tokens(&tokens);
104 | 
105 |         // Then
106 |         assert_eq!("hello    world   !!!", &result);
107 |     }
108 | }
109 | 


--------------------------------------------------------------------------------
/src/slot_filler/macros.rs:
--------------------------------------------------------------------------------
 1 | #[macro_export]
 2 | macro_rules! get_features {
 3 |     ([$(($feature_type:ident,$feature_name:ident)),*]) => {
 4 |         #[derive(Debug, Copy, Clone, PartialEq, Hash, Eq)]
 5 |         pub enum FeatureKind {
 6 |             $( $feature_type ),*
 7 |         }
 8 | 
 9 |         impl FeatureKind {
10 |             pub fn identifier(&self) -> &'static str {
11 |                 match self {
12 |                     $(
13 |                         FeatureKind::$feature_type => stringify!($feature_name),
14 |                     )*
15 |                 }
16 |             }
17 |         }
18 | 
19 |         $(
20 |             impl FeatureKindRepr for $feature_type {
21 |                 fn feature_kind(&self) -> FeatureKind {
22 |                     FeatureKind::$feature_type
23 |                 }
24 |             }
25 |         )*
26 | 
27 |         fn get_features(
28 |             f: &FeatureFactory,
29 |             shared_resources: Arc<SharedResources>,
30 |         ) -> Result<Vec<FeatureOffsetter>> {
31 |             let features = match f.factory_name.as_ref() {
32 |                 $(
33 |                     stringify!($feature_name) => $feature_type::build_features(&f.args, shared_resources),
34 |                 )*
35 |                 _ => bail!("Feature {} not implemented", f.factory_name),
36 |             };
37 |             Ok(features?
38 |                 .into_iter()
39 |                 .map(|feature| FeatureOffsetter { feature, offsets: f.offsets.clone() })
40 |                 .collect())
41 |         }
42 |     }
43 | }
44 | 


--------------------------------------------------------------------------------
/src/slot_filler/mod.rs:
--------------------------------------------------------------------------------
 1 | #[macro_use]
 2 | mod macros;
 3 | pub mod crf_slot_filler;
 4 | mod crf_utils;
 5 | mod feature_processor;
 6 | mod features;
 7 | mod features_utils;
 8 | 
 9 | use std::fs::File;
10 | use std::path::Path;
11 | use std::sync::Arc;
12 | 
13 | use failure::{format_err, ResultExt};
14 | use snips_nlu_utils::token::Token;
15 | 
16 | use crate::errors::*;
17 | use crate::models::ProcessingUnitMetadata;
18 | use crate::resources::SharedResources;
19 | use crate::slot_utils::InternalSlot;
20 | 
21 | pub use self::crf_slot_filler::*;
22 | use self::crf_utils::TaggingScheme;
23 | 
24 | pub trait SlotFiller: Send + Sync {
25 |     fn get_tagging_scheme(&self) -> TaggingScheme;
26 |     fn get_slots(&self, text: &str) -> Result<Vec<InternalSlot>>;
27 |     fn get_sequence_probability(&self, tokens: &[Token], tags: Vec<String>) -> Result<f64>;
28 | }
29 | 
30 | pub fn build_slot_filler<P: AsRef<Path>>(
31 |     path: P,
32 |     shared_resources: Arc<SharedResources>,
33 | ) -> Result<Box<dyn SlotFiller>> {
34 |     let metadata_path = path.as_ref().join("metadata.json");
35 |     let metadata_file = File::open(&metadata_path).with_context(|_| {
36 |         format!(
37 |             "Cannot open slot filler metadata file '{:?}'",
38 |             &metadata_path
39 |         )
40 |     })?;
41 |     let metadata: ProcessingUnitMetadata = serde_json::from_reader(metadata_file)
42 |         .with_context(|_| "Cannot deserialize slot filler json data")?;
43 |     match metadata {
44 |         ProcessingUnitMetadata::CrfSlotFiller => {
45 |             Ok(Box::new(CRFSlotFiller::from_path(path, shared_resources)?) as _)
46 |         }
47 |         _ => Err(format_err!("{:?} is not a slot filler", metadata)),
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/testutils.rs:
--------------------------------------------------------------------------------
  1 | use std::collections::{HashMap, HashSet};
  2 | use std::iter::FromIterator;
  3 | use std::sync::Arc;
  4 | 
  5 | use ndarray::prelude::*;
  6 | use snips_nlu_ontology::{BuiltinEntity, BuiltinEntityKind};
  7 | 
  8 | use crate::entity_parser::{BuiltinEntityParser, CustomEntity, CustomEntityParser};
  9 | use crate::errors::*;
 10 | use crate::resources::gazetteer::Gazetteer;
 11 | use crate::resources::stemmer::Stemmer;
 12 | use crate::resources::word_clusterer::WordClusterer;
 13 | use crate::resources::SharedResources;
 14 | 
 15 | pub fn assert_epsilon_eq_array1(a: &Array1<f32>, b: &Array1<f32>, epsilon: f32) {
 16 |     assert_eq!(a.dim(), b.dim());
 17 |     for (index, elem_a) in a.indexed_iter() {
 18 |         assert!(epsilon_eq(*elem_a, b[index], epsilon))
 19 |     }
 20 | }
 21 | 
 22 | pub fn epsilon_eq(a: f32, b: f32, epsilon: f32) -> bool {
 23 |     let diff = a - b;
 24 |     diff < epsilon && diff > -epsilon
 25 | }
 26 | 
 27 | pub struct SharedResourcesBuilder {
 28 |     builtin_entity_parser: Arc<dyn BuiltinEntityParser>,
 29 |     custom_entity_parser: Arc<dyn CustomEntityParser>,
 30 |     gazetteers: HashMap<String, Arc<dyn Gazetteer>>,
 31 |     stemmer: Option<Arc<dyn Stemmer>>,
 32 |     word_clusterers: HashMap<String, Arc<dyn WordClusterer>>,
 33 |     stop_words: HashSet<String>,
 34 | }
 35 | 
 36 | impl Default for SharedResourcesBuilder {
 37 |     fn default() -> Self {
 38 |         Self {
 39 |             builtin_entity_parser: Arc::<MockedBuiltinEntityParser>::default(),
 40 |             custom_entity_parser: Arc::<MockedCustomEntityParser>::default(),
 41 |             gazetteers: HashMap::default(),
 42 |             stemmer: None,
 43 |             word_clusterers: HashMap::default(),
 44 |             stop_words: HashSet::default(),
 45 |         }
 46 |     }
 47 | }
 48 | 
 49 | impl SharedResourcesBuilder {
 50 |     pub fn builtin_entity_parser<P: BuiltinEntityParser + 'static>(mut self, parser: P) -> Self {
 51 |         self.builtin_entity_parser = Arc::new(parser) as _;
 52 |         self
 53 |     }
 54 | 
 55 |     pub fn custom_entity_parser<P: CustomEntityParser + 'static>(mut self, parser: P) -> Self {
 56 |         self.custom_entity_parser = Arc::new(parser) as _;
 57 |         self
 58 |     }
 59 | 
 60 |     pub fn stop_words(mut self, stop_words: HashSet<String>) -> Self {
 61 |         self.stop_words = stop_words;
 62 |         self
 63 |     }
 64 | 
 65 |     pub fn build(self) -> SharedResources {
 66 |         SharedResources {
 67 |             builtin_entity_parser: self.builtin_entity_parser,
 68 |             custom_entity_parser: self.custom_entity_parser,
 69 |             gazetteers: self.gazetteers,
 70 |             stemmer: self.stemmer,
 71 |             word_clusterers: self.word_clusterers,
 72 |             stop_words: self.stop_words,
 73 |         }
 74 |     }
 75 | }
 76 | 
 77 | #[derive(Default)]
 78 | pub struct MockedBuiltinEntityParser {
 79 |     pub mocked_outputs: HashMap<String, Vec<BuiltinEntity>>,
 80 | }
 81 | 
 82 | impl BuiltinEntityParser for MockedBuiltinEntityParser {
 83 |     fn extract_entities(
 84 |         &self,
 85 |         sentence: &str,
 86 |         _filter_entity_kinds: Option<&[BuiltinEntityKind]>,
 87 |         _use_cache: bool,
 88 |         _max_alternative_resolved_values: usize,
 89 |     ) -> Result<Vec<BuiltinEntity>> {
 90 |         Ok(self
 91 |             .mocked_outputs
 92 |             .get(sentence)
 93 |             .cloned()
 94 |             .unwrap_or_else(|| vec![]))
 95 |     }
 96 | }
 97 | 
 98 | impl FromIterator<(String, Vec<BuiltinEntity>)> for MockedBuiltinEntityParser {
 99 |     fn from_iter<T: IntoIterator<Item = (String, Vec<BuiltinEntity>)>>(iter: T) -> Self {
100 |         Self {
101 |             mocked_outputs: HashMap::from_iter(iter),
102 |         }
103 |     }
104 | }
105 | 
106 | #[derive(Default)]
107 | pub struct MockedCustomEntityParser {
108 |     pub mocked_outputs: HashMap<String, Vec<CustomEntity>>,
109 | }
110 | 
111 | impl CustomEntityParser for MockedCustomEntityParser {
112 |     fn extract_entities(
113 |         &self,
114 |         sentence: &str,
115 |         _filter_entity_kinds: Option<&[String]>,
116 |         _max_alternative_resolved_values: usize,
117 |     ) -> Result<Vec<CustomEntity>> {
118 |         Ok(self
119 |             .mocked_outputs
120 |             .get(sentence)
121 |             .cloned()
122 |             .unwrap_or_else(|| vec![]))
123 |     }
124 | }
125 | 
126 | impl FromIterator<(String, Vec<CustomEntity>)> for MockedCustomEntityParser {
127 |     fn from_iter<T: IntoIterator<Item = (String, Vec<CustomEntity>)>>(iter: T) -> Self {
128 |         Self {
129 |             mocked_outputs: HashMap::from_iter(iter),
130 |         }
131 |     }
132 | }
133 | 


--------------------------------------------------------------------------------
/update_ontology_version.sh:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 | 
3 | NEW_VERSION=${1?"usage $0 <new version>"}
4 | 
5 | echo "Updating snips-nlu-ontology versions to version ${NEW_VERSION}"
6 | find . -name "Cargo.toml" -exec perl -p -i -e "s/snipsco\/snips-nlu-ontology\".*\$/snipsco\/snips-nlu-ontology\", tag = \"$NEW_VERSION\" }/g" {} \;
7 | find . -name "build.gradle" -exec perl -p -i -e "s/compile \"ai.snips:snips-nlu-ontology:.*\"\$/compile \"ai.snips:snips-nlu-ontology:$NEW_VERSION\"/g" {} \;
8 | 


--------------------------------------------------------------------------------
/update_version.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | NEW_VERSION=${1?"usage $0 <new version>"}
 4 | 
 5 | echo "Updating versions to version ${NEW_VERSION}"
 6 | find . -name "Cargo.toml" -exec perl -p -i -e "s/^version = \".*\"$/version = \"$NEW_VERSION\"/g" {} \;
 7 | find . -name "cbindgen.toml" -exec perl -p -i -e "s/^header = \"#define SNIPS_NLU_VERSION.*\"$/header = \"#define SNIPS_NLU_VERSION \\\\\"${NEW_VERSION}\\\\\"\"/g" {} \;
 8 | perl -p -i -e "s/^version = \".*\"\$/version = \"$NEW_VERSION\"/g" */**/build.gradle
 9 | perl -p -i -e "s/^VERSION=\".*\"\$/VERSION=\"$NEW_VERSION\"/g" */**/**/**/build.sh
10 | perl -p -i -e "s/SNIPS_NLU_VERSION \".*\"/SNIPS_NLU_VERSION \"$NEW_VERSION\"/g" platforms/c/libsnips_nlu.h
11 | 
12 | echo "$NEW_VERSION" > platforms/python/snips_nlu_rust/__version__
13 | 
14 | if [[ "${NEW_VERSION}" == "${NEW_VERSION/-SNAPSHOT/}" ]]
15 | then
16 |     perl -p -i -e \
17 |         "s/^snips-nlu-ffi = \{.*\}$/snips-nlu-ffi = { git = \"https:\/\/github.com\/snipsco\/snips-nlu-rs\", tag = \"$NEW_VERSION\" }/g" \
18 |         platforms/python/ffi/Cargo.toml
19 | else
20 |     perl -p -i -e \
21 |         "s/^snips-nlu-ffi = \{.*\}$/snips-nlu-ffi = { path = \"..\/..\/..\/ffi\" }/g" \
22 |         platforms/python/ffi/Cargo.toml
23 | 
24 | fi
25 | 


--------------------------------------------------------------------------------