├── .appveyor.yml ├── .gitignore ├── .travis.yml ├── .travis ├── before_install.sh ├── install.sh └── test.sh ├── CHANGELOG.md ├── Cargo.toml ├── LICENSE ├── LICENSE-APACHE ├── LICENSE-MIT ├── README.rst ├── benches └── nlu_engine.rs ├── data └── tests │ ├── configs │ ├── beverage_config.json │ ├── game_config.json │ └── music_config.json │ ├── datasets │ ├── beverage_dataset.json │ ├── game_dataset.json │ └── music_dataset.json │ └── models │ ├── nlu_engine_beverage.zip │ ├── nlu_engine_beverage │ ├── builtin_entity_parser │ │ └── metadata.json │ ├── custom_entity_parser │ │ ├── metadata.json │ │ └── parser │ │ │ ├── metadata.json │ │ │ └── parser_1 │ │ │ ├── metadata.json │ │ │ └── parser │ ├── deterministic_intent_parser │ │ ├── intent_parser.json │ │ └── metadata.json │ ├── lookup_intent_parser │ │ ├── intent_parser.json │ │ └── metadata.json │ ├── nlu_engine.json │ ├── probabilistic_intent_parser │ │ ├── intent_classifier │ │ │ ├── featurizer │ │ │ │ ├── cooccurrence_vectorizer │ │ │ │ │ ├── metadata.json │ │ │ │ │ └── vectorizer.json │ │ │ │ ├── featurizer.json │ │ │ │ ├── metadata.json │ │ │ │ └── tfidf_vectorizer │ │ │ │ │ ├── metadata.json │ │ │ │ │ └── vectorizer.json │ │ │ ├── intent_classifier.json │ │ │ └── metadata.json │ │ ├── intent_parser.json │ │ ├── metadata.json │ │ ├── slot_filler_0 │ │ │ ├── metadata.json │ │ │ ├── model55imurmx.crfsuite │ │ │ └── slot_filler.json │ │ └── slot_filler_1 │ │ │ ├── metadata.json │ │ │ ├── model8a9dqxnp.crfsuite │ │ │ └── slot_filler.json │ └── resources │ │ └── en │ │ ├── gazetteers │ │ └── top_10000_words_stemmed.txt │ │ ├── metadata.json │ │ ├── noise.txt │ │ ├── stemming │ │ └── stems.txt │ │ └── stop_words.txt │ ├── nlu_engine_game │ ├── builtin_entity_parser │ │ └── metadata.json │ ├── custom_entity_parser │ │ ├── metadata.json │ │ └── parser │ │ │ ├── metadata.json │ │ │ └── parser_1 │ │ │ ├── metadata.json │ │ │ └── parser │ ├── lookup_intent_parser │ │ ├── intent_parser.json │ │ └── metadata.json │ ├── nlu_engine.json │ ├── probabilistic_intent_parser │ │ ├── intent_classifier │ │ │ ├── featurizer │ │ │ │ ├── featurizer.json │ │ │ │ ├── metadata.json │ │ │ │ └── tfidf_vectorizer │ │ │ │ │ ├── metadata.json │ │ │ │ │ └── vectorizer.json │ │ │ ├── intent_classifier.json │ │ │ └── metadata.json │ │ ├── intent_parser.json │ │ ├── metadata.json │ │ └── slot_filler_0 │ │ │ ├── metadata.json │ │ │ ├── model.crfsuite │ │ │ └── slot_filler.json │ └── resources │ │ └── en │ │ ├── gazetteers │ │ └── top_10000_words_stemmed.txt │ │ ├── metadata.json │ │ ├── noise.txt │ │ ├── stemming │ │ └── stems.txt │ │ ├── stop_words.txt │ │ └── word_clusters │ │ └── brown_clusters.txt │ └── nlu_engine_music │ ├── builtin_entity_parser │ ├── gazetteer_entity_parser │ │ ├── metadata.json │ │ ├── parser_1 │ │ │ ├── metadata.json │ │ │ └── parser │ │ └── parser_2 │ │ │ ├── metadata.json │ │ │ └── parser │ └── metadata.json │ ├── custom_entity_parser │ ├── metadata.json │ └── parser │ │ ├── metadata.json │ │ └── parser_1 │ │ ├── metadata.json │ │ └── parser │ ├── deterministic_intent_parser │ ├── intent_parser.json │ └── metadata.json │ ├── nlu_engine.json │ ├── probabilistic_intent_parser │ ├── intent_classifier │ │ ├── featurizer │ │ │ ├── cooccurrence_vectorizer │ │ │ │ ├── metadata.json │ │ │ │ └── vectorizer.json │ │ │ ├── featurizer.json │ │ │ ├── metadata.json │ │ │ └── tfidf_vectorizer │ │ │ │ ├── metadata.json │ │ │ │ └── vectorizer.json │ │ ├── intent_classifier.json │ │ └── metadata.json │ ├── intent_parser.json │ ├── metadata.json │ └── slot_filler_0 │ │ ├── metadata.json │ │ ├── modeluzcfum35.crfsuite │ │ └── slot_filler.json │ └── resources │ └── fr │ ├── gazetteers │ └── top_10000_words_stemmed.txt │ ├── metadata.json │ ├── noise.txt │ ├── stemming │ └── stems.txt │ └── stop_words.txt ├── examples └── interactive_parsing_cli.rs ├── ffi ├── Cargo.toml ├── cbindgen.toml └── src │ └── lib.rs ├── platforms ├── c │ ├── libsnips_nlu.h │ └── module.modulemap ├── kotlin │ ├── .gitignore │ ├── build.gradle │ ├── gradle │ │ └── wrapper │ │ │ ├── gradle-wrapper.jar │ │ │ └── gradle-wrapper.properties │ ├── gradlew │ ├── gradlew.bat │ ├── settings.gradle │ └── src │ │ ├── main │ │ ├── android │ │ │ └── AndroidManifest.xml │ │ └── kotlin │ │ │ ├── ai │ │ │ └── snips │ │ │ │ └── nlu │ │ │ │ └── NluEngine.kt │ │ │ └── com │ │ │ └── sun │ │ │ └── jna │ │ │ └── JnaUtils.kt │ │ └── test │ │ └── kotlin │ │ └── ai │ │ └── snips │ │ └── nlu │ │ └── NluEngineTest.kt ├── python │ ├── .gitignore │ ├── LICENSE │ ├── MANIFEST.in │ ├── README.rst │ ├── ffi │ │ ├── .gitignore │ │ ├── Cargo.toml │ │ └── src │ │ │ └── lib.rs │ ├── requirements.txt │ ├── setup.py │ ├── snips_nlu_rust │ │ ├── __init__.py │ │ ├── __version__ │ │ ├── dylib │ │ │ └── .gitignore │ │ ├── nlu_engine.py │ │ ├── tests │ │ │ ├── __init__.py │ │ │ ├── test_nlu_engine_wrapper.py │ │ │ └── utils.py │ │ └── utils.py │ └── tox.ini └── swift │ ├── .gitignore │ ├── SnipsNlu.xcworkspace │ ├── contents.xcworkspacedata │ └── xcshareddata │ │ └── IDEWorkspaceChecks.plist │ └── SnipsNlu │ ├── Dependencies │ ├── .gitignore │ └── build.sh │ ├── SnipsNlu.xcodeproj │ ├── project.pbxproj │ ├── project.xcworkspace │ │ └── contents.xcworkspacedata │ └── xcshareddata │ │ └── xcschemes │ │ ├── SnipsNlu-iOS.xcscheme │ │ └── SnipsNlu-macOS.xcscheme │ ├── SnipsNlu │ ├── Info.plist │ ├── NluEngine.swift │ └── SnipsNlu.h │ └── SnipsNluTests │ ├── Info.plist │ └── NluEngineTests.swift ├── post_release.sh ├── src ├── entity_parser │ ├── builtin_entity_parser.rs │ ├── custom_entity_parser.rs │ ├── mod.rs │ └── utils.rs ├── errors.rs ├── injection │ ├── errors.rs │ ├── injection.rs │ └── mod.rs ├── intent_classifier │ ├── featurizer.rs │ ├── log_reg_intent_classifier.rs │ ├── logreg.rs │ └── mod.rs ├── intent_parser │ ├── deterministic_intent_parser.rs │ ├── lookup_intent_parser.rs │ ├── mod.rs │ └── probabilistic_intent_parser.rs ├── language.rs ├── lib.rs ├── models │ ├── intent_classifier.rs │ ├── intent_parser.rs │ ├── mod.rs │ ├── nlu_engine.rs │ ├── processing_unit_metadata.rs │ └── slot_filler.rs ├── nlu_engine.rs ├── resources │ ├── gazetteer.rs │ ├── loading.rs │ ├── mod.rs │ ├── stemmer.rs │ └── word_clusterer.rs ├── slot_filler │ ├── crf_slot_filler.rs │ ├── crf_utils.rs │ ├── feature_processor.rs │ ├── features.rs │ ├── features_utils.rs │ ├── macros.rs │ └── mod.rs ├── slot_utils.rs ├── testutils.rs └── utils.rs ├── update_ontology_version.sh └── update_version.sh /.appveyor.yml: -------------------------------------------------------------------------------- 1 | environment: 2 | matrix: 3 | - TARGET: x86_64-pc-windows-msvc 4 | 5 | branches: 6 | only: 7 | - /main\/.*/ 8 | - /release\/.*/ 9 | 10 | install: 11 | - appveyor-retry appveyor DownloadFile https://win.rustup.rs/ -FileName rustup-init.exe 12 | - rustup-init.exe -y --default-host %TARGET% 13 | - set PATH=%PATH%;C:\Users\appveyor\.cargo\bin 14 | - if defined MSYS2_BITS set PATH=%PATH%;C:\msys64\mingw%MSYS2_BITS%\bin 15 | - rustc -V 16 | - cargo -V 17 | - ps: (Get-Content platforms/python/ffi/Cargo.toml) | ForEach-Object { $_ -replace "^snips-nlu-ffi = .*$", "snips-nlu-ffi = { path = `"../../../ffi`" }" } | Set-Content platforms/python/ffi/Cargo.toml 18 | 19 | build: false 20 | 21 | test_script: 22 | - cargo build --all --tests --benches 23 | - cargo test --all 24 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ## Cargo 2 | 3 | target/ 4 | Cargo.lock 5 | **/*.rs.bk 6 | 7 | ## Idea 8 | 9 | .idea/ 10 | *.iml 11 | 12 | ## VIM 13 | 14 | # Swap 15 | [._]*.s[a-v][a-z] 16 | [._]*.sw[a-p] 17 | [._]s[a-v][a-z] 18 | [._]sw[a-p] 19 | 20 | # Session 21 | Session.vim 22 | 23 | # Temporary 24 | .netrwhist 25 | *~ 26 | # Auto-generated tag files 27 | tags 28 | 29 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | jobs: 2 | include: 3 | - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/ 4 | os: osx 5 | osx_image: xcode10.2 6 | language: generic 7 | sudo: true 8 | env: 9 | - TOXENV=py27 10 | - PYTHON_TESTS=true 11 | - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/ 12 | os: osx 13 | osx_image: xcode10.2 14 | language: generic 15 | sudo: true 16 | env: 17 | - TOXENV=py36 18 | - PYTHON_TESTS=true 19 | - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/ 20 | os: osx 21 | osx_image: xcode10.2 22 | language: generic 23 | sudo: true 24 | env: 25 | - MACOS_SWIFT_TESTS=true 26 | - IOS_SWIFT_TESTS=true 27 | - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/ 28 | os: linux 29 | language: python 30 | python: 2.7 31 | env: 32 | - TOXENV=py27 33 | - PYTHON_TESTS=true 34 | - os: linux 35 | language: python 36 | python: 3.6 37 | env: 38 | - TOXENV=py36 39 | - PYTHON_TESTS=true 40 | - if: head_branch =~ /^release\/.+$/ or head_branch =~ /^main\/.+$/ 41 | os: linux 42 | language: java 43 | jdk: openjdk8 44 | env: 45 | - KOTLIN_TESTS=true 46 | - os: linux 47 | language: rust 48 | rust: stable 49 | env: 50 | - RUST_TESTS=true 51 | 52 | before_install: . ./.travis/before_install.sh 53 | 54 | install: ./.travis/install.sh 55 | 56 | script: ./.travis/test.sh 57 | -------------------------------------------------------------------------------- /.travis/before_install.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -ev 3 | 4 | # Install Rust 5 | if [[ -z ${TRAVIS_RUST_VERSION+w} ]]; then 6 | curl https://sh.rustup.rs -sSf | bash -s -- -y 7 | fi 8 | 9 | if [[ ${TRAVIS_OS_NAME} == "osx" ]]; then 10 | if [[ ${PYTHON_TESTS} == true ]]; then 11 | # install pyenv 12 | git clone https://github.com/pyenv/pyenv $HOME/.pyenv 13 | git --git-dir=$HOME/.pyenv/.git --work-tree=$HOME/.pyenv checkout v1.2.11 # Fix for https://github.com/pyenv/pyenv/issues/1066 14 | PYENV_ROOT="$HOME/.pyenv" 15 | PATH="$PYENV_ROOT/bin:$PATH" 16 | eval "$(pyenv init -)" 17 | 18 | # CFLAGS stuff is needed because of https://github.com/pyenv/pyenv/issues/1219 19 | case "${TOXENV}" in 20 | "py27") 21 | CFLAGS="-I$(xcrun --show-sdk-path)/usr/include" pyenv install 2.7.14 22 | pyenv global 2.7.14 23 | ;; 24 | "py36") 25 | CFLAGS="-I$(xcrun --show-sdk-path)/usr/include" pyenv install 3.6.1 26 | pyenv global 3.6.1 27 | ;; 28 | "py37") 29 | CFLAGS="-I$(xcrun --show-sdk-path)/usr/include" pyenv install 3.7.2 30 | pyenv global 3.7.2 31 | ;; 32 | esac 33 | pyenv rehash 34 | 35 | # A manual check that the correct version of Python is running. 36 | python --version 37 | fi 38 | 39 | if [[ "${IOS_SWIFT_TESTS}" == "true" ]]; then 40 | PATH="$HOME/.cargo/bin:$PATH" 41 | rustup target install x86_64-apple-ios 42 | fi 43 | fi 44 | -------------------------------------------------------------------------------- /.travis/install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | echo "Replacing snips-nlu-ffi url for local version" 5 | perl -p -i -e \ 6 | "s/^snips-nlu-ffi = .*\$/snips-nlu-ffi = { path = \"..\/..\/..\/ffi\" \}/g" \ 7 | platforms/python/ffi/Cargo.toml 8 | -------------------------------------------------------------------------------- /.travis/test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ev 3 | 4 | export PATH="$HOME/.cargo/bin:$PATH" 5 | 6 | if [[ "${RUST_TESTS}" == "true" ]]; then 7 | echo "Running rust tests..." 8 | cargo test --all 9 | cargo check --benches 10 | fi 11 | 12 | if [[ "${PYTHON_TESTS}" == "true" ]]; then 13 | echo "Running python tests..." 14 | cd platforms/python 15 | pip install tox 16 | tox 17 | cd - 18 | fi 19 | 20 | if [[ "${KOTLIN_TESTS}" == "true" ]]; then 21 | echo "Running kotlin tests..." 22 | cargo build -p snips-nlu-ffi 23 | cd platforms/kotlin 24 | ./gradlew -Pdebug -PrustTargetPath=../../target test --info 25 | cd - 26 | fi 27 | 28 | if [[ "${MACOS_SWIFT_TESTS}" == "true" ]]; then 29 | echo "Running macOS swift tests..." 30 | cargo build -p snips-nlu-ffi 31 | cd platforms/swift 32 | mkdir -p build/DerivedData 33 | set -o pipefail && xcodebuild \ 34 | -IDECustomDerivedDataLocation=build/DerivedData \ 35 | -workspace SnipsNlu.xcworkspace \ 36 | -scheme SnipsNlu-macOS \ 37 | TARGET_BUILD_TYPE=debug \ 38 | SNIPS_USE_LOCAL=1 \ 39 | clean \ 40 | test \ 41 | | xcpretty 42 | cd - 43 | fi 44 | 45 | if [[ "${IOS_SWIFT_TESTS}" == "true" ]]; then 46 | echo "Running iOS swift tests..." 47 | TARGET_SYSROOT=$(xcrun --sdk iphonesimulator --show-sdk-path) \ 48 | cargo build -p snips-nlu-ffi --target x86_64-apple-ios 49 | cd platforms/swift 50 | mkdir -p build/DerivedData 51 | set -o pipefail && xcodebuild \ 52 | -IDECustomDerivedDataLocation=build/DerivedData \ 53 | -workspace SnipsNlu.xcworkspace \ 54 | -scheme SnipsNlu-iOS \ 55 | -destination 'platform=iOS Simulator,name=iPhone 8,OS=latest' \ 56 | TARGET_BUILD_TYPE=debug \ 57 | SNIPS_USE_LOCAL=1 \ 58 | clean \ 59 | test \ 60 | | xcpretty 61 | cd - 62 | fi 63 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "snips-nlu-lib" 3 | version = "0.65.6" 4 | authors = [ 5 | "Adrien Ball ", 6 | "Clement Doumouro ", 7 | "Thibaut Lorrain ", 8 | "Kevin Lefevre " 9 | ] 10 | repository = "https://github.com/snipsco/snips-nlu-rs" 11 | description = "Rust implementation of Snips NLU" 12 | edition = "2018" 13 | 14 | [workspace] 15 | members = [ 16 | "ffi", 17 | "platforms/python/ffi", 18 | ] 19 | 20 | [dependencies] 21 | crfsuite = { git = "https://github.com/snipsco/crfsuite-rs", tag = "0.3.3" } 22 | snips-nlu-ontology = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.67.1" } 23 | snips-nlu-utils = { git = "https://github.com/snipsco/snips-nlu-utils", tag = "0.9.1" } 24 | snips-nlu-parsers = { git = "https://github.com/snipsco/snips-nlu-parsers", tag = "0.4.3" } 25 | failure = "0.1" 26 | base64 = "0.10" 27 | itertools = { version = "0.8", default-features = false } 28 | log = "0.4" 29 | lru-cache = "0.1" 30 | serde = { version = "1.0", features = ["derive"] } 31 | serde_json = "1.0" 32 | tempfile = "3" 33 | ndarray = "0.12" 34 | regex = "1.0" 35 | csv = "1.0" 36 | zip = { version = "0.5", default-features = false, features = ["deflate"] } 37 | 38 | [dev-dependencies] 39 | bencher = { git = "https://github.com/snipsco/bencher", rev = "63910ace" } 40 | clap = "2.32" 41 | dinghy-test = "0.4" 42 | env_logger = "0.6" 43 | maplit = "1.0" 44 | fs_extra = "1.1" 45 | 46 | [[bench]] 47 | name = "nlu_engine" 48 | harness = false 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | ## License 2 | 3 | Licensed under either of 4 | * Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) 5 | * MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) 6 | at your option. 7 | 8 | ### Contribution 9 | 10 | Unless you explicitly state otherwise, any contribution intentionally submitted 11 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall 12 | be dual licensed as above, without any additional terms or conditions. 13 | -------------------------------------------------------------------------------- /LICENSE-MIT: -------------------------------------------------------------------------------- 1 | Permission is hereby granted, free of charge, to any 2 | person obtaining a copy of this software and associated 3 | documentation files (the "Software"), to deal in the 4 | Software without restriction, including without 5 | limitation the rights to use, copy, modify, merge, 6 | publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software 8 | is furnished to do so, subject to the following 9 | conditions: 10 | 11 | The above copyright notice and this permission notice 12 | shall be included in all copies or substantial portions 13 | of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 16 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 17 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 18 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 19 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 21 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 22 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. 24 | -------------------------------------------------------------------------------- /README.rst: -------------------------------------------------------------------------------- 1 | Snips NLU Rust 2 | ============== 3 | 4 | .. image:: https://travis-ci.org/snipsco/snips-nlu-rs.svg?branch=master 5 | :target: https://travis-ci.org/snipsco/snips-nlu-rs 6 | 7 | .. image:: https://ci.appveyor.com/api/projects/status/rsf27a9txeomic8o/branch/master?svg=true 8 | :target: https://ci.appveyor.com/project/snipsco/snips-nlu-rs 9 | 10 | Installation 11 | ------------ 12 | 13 | Add it to your ``Cargo.toml``: 14 | 15 | .. code-block:: toml 16 | 17 | [dependencies] 18 | snips-nlu-lib = { git = "https://github.com/snipsco/snips-nlu-rs", branch = "master" } 19 | 20 | Add ``extern crate snips_nlu_lib`` to your crate root and you are good to go! 21 | 22 | 23 | Intent Parsing with Snips NLU 24 | ----------------------------- 25 | 26 | The purpose of the main crate of this repository, ``snips-nlu-lib``, is to perform an information 27 | extraction task called *intent parsing*. 28 | 29 | Let’s take an example to illustrate the main purpose of this lib, and consider the following sentence: 30 | 31 | .. code-block:: text 32 | 33 | "What will be the weather in paris at 9pm?" 34 | 35 | Properly trained, the Snips NLU engine will be able to extract structured data such as: 36 | 37 | .. code-block:: json 38 | 39 | { 40 | "intent": { 41 | "intentName": "searchWeatherForecast", 42 | "confidenceScore": 0.95 43 | }, 44 | "slots": [ 45 | { 46 | "value": "paris", 47 | "entity": "locality", 48 | "slotName": "forecast_locality" 49 | }, 50 | { 51 | "value": { 52 | "kind": "InstantTime", 53 | "value": "2018-02-08 20:00:00 +00:00" 54 | }, 55 | "entity": "snips/datetime", 56 | "slotName": "forecast_start_datetime" 57 | } 58 | ] 59 | } 60 | 61 | 62 | In order to achieve such a result, the NLU engine needs to be fed with a trained model (json file). 63 | This repository only contains the inference part, in order to produce trained models please check 64 | the `Snips NLU python library `_. 65 | 66 | 67 | Example and API Usage 68 | --------------------- 69 | 70 | The `interactive parsing CLI `_ is a good example 71 | of to how to use ``snips-nlu-rs``. 72 | 73 | Here is how you can run the CLI example: 74 | 75 | .. code-block:: bash 76 | 77 | $ git clone https://github.com/snipsco/snips-nlu-rs 78 | $ cd snips-nlu-rs 79 | $ cargo run --example interactive_parsing_cli data/tests/models/nlu_engine 80 | 81 | Here we used a sample trained engine, which consists in two intents: ``MakeCoffee`` and ``MakeTea``. 82 | Thus, it will be able to parse queries like ``"Make me two cups of coffee please"`` or ``"I'd like a hot tea"``. 83 | 84 | As mentioned in the previous section, you can train your own nlu engine with the 85 | `Snips NLU python library `_. 86 | 87 | 88 | License 89 | ------- 90 | 91 | Licensed under either of 92 | * Apache License, Version 2.0 (`LICENSE-APACHE `_ or http://www.apache.org/licenses/LICENSE-2.0) 93 | * MIT license (`LICENSE-MIT `_) or http://opensource.org/licenses/MIT) 94 | at your option. 95 | 96 | Contribution 97 | ------------ 98 | 99 | Unless you explicitly state otherwise, any contribution intentionally submitted 100 | for inclusion in the work by you, as defined in the Apache-2.0 license, shall 101 | be dual licensed as above, without any additional terms or conditions. 102 | -------------------------------------------------------------------------------- /benches/nlu_engine.rs: -------------------------------------------------------------------------------- 1 | #![allow(clippy::never_loop)] 2 | 3 | #[macro_use] 4 | extern crate bencher; 5 | extern crate dinghy_test; 6 | extern crate snips_nlu_lib; 7 | 8 | use std::env; 9 | 10 | use bencher::Bencher; 11 | use snips_nlu_lib::*; 12 | 13 | const ENGINE_DIR_ENV: &str = "SNIPS_NLU_BENCH_ENGINE_DIR"; 14 | const SENTENCE_ENV: &str = "SNIPS_NLU_BENCH_SENTENCE"; 15 | 16 | fn file_path(filename: &str) -> ::std::path::PathBuf { 17 | dinghy_test::try_test_file_path("data") 18 | .unwrap_or_else(|| "../data".into()) 19 | .join(filename) 20 | } 21 | 22 | fn load_nlu_engine() -> SnipsNluEngine { 23 | let engine_path = if let Ok(engine_directory) = env::var(ENGINE_DIR_ENV) { 24 | file_path(&engine_directory) 25 | } else { 26 | file_path("untracked") 27 | }; 28 | 29 | SnipsNluEngine::from_path(engine_path).unwrap() 30 | } 31 | 32 | fn nlu_loading(b: &mut Bencher) { 33 | b.iter(|| { 34 | let _ = load_nlu_engine(); 35 | }); 36 | } 37 | 38 | fn nlu_parsing(b: &mut Bencher) { 39 | let nlu_engine = load_nlu_engine(); 40 | let sentence = env::var(SENTENCE_ENV) 41 | .map_err(|_| format!("{} env var not defined", SENTENCE_ENV)) 42 | .unwrap(); 43 | 44 | b.iter(|| { 45 | let _ = nlu_engine.parse(&sentence, None, None); 46 | }); 47 | } 48 | 49 | benchmark_group!(load, nlu_loading); 50 | benchmark_group!(run, nlu_parsing); 51 | 52 | benchmark_main!(load, run); 53 | -------------------------------------------------------------------------------- /data/tests/configs/beverage_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "nlu_engine", 3 | "intent_parsers_configs": [ 4 | { 5 | "unit_name": "lookup_intent_parser", 6 | "ignore_stop_words": true 7 | }, 8 | { 9 | "unit_name": "deterministic_intent_parser", 10 | "max_pattern_length": 1000, 11 | "max_queries": 100, 12 | "ignore_stop_words": true 13 | }, 14 | { 15 | "unit_name": "probabilistic_intent_parser", 16 | "intent_classifier_config": { 17 | "data_augmentation_config": { 18 | "noise_factor": 5, 19 | "unknown_word_prob": 0, 20 | "max_unknown_words": 0, 21 | "add_builtin_entities_examples": false, 22 | "unknown_words_replacement_string": null, 23 | "min_utterances": 20 24 | }, 25 | "unit_name": "log_reg_intent_classifier", 26 | "featurizer_config": { 27 | "cooccurrence_vectorizer_config": { 28 | "window_size": 5, 29 | "unit_name": "cooccurrence_vectorizer", 30 | "filter_stop_words": true, 31 | "unknown_words_replacement_string": null, 32 | "keep_order": true 33 | }, 34 | "unit_name": "featurizer", 35 | "added_cooccurrence_feature_ratio": 0.25, 36 | "tfidf_vectorizer_config": { 37 | "unit_name": "tfidf_vectorizer", 38 | "use_stemming": false, 39 | "word_clusters_name": null 40 | }, 41 | "pvalue_threshold": 0.4 42 | }, 43 | "random_seed": null 44 | }, 45 | "slot_filler_config": { 46 | "unit_name": "crf_slot_filler", 47 | "random_seed": null, 48 | "tagging_scheme": 1, 49 | "data_augmentation_config": { 50 | "capitalization_ratio": 0.2, 51 | "min_utterances": 200, 52 | "add_builtin_entities_examples": true 53 | }, 54 | "crf_args": { 55 | "c2": 0.1, 56 | "c1": 0.1, 57 | "algorithm": "lbfgs" 58 | }, 59 | "feature_factory_configs": [ 60 | { 61 | "args": { 62 | "common_words_gazetteer_name": "top_10000_words_stemmed", 63 | "use_stemming": true, 64 | "n": 1 65 | }, 66 | "factory_name": "ngram", 67 | "offsets": [ 68 | -2, 69 | -1, 70 | 0, 71 | 1, 72 | 2 73 | ] 74 | }, 75 | { 76 | "args": { 77 | "common_words_gazetteer_name": "top_10000_words_stemmed", 78 | "use_stemming": true, 79 | "n": 2 80 | }, 81 | "factory_name": "ngram", 82 | "offsets": [ 83 | -2, 84 | 1 85 | ] 86 | }, 87 | { 88 | "args": {}, 89 | "factory_name": "is_digit", 90 | "offsets": [ 91 | -1, 92 | 0, 93 | 1 94 | ] 95 | }, 96 | { 97 | "args": {}, 98 | "factory_name": "is_first", 99 | "offsets": [ 100 | -2, 101 | -1, 102 | 0 103 | ] 104 | }, 105 | { 106 | "args": {}, 107 | "factory_name": "is_last", 108 | "offsets": [ 109 | 0, 110 | 1, 111 | 2 112 | ] 113 | }, 114 | { 115 | "args": { 116 | "n": 1 117 | }, 118 | "factory_name": "shape_ngram", 119 | "offsets": [ 120 | 0 121 | ] 122 | }, 123 | { 124 | "args": { 125 | "n": 2 126 | }, 127 | "factory_name": "shape_ngram", 128 | "offsets": [ 129 | -1, 130 | 0 131 | ] 132 | }, 133 | { 134 | "args": { 135 | "n": 3 136 | }, 137 | "factory_name": "shape_ngram", 138 | "offsets": [ 139 | -1 140 | ] 141 | }, 142 | { 143 | "args": { 144 | "tagging_scheme_code": 2, 145 | "use_stemming": true 146 | }, 147 | "factory_name": "entity_match", 148 | "drop_out": 0.5, 149 | "offsets": [ 150 | -2, 151 | -1, 152 | 0 153 | ] 154 | }, 155 | { 156 | "args": { 157 | "tagging_scheme_code": 1 158 | }, 159 | "factory_name": "builtin_entity_match", 160 | "offsets": [ 161 | -2, 162 | -1, 163 | 0 164 | ] 165 | } 166 | ] 167 | } 168 | } 169 | ] 170 | } 171 | -------------------------------------------------------------------------------- /data/tests/configs/game_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "nlu_engine", 3 | "intent_parsers_configs": [ 4 | { 5 | "unit_name": "lookup_intent_parser", 6 | "ignore_stop_words": true 7 | }, 8 | { 9 | "unit_name": "deterministic_intent_parser", 10 | "max_pattern_length": 1000, 11 | "max_queries": 100, 12 | "ignore_stop_words": true 13 | }, 14 | { 15 | "unit_name": "probabilistic_intent_parser", 16 | "intent_classifier_config": { 17 | "data_augmentation_config": { 18 | "noise_factor": 5, 19 | "unknown_word_prob": 0, 20 | "max_unknown_words": 0, 21 | "add_builtin_entities_examples": false, 22 | "unknown_words_replacement_string": null, 23 | "min_utterances": 20 24 | }, 25 | "unit_name": "log_reg_intent_classifier", 26 | "featurizer_config": { 27 | "cooccurrence_vectorizer_config": { 28 | "window_size": 5, 29 | "unit_name": "cooccurrence_vectorizer", 30 | "filter_stop_words": true, 31 | "unknown_words_replacement_string": null, 32 | "keep_order": true 33 | }, 34 | "unit_name": "featurizer", 35 | "added_cooccurrence_feature_ratio": 0.25, 36 | "tfidf_vectorizer_config": { 37 | "unit_name": "tfidf_vectorizer", 38 | "use_stemming": false, 39 | "word_clusters_name": null 40 | }, 41 | "pvalue_threshold": 0.4 42 | }, 43 | "random_seed": null 44 | }, 45 | "slot_filler_config": { 46 | "unit_name": "crf_slot_filler", 47 | "random_seed": null, 48 | "tagging_scheme": 1, 49 | "data_augmentation_config": { 50 | "capitalization_ratio": 0.2, 51 | "min_utterances": 200, 52 | "add_builtin_entities_examples": true 53 | }, 54 | "crf_args": { 55 | "c2": 0.1, 56 | "c1": 0.1, 57 | "algorithm": "lbfgs" 58 | }, 59 | "feature_factory_configs": [ 60 | { 61 | "args": { 62 | "common_words_gazetteer_name": "top_10000_words_stemmed", 63 | "use_stemming": true, 64 | "n": 1 65 | }, 66 | "factory_name": "ngram", 67 | "offsets": [ 68 | -2, 69 | -1, 70 | 0, 71 | 1, 72 | 2 73 | ] 74 | }, 75 | { 76 | "args": { 77 | "common_words_gazetteer_name": "top_10000_words_stemmed", 78 | "use_stemming": true, 79 | "n": 2 80 | }, 81 | "factory_name": "ngram", 82 | "offsets": [ 83 | -2, 84 | 1 85 | ] 86 | }, 87 | { 88 | "args": {}, 89 | "factory_name": "is_digit", 90 | "offsets": [ 91 | -1, 92 | 0, 93 | 1 94 | ] 95 | }, 96 | { 97 | "args": {}, 98 | "factory_name": "is_first", 99 | "offsets": [ 100 | -2, 101 | -1, 102 | 0 103 | ] 104 | }, 105 | { 106 | "args": {}, 107 | "factory_name": "is_last", 108 | "offsets": [ 109 | 0, 110 | 1, 111 | 2 112 | ] 113 | }, 114 | { 115 | "args": { 116 | "n": 1 117 | }, 118 | "factory_name": "shape_ngram", 119 | "offsets": [ 120 | 0 121 | ] 122 | }, 123 | { 124 | "args": { 125 | "n": 2 126 | }, 127 | "factory_name": "shape_ngram", 128 | "offsets": [ 129 | -1, 130 | 0 131 | ] 132 | }, 133 | { 134 | "args": { 135 | "n": 3 136 | }, 137 | "factory_name": "shape_ngram", 138 | "offsets": [ 139 | -1 140 | ] 141 | }, 142 | { 143 | "args": { 144 | "tagging_scheme_code": 2, 145 | "use_stemming": true 146 | }, 147 | "factory_name": "entity_match", 148 | "drop_out": 0.5, 149 | "offsets": [ 150 | -2, 151 | -1, 152 | 0 153 | ] 154 | }, 155 | { 156 | "args": { 157 | "tagging_scheme_code": 1 158 | }, 159 | "factory_name": "builtin_entity_match", 160 | "offsets": [ 161 | -2, 162 | -1, 163 | 0 164 | ] 165 | } 166 | ] 167 | } 168 | } 169 | ] 170 | } 171 | -------------------------------------------------------------------------------- /data/tests/configs/music_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "nlu_engine", 3 | "intent_parsers_configs": [ 4 | { 5 | "unit_name": "deterministic_intent_parser", 6 | "max_pattern_length": 1000, 7 | "max_queries": 100, 8 | "ignore_stop_words": true 9 | }, 10 | { 11 | "unit_name": "probabilistic_intent_parser", 12 | "intent_classifier_config": { 13 | "data_augmentation_config": { 14 | "noise_factor": 5, 15 | "unknown_word_prob": 0.5, 16 | "max_unknown_words": 5, 17 | "add_builtin_entities_examples": true, 18 | "unknown_words_replacement_string": "unknownword", 19 | "min_utterances": 20 20 | }, 21 | "unit_name": "log_reg_intent_classifier", 22 | "featurizer_config": { 23 | "cooccurrence_vectorizer_config": { 24 | "window_size": 5, 25 | "unit_name": "cooccurrence_vectorizer", 26 | "filter_stop_words": true, 27 | "unknown_words_replacement_string": "unknownword", 28 | "keep_order": true 29 | }, 30 | "unit_name": "featurizer", 31 | "added_cooccurrence_feature_ratio": 0.25, 32 | "tfidf_vectorizer_config": { 33 | "unit_name": "tfidf_vectorizer", 34 | "use_stemming": false, 35 | "word_clusters_name": null 36 | }, 37 | "pvalue_threshold": 0.4 38 | }, 39 | "random_seed": null 40 | }, 41 | "slot_filler_config": { 42 | "unit_name": "crf_slot_filler", 43 | "random_seed": null, 44 | "tagging_scheme": 1, 45 | "data_augmentation_config": { 46 | "capitalization_ratio": 0.2, 47 | "min_utterances": 200, 48 | "add_builtin_entities_examples": true 49 | }, 50 | "crf_args": { 51 | "c2": 0.1, 52 | "c1": 0.1, 53 | "algorithm": "lbfgs" 54 | }, 55 | "feature_factory_configs": [ 56 | { 57 | "args": { 58 | "common_words_gazetteer_name": "top_10000_words_stemmed", 59 | "use_stemming": true, 60 | "n": 1 61 | }, 62 | "factory_name": "ngram", 63 | "offsets": [ 64 | -2, 65 | -1, 66 | 0, 67 | 1, 68 | 2 69 | ] 70 | }, 71 | { 72 | "args": { 73 | "common_words_gazetteer_name": "top_10000_words_stemmed", 74 | "use_stemming": true, 75 | "n": 2 76 | }, 77 | "factory_name": "ngram", 78 | "offsets": [ 79 | -2, 80 | 1 81 | ] 82 | }, 83 | { 84 | "args": {}, 85 | "factory_name": "is_digit", 86 | "offsets": [ 87 | -1, 88 | 0, 89 | 1 90 | ] 91 | }, 92 | { 93 | "args": {}, 94 | "factory_name": "is_first", 95 | "offsets": [ 96 | -2, 97 | -1, 98 | 0 99 | ] 100 | }, 101 | { 102 | "args": {}, 103 | "factory_name": "is_last", 104 | "offsets": [ 105 | 0, 106 | 1, 107 | 2 108 | ] 109 | }, 110 | { 111 | "args": { 112 | "n": 1 113 | }, 114 | "factory_name": "shape_ngram", 115 | "offsets": [ 116 | 0 117 | ] 118 | }, 119 | { 120 | "args": { 121 | "n": 2 122 | }, 123 | "factory_name": "shape_ngram", 124 | "offsets": [ 125 | -1, 126 | 0 127 | ] 128 | }, 129 | { 130 | "args": { 131 | "n": 3 132 | }, 133 | "factory_name": "shape_ngram", 134 | "offsets": [ 135 | -1 136 | ] 137 | }, 138 | { 139 | "args": { 140 | "tagging_scheme_code": 2, 141 | "use_stemming": true 142 | }, 143 | "factory_name": "entity_match", 144 | "drop_out": 0.5, 145 | "offsets": [ 146 | -2, 147 | -1, 148 | 0 149 | ] 150 | }, 151 | { 152 | "args": { 153 | "tagging_scheme_code": 1 154 | }, 155 | "factory_name": "builtin_entity_match", 156 | "offsets": [ 157 | -2, 158 | -1, 159 | 0 160 | ] 161 | } 162 | ] 163 | } 164 | } 165 | ] 166 | } 167 | -------------------------------------------------------------------------------- /data/tests/datasets/beverage_dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "entities": { 3 | "Temperature": { 4 | "automatically_extensible": true, 5 | "use_synonyms": true, 6 | "data": [], 7 | "matching_strictness": 1.0 8 | }, 9 | "snips/number": {} 10 | }, 11 | "intents": { 12 | "MakeCoffee": { 13 | "utterances": [ 14 | { 15 | "data": [ 16 | { 17 | "text": "make me " 18 | }, 19 | { 20 | "text": "one", 21 | "entity": "snips/number", 22 | "slot_name": "number_of_cups" 23 | }, 24 | { 25 | "text": " cup of coffee please" 26 | } 27 | ] 28 | }, 29 | { 30 | "data": [ 31 | { 32 | "text": "brew " 33 | }, 34 | { 35 | "text": "three hundred and four", 36 | "entity": "snips/number", 37 | "slot_name": "number_of_cups" 38 | }, 39 | { 40 | "text": " cups of coffee" 41 | } 42 | ] 43 | }, 44 | { 45 | "data": [ 46 | { 47 | "text": "can you prepare " 48 | }, 49 | { 50 | "text": "2001", 51 | "entity": "snips/number", 52 | "slot_name": "number_of_cups" 53 | }, 54 | { 55 | "text": " cup of coffee" 56 | } 57 | ] 58 | } 59 | ] 60 | }, 61 | "MakeTea": { 62 | "utterances": [ 63 | { 64 | "data": [ 65 | { 66 | "text": "make me a " 67 | }, 68 | { 69 | "text": "hot", 70 | "entity": "Temperature", 71 | "slot_name": "beverage_temperature" 72 | }, 73 | { 74 | "text": " cup of tea" 75 | } 76 | ] 77 | }, 78 | { 79 | "data": [ 80 | { 81 | "text": "make me " 82 | }, 83 | { 84 | "text": "five", 85 | "entity": "snips/number", 86 | "slot_name": "number_of_cups" 87 | }, 88 | { 89 | "text": " tea cups" 90 | } 91 | ] 92 | }, 93 | { 94 | "data": [ 95 | { 96 | "text": "i want " 97 | }, 98 | { 99 | "text": "2001", 100 | "entity": "snips/number", 101 | "slot_name": "number_of_cups" 102 | }, 103 | { 104 | "text": " cups of " 105 | }, 106 | { 107 | "text": "boiling hot", 108 | "entity": "Temperature", 109 | "slot_name": "beverage_temperature" 110 | }, 111 | { 112 | "text": " tea pls" 113 | } 114 | ] 115 | }, 116 | { 117 | "data": [ 118 | { 119 | "text": "can you prepare " 120 | }, 121 | { 122 | "text": "twenty one", 123 | "entity": "snips/number", 124 | "slot_name": "number_of_cups" 125 | }, 126 | { 127 | "text": " cup of " 128 | }, 129 | { 130 | "text": "cold", 131 | "entity": "Temperature", 132 | "slot_name": "beverage_temperature" 133 | }, 134 | { 135 | "text": " tea ?" 136 | } 137 | ] 138 | } 139 | ] 140 | } 141 | }, 142 | "language": "en" 143 | } -------------------------------------------------------------------------------- /data/tests/datasets/game_dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "entities": { 3 | "game": { 4 | "automatically_extensible": true, 5 | "data": [ 6 | { 7 | "synonyms": [ 8 | "space invader" 9 | ], 10 | "value": "Space Invader Limited Edition" 11 | }, 12 | { 13 | "synonyms": [ 14 | "invader attack" 15 | ], 16 | "value": "Invader Attack 3" 17 | }, 18 | { 19 | "synonyms": [ 20 | "invader war" 21 | ], 22 | "value": "Invader War Demo" 23 | }, 24 | { 25 | "synonyms": [ 26 | "star invader" 27 | ], 28 | "value": "Star Invader II" 29 | } 30 | ], 31 | "matching_strictness": 0.5, 32 | "use_synonyms": true 33 | } 34 | }, 35 | "intents": { 36 | "PlayGame": { 37 | "utterances": [ 38 | { 39 | "data": [ 40 | { 41 | "text": "I want to play to " 42 | }, 43 | { 44 | "entity": "game", 45 | "slot_name": "game", 46 | "text": "space invader" 47 | } 48 | ] 49 | }, 50 | { 51 | "data": [ 52 | { 53 | "text": "please launch the " 54 | }, 55 | { 56 | "entity": "game", 57 | "slot_name": "game", 58 | "text": "invader attack" 59 | }, 60 | { 61 | "text": " game" 62 | } 63 | ] 64 | } 65 | ] 66 | } 67 | }, 68 | "language": "en" 69 | } 70 | -------------------------------------------------------------------------------- /data/tests/datasets/music_dataset.json: -------------------------------------------------------------------------------- 1 | { 2 | "intents": { 3 | "adri:PlayMusic": { 4 | "utterances": [ 5 | { 6 | "data": [ 7 | { 8 | "text": "mets un son de " 9 | }, 10 | { 11 | "text": "l'imperatrice", 12 | "entity": "snips/musicArtist", 13 | "slot_name": "musicArtist" 14 | }, 15 | { 16 | "text": " " 17 | } 18 | ] 19 | }, 20 | { 21 | "data": [ 22 | { 23 | "text": "je veux ecouter une chanson de " 24 | }, 25 | { 26 | "text": "linkin park", 27 | "entity": "snips/musicArtist", 28 | "slot_name": "musicArtist" 29 | }, 30 | { 31 | "text": " please" 32 | } 33 | ] 34 | }, 35 | { 36 | "data": [ 37 | { 38 | "text": "je souhaiterais écouter l'album " 39 | }, 40 | { 41 | "text": "random access memories", 42 | "entity": "snips/musicAlbum", 43 | "slot_name": "musicAlbum" 44 | } 45 | ] 46 | }, 47 | { 48 | "data": [ 49 | { 50 | "text": "mets l'album " 51 | }, 52 | { 53 | "text": "gravé dans la roche", 54 | "entity": "snips/musicAlbum", 55 | "slot_name": "musicAlbum" 56 | }, 57 | { 58 | "text": " de " 59 | }, 60 | { 61 | "text": "sniper", 62 | "entity": "snips/musicArtist", 63 | "slot_name": "musicArtist" 64 | }, 65 | { 66 | "text": " stp" 67 | } 68 | ] 69 | }, 70 | { 71 | "data": [ 72 | { 73 | "text": "lance l'album " 74 | }, 75 | { 76 | "text": "conspiracy of one", 77 | "entity": "snips/musicAlbum", 78 | "slot_name": "musicAlbum" 79 | }, 80 | { 81 | "text": " veux tu ?" 82 | } 83 | ] 84 | }, 85 | { 86 | "data": [ 87 | { 88 | "text": "mets-moi du " 89 | }, 90 | { 91 | "text": "jacques brel", 92 | "entity": "snips/musicArtist", 93 | "slot_name": "musicArtist" 94 | } 95 | ] 96 | }, 97 | { 98 | "data": [ 99 | { 100 | "text": "je veux ecouter l'album " 101 | }, 102 | { 103 | "text": "discovery", 104 | "entity": "snips/musicAlbum", 105 | "slot_name": "musicAlbum" 106 | }, 107 | { 108 | "text": " des " 109 | }, 110 | { 111 | "text": "daft punk", 112 | "entity": "snips/musicArtist", 113 | "slot_name": "musicArtist" 114 | } 115 | ] 116 | }, 117 | { 118 | "data": [ 119 | { 120 | "text": "peux-tu mettre les " 121 | }, 122 | { 123 | "text": "rolling stones", 124 | "entity": "snips/musicArtist", 125 | "slot_name": "musicArtist" 126 | }, 127 | { 128 | "text": " stp" 129 | } 130 | ] 131 | }, 132 | { 133 | "data": [ 134 | { 135 | "text": "je voudrais ecouter " 136 | }, 137 | { 138 | "text": "michael jackson", 139 | "entity": "snips/musicArtist", 140 | "slot_name": "musicArtist" 141 | } 142 | ] 143 | }, 144 | { 145 | "data": [ 146 | { 147 | "text": "je voudrais ecouter ma playlist " 148 | }, 149 | { 150 | "text": "jazz", 151 | "entity": "playlist", 152 | "slot_name": "playlist" 153 | } 154 | ] 155 | } 156 | ], 157 | "version": "0.8.0", 158 | "language": "fr" 159 | } 160 | }, 161 | "entities": { 162 | "snips/musicArtist": { 163 | "name": "snips/musicArtist", 164 | "data": [], 165 | "use_synonyms": false, 166 | "automatically_extensible": false 167 | }, 168 | "snips/musicAlbum": { 169 | "name": "snips/musicAlbum", 170 | "data": [], 171 | "use_synonyms": false, 172 | "automatically_extensible": false 173 | }, 174 | "playlist": { 175 | "name": "playlist", 176 | "data": [], 177 | "use_synonyms": false, 178 | "automatically_extensible": false 179 | } 180 | }, 181 | "language": "fr" 182 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage.zip -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/builtin_entity_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "language": "en", 3 | "gazetteer_parser": null 4 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/custom_entity_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "language": "en", 3 | "parser_directory": "parser", 4 | "parser_usage": 2 5 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "parsers_metadata": [ 3 | { 4 | "entity_identifier": "Temperature", 5 | "entity_parser": "parser_1" 6 | } 7 | ] 8 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/parser_1/metadata.json: -------------------------------------------------------------------------------- 1 | {"version":"0.7.0","parser_filename":"parser","threshold":1.0,"stop_words":[],"edge_cases":[]} -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/parser_1/parser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage/custom_entity_parser/parser/parser_1/parser -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/deterministic_intent_parser/intent_parser.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "ignore_stop_words": true, 4 | "max_pattern_length": 1000, 5 | "max_queries": 100, 6 | "unit_name": "deterministic_intent_parser" 7 | }, 8 | "group_names_to_slot_names": { 9 | "group0": "beverage_temperature", 10 | "group1": "number_of_cups" 11 | }, 12 | "language_code": "en", 13 | "patterns": { 14 | "MakeCoffee": [ 15 | "^\\s*make\\s*(?P%SNIPSNUMBER%)\\s*cup\\s*of\\s*coffee\\s*$", 16 | "^\\s*brew\\s*(?P%SNIPSNUMBER%)\\s*cups\\s*of\\s*coffee\\s*$", 17 | "^\\s*prepare\\s*(?P%SNIPSNUMBER%)\\s*cup\\s*of\\s*coffee\\s*$" 18 | ], 19 | "MakeTea": [ 20 | "^\\s*make\\s*(?P%TEMPERATURE%)\\s*cup\\s*of\\s*tea\\s*$", 21 | "^\\s*make\\s*(?P%SNIPSNUMBER%)\\s*tea\\s*cups\\s*$", 22 | "^\\s*i\\s*want\\s*(?P%SNIPSNUMBER%)\\s*cups\\s*of\\s*(?P%TEMPERATURE%)\\s*tea\\s*$", 23 | "^\\s*prepare\\s*(?P%SNIPSNUMBER%)\\s*cup\\s*of\\s*(?P%TEMPERATURE%)\\s*tea\\s*$" 24 | ] 25 | }, 26 | "slot_names_to_entities": { 27 | "MakeCoffee": { 28 | "number_of_cups": "snips/number" 29 | }, 30 | "MakeTea": { 31 | "beverage_temperature": "Temperature", 32 | "number_of_cups": "snips/number" 33 | } 34 | }, 35 | "stop_words_whitelist": {} 36 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/deterministic_intent_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "deterministic_intent_parser" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/lookup_intent_parser/intent_parser.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "ignore_stop_words": true, 4 | "unit_name": "lookup_intent_parser" 5 | }, 6 | "entity_scopes": [ 7 | { 8 | "entity_scope": { 9 | "builtin": [ 10 | "snips/number" 11 | ], 12 | "custom": [] 13 | }, 14 | "intent_group": [ 15 | "MakeCoffee" 16 | ] 17 | }, 18 | { 19 | "entity_scope": { 20 | "builtin": [ 21 | "snips/number" 22 | ], 23 | "custom": [ 24 | "Temperature" 25 | ] 26 | }, 27 | "intent_group": [ 28 | "MakeTea" 29 | ] 30 | } 31 | ], 32 | "intents_names": [ 33 | "MakeCoffee", 34 | "MakeTea" 35 | ], 36 | "language_code": "en", 37 | "map": { 38 | "-1658454006": [ 39 | 1, 40 | [ 41 | 0 42 | ] 43 | ], 44 | "-1533083481": [ 45 | 0, 46 | [ 47 | 0 48 | ] 49 | ], 50 | "-1416877420": [ 51 | 0, 52 | [ 53 | 0 54 | ] 55 | ], 56 | "-1362288387": [ 57 | 1, 58 | [ 59 | 0, 60 | 1 61 | ] 62 | ], 63 | "-687749971": [ 64 | 0, 65 | [ 66 | 0 67 | ] 68 | ], 69 | "1085718744": [ 70 | 1, 71 | [ 72 | 1 73 | ] 74 | ], 75 | "1413162768": [ 76 | 1, 77 | [ 78 | 0, 79 | 1 80 | ] 81 | ] 82 | }, 83 | "slots_names": [ 84 | "number_of_cups", 85 | "beverage_temperature" 86 | ], 87 | "stop_words_whitelist": {} 88 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/lookup_intent_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "lookup_intent_parser" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "cooccurrence_vectorizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/vectorizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "builtin_entity_scope": [ 3 | "snips/number" 4 | ], 5 | "config": { 6 | "filter_stop_words": true, 7 | "keep_order": true, 8 | "unit_name": "cooccurrence_vectorizer", 9 | "unknown_words_replacement_string": null, 10 | "window_size": 5 11 | }, 12 | "language_code": "en", 13 | "word_pairs": { 14 | "0": [ 15 | "SNIPSNUMBER", 16 | "coffee" 17 | ], 18 | "1": [ 19 | "SNIPSNUMBER", 20 | "tea" 21 | ], 22 | "2": [ 23 | "TEMPERATURE", 24 | "tea" 25 | ], 26 | "3": [ 27 | "of", 28 | "coffee" 29 | ], 30 | "4": [ 31 | "of", 32 | "tea" 33 | ] 34 | } 35 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/featurizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "added_cooccurrence_feature_ratio": 0.25, 4 | "cooccurrence_vectorizer_config": { 5 | "filter_stop_words": true, 6 | "keep_order": true, 7 | "unit_name": "cooccurrence_vectorizer", 8 | "unknown_words_replacement_string": null, 9 | "window_size": 5 10 | }, 11 | "pvalue_threshold": 0.4, 12 | "tfidf_vectorizer_config": { 13 | "unit_name": "tfidf_vectorizer", 14 | "use_stemming": false, 15 | "word_clusters_name": null 16 | }, 17 | "unit_name": "featurizer" 18 | }, 19 | "cooccurrence_vectorizer": "cooccurrence_vectorizer", 20 | "language_code": "en", 21 | "tfidf_vectorizer": "tfidf_vectorizer" 22 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "featurizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "tfidf_vectorizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/vectorizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "builtin_entity_scope": [ 3 | "snips/number" 4 | ], 5 | "config": { 6 | "unit_name": "tfidf_vectorizer", 7 | "use_stemming": false, 8 | "word_clusters_name": null 9 | }, 10 | "language_code": "en", 11 | "vectorizer": { 12 | "idf_diag": [ 13 | 4.157000421150114, 14 | 3.176171168138387, 15 | 4.002849741322855, 16 | 3.8693183486983322, 17 | 2.2110902720948, 18 | 3.463853240590168, 19 | 2.904237452654745, 20 | 4.339321977944068, 21 | 2.7707060600302227, 22 | 3.0583881324820035, 23 | 3.176171168138387, 24 | 3.463853240590168, 25 | 3.3097025607629096, 26 | 3.0583881324820035, 27 | 3.0043209112117277, 28 | 1.9057086225436182, 29 | 3.8693183486983322, 30 | 4.157000421150114, 31 | 3.463853240590168, 32 | 2.904237452654745, 33 | 2.5814640603916943, 34 | 4.157000421150114, 35 | 3.2407096892759584 36 | ], 37 | "vocab": { 38 | "?": 0, 39 | "a": 1, 40 | "boiling": 2, 41 | "brew": 3, 42 | "builtinentityfeaturesnipsnumber": 4, 43 | "can": 5, 44 | "coffee": 6, 45 | "cold": 7, 46 | "cup": 8, 47 | "cups": 9, 48 | "entityfeaturetemperature": 10, 49 | "hot": 11, 50 | "i": 12, 51 | "make": 13, 52 | "me": 14, 53 | "of": 15, 54 | "please": 16, 55 | "pls": 17, 56 | "prepare": 18, 57 | "tea": 19, 58 | "the": 20, 59 | "want": 21, 60 | "you": 22 61 | } 62 | } 63 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/intent_classifier.json: -------------------------------------------------------------------------------- 1 | { 2 | "coeffs": [ 3 | [ 4 | -0.06628334386039435, 5 | -0.8643735409565843, 6 | -0.10162233194496288, 7 | 0.3731953851089176, 8 | -0.4488682974331702, 9 | 0.17636658950526615, 10 | 0.7184102193303363, 11 | -0.06865280010172024, 12 | 0.29743179731574193, 13 | 0.0346771374230032, 14 | -0.1761175736061183, 15 | -0.13726751217877073, 16 | -0.8071682094773692, 17 | -0.05137180345130995, 18 | -0.11880606643573582, 19 | -1.0869381590717797, 20 | 0.32523561280093755, 21 | -0.049393800526571136, 22 | 0.17636658950526615, 23 | -0.37371467568972533, 24 | -1.552232223994024, 25 | -0.049393800526571136, 26 | -0.07258216842039956, 27 | 1.773882907274256, 28 | -0.7513363572925229, 29 | -0.5328718550242008, 30 | 1.773882907274256, 31 | -0.5328718550242008 32 | ], 33 | [ 34 | 0.09460701831794095, 35 | -0.6521590578791224, 36 | 0.2457151029413227, 37 | -0.1564569232219889, 38 | -0.501396327528731, 39 | -0.006657999049205669, 40 | -0.3254543267957415, 41 | 0.19982413835098595, 42 | 0.09848862892018242, 43 | 0.4824274378008, 44 | 0.4213894656886821, 45 | 0.30004773187161327, 46 | -0.7342364277722051, 47 | 0.6427012659535128, 48 | 0.5425430291591661, 49 | -1.2591009549072656, 50 | -0.18164901959554086, 51 | 0.10599990651714576, 52 | -0.006657999049205669, 53 | 0.8868013621218258, 54 | -1.5676891353952978, 55 | 0.10599990651714576, 56 | -0.2459922083905006, 57 | -0.8070607856886259, 58 | 1.5929339342221138, 59 | 1.2525103361872103, 60 | -0.8070607856886259, 61 | 1.2525103361872103 62 | ], 63 | [ 64 | -0.07342804814279959, 65 | 0.5198593428656567, 66 | -0.1581096606820083, 67 | -0.26280946680431544, 68 | 0.00904118342688167, 69 | -0.2318387692350015, 70 | -0.4684730287438426, 71 | -0.1304212706183918, 72 | -0.44631551521307367, 73 | -0.5533957320987131, 74 | -0.29099767056562215, 75 | -0.21324635553933227, 76 | 0.5486473938992574, 77 | -0.5604628287636957, 78 | -0.4863443376102945, 79 | 0.7764987437479216, 80 | -0.17070720658830346, 81 | -0.09943826061076572, 82 | -0.2318387692350015, 83 | -0.5248553684542913, 84 | 1.3219543672776477, 85 | -0.09943826061076572, 86 | -0.025792600962746038, 87 | -1.1511881362239413, 88 | -0.9891682959063401, 89 | -0.8725891493847063, 90 | -1.1511881362239413, 91 | -0.8725891493847063 92 | ] 93 | ], 94 | "config": { 95 | "data_augmentation_config": { 96 | "add_builtin_entities_examples": false, 97 | "max_unknown_words": 0, 98 | "min_utterances": 20, 99 | "noise_factor": 5, 100 | "unknown_word_prob": 0, 101 | "unknown_words_replacement_string": null 102 | }, 103 | "featurizer_config": { 104 | "added_cooccurrence_feature_ratio": 0.25, 105 | "cooccurrence_vectorizer_config": { 106 | "filter_stop_words": true, 107 | "keep_order": true, 108 | "unit_name": "cooccurrence_vectorizer", 109 | "unknown_words_replacement_string": null, 110 | "window_size": 5 111 | }, 112 | "pvalue_threshold": 0.4, 113 | "tfidf_vectorizer_config": { 114 | "unit_name": "tfidf_vectorizer", 115 | "use_stemming": false, 116 | "word_clusters_name": null 117 | }, 118 | "unit_name": "featurizer" 119 | }, 120 | "noise_reweight_factor": 1.0, 121 | "unit_name": "log_reg_intent_classifier" 122 | }, 123 | "featurizer": "featurizer", 124 | "intent_list": [ 125 | "MakeCoffee", 126 | "MakeTea", 127 | null 128 | ], 129 | "intercept": [ 130 | -0.797273067375694, 131 | -0.7793999633924479, 132 | 0.36488637926624495 133 | ], 134 | "t_": 3221.0 135 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_classifier/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "log_reg_intent_classifier" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/intent_parser.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "intent_classifier_config": { 4 | "data_augmentation_config": { 5 | "add_builtin_entities_examples": false, 6 | "max_unknown_words": 0, 7 | "min_utterances": 20, 8 | "noise_factor": 5, 9 | "unknown_word_prob": 0, 10 | "unknown_words_replacement_string": null 11 | }, 12 | "featurizer_config": { 13 | "added_cooccurrence_feature_ratio": 0.25, 14 | "cooccurrence_vectorizer_config": { 15 | "filter_stop_words": true, 16 | "keep_order": true, 17 | "unit_name": "cooccurrence_vectorizer", 18 | "unknown_words_replacement_string": null, 19 | "window_size": 5 20 | }, 21 | "pvalue_threshold": 0.4, 22 | "tfidf_vectorizer_config": { 23 | "unit_name": "tfidf_vectorizer", 24 | "use_stemming": false, 25 | "word_clusters_name": null 26 | }, 27 | "unit_name": "featurizer" 28 | }, 29 | "noise_reweight_factor": 1.0, 30 | "unit_name": "log_reg_intent_classifier" 31 | }, 32 | "slot_filler_config": { 33 | "crf_args": { 34 | "algorithm": "lbfgs", 35 | "c1": 0.1, 36 | "c2": 0.1 37 | }, 38 | "data_augmentation_config": { 39 | "add_builtin_entities_examples": true, 40 | "capitalization_ratio": 0.2, 41 | "min_utterances": 200 42 | }, 43 | "feature_factory_configs": [ 44 | { 45 | "args": { 46 | "common_words_gazetteer_name": "top_10000_words_stemmed", 47 | "n": 1, 48 | "use_stemming": true 49 | }, 50 | "factory_name": "ngram", 51 | "offsets": [ 52 | -2, 53 | -1, 54 | 0, 55 | 1, 56 | 2 57 | ] 58 | }, 59 | { 60 | "args": { 61 | "common_words_gazetteer_name": "top_10000_words_stemmed", 62 | "n": 2, 63 | "use_stemming": true 64 | }, 65 | "factory_name": "ngram", 66 | "offsets": [ 67 | -2, 68 | 1 69 | ] 70 | }, 71 | { 72 | "args": {}, 73 | "factory_name": "is_digit", 74 | "offsets": [ 75 | -1, 76 | 0, 77 | 1 78 | ] 79 | }, 80 | { 81 | "args": {}, 82 | "factory_name": "is_first", 83 | "offsets": [ 84 | -2, 85 | -1, 86 | 0 87 | ] 88 | }, 89 | { 90 | "args": {}, 91 | "factory_name": "is_last", 92 | "offsets": [ 93 | 0, 94 | 1, 95 | 2 96 | ] 97 | }, 98 | { 99 | "args": { 100 | "n": 1 101 | }, 102 | "factory_name": "shape_ngram", 103 | "offsets": [ 104 | 0 105 | ] 106 | }, 107 | { 108 | "args": { 109 | "n": 2 110 | }, 111 | "factory_name": "shape_ngram", 112 | "offsets": [ 113 | -1, 114 | 0 115 | ] 116 | }, 117 | { 118 | "args": { 119 | "n": 3 120 | }, 121 | "factory_name": "shape_ngram", 122 | "offsets": [ 123 | -1 124 | ] 125 | }, 126 | { 127 | "args": { 128 | "tagging_scheme_code": 2, 129 | "use_stemming": true 130 | }, 131 | "drop_out": 0.5, 132 | "factory_name": "entity_match", 133 | "offsets": [ 134 | -2, 135 | -1, 136 | 0 137 | ] 138 | }, 139 | { 140 | "args": { 141 | "tagging_scheme_code": 1 142 | }, 143 | "factory_name": "builtin_entity_match", 144 | "offsets": [ 145 | -2, 146 | -1, 147 | 0 148 | ] 149 | } 150 | ], 151 | "tagging_scheme": 1, 152 | "unit_name": "crf_slot_filler" 153 | }, 154 | "unit_name": "probabilistic_intent_parser" 155 | }, 156 | "slot_fillers": [ 157 | { 158 | "intent": "MakeCoffee", 159 | "slot_filler_name": "slot_filler_0" 160 | }, 161 | { 162 | "intent": "MakeTea", 163 | "slot_filler_name": "slot_filler_1" 164 | } 165 | ] 166 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "probabilistic_intent_parser" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "crf_slot_filler" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/model55imurmx.crfsuite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/model55imurmx.crfsuite -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_0/slot_filler.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "crf_args": { 4 | "algorithm": "lbfgs", 5 | "c1": 0.1, 6 | "c2": 0.1 7 | }, 8 | "data_augmentation_config": { 9 | "add_builtin_entities_examples": true, 10 | "capitalization_ratio": 0.2, 11 | "min_utterances": 200 12 | }, 13 | "feature_factory_configs": [ 14 | { 15 | "args": { 16 | "common_words_gazetteer_name": "top_10000_words_stemmed", 17 | "language_code": "en", 18 | "n": 1, 19 | "use_stemming": true 20 | }, 21 | "factory_name": "ngram", 22 | "offsets": [ 23 | -2, 24 | -1, 25 | 0, 26 | 1, 27 | 2 28 | ] 29 | }, 30 | { 31 | "args": { 32 | "common_words_gazetteer_name": "top_10000_words_stemmed", 33 | "language_code": "en", 34 | "n": 2, 35 | "use_stemming": true 36 | }, 37 | "factory_name": "ngram", 38 | "offsets": [ 39 | -2, 40 | 1 41 | ] 42 | }, 43 | { 44 | "args": {}, 45 | "factory_name": "is_digit", 46 | "offsets": [ 47 | -1, 48 | 0, 49 | 1 50 | ] 51 | }, 52 | { 53 | "args": {}, 54 | "factory_name": "is_first", 55 | "offsets": [ 56 | -2, 57 | -1, 58 | 0 59 | ] 60 | }, 61 | { 62 | "args": {}, 63 | "factory_name": "is_last", 64 | "offsets": [ 65 | 0, 66 | 1, 67 | 2 68 | ] 69 | }, 70 | { 71 | "args": { 72 | "language_code": "en", 73 | "n": 1 74 | }, 75 | "factory_name": "shape_ngram", 76 | "offsets": [ 77 | 0 78 | ] 79 | }, 80 | { 81 | "args": { 82 | "language_code": "en", 83 | "n": 2 84 | }, 85 | "factory_name": "shape_ngram", 86 | "offsets": [ 87 | -1, 88 | 0 89 | ] 90 | }, 91 | { 92 | "args": { 93 | "language_code": "en", 94 | "n": 3 95 | }, 96 | "factory_name": "shape_ngram", 97 | "offsets": [ 98 | -1 99 | ] 100 | }, 101 | { 102 | "args": { 103 | "entities": [], 104 | "tagging_scheme_code": 2, 105 | "use_stemming": true 106 | }, 107 | "drop_out": 0.5, 108 | "factory_name": "entity_match", 109 | "offsets": [ 110 | -2, 111 | -1, 112 | 0 113 | ] 114 | }, 115 | { 116 | "args": { 117 | "entity_labels": [ 118 | "snips/amountOfMoney", 119 | "snips/date", 120 | "snips/datePeriod", 121 | "snips/datetime", 122 | "snips/duration", 123 | "snips/number", 124 | "snips/ordinal", 125 | "snips/percentage", 126 | "snips/temperature", 127 | "snips/time", 128 | "snips/timePeriod" 129 | ], 130 | "language_code": "en", 131 | "tagging_scheme_code": 1 132 | }, 133 | "factory_name": "builtin_entity_match", 134 | "offsets": [ 135 | -2, 136 | -1, 137 | 0 138 | ] 139 | } 140 | ], 141 | "tagging_scheme": 1, 142 | "unit_name": "crf_slot_filler" 143 | }, 144 | "crf_model_file": "model55imurmx.crfsuite", 145 | "intent": "MakeCoffee", 146 | "language_code": "en", 147 | "slot_name_mapping": { 148 | "number_of_cups": "snips/number" 149 | } 150 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "crf_slot_filler" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/model8a9dqxnp.crfsuite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/model8a9dqxnp.crfsuite -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/probabilistic_intent_parser/slot_filler_1/slot_filler.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "crf_args": { 4 | "algorithm": "lbfgs", 5 | "c1": 0.1, 6 | "c2": 0.1 7 | }, 8 | "data_augmentation_config": { 9 | "add_builtin_entities_examples": true, 10 | "capitalization_ratio": 0.2, 11 | "min_utterances": 200 12 | }, 13 | "feature_factory_configs": [ 14 | { 15 | "args": { 16 | "common_words_gazetteer_name": "top_10000_words_stemmed", 17 | "language_code": "en", 18 | "n": 1, 19 | "use_stemming": true 20 | }, 21 | "factory_name": "ngram", 22 | "offsets": [ 23 | -2, 24 | -1, 25 | 0, 26 | 1, 27 | 2 28 | ] 29 | }, 30 | { 31 | "args": { 32 | "common_words_gazetteer_name": "top_10000_words_stemmed", 33 | "language_code": "en", 34 | "n": 2, 35 | "use_stemming": true 36 | }, 37 | "factory_name": "ngram", 38 | "offsets": [ 39 | -2, 40 | 1 41 | ] 42 | }, 43 | { 44 | "args": {}, 45 | "factory_name": "is_digit", 46 | "offsets": [ 47 | -1, 48 | 0, 49 | 1 50 | ] 51 | }, 52 | { 53 | "args": {}, 54 | "factory_name": "is_first", 55 | "offsets": [ 56 | -2, 57 | -1, 58 | 0 59 | ] 60 | }, 61 | { 62 | "args": {}, 63 | "factory_name": "is_last", 64 | "offsets": [ 65 | 0, 66 | 1, 67 | 2 68 | ] 69 | }, 70 | { 71 | "args": { 72 | "language_code": "en", 73 | "n": 1 74 | }, 75 | "factory_name": "shape_ngram", 76 | "offsets": [ 77 | 0 78 | ] 79 | }, 80 | { 81 | "args": { 82 | "language_code": "en", 83 | "n": 2 84 | }, 85 | "factory_name": "shape_ngram", 86 | "offsets": [ 87 | -1, 88 | 0 89 | ] 90 | }, 91 | { 92 | "args": { 93 | "language_code": "en", 94 | "n": 3 95 | }, 96 | "factory_name": "shape_ngram", 97 | "offsets": [ 98 | -1 99 | ] 100 | }, 101 | { 102 | "args": { 103 | "entities": [ 104 | "Temperature" 105 | ], 106 | "tagging_scheme_code": 2, 107 | "use_stemming": true 108 | }, 109 | "drop_out": 0.5, 110 | "factory_name": "entity_match", 111 | "offsets": [ 112 | -2, 113 | -1, 114 | 0 115 | ] 116 | }, 117 | { 118 | "args": { 119 | "entity_labels": [ 120 | "snips/amountOfMoney", 121 | "snips/date", 122 | "snips/datePeriod", 123 | "snips/datetime", 124 | "snips/duration", 125 | "snips/number", 126 | "snips/ordinal", 127 | "snips/percentage", 128 | "snips/temperature", 129 | "snips/time", 130 | "snips/timePeriod" 131 | ], 132 | "language_code": "en", 133 | "tagging_scheme_code": 1 134 | }, 135 | "factory_name": "builtin_entity_match", 136 | "offsets": [ 137 | -2, 138 | -1, 139 | 0 140 | ] 141 | } 142 | ], 143 | "tagging_scheme": 1, 144 | "unit_name": "crf_slot_filler" 145 | }, 146 | "crf_model_file": "model8a9dqxnp.crfsuite", 147 | "intent": "MakeTea", 148 | "language_code": "en", 149 | "slot_name_mapping": { 150 | "beverage_temperature": "Temperature", 151 | "number_of_cups": "snips/number" 152 | } 153 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/resources/en/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Rosa Stern", 3 | "description": "Language resources for English", 4 | "email": "rosa.stern@snips.ai", 5 | "gazetteers": [ 6 | "top_10000_words_stemmed" 7 | ], 8 | "language": "en", 9 | "license": "Apache License, Version 2.0", 10 | "name": "snips_nlu_en", 11 | "noise": "noise", 12 | "snips_nlu_version": ">=0.1.0,<1.0.0", 13 | "stems": "stems", 14 | "stop_words": "stop_words", 15 | "url": "https://snips-nlu.readthedocs.io", 16 | "version": "0.2.2", 17 | "word_clusters": [] 18 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_beverage/resources/en/stop_words.txt: -------------------------------------------------------------------------------- 1 | ! 2 | ? 3 | a 4 | about 5 | allright 6 | alright 7 | although 8 | an 9 | and 10 | any 11 | besides 12 | but 13 | can 14 | hello 15 | hey 16 | hi 17 | instead 18 | just 19 | lol 20 | man 21 | me 22 | my 23 | now 24 | ok 25 | only 26 | please 27 | pls 28 | so 29 | some 30 | such 31 | that 32 | the 33 | then 34 | these 35 | this 36 | those 37 | though 38 | to 39 | too 40 | very 41 | while 42 | yo 43 | you 44 | your 45 | yours 46 | yourself 47 | -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/builtin_entity_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "language": "en", 3 | "gazetteer_parser": null 4 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/custom_entity_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "language": "en", 3 | "parser_directory": "parser", 4 | "parser_usage": 2 5 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/custom_entity_parser/parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "parsers_metadata": [ 3 | { 4 | "entity_identifier": "game", 5 | "entity_parser": "parser_1" 6 | } 7 | ] 8 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/custom_entity_parser/parser/parser_1/metadata.json: -------------------------------------------------------------------------------- 1 | {"version":"0.7.2","parser_filename":"parser","threshold":0.5,"stop_words":[],"edge_cases":[]} -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/custom_entity_parser/parser/parser_1/parser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_game/custom_entity_parser/parser/parser_1/parser -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/lookup_intent_parser/intent_parser.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "ignore_stop_words": true, 4 | "unit_name": "lookup_intent_parser" 5 | }, 6 | "entity_scopes": [ 7 | { 8 | "entity_scope": { 9 | "builtin": [], 10 | "custom": [ 11 | "game" 12 | ] 13 | }, 14 | "intent_group": [ 15 | "PlayGame" 16 | ] 17 | } 18 | ], 19 | "intents_names": [ 20 | "PlayGame" 21 | ], 22 | "language_code": "en", 23 | "map": { 24 | "483944904": [ 25 | 0, 26 | [ 27 | 0 28 | ] 29 | ], 30 | "1316274424": [ 31 | 0, 32 | [ 33 | 0 34 | ] 35 | ] 36 | }, 37 | "slots_names": [ 38 | "game" 39 | ], 40 | "stop_words_whitelist": {} 41 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/lookup_intent_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "lookup_intent_parser" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/featurizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "added_cooccurrence_feature_ratio": 0.0, 4 | "cooccurrence_vectorizer_config": { 5 | "filter_stop_words": true, 6 | "keep_order": true, 7 | "unit_name": "cooccurrence_vectorizer", 8 | "unknown_words_replacement_string": null, 9 | "window_size": null 10 | }, 11 | "pvalue_threshold": 0.4, 12 | "tfidf_vectorizer_config": { 13 | "unit_name": "tfidf_vectorizer", 14 | "use_stemming": false, 15 | "word_clusters_name": null 16 | }, 17 | "unit_name": "featurizer" 18 | }, 19 | "cooccurrence_vectorizer": null, 20 | "language_code": "en", 21 | "tfidf_vectorizer": "tfidf_vectorizer" 22 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "featurizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "tfidf_vectorizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/vectorizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "builtin_entity_scope": [], 3 | "config": { 4 | "unit_name": "tfidf_vectorizer", 5 | "use_stemming": false, 6 | "word_clusters_name": null 7 | }, 8 | "language_code": "en", 9 | "vectorizer": { 10 | "idf_diag": [ 11 | 4.697178256928631, 12 | 3.849880396541428, 13 | 4.697178256928631, 14 | 4.697178256928631, 15 | 2.751268107873318, 16 | 3.3978952727983707, 17 | 3.0232018233569597, 18 | 4.4094961844768505, 19 | 3.2308411881352046, 20 | 2.751268107873318, 21 | 3.3978952727983707, 22 | 4.697178256928631, 23 | 2.80005827204275, 24 | 3.3978952727983707, 25 | 3.3978952727983707, 26 | 4.004031076368686, 27 | 4.004031076368686, 28 | 4.697178256928631, 29 | 2.962577201540525, 30 | 3.3978952727983707, 31 | 4.186352633162641 32 | ], 33 | "vocab": { 34 | "3": 0, 35 | "attack": 1, 36 | "demo": 2, 37 | "edition": 3, 38 | "entityfeaturegame": 4, 39 | "game": 5, 40 | "i": 6, 41 | "ii": 7, 42 | "in": 8, 43 | "invader": 9, 44 | "launch": 10, 45 | "limited": 11, 46 | "of": 12, 47 | "play": 13, 48 | "please": 14, 49 | "space": 15, 50 | "star": 16, 51 | "three": 17, 52 | "to": 18, 53 | "want": 19, 54 | "war": 20 55 | } 56 | } 57 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/intent_classifier.json: -------------------------------------------------------------------------------- 1 | { 2 | "coeffs": [ 3 | [ 4 | -0.20149987419774504, 5 | -0.4999326799959391, 6 | -0.2072185982938113, 7 | -0.20533716633716562, 8 | -1.212997119952126, 9 | -0.7430829768823239, 10 | -0.10687891515378825, 11 | -0.28544080700845964, 12 | 0.7737448377337215, 13 | -1.212997119952126, 14 | -0.7430829768823239, 15 | -0.20533716633716562, 16 | 1.019183684420755, 17 | -0.7550033666508472, 18 | -0.7430829768823239, 19 | -0.4433146539838388, 20 | -0.437280810875116, 21 | -0.20277376407358896, 22 | -0.5550491008658663, 23 | -0.7550033666508472, 24 | -0.38138780663082483 25 | ] 26 | ], 27 | "config": { 28 | "data_augmentation_config": { 29 | "add_builtin_entities_examples": false, 30 | "max_unknown_words": null, 31 | "min_utterances": 20, 32 | "noise_factor": 5, 33 | "unknown_word_prob": 0.0, 34 | "unknown_words_replacement_string": null 35 | }, 36 | "featurizer_config": { 37 | "added_cooccurrence_feature_ratio": 0.0, 38 | "cooccurrence_vectorizer_config": { 39 | "filter_stop_words": true, 40 | "keep_order": true, 41 | "unit_name": "cooccurrence_vectorizer", 42 | "unknown_words_replacement_string": null, 43 | "window_size": null 44 | }, 45 | "pvalue_threshold": 0.4, 46 | "tfidf_vectorizer_config": { 47 | "unit_name": "tfidf_vectorizer", 48 | "use_stemming": false, 49 | "word_clusters_name": null 50 | }, 51 | "unit_name": "featurizer" 52 | }, 53 | "noise_reweight_factor": 1, 54 | "unit_name": "log_reg_intent_classifier" 55 | }, 56 | "featurizer": "featurizer", 57 | "intent_list": [ 58 | "PlayGame", 59 | null 60 | ], 61 | "intercept": [ 62 | 0.2646563858371308 63 | ], 64 | "t_": 2641.0 65 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_classifier/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "log_reg_intent_classifier" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/intent_parser.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "intent_classifier_config": { 4 | "data_augmentation_config": { 5 | "add_builtin_entities_examples": false, 6 | "max_unknown_words": null, 7 | "min_utterances": 20, 8 | "noise_factor": 5, 9 | "unknown_word_prob": 0.0, 10 | "unknown_words_replacement_string": null 11 | }, 12 | "featurizer_config": { 13 | "added_cooccurrence_feature_ratio": 0.0, 14 | "cooccurrence_vectorizer_config": { 15 | "filter_stop_words": true, 16 | "keep_order": true, 17 | "unit_name": "cooccurrence_vectorizer", 18 | "unknown_words_replacement_string": null, 19 | "window_size": null 20 | }, 21 | "pvalue_threshold": 0.4, 22 | "tfidf_vectorizer_config": { 23 | "unit_name": "tfidf_vectorizer", 24 | "use_stemming": false, 25 | "word_clusters_name": null 26 | }, 27 | "unit_name": "featurizer" 28 | }, 29 | "noise_reweight_factor": 1, 30 | "unit_name": "log_reg_intent_classifier" 31 | }, 32 | "slot_filler_config": { 33 | "crf_args": { 34 | "algorithm": "lbfgs", 35 | "c1": 0.1, 36 | "c2": 0.1 37 | }, 38 | "data_augmentation_config": { 39 | "add_builtin_entities_examples": true, 40 | "capitalization_ratio": 0.2, 41 | "min_utterances": 200 42 | }, 43 | "feature_factory_configs": [ 44 | { 45 | "args": { 46 | "common_words_gazetteer_name": "top_10000_words_stemmed", 47 | "n": 1, 48 | "use_stemming": true 49 | }, 50 | "factory_name": "ngram", 51 | "offsets": [ 52 | -2, 53 | -1, 54 | 0, 55 | 1, 56 | 2 57 | ] 58 | }, 59 | { 60 | "args": { 61 | "common_words_gazetteer_name": "top_10000_words_stemmed", 62 | "n": 2, 63 | "use_stemming": true 64 | }, 65 | "factory_name": "ngram", 66 | "offsets": [ 67 | -2, 68 | 1 69 | ] 70 | }, 71 | { 72 | "args": {}, 73 | "factory_name": "is_digit", 74 | "offsets": [ 75 | -1, 76 | 0, 77 | 1 78 | ] 79 | }, 80 | { 81 | "args": {}, 82 | "factory_name": "is_first", 83 | "offsets": [ 84 | -2, 85 | -1, 86 | 0 87 | ] 88 | }, 89 | { 90 | "args": {}, 91 | "factory_name": "is_last", 92 | "offsets": [ 93 | 0, 94 | 1, 95 | 2 96 | ] 97 | }, 98 | { 99 | "args": { 100 | "n": 1 101 | }, 102 | "factory_name": "shape_ngram", 103 | "offsets": [ 104 | 0 105 | ] 106 | }, 107 | { 108 | "args": { 109 | "n": 2 110 | }, 111 | "factory_name": "shape_ngram", 112 | "offsets": [ 113 | -1, 114 | 0 115 | ] 116 | }, 117 | { 118 | "args": { 119 | "n": 3 120 | }, 121 | "factory_name": "shape_ngram", 122 | "offsets": [ 123 | -1 124 | ] 125 | }, 126 | { 127 | "args": { 128 | "entity_filter": { 129 | "automatically_extensible": false 130 | }, 131 | "tagging_scheme_code": 2, 132 | "use_stemming": true 133 | }, 134 | "factory_name": "entity_match", 135 | "offsets": [ 136 | -2, 137 | -1, 138 | 0 139 | ] 140 | }, 141 | { 142 | "args": { 143 | "entity_filter": { 144 | "automatically_extensible": true 145 | }, 146 | "tagging_scheme_code": 2, 147 | "use_stemming": true 148 | }, 149 | "drop_out": 0.5, 150 | "factory_name": "entity_match", 151 | "offsets": [ 152 | -2, 153 | -1, 154 | 0 155 | ] 156 | }, 157 | { 158 | "args": { 159 | "tagging_scheme_code": 1 160 | }, 161 | "factory_name": "builtin_entity_match", 162 | "offsets": [ 163 | -2, 164 | -1, 165 | 0 166 | ] 167 | }, 168 | { 169 | "args": { 170 | "cluster_name": "brown_clusters", 171 | "use_stemming": false 172 | }, 173 | "factory_name": "word_cluster", 174 | "offsets": [ 175 | -2, 176 | -1, 177 | 0, 178 | 1 179 | ] 180 | } 181 | ], 182 | "tagging_scheme": 1, 183 | "unit_name": "crf_slot_filler" 184 | }, 185 | "unit_name": "probabilistic_intent_parser" 186 | }, 187 | "slot_fillers": [ 188 | { 189 | "intent": "PlayGame", 190 | "slot_filler_name": "slot_filler_0" 191 | } 192 | ] 193 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "probabilistic_intent_parser" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "crf_slot_filler" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/model.crfsuite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/model.crfsuite -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/probabilistic_intent_parser/slot_filler_0/slot_filler.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "crf_args": { 4 | "algorithm": "lbfgs", 5 | "c1": 0.1, 6 | "c2": 0.1 7 | }, 8 | "data_augmentation_config": { 9 | "add_builtin_entities_examples": true, 10 | "capitalization_ratio": 0.2, 11 | "min_utterances": 200 12 | }, 13 | "feature_factory_configs": [ 14 | { 15 | "args": { 16 | "common_words_gazetteer_name": "top_10000_words_stemmed", 17 | "language_code": "en", 18 | "n": 1, 19 | "use_stemming": true 20 | }, 21 | "factory_name": "ngram", 22 | "offsets": [ 23 | -2, 24 | -1, 25 | 0, 26 | 1, 27 | 2 28 | ] 29 | }, 30 | { 31 | "args": { 32 | "common_words_gazetteer_name": "top_10000_words_stemmed", 33 | "language_code": "en", 34 | "n": 2, 35 | "use_stemming": true 36 | }, 37 | "factory_name": "ngram", 38 | "offsets": [ 39 | -2, 40 | 1 41 | ] 42 | }, 43 | { 44 | "args": {}, 45 | "factory_name": "is_digit", 46 | "offsets": [ 47 | -1, 48 | 0, 49 | 1 50 | ] 51 | }, 52 | { 53 | "args": {}, 54 | "factory_name": "is_first", 55 | "offsets": [ 56 | -2, 57 | -1, 58 | 0 59 | ] 60 | }, 61 | { 62 | "args": {}, 63 | "factory_name": "is_last", 64 | "offsets": [ 65 | 0, 66 | 1, 67 | 2 68 | ] 69 | }, 70 | { 71 | "args": { 72 | "language_code": "en", 73 | "n": 1 74 | }, 75 | "factory_name": "shape_ngram", 76 | "offsets": [ 77 | 0 78 | ] 79 | }, 80 | { 81 | "args": { 82 | "language_code": "en", 83 | "n": 2 84 | }, 85 | "factory_name": "shape_ngram", 86 | "offsets": [ 87 | -1, 88 | 0 89 | ] 90 | }, 91 | { 92 | "args": { 93 | "language_code": "en", 94 | "n": 3 95 | }, 96 | "factory_name": "shape_ngram", 97 | "offsets": [ 98 | -1 99 | ] 100 | }, 101 | { 102 | "args": { 103 | "entities": [], 104 | "entity_filter": { 105 | "automatically_extensible": false 106 | }, 107 | "tagging_scheme_code": 2, 108 | "use_stemming": true 109 | }, 110 | "factory_name": "entity_match", 111 | "offsets": [ 112 | -2, 113 | -1, 114 | 0 115 | ] 116 | }, 117 | { 118 | "args": { 119 | "entities": [ 120 | "game" 121 | ], 122 | "entity_filter": { 123 | "automatically_extensible": true 124 | }, 125 | "tagging_scheme_code": 2, 126 | "use_stemming": true 127 | }, 128 | "drop_out": 0.5, 129 | "factory_name": "entity_match", 130 | "offsets": [ 131 | -2, 132 | -1, 133 | 0 134 | ] 135 | }, 136 | { 137 | "args": { 138 | "entity_labels": [ 139 | "snips/amountOfMoney", 140 | "snips/date", 141 | "snips/datePeriod", 142 | "snips/datetime", 143 | "snips/duration", 144 | "snips/number", 145 | "snips/ordinal", 146 | "snips/percentage", 147 | "snips/temperature", 148 | "snips/time", 149 | "snips/timePeriod" 150 | ], 151 | "language_code": "en", 152 | "tagging_scheme_code": 1 153 | }, 154 | "factory_name": "builtin_entity_match", 155 | "offsets": [ 156 | -2, 157 | -1, 158 | 0 159 | ] 160 | }, 161 | { 162 | "args": { 163 | "cluster_name": "brown_clusters", 164 | "use_stemming": false 165 | }, 166 | "factory_name": "word_cluster", 167 | "offsets": [ 168 | -2, 169 | -1, 170 | 0, 171 | 1 172 | ] 173 | } 174 | ], 175 | "tagging_scheme": 1, 176 | "unit_name": "crf_slot_filler" 177 | }, 178 | "crf_model_file": "model.crfsuite", 179 | "intent": "PlayGame", 180 | "language_code": "en", 181 | "slot_name_mapping": { 182 | "game": "game" 183 | } 184 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/resources/en/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Rosa Stern", 3 | "description": "Language resources for English", 4 | "email": "rosa.stern@snips.ai", 5 | "gazetteers": [ 6 | "top_10000_words_stemmed" 7 | ], 8 | "language": "en", 9 | "license": "Apache License, Version 2.0", 10 | "name": "snips_nlu_en", 11 | "noise": "noise", 12 | "snips_nlu_version": ">=0.1.0,<1.0.0", 13 | "stems": "stems", 14 | "stop_words": "stop_words", 15 | "url": "https://snips-nlu.readthedocs.io", 16 | "version": "0.2.2", 17 | "word_clusters": [ 18 | "brown_clusters" 19 | ] 20 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_game/resources/en/stop_words.txt: -------------------------------------------------------------------------------- 1 | ! 2 | ? 3 | a 4 | about 5 | allright 6 | alright 7 | although 8 | an 9 | and 10 | any 11 | besides 12 | but 13 | can 14 | hello 15 | hey 16 | hi 17 | instead 18 | just 19 | lol 20 | man 21 | me 22 | my 23 | now 24 | ok 25 | only 26 | please 27 | pls 28 | so 29 | some 30 | such 31 | that 32 | the 33 | then 34 | these 35 | this 36 | those 37 | though 38 | to 39 | too 40 | very 41 | while 42 | yo 43 | you 44 | your 45 | yours 46 | yourself 47 | -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "parsers_metadata": [ 3 | { 4 | "entity_identifier": "snips/musicAlbum", 5 | "entity_parser": "parser_1" 6 | }, 7 | { 8 | "entity_identifier": "snips/musicArtist", 9 | "entity_parser": "parser_2" 10 | } 11 | ] 12 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_1/parser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_1/parser -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_2/metadata.json: -------------------------------------------------------------------------------- 1 | {"version":"0.7.0","parser_filename":"parser","threshold":0.5,"stop_words":["mais","elles","steve","dans","vos","un","david","s","van","richard","me","à","avec","mark","sommes","nous","sur","y","sa","tu","des","ses","leur","te","pour","avons","ne","qui","qu","band","ce","and","ai","l","es","joe","tes","the","j","vous","elle","mike","avez","ont","ma","pas","ete","le","son","suis","que","c","même","of","je","thomas","meme","ta","james","eux","votre","peter","william","paul","il","m","de","sont","ou","n","les","robert","eu","moi","la","t","par","une","du","orchestra","mes","chris","martin","nos","êtes","black","lui","d","etes","lee","michael","est","a","ils","au","se","en","symphony","aux","in","ton","as","ces","été","notre","toi","mon","john","on","george","et"],"edge_cases":["The Avons","Ai","LA Symphony","Steve Lee","Black M","Mike D","Mike Lee","J","Mike","The Black","D&D","David & David","Es","The The","William Black","Mark James","EU","Joe","SA","Symphony in C","Steve James","David A. Martin","James Thomas","Meme","James","Richard Robert","John Martin","Joe Ma","Du Du A","George Martin","M","The David","N","Me&John","John Lee","AU","RoBERT","Paul C","De Van","David James","George Michael","M&S","Y&T","D/C","-M-","Chris Martin","Black","D","Richard Band","Richard Thomas","Peter Thomas","ME","Michael Lee","A","Chris Lee","David Lee","Les Elles","T & N","John David","Robert Thomas","moi","The Orchestra","John Thomas","Michael Mark","TOI","Robert Black","George","Michael Au","Chris D.","AI","Peter Peter","Chris and Thomas","On","A Band","Martin Lee","David","S / S / S","Joe Thomas","Robert John","James Michael","Avec","David Thomas","S","Me Me Me","LE","Steve Richard","Peter Martin","Mark Lui","The Band","Chris Thomas","Paul Martin","David J","John William","Qui","Mike Martin","UN"]} -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_2/parser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/builtin_entity_parser/gazetteer_entity_parser/parser_2/parser -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/builtin_entity_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "language": "fr", 3 | "gazetteer_parser": "gazetteer_entity_parser" 4 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/custom_entity_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "language": "fr", 3 | "parser_directory": "parser", 4 | "parser_usage": 2 5 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/custom_entity_parser/parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "parsers_metadata": [ 3 | { 4 | "entity_identifier": "playlist", 5 | "entity_parser": "parser_1" 6 | } 7 | ] 8 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/custom_entity_parser/parser/parser_1/metadata.json: -------------------------------------------------------------------------------- 1 | {"version":"0.7.0","parser_filename":"parser","threshold":1.0,"stop_words":[],"edge_cases":[]} -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/custom_entity_parser/parser/parser_1/parser: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/custom_entity_parser/parser/parser_1/parser -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/deterministic_intent_parser/intent_parser.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "ignore_stop_words": true, 4 | "max_pattern_length": 1000, 5 | "max_queries": 100, 6 | "unit_name": "deterministic_intent_parser" 7 | }, 8 | "group_names_to_slot_names": { 9 | "group0": "musicAlbum", 10 | "group1": "musicArtist", 11 | "group2": "playlist" 12 | }, 13 | "language_code": "fr", 14 | "patterns": { 15 | "adri:PlayMusic": [ 16 | "^\\s*mets\\s*son\\s*(?P%SNIPSMUSICARTIST%)\\s*$", 17 | "^\\s*je\\s*veux\\s*ecouter\\s*chanson\\s*(?P%SNIPSMUSICARTIST%)\\s*please\\s*$", 18 | "^\\s*je\\s*souhaiterais\\s*\u00e9couter\\s*album\\s*(?P%SNIPSMUSICALBUM%)\\s*$", 19 | "^\\s*mets\\s*album\\s*(?P%SNIPSMUSICALBUM%)\\s*(?P%SNIPSMUSICARTIST%)\\s*$", 20 | "^\\s*lance\\s*album\\s*(?P%SNIPSMUSICALBUM%)\\s*veux\\s*$", 21 | "^\\s*mets\\s*(?P%SNIPSMUSICARTIST%)\\s*$", 22 | "^\\s*je\\s*veux\\s*ecouter\\s*album\\s*(?P%SNIPSMUSICALBUM%)\\s*(?P%SNIPSMUSICARTIST%)\\s*$", 23 | "^\\s*peux\\s*mettre\\s*(?P%SNIPSMUSICARTIST%)\\s*$", 24 | "^\\s*je\\s*voudrais\\s*ecouter\\s*(?P%SNIPSMUSICARTIST%)\\s*$", 25 | "^\\s*je\\s*voudrais\\s*ecouter\\s*playlist\\s*(?P%PLAYLIST%)\\s*$" 26 | ] 27 | }, 28 | "slot_names_to_entities": { 29 | "adri:PlayMusic": { 30 | "musicAlbum": "snips/musicAlbum", 31 | "musicArtist": "snips/musicArtist", 32 | "playlist": "playlist" 33 | } 34 | }, 35 | "stop_words_whitelist": {} 36 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/deterministic_intent_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "deterministic_intent_parser" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/nlu_engine.json: -------------------------------------------------------------------------------- 1 | { 2 | "builtin_entity_parser": "builtin_entity_parser", 3 | "config": { 4 | "intent_parsers_configs": [ 5 | { 6 | "ignore_stop_words": true, 7 | "max_pattern_length": 1000, 8 | "max_queries": 100, 9 | "unit_name": "deterministic_intent_parser" 10 | }, 11 | { 12 | "intent_classifier_config": { 13 | "data_augmentation_config": { 14 | "add_builtin_entities_examples": true, 15 | "max_unknown_words": 5, 16 | "min_utterances": 20, 17 | "noise_factor": 5, 18 | "unknown_word_prob": 0.5, 19 | "unknown_words_replacement_string": "unknownword" 20 | }, 21 | "featurizer_config": { 22 | "added_cooccurrence_feature_ratio": 0.25, 23 | "cooccurrence_vectorizer_config": { 24 | "filter_stop_words": true, 25 | "keep_order": true, 26 | "unit_name": "cooccurrence_vectorizer", 27 | "unknown_words_replacement_string": "unknownword", 28 | "window_size": 5 29 | }, 30 | "pvalue_threshold": 0.4, 31 | "tfidf_vectorizer_config": { 32 | "unit_name": "tfidf_vectorizer", 33 | "use_stemming": false, 34 | "word_clusters_name": null 35 | }, 36 | "unit_name": "featurizer" 37 | }, 38 | "noise_reweight_factor": 1.0, 39 | "unit_name": "log_reg_intent_classifier" 40 | }, 41 | "slot_filler_config": { 42 | "crf_args": { 43 | "algorithm": "lbfgs", 44 | "c1": 0.1, 45 | "c2": 0.1 46 | }, 47 | "data_augmentation_config": { 48 | "add_builtin_entities_examples": true, 49 | "capitalization_ratio": 0.2, 50 | "min_utterances": 200 51 | }, 52 | "feature_factory_configs": [ 53 | { 54 | "args": { 55 | "common_words_gazetteer_name": "top_10000_words_stemmed", 56 | "n": 1, 57 | "use_stemming": true 58 | }, 59 | "factory_name": "ngram", 60 | "offsets": [ 61 | -2, 62 | -1, 63 | 0, 64 | 1, 65 | 2 66 | ] 67 | }, 68 | { 69 | "args": { 70 | "common_words_gazetteer_name": "top_10000_words_stemmed", 71 | "n": 2, 72 | "use_stemming": true 73 | }, 74 | "factory_name": "ngram", 75 | "offsets": [ 76 | -2, 77 | 1 78 | ] 79 | }, 80 | { 81 | "args": {}, 82 | "factory_name": "is_digit", 83 | "offsets": [ 84 | -1, 85 | 0, 86 | 1 87 | ] 88 | }, 89 | { 90 | "args": {}, 91 | "factory_name": "is_first", 92 | "offsets": [ 93 | -2, 94 | -1, 95 | 0 96 | ] 97 | }, 98 | { 99 | "args": {}, 100 | "factory_name": "is_last", 101 | "offsets": [ 102 | 0, 103 | 1, 104 | 2 105 | ] 106 | }, 107 | { 108 | "args": { 109 | "n": 1 110 | }, 111 | "factory_name": "shape_ngram", 112 | "offsets": [ 113 | 0 114 | ] 115 | }, 116 | { 117 | "args": { 118 | "n": 2 119 | }, 120 | "factory_name": "shape_ngram", 121 | "offsets": [ 122 | -1, 123 | 0 124 | ] 125 | }, 126 | { 127 | "args": { 128 | "n": 3 129 | }, 130 | "factory_name": "shape_ngram", 131 | "offsets": [ 132 | -1 133 | ] 134 | }, 135 | { 136 | "args": { 137 | "tagging_scheme_code": 2, 138 | "use_stemming": true 139 | }, 140 | "drop_out": 0.5, 141 | "factory_name": "entity_match", 142 | "offsets": [ 143 | -2, 144 | -1, 145 | 0 146 | ] 147 | }, 148 | { 149 | "args": { 150 | "tagging_scheme_code": 1 151 | }, 152 | "factory_name": "builtin_entity_match", 153 | "offsets": [ 154 | -2, 155 | -1, 156 | 0 157 | ] 158 | } 159 | ], 160 | "tagging_scheme": 1, 161 | "unit_name": "crf_slot_filler" 162 | }, 163 | "unit_name": "probabilistic_intent_parser" 164 | } 165 | ], 166 | "unit_name": "nlu_engine" 167 | }, 168 | "custom_entity_parser": "custom_entity_parser", 169 | "dataset_metadata": { 170 | "entities": { 171 | "playlist": { 172 | "automatically_extensible": false 173 | } 174 | }, 175 | "language_code": "fr", 176 | "slot_name_mappings": { 177 | "adri:PlayMusic": { 178 | "musicAlbum": "snips/musicAlbum", 179 | "musicArtist": "snips/musicArtist", 180 | "playlist": "playlist" 181 | } 182 | } 183 | }, 184 | "intent_parsers": [ 185 | "deterministic_intent_parser", 186 | "probabilistic_intent_parser" 187 | ], 188 | "model_version": "0.20.0", 189 | "training_package_version": "0.20.0", 190 | "unit_name": "nlu_engine" 191 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "cooccurrence_vectorizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/cooccurrence_vectorizer/vectorizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "builtin_entity_scope": [ 3 | "snips/musicAlbum", 4 | "snips/musicArtist" 5 | ], 6 | "config": { 7 | "filter_stop_words": true, 8 | "keep_order": true, 9 | "unit_name": "cooccurrence_vectorizer", 10 | "unknown_words_replacement_string": "unknownword", 11 | "window_size": 5 12 | }, 13 | "language_code": "fr", 14 | "word_pairs": { 15 | "0": [ 16 | "album", 17 | "SNIPSMUSICALBUM" 18 | ], 19 | "1": [ 20 | "ecouter", 21 | "SNIPSMUSICALBUM" 22 | ], 23 | "2": [ 24 | "ecouter", 25 | "SNIPSMUSICARTIST" 26 | ], 27 | "3": [ 28 | "je", 29 | "SNIPSMUSICALBUM" 30 | ], 31 | "4": [ 32 | "je", 33 | "ecouter" 34 | ], 35 | "5": [ 36 | "mets", 37 | "SNIPSMUSICARTIST" 38 | ] 39 | } 40 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/featurizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "added_cooccurrence_feature_ratio": 0.25, 4 | "cooccurrence_vectorizer_config": { 5 | "filter_stop_words": true, 6 | "keep_order": true, 7 | "unit_name": "cooccurrence_vectorizer", 8 | "unknown_words_replacement_string": "unknownword", 9 | "window_size": 5 10 | }, 11 | "pvalue_threshold": 0.4, 12 | "tfidf_vectorizer_config": { 13 | "unit_name": "tfidf_vectorizer", 14 | "use_stemming": false, 15 | "word_clusters_name": null 16 | }, 17 | "unit_name": "featurizer" 18 | }, 19 | "cooccurrence_vectorizer": "cooccurrence_vectorizer", 20 | "language_code": "fr", 21 | "tfidf_vectorizer": "tfidf_vectorizer" 22 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "featurizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "tfidf_vectorizer" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/featurizer/tfidf_vectorizer/vectorizer.json: -------------------------------------------------------------------------------- 1 | { 2 | "builtin_entity_scope": [ 3 | "snips/musicAlbum", 4 | "snips/musicArtist" 5 | ], 6 | "config": { 7 | "unit_name": "tfidf_vectorizer", 8 | "use_stemming": false, 9 | "word_clusters_name": null 10 | }, 11 | "language_code": "fr", 12 | "vectorizer": { 13 | "idf_diag": [ 14 | 4.697178256928631, 15 | 3.4932054526026954, 16 | 2.2404424841073274, 17 | 2.361803341111595, 18 | 4.697178256928631, 19 | 3.5985659682605218, 20 | 2.3300546427970144, 21 | 3.3978952727983707, 22 | 4.697178256928631, 23 | 4.697178256928631, 24 | 3.4932054526026954, 25 | 2.8513515664303006, 26 | 4.697178256928631, 27 | 3.849880396541428, 28 | 4.4094961844768505, 29 | 4.4094961844768505, 30 | 4.697178256928631, 31 | 4.697178256928631, 32 | 4.186352633162641, 33 | 4.697178256928631, 34 | 4.186352633162641, 35 | 1.0772916743016465, 36 | 3.716349003916905, 37 | 4.186352633162641 38 | ], 39 | "vocab": { 40 | "?": 0, 41 | "album": 1, 42 | "builtinentityfeaturesnipsmusicalbum": 2, 43 | "builtinentityfeaturesnipsmusicartist": 3, 44 | "chanson": 4, 45 | "dans": 5, 46 | "de": 6, 47 | "ecouter": 7, 48 | "entityfeatureplaylist": 8, 49 | "jazz": 9, 50 | "l": 10, 51 | "la": 11, 52 | "lance": 12, 53 | "mets": 13, 54 | "mettre": 14, 55 | "moi": 15, 56 | "playlist": 16, 57 | "please": 17, 58 | "son": 18, 59 | "souhaiterais": 19, 60 | "stp": 20, 61 | "unknownword": 21, 62 | "veux": 22, 63 | "voudrais": 23 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/intent_classifier.json: -------------------------------------------------------------------------------- 1 | { 2 | "coeffs": [ 3 | [ 4 | -0.5166360015252428, 5 | -0.37123136560399383, 6 | -0.5990894741148578, 7 | -1.118727734094978, 8 | -0.030597456416977947, 9 | 1.0322279612160994, 10 | 1.016885422821252, 11 | -0.7621951278255702, 12 | -0.18816721865208444, 13 | -0.18816721865208444, 14 | -0.3827265434396571, 15 | 1.0195576774593815, 16 | -0.5166360015252428, 17 | -1.1284893473963162, 18 | -1.3002036067433036, 19 | -0.5832964749395109, 20 | -0.18816721865208444, 21 | -0.030597456416977947, 22 | -0.15204859193852968, 23 | -0.1956950722711481, 24 | -1.3811965585429193, 25 | 3.2429520118942476, 26 | -0.2631328048160813, 27 | -0.7114748378163408, 28 | -1.8419736587848352, 29 | -0.636452201857874, 30 | -1.0594513135610186, 31 | -0.6320896470933497, 32 | -1.6115371082209173, 33 | -2.444543746213874 34 | ] 35 | ], 36 | "config": { 37 | "data_augmentation_config": { 38 | "add_builtin_entities_examples": true, 39 | "max_unknown_words": 5, 40 | "min_utterances": 20, 41 | "noise_factor": 5, 42 | "unknown_word_prob": 0.5, 43 | "unknown_words_replacement_string": "unknownword" 44 | }, 45 | "featurizer_config": { 46 | "added_cooccurrence_feature_ratio": 0.25, 47 | "cooccurrence_vectorizer_config": { 48 | "filter_stop_words": true, 49 | "keep_order": true, 50 | "unit_name": "cooccurrence_vectorizer", 51 | "unknown_words_replacement_string": "unknownword", 52 | "window_size": 5 53 | }, 54 | "pvalue_threshold": 0.4, 55 | "tfidf_vectorizer_config": { 56 | "unit_name": "tfidf_vectorizer", 57 | "use_stemming": false, 58 | "word_clusters_name": null 59 | }, 60 | "unit_name": "featurizer" 61 | }, 62 | "noise_reweight_factor": 1.0, 63 | "unit_name": "log_reg_intent_classifier" 64 | }, 65 | "featurizer": "featurizer", 66 | "intent_list": [ 67 | "adri:PlayMusic", 68 | null 69 | ], 70 | "intercept": [ 71 | 0.028391354876375573 72 | ], 73 | "t_": 961.0 74 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_classifier/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "log_reg_intent_classifier" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/intent_parser.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "intent_classifier_config": { 4 | "data_augmentation_config": { 5 | "add_builtin_entities_examples": true, 6 | "max_unknown_words": 5, 7 | "min_utterances": 20, 8 | "noise_factor": 5, 9 | "unknown_word_prob": 0.5, 10 | "unknown_words_replacement_string": "unknownword" 11 | }, 12 | "featurizer_config": { 13 | "added_cooccurrence_feature_ratio": 0.25, 14 | "cooccurrence_vectorizer_config": { 15 | "filter_stop_words": true, 16 | "keep_order": true, 17 | "unit_name": "cooccurrence_vectorizer", 18 | "unknown_words_replacement_string": "unknownword", 19 | "window_size": 5 20 | }, 21 | "pvalue_threshold": 0.4, 22 | "tfidf_vectorizer_config": { 23 | "unit_name": "tfidf_vectorizer", 24 | "use_stemming": false, 25 | "word_clusters_name": null 26 | }, 27 | "unit_name": "featurizer" 28 | }, 29 | "noise_reweight_factor": 1.0, 30 | "unit_name": "log_reg_intent_classifier" 31 | }, 32 | "slot_filler_config": { 33 | "crf_args": { 34 | "algorithm": "lbfgs", 35 | "c1": 0.1, 36 | "c2": 0.1 37 | }, 38 | "data_augmentation_config": { 39 | "add_builtin_entities_examples": true, 40 | "capitalization_ratio": 0.2, 41 | "min_utterances": 200 42 | }, 43 | "feature_factory_configs": [ 44 | { 45 | "args": { 46 | "common_words_gazetteer_name": "top_10000_words_stemmed", 47 | "n": 1, 48 | "use_stemming": true 49 | }, 50 | "factory_name": "ngram", 51 | "offsets": [ 52 | -2, 53 | -1, 54 | 0, 55 | 1, 56 | 2 57 | ] 58 | }, 59 | { 60 | "args": { 61 | "common_words_gazetteer_name": "top_10000_words_stemmed", 62 | "n": 2, 63 | "use_stemming": true 64 | }, 65 | "factory_name": "ngram", 66 | "offsets": [ 67 | -2, 68 | 1 69 | ] 70 | }, 71 | { 72 | "args": {}, 73 | "factory_name": "is_digit", 74 | "offsets": [ 75 | -1, 76 | 0, 77 | 1 78 | ] 79 | }, 80 | { 81 | "args": {}, 82 | "factory_name": "is_first", 83 | "offsets": [ 84 | -2, 85 | -1, 86 | 0 87 | ] 88 | }, 89 | { 90 | "args": {}, 91 | "factory_name": "is_last", 92 | "offsets": [ 93 | 0, 94 | 1, 95 | 2 96 | ] 97 | }, 98 | { 99 | "args": { 100 | "n": 1 101 | }, 102 | "factory_name": "shape_ngram", 103 | "offsets": [ 104 | 0 105 | ] 106 | }, 107 | { 108 | "args": { 109 | "n": 2 110 | }, 111 | "factory_name": "shape_ngram", 112 | "offsets": [ 113 | -1, 114 | 0 115 | ] 116 | }, 117 | { 118 | "args": { 119 | "n": 3 120 | }, 121 | "factory_name": "shape_ngram", 122 | "offsets": [ 123 | -1 124 | ] 125 | }, 126 | { 127 | "args": { 128 | "tagging_scheme_code": 2, 129 | "use_stemming": true 130 | }, 131 | "drop_out": 0.5, 132 | "factory_name": "entity_match", 133 | "offsets": [ 134 | -2, 135 | -1, 136 | 0 137 | ] 138 | }, 139 | { 140 | "args": { 141 | "tagging_scheme_code": 1 142 | }, 143 | "factory_name": "builtin_entity_match", 144 | "offsets": [ 145 | -2, 146 | -1, 147 | 0 148 | ] 149 | } 150 | ], 151 | "tagging_scheme": 1, 152 | "unit_name": "crf_slot_filler" 153 | }, 154 | "unit_name": "probabilistic_intent_parser" 155 | }, 156 | "slot_fillers": [ 157 | { 158 | "intent": "adri:PlayMusic", 159 | "slot_filler_name": "slot_filler_0" 160 | } 161 | ] 162 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "probabilistic_intent_parser" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unit_name": "crf_slot_filler" 3 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/modeluzcfum35.crfsuite: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/modeluzcfum35.crfsuite -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/probabilistic_intent_parser/slot_filler_0/slot_filler.json: -------------------------------------------------------------------------------- 1 | { 2 | "config": { 3 | "crf_args": { 4 | "algorithm": "lbfgs", 5 | "c1": 0.1, 6 | "c2": 0.1 7 | }, 8 | "data_augmentation_config": { 9 | "add_builtin_entities_examples": true, 10 | "capitalization_ratio": 0.2, 11 | "min_utterances": 200 12 | }, 13 | "feature_factory_configs": [ 14 | { 15 | "args": { 16 | "common_words_gazetteer_name": "top_10000_words_stemmed", 17 | "language_code": "fr", 18 | "n": 1, 19 | "use_stemming": true 20 | }, 21 | "factory_name": "ngram", 22 | "offsets": [ 23 | -2, 24 | -1, 25 | 0, 26 | 1, 27 | 2 28 | ] 29 | }, 30 | { 31 | "args": { 32 | "common_words_gazetteer_name": "top_10000_words_stemmed", 33 | "language_code": "fr", 34 | "n": 2, 35 | "use_stemming": true 36 | }, 37 | "factory_name": "ngram", 38 | "offsets": [ 39 | -2, 40 | 1 41 | ] 42 | }, 43 | { 44 | "args": {}, 45 | "factory_name": "is_digit", 46 | "offsets": [ 47 | -1, 48 | 0, 49 | 1 50 | ] 51 | }, 52 | { 53 | "args": {}, 54 | "factory_name": "is_first", 55 | "offsets": [ 56 | -2, 57 | -1, 58 | 0 59 | ] 60 | }, 61 | { 62 | "args": {}, 63 | "factory_name": "is_last", 64 | "offsets": [ 65 | 0, 66 | 1, 67 | 2 68 | ] 69 | }, 70 | { 71 | "args": { 72 | "language_code": "fr", 73 | "n": 1 74 | }, 75 | "factory_name": "shape_ngram", 76 | "offsets": [ 77 | 0 78 | ] 79 | }, 80 | { 81 | "args": { 82 | "language_code": "fr", 83 | "n": 2 84 | }, 85 | "factory_name": "shape_ngram", 86 | "offsets": [ 87 | -1, 88 | 0 89 | ] 90 | }, 91 | { 92 | "args": { 93 | "language_code": "fr", 94 | "n": 3 95 | }, 96 | "factory_name": "shape_ngram", 97 | "offsets": [ 98 | -1 99 | ] 100 | }, 101 | { 102 | "args": { 103 | "entities": [ 104 | "playlist" 105 | ], 106 | "tagging_scheme_code": 2, 107 | "use_stemming": true 108 | }, 109 | "drop_out": 0.5, 110 | "factory_name": "entity_match", 111 | "offsets": [ 112 | -2, 113 | -1, 114 | 0 115 | ] 116 | }, 117 | { 118 | "args": { 119 | "entity_labels": [ 120 | "snips/amountOfMoney", 121 | "snips/datetime", 122 | "snips/duration", 123 | "snips/musicAlbum", 124 | "snips/musicArtist", 125 | "snips/number", 126 | "snips/ordinal", 127 | "snips/percentage", 128 | "snips/temperature" 129 | ], 130 | "language_code": "fr", 131 | "tagging_scheme_code": 1 132 | }, 133 | "factory_name": "builtin_entity_match", 134 | "offsets": [ 135 | -2, 136 | -1, 137 | 0 138 | ] 139 | } 140 | ], 141 | "tagging_scheme": 1, 142 | "unit_name": "crf_slot_filler" 143 | }, 144 | "crf_model_file": "modeluzcfum35.crfsuite", 145 | "intent": "adri:PlayMusic", 146 | "language_code": "fr", 147 | "slot_name_mapping": { 148 | "musicAlbum": "snips/musicAlbum", 149 | "musicArtist": "snips/musicArtist", 150 | "playlist": "playlist" 151 | } 152 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/resources/fr/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "author": "Rosa Stern", 3 | "description": "Language resources for French", 4 | "email": "rosa.stern@snips.ai", 5 | "gazetteers": [ 6 | "top_10000_words_stemmed" 7 | ], 8 | "language": "fr", 9 | "license": "Apache License, Version 2.0", 10 | "name": "snips_nlu_fr", 11 | "noise": "noise", 12 | "snips_nlu_version": ">=0.1.0,<1.0.0", 13 | "stems": "stems", 14 | "stop_words": "stop_words", 15 | "url": "https://snips-nlu.readthedocs.io", 16 | "version": "0.2.4", 17 | "word_clusters": [] 18 | } -------------------------------------------------------------------------------- /data/tests/models/nlu_engine_music/resources/fr/stop_words.txt: -------------------------------------------------------------------------------- 1 | ! 2 | ? 3 | a 4 | au 5 | aussi 6 | aux 7 | bonjour 8 | bonsoir 9 | c 10 | ce 11 | ceci 12 | cela 13 | ces 14 | cet 15 | cette 16 | d 17 | de 18 | des 19 | du 20 | et 21 | hey 22 | j 23 | juste 24 | l 25 | la 26 | le 27 | les 28 | lol 29 | m 30 | ma 31 | maintenant 32 | mais 33 | me 34 | merci 35 | mes 36 | moi 37 | mon 38 | ok 39 | puis 40 | s 41 | salut 42 | se 43 | stp 44 | svp 45 | toi 46 | tres 47 | tu 48 | un 49 | une 50 | vous 51 | y 52 | yo 53 | -------------------------------------------------------------------------------- /examples/interactive_parsing_cli.rs: -------------------------------------------------------------------------------- 1 | extern crate clap; 2 | extern crate env_logger; 3 | extern crate serde_json; 4 | extern crate snips_nlu_lib; 5 | 6 | use clap::{App, Arg}; 7 | use snips_nlu_lib::SnipsNluEngine; 8 | use std::io; 9 | use std::io::Write; 10 | 11 | fn main() { 12 | env_logger::Builder::from_default_env() 13 | .default_format_timestamp_nanos(true) 14 | .init(); 15 | 16 | let matches = App::new("snips-nlu-parse") 17 | .about("Snips NLU interactive CLI for parsing intents") 18 | .arg( 19 | Arg::with_name("NLU_ENGINE_DIR") 20 | .required(true) 21 | .takes_value(true) 22 | .index(1) 23 | .help("path to the trained nlu engine directory"), 24 | ) 25 | .arg( 26 | Arg::with_name("intents_alternatives") 27 | .short("i") 28 | .long("--intents-alternatives") 29 | .takes_value(true) 30 | .help("number of alternative parsing results to return in the output"), 31 | ) 32 | .arg( 33 | Arg::with_name("slots_alternatives") 34 | .short("s") 35 | .long("--slots-alternatives") 36 | .takes_value(true) 37 | .help("number of alternative slot values to return along with each extracted slot"), 38 | ) 39 | .get_matches(); 40 | let engine_dir = matches.value_of("NLU_ENGINE_DIR").unwrap(); 41 | let intents_alternatives = matches 42 | .value_of("intents_alternatives") 43 | .map(|v| v.to_string().parse::().unwrap()) 44 | .unwrap_or(0); 45 | let slots_alternatives = matches 46 | .value_of("slots_alternatives") 47 | .map(|v| v.to_string().parse::().unwrap()) 48 | .unwrap_or(0); 49 | 50 | println!("\nLoading the nlu engine..."); 51 | let engine = SnipsNluEngine::from_path(engine_dir).unwrap(); 52 | 53 | loop { 54 | print!("> "); 55 | io::stdout().flush().unwrap(); 56 | let mut query = String::new(); 57 | io::stdin().read_line(&mut query).unwrap(); 58 | let result = engine 59 | .parse_with_alternatives( 60 | query.trim(), 61 | None, 62 | None, 63 | intents_alternatives, 64 | slots_alternatives, 65 | ) 66 | .unwrap(); 67 | let result_json = serde_json::to_string_pretty(&result).unwrap(); 68 | println!("{}", result_json); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /ffi/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "snips-nlu-ffi" 3 | version = "0.65.6" 4 | edition = "2018" 5 | authors = [ 6 | "Adrien Ball ", 7 | "Clement Doumouro ", 8 | "Kevin Lefevre ", 9 | "Thibaut Lorrain " 10 | ] 11 | 12 | [dependencies] 13 | ffi-utils = { git = "https://github.com/snipsco/snips-utils-rs", rev = "4292ad9" } 14 | snips-nlu-lib = { path = ".." } 15 | snips-nlu-ontology-ffi-macros = { git = "https://github.com/snipsco/snips-nlu-ontology", tag = "0.67.1" } 16 | failure = "0.1" 17 | lazy_static = "1.0" 18 | libc = "0.2" 19 | serde_json = "1.0" 20 | 21 | [lib] 22 | crate-type = ["rlib", "staticlib", "cdylib"] 23 | -------------------------------------------------------------------------------- /ffi/cbindgen.toml: -------------------------------------------------------------------------------- 1 | language = "c" 2 | 3 | include_guard = "LIBSNIPS_NLU_H_" 4 | 5 | header = "#define SNIPS_NLU_VERSION \"0.65.6\"" 6 | 7 | [parse] 8 | parse_deps = true 9 | include = [ 10 | "snips_nlu_ffi", 11 | "ffi_utils", 12 | "snips_nlu_ontology_ffi", 13 | "snips_nlu_ontology_ffi_macros", 14 | ] 15 | 16 | [parse.expand] 17 | crates = [ 18 | "snips-nlu-ffi", 19 | ] 20 | 21 | [export] 22 | # These types are hidden behind a void pointer, let's include them 23 | include = [ 24 | "CActionSessionInit", 25 | "CNumberValue", 26 | "COrdinalValue", 27 | "CPercentageValue", 28 | "CInstantTimeValue", 29 | "CTimeIntervalValue", 30 | "CAmountOfMoneyValue", 31 | "CTemperatureValue", 32 | "CDurationValue", 33 | ] 34 | -------------------------------------------------------------------------------- /platforms/c/module.modulemap: -------------------------------------------------------------------------------- 1 | module Clibsnips_nlu { 2 | header "./libsnips_nlu.h" 3 | link "snips_nlu_ffi" 4 | export * 5 | } 6 | -------------------------------------------------------------------------------- /platforms/kotlin/.gitignore: -------------------------------------------------------------------------------- 1 | build/ 2 | .gradle 3 | out/ 4 | -------------------------------------------------------------------------------- /platforms/kotlin/build.gradle: -------------------------------------------------------------------------------- 1 | buildscript { 2 | ext.kotlin_version = '1.3.11' 3 | repositories { 4 | jcenter() 5 | } 6 | dependencies { 7 | classpath "org.jetbrains.kotlin:kotlin-gradle-plugin:$kotlin_version" 8 | classpath 'net.java.dev.jna:jna:4.5.0' 9 | } 10 | } 11 | 12 | apply plugin: 'kotlin' 13 | 14 | version = "0.65.6" 15 | group = "ai.snips" 16 | 17 | repositories { 18 | jcenter() 19 | maven { 20 | url "https://nexus-repository.snips.ai/repository/snips-maven-releases/" 21 | } 22 | maven { 23 | url "https://nexus-repository.snips.ai/repository/snips-maven-snapshots/" 24 | } 25 | } 26 | 27 | configurations { 28 | aarArchives 29 | jarArchives 30 | } 31 | 32 | dependencies { 33 | compile "org.jetbrains.kotlin:kotlin-stdlib-jdk7:$kotlin_version" 34 | compile 'net.java.dev.jna:jna:4.5.0' 35 | compile "ai.snips:snips-nlu-ontology:0.67.1" 36 | testCompile 'junit:junit:4.12' 37 | testCompile 'com.google.truth:truth:0.36' 38 | } 39 | 40 | def buildType = project.hasProperty("debug") ? "debug" : "release" 41 | println "Using build type $buildType" 42 | 43 | def rustTargetPath = project.hasProperty("rustTargetPath") ? project.rustTargetPath : "../../target" 44 | println "Using rust target path $rustTargetPath" 45 | 46 | def soForJar = [ 47 | ["$rustTargetPath/$buildType/libsnips_nlu_ffi.so", "linux-x86-64", "linuxNative" ], 48 | ["$rustTargetPath/$buildType/libsnips_nlu_ffi.dylib", "darwin", "macOsNative" ], 49 | ["$rustTargetPath/arm-unknown-linux-gnueabihf/$buildType/libsnips_nlu_ffi.so", "linux-arm", "linuxArmCross"] 50 | ] 51 | 52 | def jarClassifier = { 53 | def exisiting = soForJar.findAll{ file(it[0]).exists() } 54 | if(exisiting.size == 1) exisiting[0][1] 55 | else if(exisiting.size == 0) "naked" 56 | else "multiarch" 57 | } 58 | 59 | jar { 60 | classifier "${ -> jarClassifier() }" 61 | } 62 | 63 | def jniLibsDir = new File(buildDir, "jniLibs") 64 | 65 | soForJar.forEach { 66 | def taskName = "copySo${it[2].capitalize()}ForJar" 67 | def soFile = file(it[0]) 68 | def destDir = new File(jniLibsDir, it[1]) 69 | 70 | task(taskName, type: Copy) { 71 | from soFile 72 | into destDir 73 | } 74 | processResources.dependsOn(taskName) 75 | 76 | } 77 | 78 | sourceSets { 79 | main { 80 | resources { 81 | srcDir jniLibsDir 82 | } 83 | } 84 | } 85 | 86 | def aarDir = new File(buildDir, "aar") 87 | 88 | task("aar", type: Zip) { 89 | destinationDir new File(buildDir, "libs") 90 | baseName = "${project.name}-android" 91 | version = project.version 92 | extension = "aar" 93 | from aarDir 94 | } 95 | 96 | task("classesJarForAar", type: Zip) { 97 | destinationDir aarDir 98 | archiveName "classes.jar" 99 | from new File(buildDir, "classes/java/main") 100 | } 101 | 102 | classesJarForAar.dependsOn(classes) 103 | aar.dependsOn(classesJarForAar) 104 | 105 | task("manifestForAar", type: Copy) { 106 | from new File("src/main/android/AndroidManifest.xml") 107 | destinationDir aarDir 108 | } 109 | 110 | aar.dependsOn(manifestForAar) 111 | 112 | def soForAar = [ 113 | ["$rustTargetPath/arm-linux-androideabi/$buildType/libsnips_nlu.so", "armeabi"], 114 | ["$rustTargetPath/armv7-linux-androideabi/$buildType/libsnips_nlu.so", "armeabi-v7a"], 115 | ["$rustTargetPath/aarch64-linux-android/$buildType/libsnips_nlu.so", "arm64-v8a"], 116 | ["$rustTargetPath/i686-linux-android/$buildType/libsnips_nlu.so", "x86"], 117 | ["$rustTargetPath/x86_64-linux-android/$buildType/libsnips_nlu.so", "x86_64"] 118 | ] 119 | 120 | soForAar.forEach { 121 | def taskName = "copySo${it[1].capitalize()}ForAar" 122 | def soFile = file(it[0]) 123 | def destDir = new File(aarDir, "jni/${it[1]}") 124 | 125 | task(taskName, type: Copy) { 126 | from soFile 127 | into destDir 128 | } 129 | aar.dependsOn(taskName) 130 | 131 | } 132 | 133 | sourceCompatibility = "1.7" 134 | targetCompatibility = "1.7" 135 | 136 | artifacts { 137 | aarArchives aar.archivePath 138 | jarArchives jar.archivePath 139 | } 140 | 141 | apply plugin: 'maven' 142 | 143 | def _nexusUsername = project.hasProperty("nexusUsername") ? nexusUsername : "" 144 | def _nexusPassword = project.hasProperty("nexusPassword") ? nexusPassword : "" 145 | 146 | uploadJarArchives { 147 | repositories { 148 | mavenDeployer { 149 | repository(url: "https://nexus-repository.snips.ai/repository/snips-maven-releases/") { 150 | authentication(userName: _nexusUsername, password: _nexusPassword) 151 | } 152 | snapshotRepository(url: "https://nexus-repository.snips.ai/repository/snips-maven-snapshots/") { 153 | authentication(userName: _nexusUsername, password: _nexusPassword) 154 | } 155 | } 156 | } 157 | } 158 | 159 | uploadJarArchives.dependsOn(jar) 160 | uploadArchives.dependsOn(uploadJarArchives) 161 | 162 | uploadAarArchives { 163 | repositories { 164 | mavenDeployer { 165 | repository(url: "https://nexus-repository.snips.ai/repository/snips-maven-releases/") { 166 | authentication(userName: _nexusUsername, password: _nexusPassword) 167 | } 168 | snapshotRepository(url: "https://nexus-repository.snips.ai/repository/snips-maven-snapshots/") { 169 | authentication(userName: _nexusUsername, password: _nexusPassword) 170 | } 171 | 172 | repository(url: "file://localhost/tmp/myRepo/") 173 | pom.name = "snips-nlu-android" 174 | 175 | pom.whenConfigured { pom -> 176 | pom.dependencies.find { dep -> dep.groupId == 'net.java.dev.jna' && dep.artifactId == 'jna' }.scope = "provided" 177 | } 178 | } 179 | } 180 | } 181 | 182 | uploadAarArchives.dependsOn(aar) 183 | uploadArchives.dependsOn(uploadAarArchives) 184 | -------------------------------------------------------------------------------- /platforms/kotlin/gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/platforms/kotlin/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /platforms/kotlin/gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | #Wed Aug 30 18:30:10 CEST 2017 2 | distributionBase=GRADLE_USER_HOME 3 | distributionPath=wrapper/dists 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | distributionUrl=https\://services.gradle.org/distributions/gradle-5.1-all.zip 7 | -------------------------------------------------------------------------------- /platforms/kotlin/gradlew: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | 3 | ############################################################################## 4 | ## 5 | ## Gradle start up script for UN*X 6 | ## 7 | ############################################################################## 8 | 9 | # Attempt to set APP_HOME 10 | # Resolve links: $0 may be a link 11 | PRG="$0" 12 | # Need this for relative symlinks. 13 | while [ -h "$PRG" ] ; do 14 | ls=`ls -ld "$PRG"` 15 | link=`expr "$ls" : '.*-> \(.*\)$'` 16 | if expr "$link" : '/.*' > /dev/null; then 17 | PRG="$link" 18 | else 19 | PRG=`dirname "$PRG"`"/$link" 20 | fi 21 | done 22 | SAVED="`pwd`" 23 | cd "`dirname \"$PRG\"`/" >/dev/null 24 | APP_HOME="`pwd -P`" 25 | cd "$SAVED" >/dev/null 26 | 27 | APP_NAME="Gradle" 28 | APP_BASE_NAME=`basename "$0"` 29 | 30 | # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 31 | DEFAULT_JVM_OPTS="" 32 | 33 | # Use the maximum available, or set MAX_FD != -1 to use that value. 34 | MAX_FD="maximum" 35 | 36 | warn () { 37 | echo "$*" 38 | } 39 | 40 | die () { 41 | echo 42 | echo "$*" 43 | echo 44 | exit 1 45 | } 46 | 47 | # OS specific support (must be 'true' or 'false'). 48 | cygwin=false 49 | msys=false 50 | darwin=false 51 | nonstop=false 52 | case "`uname`" in 53 | CYGWIN* ) 54 | cygwin=true 55 | ;; 56 | Darwin* ) 57 | darwin=true 58 | ;; 59 | MINGW* ) 60 | msys=true 61 | ;; 62 | NONSTOP* ) 63 | nonstop=true 64 | ;; 65 | esac 66 | 67 | CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar 68 | 69 | # Determine the Java command to use to start the JVM. 70 | if [ -n "$JAVA_HOME" ] ; then 71 | if [ -x "$JAVA_HOME/jre/sh/java" ] ; then 72 | # IBM's JDK on AIX uses strange locations for the executables 73 | JAVACMD="$JAVA_HOME/jre/sh/java" 74 | else 75 | JAVACMD="$JAVA_HOME/bin/java" 76 | fi 77 | if [ ! -x "$JAVACMD" ] ; then 78 | die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME 79 | 80 | Please set the JAVA_HOME variable in your environment to match the 81 | location of your Java installation." 82 | fi 83 | else 84 | JAVACMD="java" 85 | which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 86 | 87 | Please set the JAVA_HOME variable in your environment to match the 88 | location of your Java installation." 89 | fi 90 | 91 | # Increase the maximum file descriptors if we can. 92 | if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then 93 | MAX_FD_LIMIT=`ulimit -H -n` 94 | if [ $? -eq 0 ] ; then 95 | if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then 96 | MAX_FD="$MAX_FD_LIMIT" 97 | fi 98 | ulimit -n $MAX_FD 99 | if [ $? -ne 0 ] ; then 100 | warn "Could not set maximum file descriptor limit: $MAX_FD" 101 | fi 102 | else 103 | warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" 104 | fi 105 | fi 106 | 107 | # For Darwin, add options to specify how the application appears in the dock 108 | if $darwin; then 109 | GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" 110 | fi 111 | 112 | # For Cygwin, switch paths to Windows format before running java 113 | if $cygwin ; then 114 | APP_HOME=`cygpath --path --mixed "$APP_HOME"` 115 | CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` 116 | JAVACMD=`cygpath --unix "$JAVACMD"` 117 | 118 | # We build the pattern for arguments to be converted via cygpath 119 | ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` 120 | SEP="" 121 | for dir in $ROOTDIRSRAW ; do 122 | ROOTDIRS="$ROOTDIRS$SEP$dir" 123 | SEP="|" 124 | done 125 | OURCYGPATTERN="(^($ROOTDIRS))" 126 | # Add a user-defined pattern to the cygpath arguments 127 | if [ "$GRADLE_CYGPATTERN" != "" ] ; then 128 | OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" 129 | fi 130 | # Now convert the arguments - kludge to limit ourselves to /bin/sh 131 | i=0 132 | for arg in "$@" ; do 133 | CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` 134 | CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option 135 | 136 | if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition 137 | eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` 138 | else 139 | eval `echo args$i`="\"$arg\"" 140 | fi 141 | i=$((i+1)) 142 | done 143 | case $i in 144 | (0) set -- ;; 145 | (1) set -- "$args0" ;; 146 | (2) set -- "$args0" "$args1" ;; 147 | (3) set -- "$args0" "$args1" "$args2" ;; 148 | (4) set -- "$args0" "$args1" "$args2" "$args3" ;; 149 | (5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; 150 | (6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; 151 | (7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; 152 | (8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; 153 | (9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; 154 | esac 155 | fi 156 | 157 | # Escape application args 158 | save () { 159 | for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done 160 | echo " " 161 | } 162 | APP_ARGS=$(save "$@") 163 | 164 | # Collect all arguments for the java command, following the shell quoting and substitution rules 165 | eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" 166 | 167 | # by default we should be in the correct project dir, but when run from Finder on Mac, the cwd is wrong 168 | if [ "$(uname)" = "Darwin" ] && [ "$HOME" = "$PWD" ]; then 169 | cd "$(dirname "$0")" 170 | fi 171 | 172 | exec "$JAVACMD" "$@" 173 | -------------------------------------------------------------------------------- /platforms/kotlin/gradlew.bat: -------------------------------------------------------------------------------- 1 | @if "%DEBUG%" == "" @echo off 2 | @rem ########################################################################## 3 | @rem 4 | @rem Gradle startup script for Windows 5 | @rem 6 | @rem ########################################################################## 7 | 8 | @rem Set local scope for the variables with windows NT shell 9 | if "%OS%"=="Windows_NT" setlocal 10 | 11 | set DIRNAME=%~dp0 12 | if "%DIRNAME%" == "" set DIRNAME=. 13 | set APP_BASE_NAME=%~n0 14 | set APP_HOME=%DIRNAME% 15 | 16 | @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. 17 | set DEFAULT_JVM_OPTS= 18 | 19 | @rem Find java.exe 20 | if defined JAVA_HOME goto findJavaFromJavaHome 21 | 22 | set JAVA_EXE=java.exe 23 | %JAVA_EXE% -version >NUL 2>&1 24 | if "%ERRORLEVEL%" == "0" goto init 25 | 26 | echo. 27 | echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 28 | echo. 29 | echo Please set the JAVA_HOME variable in your environment to match the 30 | echo location of your Java installation. 31 | 32 | goto fail 33 | 34 | :findJavaFromJavaHome 35 | set JAVA_HOME=%JAVA_HOME:"=% 36 | set JAVA_EXE=%JAVA_HOME%/bin/java.exe 37 | 38 | if exist "%JAVA_EXE%" goto init 39 | 40 | echo. 41 | echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 42 | echo. 43 | echo Please set the JAVA_HOME variable in your environment to match the 44 | echo location of your Java installation. 45 | 46 | goto fail 47 | 48 | :init 49 | @rem Get command-line arguments, handling Windows variants 50 | 51 | if not "%OS%" == "Windows_NT" goto win9xME_args 52 | 53 | :win9xME_args 54 | @rem Slurp the command line arguments. 55 | set CMD_LINE_ARGS= 56 | set _SKIP=2 57 | 58 | :win9xME_args_slurp 59 | if "x%~1" == "x" goto execute 60 | 61 | set CMD_LINE_ARGS=%* 62 | 63 | :execute 64 | @rem Setup the command line 65 | 66 | set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar 67 | 68 | @rem Execute Gradle 69 | "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% 70 | 71 | :end 72 | @rem End local scope for the variables with windows NT shell 73 | if "%ERRORLEVEL%"=="0" goto mainEnd 74 | 75 | :fail 76 | rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of 77 | rem the _cmd.exe /c_ return code! 78 | if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 79 | exit /b 1 80 | 81 | :mainEnd 82 | if "%OS%"=="Windows_NT" endlocal 83 | 84 | :omega 85 | -------------------------------------------------------------------------------- /platforms/kotlin/settings.gradle: -------------------------------------------------------------------------------- 1 | rootProject.name = "snips-nlu" 2 | -------------------------------------------------------------------------------- /platforms/kotlin/src/main/android/AndroidManifest.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /platforms/kotlin/src/main/kotlin/com/sun/jna/JnaUtils.kt: -------------------------------------------------------------------------------- 1 | package com.sun.jna 2 | 3 | // NativeString is package private... 4 | fun String.toJnaPointer(encoding: String) = NativeString(this, encoding).pointer 5 | -------------------------------------------------------------------------------- /platforms/python/.gitignore: -------------------------------------------------------------------------------- 1 | venv/ 2 | venv3/ 3 | venv34/ 4 | venv36/ 5 | venv37/ 6 | build/ 7 | dist/ 8 | *.pyc 9 | *.py.bak 10 | *.egg-info/ 11 | .idea 12 | .tox/ 13 | -------------------------------------------------------------------------------- /platforms/python/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2018 Snips 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /platforms/python/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include snips_nlu_rust/__version__ 2 | include LICENSE README.rst 3 | recursive-include snips_nlu_rust/dylib/ * 4 | recursive-include ffi/ * 5 | recursive-exclude ffi/target/ * 6 | global-exclude __pycache__ *.py[cod] 7 | -------------------------------------------------------------------------------- /platforms/python/README.rst: -------------------------------------------------------------------------------- 1 | Snips NLU Rust Wrapper 2 | ====================== 3 | 4 | Installation 5 | ------------ 6 | 7 | It is recommended to install the package with a virtualenv: 8 | 9 | .. code-block:: bash 10 | 11 | virtualenv -p python3.6 venv 12 | . venv/bin/activate 13 | 14 | The package is available on pypi, and can be installed with `pip`: 15 | 16 | .. code-block:: bash 17 | 18 | pip install snips-nlu-rust 19 | -------------------------------------------------------------------------------- /platforms/python/ffi/.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Rust template 3 | # Generated by Cargo 4 | # will have compiled files and executables 5 | /target/ 6 | 7 | # Remove Cargo.lock from gitignore if creating an executable, leave it for libraries 8 | # More information here http://doc.crates.io/guide.html#cargotoml-vs-cargolock 9 | Cargo.lock 10 | 11 | # These are backup files generated by rustfmt 12 | **/*.rs.bk 13 | 14 | -------------------------------------------------------------------------------- /platforms/python/ffi/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "snips-nlu-python-ffi" 3 | version = "0.65.6" 4 | authors = ["Adrien Ball "] 5 | edition = "2018" 6 | 7 | [lib] 8 | name = "snips_nlu_python_ffi" 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | libc = "0.2" 13 | ffi-utils = { git = "https://github.com/snipsco/snips-utils-rs", rev = "4292ad9" } 14 | snips-nlu-ffi = { git = "https://github.com/snipsco/snips-nlu-rs", tag = "0.65.6" } 15 | -------------------------------------------------------------------------------- /platforms/python/ffi/src/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate ffi_utils; 2 | extern crate libc; 3 | extern crate snips_nlu_ffi; 4 | 5 | use ffi_utils::{CStringArray, SNIPS_RESULT}; 6 | use snips_nlu_ffi::CSnipsNluEngine; 7 | 8 | #[doc(hidden)] 9 | #[macro_export] 10 | macro_rules! export_c_symbol { 11 | ($alias:ident, fn $name:ident($( $arg:ident : $type:ty ),*) -> $ret:ty) => { 12 | #[no_mangle] 13 | pub extern "C" fn $alias($( $arg : $type),*) -> $ret { 14 | ::snips_nlu_ffi::$name($( $arg ),*) 15 | } 16 | }; 17 | ($alias:ident, fn $name:ident($( $arg:ident : $type:ty ),*)) => { 18 | export_c_symbol!($alias, fn $name($( $arg : $type),*) -> ()); 19 | } 20 | } 21 | 22 | export_c_symbol!(ffi_snips_nlu_engine_create_from_dir, fn snips_nlu_engine_create_from_dir(root_dir: *const libc::c_char, client: *mut *const CSnipsNluEngine) -> SNIPS_RESULT); 23 | export_c_symbol!(ffi_snips_nlu_engine_create_from_zip, fn snips_nlu_engine_create_from_zip(zip: *const libc::c_uchar, zip_size: libc::c_uint, client: *mut *const CSnipsNluEngine) -> SNIPS_RESULT); 24 | export_c_symbol!(ffi_snips_nlu_engine_run_parse_into_json, fn snips_nlu_engine_run_parse_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intents_whitelist: *const CStringArray, intents_blacklist: *const CStringArray, result_json: *mut *const libc::c_char) -> SNIPS_RESULT); 25 | export_c_symbol!(ffi_snips_nlu_engine_run_parse_with_alternatives_into_json, fn snips_nlu_engine_run_parse_with_alternatives_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intents_whitelist: *const CStringArray, intents_blacklist: *const CStringArray, intents_alternatives: libc::c_uint, slots_alternatives: libc::c_uint, result_json: *mut *const libc::c_char) -> SNIPS_RESULT); 26 | export_c_symbol!(ffi_snips_nlu_engine_run_get_slots_into_json, fn snips_nlu_engine_run_get_slots_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intent: *const libc::c_char, result_json: *mut *const libc::c_char) -> SNIPS_RESULT); 27 | export_c_symbol!(ffi_snips_nlu_engine_run_get_slots_with_alternatives_into_json, fn snips_nlu_engine_run_get_slots_with_alternatives_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, intent: *const libc::c_char, slots_alternatives: libc::c_uint, result_json: *mut *const libc::c_char) -> SNIPS_RESULT); 28 | export_c_symbol!(ffi_snips_nlu_engine_run_get_intents_into_json, fn snips_nlu_engine_run_get_intents_into_json(client: *const CSnipsNluEngine, input: *const libc::c_char, result_json: *mut *const libc::c_char) -> SNIPS_RESULT); 29 | export_c_symbol!(ffi_snips_nlu_engine_get_last_error, fn snips_nlu_engine_get_last_error(error: *mut *const libc::c_char) -> SNIPS_RESULT); 30 | export_c_symbol!(ffi_snips_nlu_engine_destroy_string, fn snips_nlu_engine_destroy_string(string: *mut libc::c_char) -> SNIPS_RESULT); 31 | export_c_symbol!(ffi_snips_nlu_engine_destroy_client, fn snips_nlu_engine_destroy_client(client: *mut CSnipsNluEngine) -> SNIPS_RESULT); 32 | export_c_symbol!(ffi_snips_nlu_engine_get_model_version, fn snips_nlu_engine_get_model_version(version: *mut *const libc::c_char) -> SNIPS_RESULT); 33 | -------------------------------------------------------------------------------- /platforms/python/requirements.txt: -------------------------------------------------------------------------------- 1 | setuptools_rust==0.8.4 2 | wheel==0.30.0 3 | -------------------------------------------------------------------------------- /platforms/python/setup.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | 3 | import io 4 | import os 5 | import sys 6 | 7 | from setuptools import setup, find_packages 8 | from setuptools_rust import Binding, RustExtension 9 | 10 | packages = [p for p in find_packages() if "tests" not in p] 11 | 12 | PACKAGE_NAME = "snips_nlu_rust" 13 | ROOT_PATH = os.path.dirname(os.path.abspath(__file__)) 14 | PACKAGE_PATH = os.path.join(ROOT_PATH, PACKAGE_NAME) 15 | VERSION = "__version__" 16 | README = os.path.join(ROOT_PATH, "README.rst") 17 | 18 | RUST_EXTENSION_NAME = 'snips_nlu_rust.dylib.libsnips_nlu_rs' 19 | CARGO_ROOT_PATH = os.path.join(ROOT_PATH, 'ffi') 20 | CARGO_FILE_PATH = os.path.join(CARGO_ROOT_PATH, 'Cargo.toml') 21 | CARGO_TARGET_DIR = os.path.join(CARGO_ROOT_PATH, 'target') 22 | os.environ['CARGO_TARGET_DIR'] = CARGO_TARGET_DIR 23 | 24 | with io.open(os.path.join(PACKAGE_PATH, VERSION)) as f: 25 | version = f.readline() 26 | 27 | with io.open(README, "rt", encoding="utf8") as f: 28 | readme = f.read() 29 | 30 | setup(name=PACKAGE_NAME, 31 | version=version, 32 | description='Python wrapper of the Rust Snips NLU engine', 33 | long_description=readme, 34 | author='Thibaut Lorrain, Adrien Ball', 35 | author_email='thibaut.lorrain@snips.ai, adrien.ball@snips.ai', 36 | classifiers=[ 37 | "Programming Language :: Python :: 2", 38 | "Programming Language :: Python :: 2.7", 39 | "Programming Language :: Python :: 3", 40 | "Programming Language :: Python :: 3.4", 41 | "Programming Language :: Python :: 3.5", 42 | "Programming Language :: Python :: 3.6", 43 | "Programming Language :: Python :: 3.7", 44 | ], 45 | install_requires=[ 46 | "future>=0.16,<0.18", 47 | "pathlib>=1.0,<2.0; python_version < '3.4'", 48 | ], 49 | packages=packages, 50 | include_package_data=True, 51 | rust_extensions=[RustExtension(RUST_EXTENSION_NAME, CARGO_FILE_PATH, 52 | debug="develop" in sys.argv, 53 | binding=Binding.NoBinding)], 54 | zip_safe=False) 55 | -------------------------------------------------------------------------------- /platforms/python/snips_nlu_rust/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | 3 | from snips_nlu_rust.nlu_engine import NLUEngine 4 | -------------------------------------------------------------------------------- /platforms/python/snips_nlu_rust/__version__: -------------------------------------------------------------------------------- 1 | 0.65.6 2 | -------------------------------------------------------------------------------- /platforms/python/snips_nlu_rust/dylib/.gitignore: -------------------------------------------------------------------------------- 1 | *.dylib 2 | *.so 3 | *.dll -------------------------------------------------------------------------------- /platforms/python/snips_nlu_rust/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/snipsco/snips-nlu-rs/88a047aacf40f3316a9cbdde850ea1798af39c2a/platforms/python/snips_nlu_rust/tests/__init__.py -------------------------------------------------------------------------------- /platforms/python/snips_nlu_rust/tests/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | 3 | import io 4 | import os 5 | 6 | TEST_DATA_PATH = os.path.join(os.path.dirname(os.path.abspath(__file__)), 7 | "..", "..", "..", "..", "data", "tests") 8 | 9 | GAME_ENGINE_DIR = os.path.join(TEST_DATA_PATH, "models", "nlu_engine_game") 10 | BEVERAGE_ENGINE_DIR = os.path.join(TEST_DATA_PATH, "models", 11 | "nlu_engine_beverage") 12 | BEVERAGE_ENGINE_ZIP_PATH = os.path.join(TEST_DATA_PATH, "models", 13 | "nlu_engine_beverage.zip") 14 | 15 | with io.open(BEVERAGE_ENGINE_ZIP_PATH, mode='rb') as f: 16 | BEVERAGE_ENGINE_ZIP_BYTES = bytearray(f.read()) 17 | -------------------------------------------------------------------------------- /platforms/python/snips_nlu_rust/utils.py: -------------------------------------------------------------------------------- 1 | from _ctypes import Structure, POINTER, byref 2 | from contextlib import contextmanager 3 | from ctypes import cdll, c_char_p, c_int32, string_at 4 | from pathlib import Path 5 | 6 | dylib_dir = Path(__file__).parent / "dylib" 7 | dylib_path = list(dylib_dir.glob("libsnips_nlu*"))[0] 8 | lib = cdll.LoadLibrary(str(dylib_path)) 9 | 10 | 11 | @contextmanager 12 | def string_pointer(ptr): 13 | try: 14 | yield ptr 15 | finally: 16 | lib.ffi_snips_nlu_engine_destroy_string(ptr) 17 | 18 | 19 | class CStringArray(Structure): 20 | _fields_ = [ 21 | ("data", POINTER(c_char_p)), 22 | ("size", c_int32) 23 | ] 24 | 25 | 26 | def check_ffi_error(exit_code, error_context_msg): 27 | if exit_code != 0: 28 | with string_pointer(c_char_p()) as ptr: 29 | if lib.snips_nlu_engine_get_last_error(byref(ptr)) == 0: 30 | ffi_error_message = string_at(ptr).decode("utf8") 31 | else: 32 | ffi_error_message = "see stderr" 33 | raise ValueError("%s: %s" % (error_context_msg, ffi_error_message)) 34 | -------------------------------------------------------------------------------- /platforms/python/tox.ini: -------------------------------------------------------------------------------- 1 | [tox] 2 | envlist = py27, py35, py36, py37 3 | skipsdist = True 4 | 5 | [testenv] 6 | skip_install = true 7 | commands = 8 | pip install -r requirements.txt 9 | pip install -e . --verbose 10 | python -m unittest discover 11 | setenv= 12 | LANG=en_US.UTF-8 13 | PYTHONIOENCODING=UTF-8 14 | -------------------------------------------------------------------------------- /platforms/swift/.gitignore: -------------------------------------------------------------------------------- 1 | #### joe made this: http://goel.io/joe 2 | 3 | #### swift #### 4 | # Xcode 5 | # 6 | # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore 7 | 8 | ## Build generated 9 | build/ 10 | DerivedData/ 11 | 12 | ## Various settings 13 | *.pbxuser 14 | !default.pbxuser 15 | *.mode1v3 16 | !default.mode1v3 17 | *.mode2v3 18 | !default.mode2v3 19 | *.perspectivev3 20 | !default.perspectivev3 21 | xcuserdata/ 22 | 23 | ## Other 24 | *.moved-aside 25 | *.xccheckout 26 | *.xcscmblueprint 27 | 28 | ## Obj-C/Swift specific 29 | *.hmap 30 | *.ipa 31 | *.dSYM.zip 32 | *.dSYM 33 | 34 | ## Playgrounds 35 | timeline.xctimeline 36 | playground.xcworkspace 37 | 38 | # Swift Package Manager 39 | # 40 | # Add this line if you want to avoid checking in source code from Swift Package Manager dependencies. 41 | # Packages/ 42 | # Package.pins 43 | .build/ 44 | 45 | # CocoaPods 46 | # 47 | # We recommend against adding the Pods directory to your .gitignore. However 48 | # you should judge for yourself, the pros and cons are mentioned at: 49 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 50 | # 51 | # Pods/ 52 | 53 | # Carthage 54 | # 55 | # Add this line if you want to avoid checking in source code from Carthage dependencies. 56 | # Carthage/Checkouts 57 | 58 | Carthage/Build 59 | 60 | # fastlane 61 | # 62 | # It is recommended to not store the screenshots in the git repo. Instead, use fastlane to re-generate the 63 | # screenshots whenever they are needed. 64 | # For more information about the recommended setup visit: 65 | # https://docs.fastlane.tools/best-practices/source-control/#source-control 66 | 67 | fastlane/report.xml 68 | fastlane/Preview.html 69 | fastlane/screenshots 70 | fastlane/test_output 71 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | IDEDidComputeMac32BitWarning 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/Dependencies/.gitignore: -------------------------------------------------------------------------------- 1 | ios/ 2 | macos/ 3 | tvos/ 4 | watchos/ 5 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/Dependencies/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | 3 | : ${PROJECT_DIR:?"${0##*/} must be invoked as part of an Xcode script phase"} 4 | 5 | set -e 6 | 7 | VERSION="0.65.6" 8 | SYSTEM=$(echo $1 | tr '[:upper:]' '[:lower:]') 9 | LIBRARY_NAME=libsnips_nlu_ffi 10 | LIBRARY_NAME_A=${LIBRARY_NAME}.a 11 | LIBRARY_NAME_H=libsnips_nlu.h 12 | OUT_DIR=${PROJECT_DIR}/Dependencies/${SYSTEM} 13 | 14 | if [ -z "$TARGET_BUILD_TYPE" ]; then 15 | TARGET_BUILD_TYPE=$(echo ${CONFIGURATION} | tr '[:upper:]' '[:lower:]') 16 | fi 17 | 18 | if [ "${SYSTEM}" != "ios" ] && [ "${SYSTEM}" != "macos" ]; then 19 | echo "Given system should be 'ios' or 'macos'." 20 | exit 1 21 | fi 22 | 23 | mkdir -p ${OUT_DIR} 24 | 25 | install_remote_core () { 26 | echo "Trying remote installation" 27 | 28 | local filename=snips-nlu-${SYSTEM}.${VERSION}.tgz 29 | local url=https://s3.amazonaws.com/snips/snips-nlu-dev/${filename} 30 | 31 | echo "Will download '${filename}' in '${OUT_DIR}'" 32 | if curl --output /dev/null --silent --head --fail "$url"; then 33 | $(cd ${OUT_DIR} && curl -s ${url} | tar zxv) 34 | else 35 | echo "Version ${VERSION} doesn't seem to have been released yet" 36 | echo "Could not find any file at '${url}'" 37 | echo "Please file issue on 'https://github.com/snipsco/snips-nlu-rs' if you believe this is an issue" 38 | return 1 39 | fi 40 | 41 | return 0 42 | } 43 | 44 | install_local_core () { 45 | echo "Trying local installation" 46 | 47 | # TODO: Find a better way to retrieve root_dir 48 | local root_dir=${PROJECT_DIR}/../../../ 49 | local target_dir=${root_dir}/target/ 50 | 51 | if [ ${SYSTEM} = macos ]; then 52 | echo "Using macOS local build" 53 | 54 | local library_path=${target_dir}/${TARGET_BUILD_TYPE}/${LIBRARY_NAME_A} 55 | if [ ! -e ${library_path} ]; then 56 | echo "Missing file '${library_path}'" 57 | return 1 58 | fi 59 | 60 | cp ${library_path} ${OUT_DIR} 61 | cp ${PROJECT_DIR}/../../c/${LIBRARY_NAME_H} ${OUT_DIR} 62 | cp ${PROJECT_DIR}/../../c/module.modulemap ${OUT_DIR} 63 | 64 | elif [ ${SYSTEM} = ios ]; then 65 | echo "Using iOS local build" 66 | local archs_array=( ${ARCHS} ) 67 | 68 | for arch in "${archs_array[@]}"; do 69 | if [ ${arch} = arm64 ]; then 70 | local arch=aarch64 71 | fi 72 | local library_path=${target_dir}/${arch}-apple-ios/${TARGET_BUILD_TYPE}/${LIBRARY_NAME_A} 73 | if [ ! -e ${library_path} ]; then 74 | echo "Can't find library for arch ${arch}" 75 | echo "Missing file '${library_path}'" 76 | return 1 77 | fi 78 | cp ${library_path} ${OUT_DIR}/${LIBRARY_NAME}-${arch}.a 79 | done 80 | 81 | lipo -create $(find ${OUT_DIR}/${LIBRARY_NAME}-*.a) \ 82 | -output ${OUT_DIR}/${LIBRARY_NAME_A} 83 | cp ${PROJECT_DIR}/../../c/${LIBRARY_NAME_H} ${OUT_DIR} 84 | cp ${PROJECT_DIR}/../../c/module.modulemap ${OUT_DIR} 85 | 86 | else 87 | echo "${SYSTEM} isn't supported" 88 | return 1 89 | fi 90 | 91 | return 0 92 | } 93 | 94 | core_is_present () { 95 | echo "Checking if core is present (and complete)" 96 | local files=( 97 | ${OUT_DIR}/module.modulemap 98 | ${OUT_DIR}/${LIBRARY_NAME_A} 99 | ${OUT_DIR}/${LIBRARY_NAME_H} 100 | ) 101 | 102 | for file in "${files[@]}"; do 103 | if [ ! -e $file ]; then 104 | echo "Core isn't complete" 105 | echo "Missing file '$file'" 106 | return 1 107 | fi 108 | done 109 | 110 | echo "Core is present" 111 | return 0 112 | } 113 | 114 | core_is_up_to_date () { 115 | echo "Checking if core is up-to-date" 116 | 117 | local header_path=${OUT_DIR}/${LIBRARY_NAME_H} 118 | 119 | if [ -z $(grep "SNIPS_NLU_VERSION" $header_path) ]; then 120 | echo "SNIPS_NLU_VERSION not present. Skipping up-to-date check..." 121 | return 0 122 | fi 123 | 124 | local core_version=$(grep "SNIPS_NLU_VERSION" $header_path | cut -d'"' -f2) 125 | 126 | if [ "$core_version" = ${VERSION} ]; then 127 | echo "Core is up-to-date" 128 | return 0 129 | fi 130 | 131 | echo "Core isn't up-to-date" 132 | echo "Found version ${core_version}, expected version ${VERSION}" 133 | return 1 134 | } 135 | 136 | echo "Will check if core is present and up-to-date" 137 | if core_is_present && core_is_up_to_date; then 138 | echo "Core seems present and up-to-date !" 139 | exit 0 140 | fi 141 | 142 | rm -f ${OUT_DIR}/* 143 | if [ "${SNIPS_USE_LOCAL}" == 1 ]; then 144 | echo "SNIPS_USE_LOCAL=1 Will try local installation only" 145 | install_local_core && exit 0 146 | elif [ "${SNIPS_USE_REMOTE}" == 1 ]; then 147 | echo "SNIPS_USE_REMOTE=1 Will try remote installation only" 148 | install_remote_core && exit 0 149 | else 150 | if ! install_local_core; then 151 | echo "Local installation failed" 152 | install_remote_core && exit 0 153 | fi 154 | fi 155 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/SnipsNlu.xcodeproj/project.xcworkspace/contents.xcworkspacedata: -------------------------------------------------------------------------------- 1 | 2 | 4 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/SnipsNlu.xcodeproj/xcshareddata/xcschemes/SnipsNlu-iOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 33 | 39 | 40 | 41 | 42 | 43 | 49 | 50 | 51 | 52 | 53 | 54 | 64 | 65 | 71 | 72 | 73 | 74 | 75 | 76 | 82 | 83 | 89 | 90 | 91 | 92 | 94 | 95 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/SnipsNlu.xcodeproj/xcshareddata/xcschemes/SnipsNlu-macOS.xcscheme: -------------------------------------------------------------------------------- 1 | 2 | 5 | 8 | 9 | 15 | 21 | 22 | 23 | 24 | 25 | 30 | 31 | 33 | 39 | 40 | 41 | 42 | 43 | 49 | 50 | 51 | 52 | 53 | 54 | 64 | 65 | 71 | 72 | 73 | 74 | 75 | 76 | 82 | 83 | 89 | 90 | 91 | 92 | 94 | 95 | 98 | 99 | 100 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/SnipsNlu/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | FMWK 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | $(CURRENT_PROJECT_VERSION) 21 | NSHumanReadableCopyright 22 | Copyright © 2017 Snips. All rights reserved. 23 | NSPrincipalClass 24 | 25 | 26 | 27 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/SnipsNlu/SnipsNlu.h: -------------------------------------------------------------------------------- 1 | // 2 | // SnipsNlu.h 3 | // SnipsNlu 4 | // 5 | // Created by Kevin Lefevre on 23/06/2017. 6 | // Copyright © 2017 Snips. All rights reserved. 7 | // 8 | 9 | #import 10 | 11 | //! Project version number for SnipsNlu. 12 | FOUNDATION_EXPORT double SnipsNluVersionNumber; 13 | 14 | //! Project version string for SnipsNlu. 15 | FOUNDATION_EXPORT const unsigned char SnipsNluVersionString[]; 16 | 17 | // In this header, you should import all the public headers of your framework using statements like #import 18 | 19 | 20 | -------------------------------------------------------------------------------- /platforms/swift/SnipsNlu/SnipsNluTests/Info.plist: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | CFBundleDevelopmentRegion 6 | en 7 | CFBundleExecutable 8 | $(EXECUTABLE_NAME) 9 | CFBundleIdentifier 10 | $(PRODUCT_BUNDLE_IDENTIFIER) 11 | CFBundleInfoDictionaryVersion 12 | 6.0 13 | CFBundleName 14 | $(PRODUCT_NAME) 15 | CFBundlePackageType 16 | BNDL 17 | CFBundleShortVersionString 18 | 1.0 19 | CFBundleVersion 20 | 1 21 | 22 | 23 | -------------------------------------------------------------------------------- /post_release.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | VERSION=$1 4 | 5 | if [[ -z "$VERSION" ]] 6 | then 7 | echo "Usage: $0 " 8 | exit 1 9 | fi 10 | 11 | set -ex 12 | 13 | ./update_version.sh ${VERSION} 14 | 15 | git commit . -m "Set post-release version to $VERSION" 16 | -------------------------------------------------------------------------------- /src/entity_parser/builtin_entity_parser.rs: -------------------------------------------------------------------------------- 1 | use std::path::Path; 2 | use std::sync::Mutex; 3 | 4 | use log::info; 5 | use snips_nlu_ontology::{BuiltinEntity, BuiltinEntityKind}; 6 | use snips_nlu_parsers::BuiltinEntityParser as _BuiltinEntityParser; 7 | 8 | use super::utils::Cache; 9 | use crate::errors::*; 10 | 11 | pub trait BuiltinEntityParser: Send + Sync { 12 | fn extract_entities( 13 | &self, 14 | sentence: &str, 15 | filter_entity_kinds: Option<&[BuiltinEntityKind]>, 16 | use_cache: bool, 17 | max_alternative_resolved_values: usize, 18 | ) -> Result>; 19 | } 20 | 21 | pub struct CachingBuiltinEntityParser { 22 | parser: _BuiltinEntityParser, 23 | cache: Mutex>>, 24 | } 25 | 26 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 27 | struct CacheKey { 28 | input: String, 29 | kinds: Option>, 30 | max_alternative_resolved_values: usize, 31 | } 32 | 33 | impl BuiltinEntityParser for CachingBuiltinEntityParser { 34 | fn extract_entities( 35 | &self, 36 | sentence: &str, 37 | filter_entity_kinds: Option<&[BuiltinEntityKind]>, 38 | use_cache: bool, 39 | max_alternative_resolved_values: usize, 40 | ) -> Result> { 41 | let lowercased_sentence = sentence.to_lowercase(); 42 | if !use_cache { 43 | return self.parser.extract_entities( 44 | &lowercased_sentence, 45 | filter_entity_kinds, 46 | max_alternative_resolved_values, 47 | ); 48 | } 49 | let cache_key = CacheKey { 50 | input: lowercased_sentence, 51 | kinds: filter_entity_kinds.map(|entity_kinds| entity_kinds.to_vec()), 52 | max_alternative_resolved_values, 53 | }; 54 | 55 | self.cache 56 | .lock() 57 | .unwrap() 58 | .try_cache(&cache_key, |cache_key| { 59 | self.parser.extract_entities( 60 | &cache_key.input, 61 | filter_entity_kinds, 62 | max_alternative_resolved_values, 63 | ) 64 | }) 65 | } 66 | } 67 | 68 | impl CachingBuiltinEntityParser { 69 | pub fn from_path>(path: P, cache_capacity: usize) -> Result { 70 | info!("Loading builtin entity parser ({:?}) ...", path.as_ref()); 71 | let parser = _BuiltinEntityParser::from_path(path)?; 72 | let cache = Mutex::new(Cache::new(cache_capacity)); 73 | info!("Builtin entity parser loaded"); 74 | Ok(Self { parser, cache }) 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/entity_parser/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod builtin_entity_parser; 2 | pub mod custom_entity_parser; 3 | mod utils; 4 | 5 | pub use self::builtin_entity_parser::*; 6 | pub use self::custom_entity_parser::*; 7 | -------------------------------------------------------------------------------- /src/entity_parser/utils.rs: -------------------------------------------------------------------------------- 1 | use std::hash::Hash; 2 | 3 | use lru_cache::LruCache; 4 | 5 | use crate::errors::*; 6 | 7 | pub struct Cache(LruCache) 8 | where 9 | K: Eq + Hash + Clone, 10 | V: Clone; 11 | 12 | impl Cache 13 | where 14 | K: Eq + Hash + Clone, 15 | V: Clone, 16 | { 17 | pub fn new(capacity: usize) -> Self { 18 | Cache(LruCache::new(capacity)) 19 | } 20 | 21 | pub fn try_cache Result>(&mut self, key: &K, producer: F) -> Result { 22 | let cached_value = self.0.get_mut(key).cloned(); 23 | if let Some(value) = cached_value { 24 | return Ok(value); 25 | } 26 | let value = producer(key)?; 27 | self.0.insert(key.clone(), value.clone()); 28 | Ok(value) 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/errors.rs: -------------------------------------------------------------------------------- 1 | use failure::Fail; 2 | 3 | #[derive(Debug, Fail)] 4 | pub enum SnipsNluError { 5 | #[fail(display = "Unable to read file '{}'", _0)] 6 | ModelLoad(String), 7 | #[fail(display = "Mismatched model version: model is {} but runner is {}", model, runner)] 8 | WrongModelVersion{ model: String, runner: &'static str}, 9 | #[fail(display = "Unknown intent: '{}'", _0)] 10 | UnknownIntent(String), 11 | #[fail(display = "Internal error: {}", _0)] 12 | InternalError(String), 13 | } 14 | 15 | pub type Result = ::std::result::Result; 16 | -------------------------------------------------------------------------------- /src/injection/errors.rs: -------------------------------------------------------------------------------- 1 | use failure::{Backtrace, Context, Fail}; 2 | use std::fmt; 3 | use std::fmt::Display; 4 | 5 | #[derive(Debug)] 6 | pub struct NluInjectionError { 7 | inner: Context, 8 | } 9 | 10 | #[derive(Debug, Fail)] 11 | pub enum NluInjectionErrorKind { 12 | #[fail(display = "Entity is not injectable: {}", msg)] 13 | EntityNotInjectable { msg: String }, 14 | #[fail(display = "Internal injection error: {}", msg)] 15 | InternalInjectionError { msg: String }, 16 | } 17 | 18 | // Boilerplate 19 | impl Fail for NluInjectionError { 20 | fn cause(&self) -> Option<&dyn Fail> { 21 | self.inner.cause() 22 | } 23 | 24 | fn backtrace(&self) -> Option<&Backtrace> { 25 | self.inner.backtrace() 26 | } 27 | } 28 | 29 | impl Display for NluInjectionError { 30 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { 31 | Display::fmt(&self.inner, f) 32 | } 33 | } 34 | 35 | impl From for NluInjectionError { 36 | fn from(kind: NluInjectionErrorKind) -> NluInjectionError { 37 | NluInjectionError { 38 | inner: Context::new(kind), 39 | } 40 | } 41 | } 42 | 43 | impl From> for NluInjectionError { 44 | fn from(inner: Context) -> NluInjectionError { 45 | NluInjectionError { inner } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/injection/mod.rs: -------------------------------------------------------------------------------- 1 | mod errors; 2 | mod injection; 3 | 4 | pub use self::errors::{NluInjectionError, NluInjectionErrorKind}; 5 | pub use self::injection::{InjectedEntity, InjectedValue, NluInjector}; 6 | -------------------------------------------------------------------------------- /src/intent_classifier/logreg.rs: -------------------------------------------------------------------------------- 1 | use ndarray::prelude::*; 2 | use ndarray::{array, stack}; 3 | 4 | use crate::errors::*; 5 | 6 | /// The multiclass probability estimates are derived from binary (one-vs.-rest) 7 | /// estimates by simple normalization 8 | pub struct MulticlassLogisticRegression { 9 | /// matrix with shape (f, c) 10 | /// ------------------------ 11 | /// 12 | /// - f = number of features 13 | /// - c = number of classes 14 | weights: Array2, 15 | } 16 | 17 | impl MulticlassLogisticRegression { 18 | fn nb_features(&self) -> usize { 19 | // without intercept 20 | self.weights.dim().0 - 1 21 | } 22 | 23 | fn nb_classes(&self) -> usize { 24 | self.weights.dim().1 25 | } 26 | 27 | fn is_binary(&self) -> bool { 28 | self.nb_classes() == 1 29 | } 30 | } 31 | 32 | impl MulticlassLogisticRegression { 33 | pub fn new(intercept: Array1, weights: Array2) -> Result { 34 | let nb_classes = intercept.dim(); 35 | let reshaped_intercept = intercept.into_shape((1, nb_classes))?; 36 | let weights_with_intercept = stack![Axis(0), reshaped_intercept, weights]; 37 | Ok(Self { 38 | weights: weights_with_intercept, 39 | }) 40 | } 41 | 42 | pub fn run(&self, features: &ArrayView1) -> Result> { 43 | let reshaped_features = features.into_shape((1, self.nb_features()))?; 44 | let reshaped_features = stack![Axis(1), array![[1.]], reshaped_features]; 45 | let mut result = reshaped_features 46 | .dot(&self.weights) 47 | .into_shape(self.nb_classes())?; 48 | result.mapv_inplace(logit); 49 | if self.is_binary() { 50 | return Ok(arr1(&[1.0 - result[0], result[0]])); 51 | } 52 | Ok(result) 53 | } 54 | } 55 | 56 | fn logit(x: f32) -> f32 { 57 | 1. / (1. + (-x).exp()) 58 | } 59 | 60 | #[cfg(test)] 61 | mod tests { 62 | use super::MulticlassLogisticRegression; 63 | use crate::testutils::assert_epsilon_eq_array1; 64 | use ndarray::array; 65 | 66 | #[test] 67 | fn test_multiclass_logistic_regression() { 68 | // Given 69 | let intercept = array![0.98, 0.32, -0.76]; 70 | let weights = array![ 71 | [2.5, -0.6, 0.5], 72 | [1.2, 1.2, -2.7], 73 | [1.5, 0.1, -3.2], 74 | [-0.9, 1.4, 1.8] 75 | ]; 76 | 77 | let features = array![0.4, -2.3, 1.9, 1.3]; 78 | let regression = MulticlassLogisticRegression::new(intercept, weights).unwrap(); 79 | 80 | // When 81 | let predictions = regression.run(&features.view()).unwrap(); 82 | 83 | // Then 84 | let expected_predictions = array![0.7109495, 0.3384968, 0.8710191]; 85 | assert_epsilon_eq_array1(&predictions, &expected_predictions, 1e-06); 86 | } 87 | 88 | #[test] 89 | fn test_multiclass_logistic_regression_when_binary() { 90 | // Given 91 | let intercept = array![0.98]; 92 | let weights = array![[2.5], [1.2], [1.5], [-0.9]]; 93 | 94 | let features = array![0.4, -2.3, 1.9, 1.3]; 95 | let regression = MulticlassLogisticRegression::new(intercept, weights).unwrap(); 96 | 97 | // When 98 | let predictions = regression.run(&features.view()).unwrap(); 99 | 100 | // Then 101 | let expected_predictions = array![0.2890504, 0.7109495]; 102 | assert_epsilon_eq_array1(&predictions, &expected_predictions, 1e-06); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/intent_classifier/mod.rs: -------------------------------------------------------------------------------- 1 | mod featurizer; 2 | mod log_reg_intent_classifier; 3 | mod logreg; 4 | 5 | use std::fs::File; 6 | use std::path::Path; 7 | use std::sync::Arc; 8 | 9 | use crate::errors::*; 10 | use failure::{format_err, ResultExt}; 11 | use snips_nlu_ontology::IntentClassifierResult; 12 | 13 | pub use self::featurizer::{CooccurrenceVectorizer, Featurizer, TfidfVectorizer}; 14 | pub use self::log_reg_intent_classifier::LogRegIntentClassifier; 15 | use crate::models::ProcessingUnitMetadata; 16 | use crate::resources::SharedResources; 17 | 18 | pub trait IntentClassifier: Send + Sync { 19 | fn get_intent( 20 | &self, 21 | input: &str, 22 | intents_whitelist: Option<&[&str]>, 23 | ) -> Result; 24 | 25 | fn get_intents(&self, input: &str) -> Result>; 26 | } 27 | 28 | pub fn build_intent_classifier>( 29 | path: P, 30 | shared_resources: Arc, 31 | ) -> Result> { 32 | let metadata_path = path.as_ref().join("metadata.json"); 33 | let metadata_file = File::open(&metadata_path).with_context(|_| { 34 | format!( 35 | "Cannot open intent classifier metadata file '{:?}'", 36 | &metadata_path 37 | ) 38 | })?; 39 | let metadata: ProcessingUnitMetadata = serde_json::from_reader(metadata_file) 40 | .with_context(|_| "Cannot deserialize intent classifier json data")?; 41 | match metadata { 42 | ProcessingUnitMetadata::LogRegIntentClassifier => { 43 | Ok(Box::new(LogRegIntentClassifier::from_path(path, shared_resources)?) as _) 44 | } 45 | _ => Err(format_err!("{:?} is not an intent classifier", metadata)), 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/intent_parser/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod deterministic_intent_parser; 2 | pub mod lookup_intent_parser; 3 | pub mod probabilistic_intent_parser; 4 | 5 | use std::path::Path; 6 | use std::sync::Arc; 7 | 8 | use failure::format_err; 9 | use snips_nlu_ontology::IntentClassifierResult; 10 | 11 | pub use self::deterministic_intent_parser::DeterministicIntentParser; 12 | pub use self::lookup_intent_parser::LookupIntentParser; 13 | pub use self::probabilistic_intent_parser::ProbabilisticIntentParser; 14 | use crate::errors::*; 15 | use crate::models::ProcessingUnitMetadata; 16 | use crate::resources::SharedResources; 17 | pub use crate::slot_utils::InternalSlot; 18 | use crate::utils::IntentName; 19 | 20 | #[derive(Debug, Clone, PartialEq)] 21 | pub struct InternalParsingResult { 22 | pub intent: IntentClassifierResult, 23 | pub slots: Vec, 24 | } 25 | 26 | impl InternalParsingResult { 27 | pub fn empty() -> InternalParsingResult { 28 | InternalParsingResult { 29 | intent: IntentClassifierResult { 30 | intent_name: None, 31 | confidence_score: 1.0, 32 | }, 33 | slots: vec![], 34 | } 35 | } 36 | } 37 | 38 | pub fn internal_parsing_result( 39 | intent_name: Option, 40 | intent_proba: f32, 41 | slots: Vec, 42 | ) -> InternalParsingResult { 43 | InternalParsingResult { 44 | intent: IntentClassifierResult { 45 | intent_name, 46 | confidence_score: intent_proba, 47 | }, 48 | slots, 49 | } 50 | } 51 | 52 | pub trait IntentParser: Send + Sync { 53 | fn parse( 54 | &self, 55 | input: &str, 56 | intents_whitelist: Option<&[&str]>, 57 | ) -> Result; 58 | 59 | fn get_intents(&self, input: &str) -> Result>; 60 | 61 | fn get_slots(&self, input: &str, intent: &str) -> Result>; 62 | } 63 | 64 | pub fn build_intent_parser>( 65 | metadata: ProcessingUnitMetadata, 66 | path: P, 67 | shared_resources: Arc, 68 | ) -> Result> { 69 | match metadata { 70 | ProcessingUnitMetadata::LookupIntentParser => { 71 | Ok(Box::new(LookupIntentParser::from_path(path, shared_resources)?) as _) 72 | } 73 | ProcessingUnitMetadata::DeterministicIntentParser => Ok(Box::new( 74 | DeterministicIntentParser::from_path(path, shared_resources)?, 75 | ) as _), 76 | ProcessingUnitMetadata::ProbabilisticIntentParser => Ok(Box::new( 77 | ProbabilisticIntentParser::from_path(path, shared_resources)?, 78 | ) as _), 79 | _ => Err(format_err!("{:?} is not an intent parser", metadata)), 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /src/language.rs: -------------------------------------------------------------------------------- 1 | use snips_nlu_ontology::Language; 2 | use snips_nlu_utils::language::Language as NluUtilsLanguage; 3 | 4 | pub trait FromLanguage { 5 | fn from_language(l: Language) -> Self; 6 | } 7 | 8 | impl FromLanguage for NluUtilsLanguage { 9 | fn from_language(l: Language) -> Self { 10 | match l { 11 | Language::DE => NluUtilsLanguage::DE, 12 | Language::EN => NluUtilsLanguage::EN, 13 | Language::ES => NluUtilsLanguage::ES, 14 | Language::FR => NluUtilsLanguage::FR, 15 | Language::IT => NluUtilsLanguage::IT, 16 | Language::JA => NluUtilsLanguage::JA, 17 | Language::KO => NluUtilsLanguage::KO, 18 | Language::PT_PT => NluUtilsLanguage::PT_PT, 19 | Language::PT_BR => NluUtilsLanguage::PT_BR, 20 | } 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![allow( 2 | clippy::unreadable_literal, 3 | clippy::excessive_precision, 4 | clippy::module_inception 5 | )] 6 | 7 | mod entity_parser; 8 | pub mod errors; 9 | pub mod injection; 10 | mod intent_classifier; 11 | mod intent_parser; 12 | mod language; 13 | pub mod models; 14 | mod nlu_engine; 15 | mod resources; 16 | mod slot_filler; 17 | mod slot_utils; 18 | #[cfg(test)] 19 | mod testutils; 20 | mod utils; 21 | 22 | pub const MODEL_VERSION: &str = "0.20.0"; 23 | 24 | pub extern crate snips_nlu_ontology as ontology; 25 | pub use crate::errors::*; 26 | pub use crate::intent_classifier::{IntentClassifier, LogRegIntentClassifier}; 27 | pub use crate::intent_parser::{ 28 | DeterministicIntentParser, IntentParser, LookupIntentParser, ProbabilisticIntentParser, 29 | }; 30 | pub use crate::models::*; 31 | pub use crate::nlu_engine::SnipsNluEngine; 32 | pub use crate::resources::loading::load_shared_resources; 33 | pub use crate::resources::SharedResources; 34 | pub use crate::slot_filler::{CRFSlotFiller, SlotFiller}; 35 | pub use snips_nlu_ontology::Language; 36 | -------------------------------------------------------------------------------- /src/models/intent_classifier.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::utils::IntentName; 6 | 7 | #[derive(Debug, Deserialize)] 8 | pub struct IntentClassifierModel { 9 | pub featurizer: Option, 10 | pub intercept: Option>, 11 | pub coeffs: Option>>, 12 | pub intent_list: Vec>, 13 | } 14 | 15 | #[derive(Debug, Deserialize)] 16 | pub struct FeaturizerModel { 17 | pub language_code: String, 18 | pub tfidf_vectorizer: String, 19 | pub cooccurrence_vectorizer: Option, 20 | } 21 | 22 | #[derive(Debug, Deserialize)] 23 | pub struct TfidfVectorizerModel { 24 | pub language_code: String, 25 | pub builtin_entity_scope: Vec, 26 | pub vectorizer: SklearnVectorizerModel, 27 | pub config: TfidfVectorizerConfiguration, 28 | } 29 | 30 | #[derive(Debug, Deserialize)] 31 | pub struct TfidfVectorizerConfiguration { 32 | pub use_stemming: bool, 33 | pub word_clusters_name: Option, 34 | } 35 | 36 | #[derive(Debug, Deserialize)] 37 | pub struct SklearnVectorizerModel { 38 | pub idf_diag: Vec, 39 | pub vocab: HashMap, 40 | } 41 | 42 | #[derive(Debug, Deserialize)] 43 | pub struct CooccurrenceVectorizerModel { 44 | pub language_code: String, 45 | pub builtin_entity_scope: Vec, 46 | pub word_pairs: HashMap, 47 | pub config: CooccurrenceVectorizerConfiguration, 48 | } 49 | 50 | #[derive(Debug, Deserialize)] 51 | pub struct CooccurrenceVectorizerConfiguration { 52 | pub window_size: Option, 53 | pub filter_stop_words: bool, 54 | pub keep_order: bool, 55 | pub unknown_words_replacement_string: Option, 56 | } 57 | -------------------------------------------------------------------------------- /src/models/intent_parser.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::utils::{EntityName, IntentName, SlotName}; 6 | 7 | pub type InputHash = i32; 8 | pub type IntentId = i32; 9 | pub type SlotId = i32; 10 | 11 | #[derive(Debug, Deserialize)] 12 | pub struct DeterministicParserModel { 13 | pub language_code: String, 14 | pub patterns: HashMap>, 15 | pub group_names_to_slot_names: HashMap, 16 | pub slot_names_to_entities: HashMap>, 17 | #[serde(default)] 18 | pub stop_words_whitelist: HashMap>, 19 | pub config: DeterministicParserConfig, 20 | } 21 | 22 | #[derive(Debug, Deserialize)] 23 | pub struct LookupParserModel { 24 | pub language_code: String, 25 | pub slots_names: Vec, 26 | pub intents_names: Vec, 27 | pub map: HashMap)>, 28 | pub entity_scopes: Vec, 29 | pub stop_words_whitelist: HashMap>, 30 | pub config: LookupParserConfig, 31 | } 32 | 33 | #[derive(Debug, Deserialize)] 34 | pub struct GroupedEntityScope { 35 | pub intent_group: Vec, 36 | pub entity_scope: EntityScope, 37 | } 38 | 39 | #[derive(Debug, Deserialize)] 40 | pub struct EntityScope { 41 | pub builtin: Vec, 42 | pub custom: Vec, 43 | } 44 | 45 | #[derive(Debug, Deserialize)] 46 | pub struct DeterministicParserConfig { 47 | #[serde(default)] 48 | pub ignore_stop_words: bool, 49 | } 50 | 51 | #[derive(Debug, Deserialize)] 52 | pub struct LookupParserConfig { 53 | #[serde(default)] 54 | pub ignore_stop_words: bool, 55 | } 56 | 57 | #[derive(Debug, Deserialize)] 58 | pub struct ProbabilisticParserModel { 59 | pub slot_fillers: Vec, 60 | } 61 | 62 | #[derive(Debug, Deserialize)] 63 | pub struct SlotFillerMetadata { 64 | pub intent: IntentName, 65 | pub slot_filler_name: String, 66 | } 67 | -------------------------------------------------------------------------------- /src/models/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod intent_classifier; 2 | pub mod intent_parser; 3 | pub mod nlu_engine; 4 | pub mod processing_unit_metadata; 5 | pub mod slot_filler; 6 | 7 | pub use self::intent_classifier::*; 8 | pub use self::intent_parser::*; 9 | pub use self::nlu_engine::*; 10 | pub use self::processing_unit_metadata::*; 11 | pub use self::slot_filler::*; 12 | -------------------------------------------------------------------------------- /src/models/nlu_engine.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::utils::{EntityName, IntentName, SlotName}; 6 | 7 | #[derive(Debug, Deserialize)] 8 | pub struct ModelVersion { 9 | pub model_version: String, 10 | } 11 | 12 | #[derive(Debug, Deserialize)] 13 | pub struct NluEngineModel { 14 | pub dataset_metadata: DatasetMetadata, 15 | pub intent_parsers: Vec, 16 | pub model_version: String, 17 | pub training_package_version: String, 18 | pub builtin_entity_parser: String, 19 | pub custom_entity_parser: String, 20 | } 21 | 22 | #[derive(Debug, Deserialize)] 23 | pub struct DatasetMetadata { 24 | pub language_code: String, 25 | pub entities: HashMap, 26 | pub slot_name_mappings: HashMap>, 27 | } 28 | 29 | #[derive(Debug, Deserialize, Clone)] 30 | pub struct Entity { 31 | pub automatically_extensible: bool, 32 | } 33 | -------------------------------------------------------------------------------- /src/models/processing_unit_metadata.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | 3 | #[derive(Debug, Deserialize, Copy, Clone, PartialEq, Eq)] 4 | #[serde(tag = "unit_name")] 5 | #[serde(rename_all = "snake_case")] 6 | pub enum ProcessingUnitMetadata { 7 | DeterministicIntentParser, 8 | LookupIntentParser, 9 | ProbabilisticIntentParser, 10 | CrfSlotFiller, 11 | LogRegIntentClassifier, 12 | } 13 | 14 | #[cfg(test)] 15 | mod tests { 16 | use super::*; 17 | use serde_json; 18 | 19 | #[test] 20 | fn test_deserialize() { 21 | let data = r#"{ 22 | "unit_name": "crf_slot_filler" 23 | }"#; 24 | let metadata: ProcessingUnitMetadata = serde_json::from_str(data).unwrap(); 25 | assert_eq!(ProcessingUnitMetadata::CrfSlotFiller, metadata); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/models/slot_filler.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::utils::{EntityName, IntentName, SlotName}; 6 | 7 | #[derive(Debug, Deserialize)] 8 | pub struct SlotFillerModel { 9 | pub language_code: String, 10 | pub intent: IntentName, 11 | pub slot_name_mapping: HashMap, 12 | pub crf_model_file: Option, 13 | pub config: SlotFillerConfiguration, 14 | } 15 | 16 | #[derive(Debug, Deserialize)] 17 | pub struct SlotFillerConfiguration { 18 | pub tagging_scheme: u8, 19 | pub feature_factory_configs: Vec, 20 | } 21 | 22 | #[derive(Debug, Deserialize)] 23 | pub struct FeatureFactory { 24 | pub factory_name: String, 25 | pub offsets: Vec, 26 | pub args: HashMap, 27 | } 28 | -------------------------------------------------------------------------------- /src/resources/gazetteer.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use snips_nlu_utils::string::hash_str_to_i32; 3 | use std::collections::HashSet; 4 | use std::io::{BufRead, BufReader, Read}; 5 | use std::iter::FromIterator; 6 | 7 | pub trait Gazetteer: Send + Sync { 8 | fn contains(&self, value: &str) -> bool; 9 | } 10 | 11 | pub struct HashSetGazetteer { 12 | values: HashSet, 13 | } 14 | 15 | impl HashSetGazetteer { 16 | pub fn from_reader(reader: R) -> Result { 17 | let reader = BufReader::new(reader); 18 | let mut values = HashSet::new(); 19 | for line in reader.lines() { 20 | let word = line?; 21 | if !word.is_empty() { 22 | values.insert(hash_str_to_i32(&*word)); 23 | } 24 | } 25 | Ok(Self { values }) 26 | } 27 | } 28 | 29 | impl FromIterator for HashSetGazetteer { 30 | fn from_iter>(iter: T) -> Self { 31 | Self { 32 | values: iter 33 | .into_iter() 34 | .map(|str_value| hash_str_to_i32(&*str_value)) 35 | .collect(), 36 | } 37 | } 38 | } 39 | 40 | impl Gazetteer for HashSetGazetteer { 41 | fn contains(&self, value: &str) -> bool { 42 | self.values.contains(&hash_str_to_i32(value)) 43 | } 44 | } 45 | 46 | #[cfg(test)] 47 | mod tests { 48 | use super::{Gazetteer, HashSetGazetteer}; 49 | 50 | #[test] 51 | fn test_hashset_gazetteer() { 52 | // Given 53 | let gazetteer: &[u8] = r#" 54 | dog 55 | cat 56 | bear 57 | crocodile"# 58 | .as_ref(); 59 | 60 | // When 61 | let gazetteer = HashSetGazetteer::from_reader(gazetteer); 62 | 63 | // Then 64 | assert!(gazetteer.is_ok()); 65 | let gazetteer = gazetteer.unwrap(); 66 | assert!(gazetteer.contains("dog")); 67 | assert!(gazetteer.contains("crocodile")); 68 | assert!(!gazetteer.contains("bird")); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/resources/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod gazetteer; 2 | pub mod loading; 3 | pub mod stemmer; 4 | pub mod word_clusterer; 5 | 6 | use std::collections::{HashMap, HashSet}; 7 | use std::sync::Arc; 8 | 9 | use self::gazetteer::Gazetteer; 10 | use self::stemmer::Stemmer; 11 | use self::word_clusterer::WordClusterer; 12 | use super::entity_parser::{BuiltinEntityParser, CustomEntityParser}; 13 | 14 | pub struct SharedResources { 15 | pub builtin_entity_parser: Arc, 16 | pub custom_entity_parser: Arc, 17 | pub gazetteers: HashMap>, 18 | pub stemmer: Option>, 19 | pub word_clusterers: HashMap>, 20 | pub stop_words: HashSet, 21 | } 22 | -------------------------------------------------------------------------------- /src/resources/stemmer.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use snips_nlu_utils::string::{hash_str_to_i32, normalize}; 3 | use std::collections::HashMap; 4 | use std::io::Read; 5 | use std::iter::FromIterator; 6 | 7 | pub trait Stemmer: Send + Sync { 8 | fn stem(&self, value: &str) -> String; 9 | } 10 | 11 | pub struct HashMapStemmer { 12 | values: HashMap, 13 | } 14 | 15 | impl HashMapStemmer { 16 | pub fn from_reader(reader: R) -> Result { 17 | let mut values = HashMap::new(); 18 | let mut csv_reader = csv::ReaderBuilder::new() 19 | .delimiter(b',') 20 | .quoting(false) 21 | .flexible(true) 22 | .has_headers(false) 23 | .from_reader(reader); 24 | 25 | for record in csv_reader.records() { 26 | let elements = record?; 27 | let stem = &elements[0]; 28 | for value in elements.iter().skip(1) { 29 | values.insert(hash_str_to_i32(value), stem.to_string()); 30 | } 31 | } 32 | Ok(Self { values }) 33 | } 34 | } 35 | 36 | impl FromIterator<(String, String)> for HashMapStemmer { 37 | fn from_iter>(iter: T) -> Self { 38 | Self { 39 | values: iter 40 | .into_iter() 41 | .map(|(str_key, str_value)| (hash_str_to_i32(&*str_key), str_value)) 42 | .collect(), 43 | } 44 | } 45 | } 46 | 47 | impl Stemmer for HashMapStemmer { 48 | fn stem(&self, value: &str) -> String { 49 | self.values 50 | .get(&hash_str_to_i32(&*normalize(value))) 51 | .map(|v| v.to_string()) 52 | .unwrap_or_else(|| value.to_string()) 53 | } 54 | } 55 | 56 | #[cfg(test)] 57 | mod tests { 58 | use super::*; 59 | 60 | #[test] 61 | fn test_hashmap_stemmer() { 62 | // Given 63 | let stems: &[u8] = r#" 64 | investigate,investigated,investigation,"investigate 65 | do,done,don't,doing,did,does"# 66 | .as_ref(); 67 | 68 | // When 69 | let stemmer = HashMapStemmer::from_reader(stems); 70 | 71 | // Then 72 | assert!(stemmer.is_ok()); 73 | let stemmer = stemmer.unwrap(); 74 | assert_eq!(stemmer.stem("don't"), "do".to_string()); 75 | assert_eq!(stemmer.stem("does"), "do".to_string()); 76 | assert_eq!(stemmer.stem("\"investigate"), "investigate".to_string()); 77 | assert_eq!(stemmer.stem("unknown"), "unknown".to_string()); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/resources/word_clusterer.rs: -------------------------------------------------------------------------------- 1 | use crate::errors::*; 2 | use itertools::Either; 3 | use snips_nlu_ontology::Language; 4 | use snips_nlu_utils::string::hash_str_to_i32; 5 | use std::collections::HashMap; 6 | use std::io::Read; 7 | use std::str::FromStr; 8 | 9 | pub trait WordClusterer: Send + Sync { 10 | fn get_cluster(&self, word: &str) -> Option; 11 | } 12 | 13 | pub struct HashMapWordClusterer { 14 | /// This implementation allows to support both u16 and raw string representations for 15 | /// word clusters 16 | values: Either, HashMap>, 17 | } 18 | 19 | impl HashMapWordClusterer { 20 | pub fn from_reader(reader: R) -> Result { 21 | let mut csv_reader = csv::ReaderBuilder::new() 22 | .delimiter(b'\t') 23 | .quoting(false) 24 | .has_headers(false) 25 | .from_reader(reader); 26 | // This flag is switched to false as soon as a record is found which cannot 27 | // be converted to a u16 28 | let mut u16_casting_ok = true; 29 | let mut u16_values = HashMap::new(); 30 | let mut str_values = HashMap::new(); 31 | for record in csv_reader.records() { 32 | let elements = record?; 33 | let hashed_key = hash_str_to_i32(elements[0].as_ref()); 34 | // Casting into u16 is attempted only when all previous clusters were converted 35 | // successfully 36 | if u16_casting_ok { 37 | match u16::from_str(elements[1].as_ref()) { 38 | Ok(u16_value) => { 39 | u16_values.insert(hashed_key, u16_value); 40 | } 41 | Err(_) => { 42 | // A word cluster cannot be converted into a u16, let's move all the 43 | // previously stored clusters into a raw string representation 44 | for (hash, value) in u16_values.iter() { 45 | str_values.insert(*hash, format!("{}", value)); 46 | } 47 | str_values.insert(hashed_key, elements[1].to_string()); 48 | u16_casting_ok = false; 49 | u16_values.clear(); 50 | } 51 | } 52 | } else { 53 | str_values.insert(hashed_key, elements[1].to_string()); 54 | } 55 | } 56 | Ok(Self { 57 | values: if u16_casting_ok { 58 | Either::Left(u16_values) 59 | } else { 60 | Either::Right(str_values) 61 | }, 62 | }) 63 | } 64 | } 65 | 66 | impl WordClusterer for HashMapWordClusterer { 67 | fn get_cluster(&self, word: &str) -> Option { 68 | let hashed_key = hash_str_to_i32(word); 69 | match &self.values { 70 | Either::Left(u16_values) => u16_values.get(&hashed_key).map(|v| format!("{}", v)), 71 | Either::Right(str_values) => str_values.get(&hashed_key).cloned(), 72 | } 73 | } 74 | } 75 | 76 | #[derive(Debug, Clone, PartialEq, Eq, Hash)] 77 | pub struct WordClustererConfiguration { 78 | language: Language, 79 | clusters_name: String, 80 | } 81 | 82 | #[cfg(test)] 83 | mod tests { 84 | use super::*; 85 | 86 | #[test] 87 | fn test_hashmap_word_clusterer_with_non_u16_values() { 88 | // Given 89 | let clusters: &[u8] = r#" 90 | hello 42 91 | world 123 92 | "yolo cluster_which_is_not_u16 93 | "# 94 | .as_ref(); 95 | 96 | // When 97 | let clusterer = HashMapWordClusterer::from_reader(clusters); 98 | 99 | // Then 100 | assert!(clusterer.is_ok()); 101 | let clusterer = clusterer.unwrap(); 102 | assert!(clusterer.values.is_right()); 103 | assert_eq!(clusterer.get_cluster("hello"), Some("42".to_string())); 104 | assert_eq!(clusterer.get_cluster("world"), Some("123".to_string())); 105 | assert_eq!(clusterer.get_cluster("\"yolo"), Some("cluster_which_is_not_u16".to_string())); 106 | assert_eq!(clusterer.get_cluster("unknown"), None); 107 | } 108 | 109 | #[test] 110 | fn test_hashmap_word_clusterer_with_u16_values() { 111 | // Given 112 | let clusters: &[u8] = r#" 113 | hello 42 114 | world 123 115 | yolo 65500 116 | "# 117 | .as_ref(); 118 | 119 | // When 120 | let clusterer = HashMapWordClusterer::from_reader(clusters); 121 | 122 | // Then 123 | assert!(clusterer.is_ok()); 124 | let clusterer = clusterer.unwrap(); 125 | assert!(clusterer.values.is_left()); 126 | assert_eq!(clusterer.get_cluster("hello"), Some("42".to_string())); 127 | assert_eq!(clusterer.get_cluster("world"), Some("123".to_string())); 128 | assert_eq!(clusterer.get_cluster("yolo"), Some("65500".to_string())); 129 | assert_eq!(clusterer.get_cluster("unknown"), None); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /src/slot_filler/features_utils.rs: -------------------------------------------------------------------------------- 1 | use std::iter::FromIterator; 2 | use std::str; 3 | 4 | use snips_nlu_utils::token::Token; 5 | 6 | pub fn get_word_chunk( 7 | word: &str, 8 | chunk_size: usize, 9 | chunk_start: usize, 10 | reverse: bool, 11 | ) -> Option { 12 | if reverse && chunk_size > chunk_start { 13 | return None; 14 | } 15 | let start = if reverse { 16 | chunk_start - chunk_size 17 | } else { 18 | chunk_start 19 | }; 20 | if start + chunk_size > word.chars().count() { 21 | None 22 | } else { 23 | Some(word.chars().skip(start).take(chunk_size).collect()) 24 | } 25 | } 26 | 27 | pub fn initial_string_from_tokens(tokens: &[Token]) -> String { 28 | let mut current_index = 0; 29 | let mut chunks: Vec = Vec::with_capacity(2 * tokens.len() - 1); 30 | for token in tokens { 31 | if token.char_range.start > current_index { 32 | let nb_spaces = token.char_range.start - current_index; 33 | let spaces = String::from_iter(vec![' '; nb_spaces]); 34 | chunks.push(spaces); 35 | } 36 | chunks.push(token.value.clone()); 37 | current_index = token.char_range.end; 38 | } 39 | chunks.join("") 40 | } 41 | 42 | #[cfg(test)] 43 | mod tests { 44 | use super::*; 45 | 46 | #[test] 47 | fn test_get_word_chunk() { 48 | // Given 49 | let word = "hello_world"; 50 | let chunk_size = 6; 51 | let chunk_start = 5; 52 | let reverse = false; 53 | 54 | // When 55 | let word_chunk = get_word_chunk(word, chunk_size, chunk_start, reverse); 56 | 57 | // Then 58 | let expected_chunk = Some("_world".to_string()); 59 | assert_eq!(word_chunk, expected_chunk); 60 | } 61 | 62 | #[test] 63 | fn test_get_word_chunk_reversed() { 64 | // Given 65 | let word = "hello_world"; 66 | let chunk_size = 8; 67 | let chunk_start = 8; 68 | let reverse = true; 69 | 70 | // When 71 | let word_chunk = get_word_chunk(word, chunk_size, chunk_start, reverse); 72 | 73 | // Then 74 | let expected_chunk = Some("hello_wo".to_string()); 75 | assert_eq!(word_chunk, expected_chunk); 76 | } 77 | 78 | #[test] 79 | fn test_get_word_chunk_out_of_bound() { 80 | // Given 81 | let word = "hello_world"; 82 | let chunk_size = 4; 83 | let chunk_start = 8; 84 | let reverse = false; 85 | 86 | // When 87 | let word_chunk = get_word_chunk(word, chunk_size, chunk_start, reverse); 88 | 89 | // Then 90 | assert_eq!(word_chunk, None); 91 | } 92 | 93 | #[test] 94 | fn test_initial_string_from_tokens() { 95 | // Given 96 | let tokens = vec![ 97 | Token::new("hello".to_string(), 0..5, 0..5), 98 | Token::new("world".to_string(), 9..14, 9..14), 99 | Token::new("!!!".to_string(), 17..20, 17..20), 100 | ]; 101 | 102 | // When 103 | let result = initial_string_from_tokens(&tokens); 104 | 105 | // Then 106 | assert_eq!("hello world !!!", &result); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/slot_filler/macros.rs: -------------------------------------------------------------------------------- 1 | #[macro_export] 2 | macro_rules! get_features { 3 | ([$(($feature_type:ident,$feature_name:ident)),*]) => { 4 | #[derive(Debug, Copy, Clone, PartialEq, Hash, Eq)] 5 | pub enum FeatureKind { 6 | $( $feature_type ),* 7 | } 8 | 9 | impl FeatureKind { 10 | pub fn identifier(&self) -> &'static str { 11 | match self { 12 | $( 13 | FeatureKind::$feature_type => stringify!($feature_name), 14 | )* 15 | } 16 | } 17 | } 18 | 19 | $( 20 | impl FeatureKindRepr for $feature_type { 21 | fn feature_kind(&self) -> FeatureKind { 22 | FeatureKind::$feature_type 23 | } 24 | } 25 | )* 26 | 27 | fn get_features( 28 | f: &FeatureFactory, 29 | shared_resources: Arc, 30 | ) -> Result> { 31 | let features = match f.factory_name.as_ref() { 32 | $( 33 | stringify!($feature_name) => $feature_type::build_features(&f.args, shared_resources), 34 | )* 35 | _ => bail!("Feature {} not implemented", f.factory_name), 36 | }; 37 | Ok(features? 38 | .into_iter() 39 | .map(|feature| FeatureOffsetter { feature, offsets: f.offsets.clone() }) 40 | .collect()) 41 | } 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/slot_filler/mod.rs: -------------------------------------------------------------------------------- 1 | #[macro_use] 2 | mod macros; 3 | pub mod crf_slot_filler; 4 | mod crf_utils; 5 | mod feature_processor; 6 | mod features; 7 | mod features_utils; 8 | 9 | use std::fs::File; 10 | use std::path::Path; 11 | use std::sync::Arc; 12 | 13 | use failure::{format_err, ResultExt}; 14 | use snips_nlu_utils::token::Token; 15 | 16 | use crate::errors::*; 17 | use crate::models::ProcessingUnitMetadata; 18 | use crate::resources::SharedResources; 19 | use crate::slot_utils::InternalSlot; 20 | 21 | pub use self::crf_slot_filler::*; 22 | use self::crf_utils::TaggingScheme; 23 | 24 | pub trait SlotFiller: Send + Sync { 25 | fn get_tagging_scheme(&self) -> TaggingScheme; 26 | fn get_slots(&self, text: &str) -> Result>; 27 | fn get_sequence_probability(&self, tokens: &[Token], tags: Vec) -> Result; 28 | } 29 | 30 | pub fn build_slot_filler>( 31 | path: P, 32 | shared_resources: Arc, 33 | ) -> Result> { 34 | let metadata_path = path.as_ref().join("metadata.json"); 35 | let metadata_file = File::open(&metadata_path).with_context(|_| { 36 | format!( 37 | "Cannot open slot filler metadata file '{:?}'", 38 | &metadata_path 39 | ) 40 | })?; 41 | let metadata: ProcessingUnitMetadata = serde_json::from_reader(metadata_file) 42 | .with_context(|_| "Cannot deserialize slot filler json data")?; 43 | match metadata { 44 | ProcessingUnitMetadata::CrfSlotFiller => { 45 | Ok(Box::new(CRFSlotFiller::from_path(path, shared_resources)?) as _) 46 | } 47 | _ => Err(format_err!("{:?} is not a slot filler", metadata)), 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/testutils.rs: -------------------------------------------------------------------------------- 1 | use std::collections::{HashMap, HashSet}; 2 | use std::iter::FromIterator; 3 | use std::sync::Arc; 4 | 5 | use ndarray::prelude::*; 6 | use snips_nlu_ontology::{BuiltinEntity, BuiltinEntityKind}; 7 | 8 | use crate::entity_parser::{BuiltinEntityParser, CustomEntity, CustomEntityParser}; 9 | use crate::errors::*; 10 | use crate::resources::gazetteer::Gazetteer; 11 | use crate::resources::stemmer::Stemmer; 12 | use crate::resources::word_clusterer::WordClusterer; 13 | use crate::resources::SharedResources; 14 | 15 | pub fn assert_epsilon_eq_array1(a: &Array1, b: &Array1, epsilon: f32) { 16 | assert_eq!(a.dim(), b.dim()); 17 | for (index, elem_a) in a.indexed_iter() { 18 | assert!(epsilon_eq(*elem_a, b[index], epsilon)) 19 | } 20 | } 21 | 22 | pub fn epsilon_eq(a: f32, b: f32, epsilon: f32) -> bool { 23 | let diff = a - b; 24 | diff < epsilon && diff > -epsilon 25 | } 26 | 27 | pub struct SharedResourcesBuilder { 28 | builtin_entity_parser: Arc, 29 | custom_entity_parser: Arc, 30 | gazetteers: HashMap>, 31 | stemmer: Option>, 32 | word_clusterers: HashMap>, 33 | stop_words: HashSet, 34 | } 35 | 36 | impl Default for SharedResourcesBuilder { 37 | fn default() -> Self { 38 | Self { 39 | builtin_entity_parser: Arc::::default(), 40 | custom_entity_parser: Arc::::default(), 41 | gazetteers: HashMap::default(), 42 | stemmer: None, 43 | word_clusterers: HashMap::default(), 44 | stop_words: HashSet::default(), 45 | } 46 | } 47 | } 48 | 49 | impl SharedResourcesBuilder { 50 | pub fn builtin_entity_parser(mut self, parser: P) -> Self { 51 | self.builtin_entity_parser = Arc::new(parser) as _; 52 | self 53 | } 54 | 55 | pub fn custom_entity_parser(mut self, parser: P) -> Self { 56 | self.custom_entity_parser = Arc::new(parser) as _; 57 | self 58 | } 59 | 60 | pub fn stop_words(mut self, stop_words: HashSet) -> Self { 61 | self.stop_words = stop_words; 62 | self 63 | } 64 | 65 | pub fn build(self) -> SharedResources { 66 | SharedResources { 67 | builtin_entity_parser: self.builtin_entity_parser, 68 | custom_entity_parser: self.custom_entity_parser, 69 | gazetteers: self.gazetteers, 70 | stemmer: self.stemmer, 71 | word_clusterers: self.word_clusterers, 72 | stop_words: self.stop_words, 73 | } 74 | } 75 | } 76 | 77 | #[derive(Default)] 78 | pub struct MockedBuiltinEntityParser { 79 | pub mocked_outputs: HashMap>, 80 | } 81 | 82 | impl BuiltinEntityParser for MockedBuiltinEntityParser { 83 | fn extract_entities( 84 | &self, 85 | sentence: &str, 86 | _filter_entity_kinds: Option<&[BuiltinEntityKind]>, 87 | _use_cache: bool, 88 | _max_alternative_resolved_values: usize, 89 | ) -> Result> { 90 | Ok(self 91 | .mocked_outputs 92 | .get(sentence) 93 | .cloned() 94 | .unwrap_or_else(|| vec![])) 95 | } 96 | } 97 | 98 | impl FromIterator<(String, Vec)> for MockedBuiltinEntityParser { 99 | fn from_iter)>>(iter: T) -> Self { 100 | Self { 101 | mocked_outputs: HashMap::from_iter(iter), 102 | } 103 | } 104 | } 105 | 106 | #[derive(Default)] 107 | pub struct MockedCustomEntityParser { 108 | pub mocked_outputs: HashMap>, 109 | } 110 | 111 | impl CustomEntityParser for MockedCustomEntityParser { 112 | fn extract_entities( 113 | &self, 114 | sentence: &str, 115 | _filter_entity_kinds: Option<&[String]>, 116 | _max_alternative_resolved_values: usize, 117 | ) -> Result> { 118 | Ok(self 119 | .mocked_outputs 120 | .get(sentence) 121 | .cloned() 122 | .unwrap_or_else(|| vec![])) 123 | } 124 | } 125 | 126 | impl FromIterator<(String, Vec)> for MockedCustomEntityParser { 127 | fn from_iter)>>(iter: T) -> Self { 128 | Self { 129 | mocked_outputs: HashMap::from_iter(iter), 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /update_ontology_version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | NEW_VERSION=${1?"usage $0 "} 4 | 5 | echo "Updating snips-nlu-ontology versions to version ${NEW_VERSION}" 6 | find . -name "Cargo.toml" -exec perl -p -i -e "s/snipsco\/snips-nlu-ontology\".*\$/snipsco\/snips-nlu-ontology\", tag = \"$NEW_VERSION\" }/g" {} \; 7 | find . -name "build.gradle" -exec perl -p -i -e "s/compile \"ai.snips:snips-nlu-ontology:.*\"\$/compile \"ai.snips:snips-nlu-ontology:$NEW_VERSION\"/g" {} \; 8 | -------------------------------------------------------------------------------- /update_version.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | NEW_VERSION=${1?"usage $0 "} 4 | 5 | echo "Updating versions to version ${NEW_VERSION}" 6 | find . -name "Cargo.toml" -exec perl -p -i -e "s/^version = \".*\"$/version = \"$NEW_VERSION\"/g" {} \; 7 | find . -name "cbindgen.toml" -exec perl -p -i -e "s/^header = \"#define SNIPS_NLU_VERSION.*\"$/header = \"#define SNIPS_NLU_VERSION \\\\\"${NEW_VERSION}\\\\\"\"/g" {} \; 8 | perl -p -i -e "s/^version = \".*\"\$/version = \"$NEW_VERSION\"/g" */**/build.gradle 9 | perl -p -i -e "s/^VERSION=\".*\"\$/VERSION=\"$NEW_VERSION\"/g" */**/**/**/build.sh 10 | perl -p -i -e "s/SNIPS_NLU_VERSION \".*\"/SNIPS_NLU_VERSION \"$NEW_VERSION\"/g" platforms/c/libsnips_nlu.h 11 | 12 | echo "$NEW_VERSION" > platforms/python/snips_nlu_rust/__version__ 13 | 14 | if [[ "${NEW_VERSION}" == "${NEW_VERSION/-SNAPSHOT/}" ]] 15 | then 16 | perl -p -i -e \ 17 | "s/^snips-nlu-ffi = \{.*\}$/snips-nlu-ffi = { git = \"https:\/\/github.com\/snipsco\/snips-nlu-rs\", tag = \"$NEW_VERSION\" }/g" \ 18 | platforms/python/ffi/Cargo.toml 19 | else 20 | perl -p -i -e \ 21 | "s/^snips-nlu-ffi = \{.*\}$/snips-nlu-ffi = { path = \"..\/..\/..\/ffi\" }/g" \ 22 | platforms/python/ffi/Cargo.toml 23 | 24 | fi 25 | --------------------------------------------------------------------------------