├── .coveragerc
├── .dockerignore
├── .env
├── .gitattributes
├── .github
    ├── FUNDING.yml
    ├── ISSUE_TEMPLATE.md
    └── PULL_REQUEST_TEMPLATE.md
├── .gitignore
├── .travis.yml
├── CHANGELOG.rst
├── CODE_OF_CONDUCT.md
├── LICENSE.txt
├── MANIFEST.in
├── Makefile
├── README.md
├── alt_requirements
    ├── conda-requirements.txt
    ├── requirements_bare.txt
    ├── requirements_dev.txt
    ├── requirements_full.txt
    ├── requirements_mitie.txt
    ├── requirements_spacy_sklearn.txt
    └── requirements_tensorflow_sklearn.txt
├── app.json
├── cloudbuild.yaml
├── data
    ├── README.md
    ├── examples
    │   ├── dialogflow
    │   │   ├── agent.json
    │   │   ├── entities
    │   │   │   ├── cuisine.json
    │   │   │   ├── cuisine_entries_en.json
    │   │   │   ├── cuisine_entries_es.json
    │   │   │   ├── location.json
    │   │   │   ├── location_entries_en.json
    │   │   │   └── location_entries_es.json
    │   │   ├── intents
    │   │   │   ├── Default Fallback Intent.json
    │   │   │   ├── affirm.json
    │   │   │   ├── affirm_usersays_en.json
    │   │   │   ├── affirm_usersays_es.json
    │   │   │   ├── goodbye.json
    │   │   │   ├── goodbye_usersays_en.json
    │   │   │   ├── goodbye_usersays_es.json
    │   │   │   ├── hi.json
    │   │   │   ├── hi_usersays_en.json
    │   │   │   ├── hi_usersays_es.json
    │   │   │   ├── inform.json
    │   │   │   ├── inform_usersays_en.json
    │   │   │   └── inform_usersays_es.json
    │   │   └── package.json
    │   ├── luis
    │   │   └── demo-restaurants.json
    │   ├── rasa
    │   │   ├── demo-rasa.json
    │   │   ├── demo-rasa.md
    │   │   ├── demo-rasa_zh.json
    │   │   ├── demo-rasa_zh_medical.json
    │   │   └── demo-rasa_zh_movie.json
    │   └── wit
    │   │   └── demo-flights.json
    └── test
    │   ├── demo-rasa-noents.json
    │   ├── demo-rasa-small.json
    │   ├── demo-rasa-zh.json
    │   ├── dialogflow_en_converted_to_rasa.json
    │   ├── dialogflow_es_converted_to_rasa.json
    │   ├── json_converted_to_md.md
    │   ├── luis_converted_to_rasa.json
    │   ├── markdown_single_sections
    │       ├── regex_only.md
    │       └── synonyms_only.md
    │   ├── md_converted_to_json.json
    │   ├── multiple_files_json
    │       ├── demo-rasa-affirm.json
    │       ├── demo-rasa-goodbye.json
    │       ├── demo-rasa-greet.json
    │       └── demo-rasa-restaurant_search.json
    │   ├── multiple_files_markdown
    │       ├── demo-rasa-affirm.md
    │       ├── demo-rasa-goodbye.md
    │       ├── demo-rasa-greet.md
    │       └── demo-rasa-restaurant_search.md
    │   └── wit_converted_to_rasa.json
├── docker
    ├── Dockerfile_bare
    ├── Dockerfile_full
    ├── Dockerfile_mitie
    ├── Dockerfile_spacy_sklearn
    ├── Dockerfile_test
    └── docker-cloud.yml
├── docs
    ├── Makefile
    ├── _static
    │   ├── css
    │   │   └── custom.css
    │   └── images
    │   │   ├── component_lifecycle.png
    │   │   └── rasa_nlu_intent_gui.png
    ├── _templates
    │   └── layout.html
    ├── changelog.rst
    ├── closeloop.rst
    ├── community.rst
    ├── conf.py
    ├── config.rst
    ├── context.rst
    ├── contribute.rst
    ├── dataformat.rst
    ├── entities.rst
    ├── evaluation.rst
    ├── faq.rst
    ├── http.rst
    ├── index.rst
    ├── installation.rst
    ├── key.enc
    ├── languages.rst
    ├── license.rst
    ├── migrating.rst
    ├── migrations.rst
    ├── persist.rst
    ├── pipeline.rst
    ├── poll.html
    ├── python.rst
    └── tutorial.rst
├── entrypoint.sh
├── heroku
    ├── Procfile
    └── runtime.txt
├── jieba_userdict
    └── jieba_userdict.txt
├── rasa_nlu
    ├── __init__.py
    ├── classifiers
    │   ├── __init__.py
    │   ├── embedding_intent_classifier.py
    │   ├── keyword_intent_classifier.py
    │   ├── mitie_intent_classifier.py
    │   └── sklearn_intent_classifier.py
    ├── components.py
    ├── config.py
    ├── convert.py
    ├── data_router.py
    ├── emulators
    │   ├── __init__.py
    │   ├── dialogflow.py
    │   ├── luis.py
    │   └── wit.py
    ├── evaluate.py
    ├── extractors
    │   ├── __init__.py
    │   ├── crf_entity_extractor.py
    │   ├── duckling_extractor.py
    │   ├── duckling_http_extractor.py
    │   ├── entity_synonyms.py
    │   ├── mitie_entity_extractor.py
    │   └── spacy_entity_extractor.py
    ├── featurizers
    │   ├── __init__.py
    │   ├── count_vectors_featurizer.py
    │   ├── mitie_featurizer.py
    │   ├── ngram_featurizer.py
    │   ├── regex_featurizer.py
    │   └── spacy_featurizer.py
    ├── model.py
    ├── persistor.py
    ├── project.py
    ├── registry.py
    ├── run.py
    ├── schemas
    │   └── nlu_model.yml
    ├── server.py
    ├── tokenizers
    │   ├── __init__.py
    │   ├── jieba_tokenizer.py
    │   ├── mitie_tokenizer.py
    │   ├── spacy_tokenizer.py
    │   ├── whitespace_tokenizer.py
    │   └── yaha_tokenizer.py
    ├── train.py
    ├── training_data
    │   ├── __init__.py
    │   ├── formats
    │   │   ├── __init__.py
    │   │   ├── dialogflow.py
    │   │   ├── luis.py
    │   │   ├── markdown.py
    │   │   ├── rasa.py
    │   │   ├── readerwriter.py
    │   │   └── wit.py
    │   ├── loading.py
    │   ├── message.py
    │   ├── training_data.py
    │   └── util.py
    ├── utils
    │   ├── __init__.py
    │   ├── mitie_utils.py
    │   └── spacy_utils.py
    └── version.py
├── requirements.txt
├── sample_configs
    ├── config_crf.yml
    ├── config_defaults.yml
    ├── config_embedding.yml
    ├── config_jieba_mitie.yml
    ├── config_jieba_mitie_sklearn.json
    ├── config_jieba_mitie_sklearn.yml
    ├── config_jieba_mitie_sklearn_plus_dict_path.yml
    ├── config_mitie.yml
    ├── config_mitie_sklearn.yml
    ├── config_spacy.yml
    ├── config_spacy_duckling.yml
    ├── config_train_server_json.yml
    ├── config_train_server_md.yml
    └── config_yaha_mitie_sklearn.json
├── setup.cfg
├── setup.py
├── test_models
    ├── test_model_mitie
    │   └── model_20170628-002704
    │   │   ├── entity_extractor.dat
    │   │   ├── entity_synonyms.json
    │   │   ├── intent_classifier.dat
    │   │   ├── metadata.json
    │   │   └── training_data.json
    ├── test_model_mitie_sklearn
    │   └── model_20170628-002712
    │   │   ├── entity_extractor.dat
    │   │   ├── entity_synonyms.json
    │   │   ├── intent_classifier.pkl
    │   │   ├── metadata.json
    │   │   └── training_data.json
    └── test_model_spacy_sklearn
    │   └── model_20170628-002705
    │       ├── crf_model.pkl
    │       ├── entity_synonyms.json
    │       ├── intent_classifier.pkl
    │       ├── metadata.json
    │       └── training_data.json
└── tests
    ├── __init__.py
    ├── base
        ├── __init__.py
        ├── test_components.py
        ├── test_config.py
        ├── test_data_router.py
        ├── test_emulators.py
        ├── test_evaluation.py
        ├── test_extractors.py
        ├── test_featurizers.py
        ├── test_interpreter.py
        ├── test_multitenancy.py
        ├── test_persistor.py
        ├── test_project.py
        ├── test_server.py
        ├── test_synonyms.py
        ├── test_tokenizers.py
        ├── test_training_data.py
        └── test_utils.py
    ├── conftest.py
    ├── training
        ├── __init__.py
        └── test_train.py
    └── utilities.py


/.coveragerc:
--------------------------------------------------------------------------------
1 | [report]
2 | exclude_lines =
3 |     pragma: no cover
4 |     def __repr__
5 |     raise NotImplementedError
6 |     if __name__ == .__main__.:
7 |     def create_argument_parser
8 |     if typing.TYPE_CHECKING


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
1 | docker*
2 | docs
3 | .git*
4 | **/*.pyc
5 | **/__pycache__
6 | 
7 | 


--------------------------------------------------------------------------------
/.env:
--------------------------------------------------------------------------------
1 | TIMES=2


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | * -text


--------------------------------------------------------------------------------
/.github/FUNDING.yml:
--------------------------------------------------------------------------------
 1 | # These are supported funding model platforms
 2 | 
 3 | github: crownpku # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2]
 4 | patreon: # Replace with a single Patreon username
 5 | open_collective: # Replace with a single Open Collective username
 6 | ko_fi: # Replace with a single Ko-fi username
 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel
 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry
 9 | liberapay: # Replace with a single Liberapay username
10 | issuehunt: # Replace with a single IssueHunt username
11 | otechie: # Replace with a single Otechie username
12 | custom: #https://www.paypal.me/crownpku
13 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE.md:
--------------------------------------------------------------------------------
 1 | <!-- If you don't know your Rasa NLU version, here is some help: https://goo.gl/g9QQg2. If you are creating a feature request, feel free to remove all the system information stuff. --> 
 2 | 
 3 | **Rasa NLU version**:
 4 | 
 5 | **Operating system** (windows, osx, ...):
 6 | 
 7 | **Content of model configuration file**:
 8 | ```yml
 9 | 
10 | ```
11 | 
12 | **Issue**:
13 | 
14 | 


--------------------------------------------------------------------------------
/.github/PULL_REQUEST_TEMPLATE.md:
--------------------------------------------------------------------------------
1 | **Proposed changes**:
2 | - ...
3 | 
4 | **Status (please check what you already did)**:
5 | - [ ] made PR ready for code review
6 | - [ ] added some tests for the functionality
7 | - [ ] updated the documentation
8 | - [ ] updated the changelog
9 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *~
 2 | *pyc
 3 | dist/
 4 | data/*
 5 | !data/examples
 6 | !data/test
 7 | !data/README.md
 8 | docs/_build
 9 | server/
10 | scala/
11 | mongodb/
12 | .cache/
13 | build/
14 | *.egg-info/
15 | jnk/
16 | logs/
17 | tmp/
18 | profile.*
19 | *.sqlite
20 | lastmile_ai/learn/plots/
21 | *npy
22 | *#
23 | /config.json
24 | *log.json
25 | .coverage
26 | .coveralls.yml
27 | .idea/
28 | *.iml
29 | out/
30 | .vscode/
31 | tmp_training_data.json
32 | .DS_Store
33 | models/
34 | projects/
35 | test_models/model_*
36 | .ipynb_checkpoints/
37 | models/
38 | nohup.out
39 | test_projects/test_project_*
40 | rasa_nlu/tmbo_test.py
41 | .mypy_cache/
42 | *.tar.gz
43 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | sudo: required
 3 | group: deprecated-2017Q2
 4 | services:
 5 |   - docker
 6 | cache:
 7 |   directories:
 8 |   - $HOME/.cache/pip
 9 |   - /tmp/cached/
10 | python:
11 | - '2.7'
12 | - '3.5'
13 | - '3.6'
14 | env:
15 |   # needed to fix issues with boto during testing:
16 |   # https://github.com/travis-ci/travis-ci/issues/7940
17 |   global: BOTO_CONFIG=/dev/null
18 | install:
19 | - pip install git+https://github.com/tmbo/MITIE.git
20 | - pip install -r alt_requirements/requirements_dev.txt
21 | - pip install -e .
22 | - pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz
23 |   --no-cache-dir > jnk
24 | - python -m spacy link en_core_web_md en
25 | - pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz
26 |   --no-cache-dir > jnk
27 | - python -m spacy link de_core_news_sm de
28 | - if [[ ! -f /tmp/cached/total_word_feature_extractor.dat ]]; then wget --quiet -P /tmp/cached/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat;
29 |   fi
30 | - mv /tmp/cached/total_word_feature_extractor.dat data/total_word_feature_extractor.dat
31 | - pip list
32 | before_script:
33 | - mkdir $HOME/tmp
34 | - export TMPDIR=$HOME/tmp
35 | script:
36 | - py.test --pep8 -m pep8
37 | - py.test tests/base --cov rasa_nlu -v --cov-append
38 | - py.test tests/training --cov rasa_nlu -v --cov-append
39 | after_success:
40 | - coveralls
41 | jobs:
42 |   include:
43 |   - stage: docs
44 |     if: fork = false
45 |     install:
46 |     - pip install sphinx==1.5.2 sphinx-autobuild==0.7.1 sphinxcontrib-versioning==2.2.1 sphinxcontrib-programoutput==0.11
47 |       nbsphinx==0.2.18
48 |     - pip install -e git://github.com/RasaHQ/sphinx_rtd_theme.git#egg=sphinx_rtd_theme
49 |     - pip install -e .
50 |     script:
51 |     - eval "$(ssh-agent -s)"; touch docs/key; chmod 0600 docs/key
52 |     - openssl aes-256-cbc -K $encrypted_4a8a3f4b9c17_key -iv $encrypted_4a8a3f4b9c17_iv -in docs/key.enc -out docs/key -d && ssh-add docs/key
53 |     - git config --global user.email "builds@travis-ci.com"
54 |     - git config --global user.name "Travis CI"
55 |     - git remote set-url --push origin "git@github.com:$TRAVIS_REPO_SLUG"
56 |     - export ${!TRAVIS*}
57 |     - sphinx-versioning push docs docs .
58 |   - stage: deploy
59 |     install: skip
60 |     script: skip
61 |     deploy:
62 |       provider: pypi
63 |       user: amn41
64 |       # server: https://test.pypi.org/legacy/
65 |       distributions: "sdist bdist_wheel"
66 |       on:
67 |         branch: master
68 |         tags: true
69 |       password:
70 |         secure: K3JhIpxBBfu8SC8voAsIvgU9pdND9PayQi8Ep4Whg+RPKgnLWMzbFe2FfSTyxuEIkJGx4S6h0qORGz4ro6b/tCy72ruEYxLrx3vt8uNtWdYXSRnW+Knqk4QKn2q+WehmfSxhkvu2PQ3LACGWN13Nnc4OdlY9u843d0dSjD9INlAs/+m6X3Me0zdACmwd0V0l4U2hNMjJyvOPznrQj4HrMIGWfuags4NLySVkpnYMMzz5lnamUZKUUfyChAKTUPXuoO8s9U0Zxj2duOy+2yu9hcJwomFwBLiWR6nKZmEtzYrfgHFDkRtNyuJtmQn3pR4BzbMV5L6Td7DAey3fRYss8JxVZ+3mwjsRzbbMDRpqqI8b7L0KBFnWfS5qOecB6T9hT2SVQuGHqj4Y/CAHqzscBhiOlhKev65JXIc1JIJACKWaHVYASKeU24zprlcalkRsqXmUv/rvSgP1UQSEsE726hxr0gs/gyJVRSmg7dxm/BrFTVa4Pucpy0QW3ABfc7miaz9LuNzsY+7OBxmsOhPDZpQVez9TNr4agdh6enRTK2cg0zDqjSfzjXBPwXRlcsR67u1JZPqjN0cpT44xKUvuwzDFgcZjK74tDx9A7cV6yS325cKIz8KQ08saBIyrbDtbv/i9ry1Dvkxj+k3t+i7kyuzjzMdhj2yDF9WTAGTdOhY=
71 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation.
 6 | 
 7 | ## Our Standards
 8 | 
 9 | Examples of behavior that contributes to creating a positive environment include:
10 | 
11 | * Using welcoming and inclusive language
12 | * Being respectful of differing viewpoints and experiences
13 | * Gracefully accepting constructive criticism
14 | * Focusing on what is best for the community
15 | * Showing empathy towards other community members
16 | 
17 | Examples of unacceptable behavior by participants include:
18 | 
19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances
20 | * Trolling, insulting/derogatory comments, and personal or political attacks
21 | * Public or private harassment
22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission
23 | * Other conduct which could reasonably be considered inappropriate in a professional setting
24 | 
25 | ## Our Responsibilities
26 | 
27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior.
28 | 
29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful.
30 | 
31 | ## Scope
32 | 
33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers.
34 | 
35 | ## Enforcement
36 | 
37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at tom@rasa.ai or alan@rasa.ai. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately.
38 | 
39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership.
40 | 
41 | ## Attribution
42 | 
43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version]
44 | 
45 | [homepage]: http://contributor-covenant.org
46 | [version]: http://contributor-covenant.org/version/1/4/
47 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE README.rst requirements.txt
2 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: clean test lint
 2 | 
 3 | TEST_PATH=./
 4 | 
 5 | help:
 6 | 	@echo "    clean"
 7 | 	@echo "        Remove python artifacts and build artifacts."
 8 | 	@echo "    lint"
 9 | 	@echo "        Check style with flake8."
10 | 	@echo "    test"
11 | 	@echo "        Run py.test"
12 | 	@echo "    check-readme"
13 | 	@echo "        Check if the readme can be converted from md to rst for pypi"
14 | 
15 | clean:
16 | 	find . -name '*.pyc' -exec rm -f {} +
17 | 	find . -name '*.pyo' -exec rm -f {} +
18 | 	find . -name '*~' -exec rm -f  {} +
19 | 	rm -rf build/
20 | 	rm -rf dist/
21 | 	rm -rf *.egg-info
22 | 	rm -rf docs/_build
23 | 
24 | lint:
25 | 	py.test --pep8 -m pep8
26 | 
27 | test: clean
28 | 	py.test tests --verbose --pep8 --color=yes $(TEST_PATH)
29 | 
30 | livedocs:
31 | 	cd docs && make livehtml
32 | 
33 | check-readme:
34 | 	# if this runs through we can be sure the readme is properly shown on pypi
35 | 	python setup.py check --restructuredtext --strict
36 | 


--------------------------------------------------------------------------------
/alt_requirements/conda-requirements.txt:
--------------------------------------------------------------------------------
1 | scipy==1.10.0
2 | scikit-learn==0.19.1
3 | 


--------------------------------------------------------------------------------
/alt_requirements/requirements_bare.txt:
--------------------------------------------------------------------------------
 1 | gevent==23.9.1
 2 | klein==17.10.0
 3 | hyperlink==17.3.1
 4 | boto3==1.5.20
 5 | typing==3.6.2
 6 | future==0.18.3
 7 | six==1.11.0
 8 | jsonschema==2.6.0
 9 | matplotlib==2.1.0
10 | requests==2.31.0
11 | tqdm==4.19.5
12 | numpy==1.22.0
13 | simplejson==3.13.2
14 | cloudpickle==0.5.2
15 | msgpack-python==0.5.4
16 | packaging==17.1
17 | pyyaml==5.4
18 | coloredlogs==9.0
19 | 


--------------------------------------------------------------------------------
/alt_requirements/requirements_dev.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | # mitie
 3 | git+https://github.com/mit-nlp/MITIE.git#egg=mitie
 4 | # spacy
 5 | spacy==1.8.2
 6 | # sklearn
 7 | scikit-learn==0.18.1
 8 | scipy==1.10.0
 9 | matplotlib==1.5.3
10 | # duckling
11 | duckling==1.7.1
12 | # sklearn_crfsuite
13 | sklearn-crfsuite==0.3.5
14 | # jieba
15 | jieba==0.38
16 | # cloudpickle
17 | cloudpickle==0.2.2
18 | 
19 | -r requirements_full.txt
20 | 
21 | 
22 | # test
23 | python-coveralls==2.9.1
24 | pytest-pep8==1.0.6
25 | pytest-services==1.2.1
26 | pytest-cov==2.5.1
27 | pytest-twisted==1.6
28 | pytest==3.3.2
29 | treq==22.1.0
30 | moto==1.2.0
31 | mock==2.0.0
32 | # other
33 | google-cloud-storage==1.7.0
34 | azure-storage-blob==1.0.0
35 | 
36 | # docs
37 | sphinx==1.5.2
38 | sphinx-autobuild==0.7.1
39 | sphinxcontrib-versioning==2.2.1
40 | sphinxcontrib-programoutput==0.11
41 | nbsphinx==0.2.18
42 | -e git://github.com/RasaHQ/sphinx_rtd_theme.git#egg=sphinx_rtd_theme
43 | 


--------------------------------------------------------------------------------
/alt_requirements/requirements_full.txt:
--------------------------------------------------------------------------------
 1 | # Minimum Instal Requirements
 2 | -r requirements_bare.txt
 3 | 
 4 | # Spacy Requirements
 5 | -r requirements_spacy_sklearn.txt
 6 | 
 7 | # Tensorflow Requirements
 8 | -r requirements_tensorflow_sklearn.txt
 9 | 
10 | # MITIE Requirements
11 | -r requirements_mitie.txt
12 | 
13 | duckling==1.8.0
14 | Jpype1==0.6.2
15 | jieba==0.39
16 | 


--------------------------------------------------------------------------------
/alt_requirements/requirements_mitie.txt:
--------------------------------------------------------------------------------
1 | # Minimum Install Requirements
2 | -r requirements_bare.txt
3 | 
4 | git+https://github.com/mit-nlp/MITIE.git#egg=mitie
5 | 


--------------------------------------------------------------------------------
/alt_requirements/requirements_spacy_sklearn.txt:
--------------------------------------------------------------------------------
1 | # Minimum Install Requirements
2 | -r requirements_bare.txt
3 | 
4 | spacy==2.0.5
5 | scikit-learn==0.19.1
6 | scipy==1.10.0
7 | sklearn-crfsuite==0.3.6


--------------------------------------------------------------------------------
/alt_requirements/requirements_tensorflow_sklearn.txt:
--------------------------------------------------------------------------------
1 | # Minimum Install Requirements
2 | -r requirements_bare.txt
3 | 
4 | scikit-learn==0.19.1
5 | tensorflow==2.11.1


--------------------------------------------------------------------------------
/app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "rasa_nlu",
 3 |   "description": "a service for interpreting natural language",
 4 |   "image": "heroku/python",
 5 |   "repository": "https://github.com/RasaHQ/rasa_nlu",
 6 |   "keywords": ["python" ],
 7 |   "addons": [],
 8 |   "scripts": {
 9 |     "postdeploy": "echo '{}' > config.json;mkdir logs;python -m spacy download en"
10 |   },
11 |   "buildpacks": [
12 |     {
13 |       "url": "https://github.com/kennethreitz/conda-buildpack.git"
14 |     }
15 |   ],
16 |   "env" : {
17 |     "RASA_TOKEN": {
18 |       "generator": "secret",
19 |       "description": "token for validating requests"
20 |     },
21 |     "RASA_PIPELINE": {
22 |       "description": "which pipeline to use",
23 |       "value": "spacy_sklearn"
24 |     },
25 |     "RASA_PATH": {
26 |       "description": "where to save projects and their models",
27 |       "value": "/app/data"
28 |     },
29 |     "RASA_LOGDIR": {
30 |       "description": "where to save logs",
31 |       "value": "/app/logs"
32 |     },
33 |     "RASA_MITIE_FILE": {
34 |       "description": "file containing mitie feature extractor",
35 |       "value": "${RASA_PATH}/total_word_feature_extractor.dat"
36 |     },
37 |     "RASA_SERVER_MODEL_DIRS": {
38 |       "description": "dir containing model which should be used to process requests",
39 |       "required": false
40 |     },
41 |     "AWS_SECRET_ACCESS_KEY": {
42 |       "description": "secret key for S3 access",
43 |       "required": false
44 |     },
45 |     "AWS_ACCESS_KEY_ID": {
46 |       "description": "key id for S3 access",
47 |       "required": false
48 |     },
49 |     "BUCKET_NAME": {
50 |       "description": "name of s3 bucket",
51 |       "value": "rasanlu"
52 |     },
53 |     "AWS_REGION": {
54 |       "description": "aws region of S3 bucket",
55 |       "value": "eu-west-1"
56 |     }
57 |   }
58 | }


--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | timeout: "20m"
2 | steps:
3 | - name: 'gcr.io/cloud-builders/docker'
4 |   id: 'docker-build'
5 |   args: ['build', '--file', './docker/Dockerfile_full', '-t', '$_IMAGE_REPO:$TAG_NAME', '.']
6 | images: [ '$_IMAGE_REPO:$TAG_NAME' ]
7 | 


--------------------------------------------------------------------------------
/data/README.md:
--------------------------------------------------------------------------------
 1 | These are some example training data files for a simple bot in the restaurant domain. 
 2 | They are in the format of the services rasa NLU can emulate, e.g. when you download an export
 3 | of your app from one of these services it should look like one of these files.
 4 | 
 5 | 
 6 | [examples/rasa](examples/rasa): examples in the native rasa NLU format
 7 | 
 8 | [examples/luis](examples/luis): in LUIS format
 9 | 
10 | [examples/wit](examples/wit): in wit format
11 | 
12 | [examples/api](examples/api): this is a dir and in Dialogflow format
13 | 


--------------------------------------------------------------------------------
/data/examples/dialogflow/agent.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "description": "",
 3 |   "language": "en",
 4 |   "googleAssistant": {
 5 |     "googleAssistantCompatible": false,
 6 |     "project": "rasanlu-development",
 7 |     "welcomeIntentSignInRequired": false,
 8 |     "startIntents": [],
 9 |     "systemIntents": [],
10 |     "endIntentIds": [],
11 |     "oAuthLinking": {
12 |       "required": false,
13 |       "grantType": "AUTH_CODE_GRANT"
14 |     },
15 |     "voiceType": "MALE_1",
16 |     "capabilities": [],
17 |     "protocolVersion": "V2"
18 |   },
19 |   "defaultTimezone": "Asia/Hong_Kong",
20 |   "webhook": {
21 |     "available": false,
22 |     "useForDomains": false,
23 |     "cloudFunctionsEnabled": false,
24 |     "cloudFunctionsInitialized": false
25 |   },
26 |   "isPrivate": true,
27 |   "customClassifierMode": "use.after",
28 |   "mlMinConfidence": 0.3,
29 |   "supportedLanguages": [
30 |     "es"
31 |   ]
32 | }


--------------------------------------------------------------------------------
/data/examples/dialogflow/entities/cuisine.json:
--------------------------------------------------------------------------------
1 | {
2 |   "id": "11c77228-4a02-4db8-a398-b286fe8098d2",
3 |   "name": "cuisine",
4 |   "isOverridable": true,
5 |   "isEnum": false,
6 |   "automatedExpansion": false
7 | }


--------------------------------------------------------------------------------
/data/examples/dialogflow/entities/cuisine_entries_en.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "value": "mexican",
 4 |     "synonyms": [
 5 |       "mexican",
 6 |       "mexico"
 7 |     ]
 8 |   },
 9 |   {
10 |     "value": "chinese",
11 |     "synonyms": [
12 |       "chinese",
13 |       "china"
14 |     ]
15 |   },
16 |   {
17 |     "value": "indian",
18 |     "synonyms": [
19 |       "indian",
20 |       "india"
21 |     ]
22 |   }
23 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/entities/cuisine_entries_es.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "value": "mexicano",
 4 |     "synonyms": [
 5 |       "mexicano",
 6 |       "mexicana",
 7 |       "méxico"
 8 |     ]
 9 |   },
10 |   {
11 |     "value": "chino",
12 |     "synonyms": [
13 |       "chino",
14 |       "china",
15 |       "chinos"
16 |     ]
17 |   },
18 |   {
19 |     "value": "indio",
20 |     "synonyms": [
21 |       "indio",
22 |       "india"
23 |     ]
24 |   }
25 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/entities/location.json:
--------------------------------------------------------------------------------
1 | {
2 |   "id": "8ee88034-01d3-49d4-bb58-531a705b963b",
3 |   "name": "location",
4 |   "isOverridable": true,
5 |   "isEnum": false,
6 |   "automatedExpansion": false
7 | }


--------------------------------------------------------------------------------
/data/examples/dialogflow/entities/location_entries_en.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "value": "centre",
 4 |     "synonyms": [
 5 |       "centre"
 6 |     ]
 7 |   },
 8 |   {
 9 |     "value": "west",
10 |     "synonyms": [
11 |       "west"
12 |     ]
13 |   },
14 |   {
15 |     "value": "central",
16 |     "synonyms": [
17 |       "central"
18 |     ]
19 |   },
20 |   {
21 |     "value": "north",
22 |     "synonyms": [
23 |       "north"
24 |     ]
25 |   }
26 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/entities/location_entries_es.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "value": "centro",
 4 |     "synonyms": [
 5 |       "centro",
 6 |       "centrar"
 7 |     ]
 8 |   },
 9 |   {
10 |     "value": "oeste",
11 |     "synonyms": [
12 |       "oeste",
13 |       "occidente"
14 |     ]
15 |   },
16 |   {
17 |     "value": "central",
18 |     "synonyms": [
19 |       "central",
20 |       "céntrico"
21 |     ]
22 |   },
23 |   {
24 |     "value": "norte",
25 |     "synonyms": [
26 |       "norte"
27 |     ]
28 |   }
29 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/Default Fallback Intent.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "27b800fb-3b69-4723-932d-ca53eb849138",
 3 |   "name": "Default Fallback Intent",
 4 |   "auto": true,
 5 |   "contexts": [],
 6 |   "responses": [
 7 |     {
 8 |       "resetContexts": false,
 9 |       "action": "input.unknown",
10 |       "affectedContexts": [],
11 |       "parameters": [],
12 |       "messages": [
13 |         {
14 |           "type": 0,
15 |           "lang": "es",
16 |           "speech": [
17 |             "Ups, no he entendido a que te refieres.",
18 |             "¿Podrías repetirlo, por favor?",
19 |             "¿Disculpa?",
20 |             "¿Decías?",
21 |             "¿Cómo?"
22 |           ]
23 |         },
24 |         {
25 |           "type": 0,
26 |           "lang": "en",
27 |           "speech": [
28 |             "I didn\u0027t get that. Can you say it again?",
29 |             "I missed what you said. Say it again?",
30 |             "Sorry, could you say that again?",
31 |             "Sorry, can you say that again?",
32 |             "Can you say that again?",
33 |             "Sorry, I didn\u0027t get that.",
34 |             "Sorry, what was that?",
35 |             "One more time?",
36 |             "What was that?",
37 |             "Say that again?",
38 |             "I didn\u0027t get that.",
39 |             "I missed that."
40 |           ]
41 |         }
42 |       ],
43 |       "defaultResponsePlatforms": {},
44 |       "speech": []
45 |     }
46 |   ],
47 |   "priority": 500000,
48 |   "webhookUsed": false,
49 |   "webhookForSlotFilling": false,
50 |   "lastUpdate": 1507539905,
51 |   "fallbackIntent": true,
52 |   "events": []
53 | }


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/affirm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "c2e82a05-3980-4f74-b0d5-7ee0e1297284",
 3 |   "name": "affirm",
 4 |   "auto": true,
 5 |   "contexts": [],
 6 |   "responses": [
 7 |     {
 8 |       "resetContexts": false,
 9 |       "affectedContexts": [],
10 |       "parameters": [],
11 |       "messages": [
12 |         {
13 |           "type": 0,
14 |           "lang": "es",
15 |           "speech": "Me alegro de ayudarte, compañero :)"
16 |         },
17 |         {
18 |           "type": 0,
19 |           "lang": "en",
20 |           "speech": "Glad I help you, mate :)"
21 |         }
22 |       ],
23 |       "defaultResponsePlatforms": {},
24 |       "speech": []
25 |     }
26 |   ],
27 |   "priority": 500000,
28 |   "webhookUsed": false,
29 |   "webhookForSlotFilling": false,
30 |   "lastUpdate": 1507540481,
31 |   "fallbackIntent": false,
32 |   "events": []
33 | }


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/affirm_usersays_en.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "74dc9ae2-335c-448e-8e02-f37225051102",
 4 |     "data": [
 5 |       {
 6 |         "text": "yes",
 7 |         "userDefined": false
 8 |       }
 9 |     ],
10 |     "isTemplate": false,
11 |     "count": 0,
12 |     "updated": 0
13 |   },
14 |   {
15 |     "id": "425eacad-7c28-471a-8a4b-58f5079ec1c6",
16 |     "data": [
17 |       {
18 |         "text": "yep",
19 |         "userDefined": false
20 |       }
21 |     ],
22 |     "isTemplate": false,
23 |     "count": 0,
24 |     "updated": 0
25 |   },
26 |   {
27 |     "id": "034520c5-bc84-4f09-bd74-625d10fa6499",
28 |     "data": [
29 |       {
30 |         "text": "yeah",
31 |         "userDefined": false
32 |       }
33 |     ],
34 |     "isTemplate": false,
35 |     "count": 0,
36 |     "updated": 0
37 |   },
38 |   {
39 |     "id": "ee97d4c8-1d44-4f8c-a657-33a2c4c1c869",
40 |     "data": [
41 |       {
42 |         "text": "indeed",
43 |         "userDefined": false
44 |       }
45 |     ],
46 |     "isTemplate": false,
47 |     "count": 0,
48 |     "updated": 0
49 |   },
50 |   {
51 |     "id": "6d5b05c4-5cd6-43b5-af52-764b3b1259e7",
52 |     "data": [
53 |       {
54 |         "text": "that\u0027s right",
55 |         "userDefined": false
56 |       }
57 |     ],
58 |     "isTemplate": false,
59 |     "count": 0,
60 |     "updated": 0
61 |   },
62 |   {
63 |     "id": "0ef60526-405a-40ec-955f-3961600ae7dd",
64 |     "data": [
65 |       {
66 |         "text": "ok",
67 |         "userDefined": false
68 |       }
69 |     ],
70 |     "isTemplate": false,
71 |     "count": 0,
72 |     "updated": 0
73 |   },
74 |   {
75 |     "id": "b15de01a-db24-4516-bab0-88eda8de1c16",
76 |     "data": [
77 |       {
78 |         "text": "great",
79 |         "userDefined": false
80 |       }
81 |     ],
82 |     "isTemplate": false,
83 |     "count": 0,
84 |     "updated": 0
85 |   }
86 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/affirm_usersays_es.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "9dc52d2a-dbf2-44e7-bc55-0eee5a7a8a14",
 4 |     "data": [
 5 |       {
 6 |         "text": "sí",
 7 |         "userDefined": false
 8 |       }
 9 |     ],
10 |     "isTemplate": false,
11 |     "count": 0,
12 |     "updated": 1507540481
13 |   },
14 |   {
15 |     "id": "9dg52d2a-dbf2-44e7-bc55-0eee5a6a8a14",
16 |     "data": [
17 |       {
18 |         "text": "si",
19 |         "userDefined": false
20 |       }
21 |     ],
22 |     "isTemplate": false,
23 |     "count": 0,
24 |     "updated": 1507540481
25 |   },
26 |   {
27 |     "id": "9dg52d2a-dbf2-44e7-bc55-0eee5a6a8a24",
28 |     "data": [
29 |       {
30 |         "text": "Sí",
31 |         "userDefined": false
32 |       }
33 |     ],
34 |     "isTemplate": false,
35 |     "count": 0,
36 |     "updated": 1507540481
37 |   },
38 |   {
39 |     "id": "7a11df78-3b06-48c3-9aa4-9f779c23fb0b",
40 |     "data": [
41 |       {
42 |         "text": "de verdad",
43 |         "userDefined": false
44 |       }
45 |     ],
46 |     "isTemplate": false,
47 |     "count": 0,
48 |     "updated": 1507540481
49 |   },
50 |   {
51 |     "id": "e6238a3e-3dcd-4932-9034-1a05f037d4e3",
52 |     "data": [
53 |       {
54 |         "text": "está bien",
55 |         "userDefined": false
56 |       }
57 |     ],
58 |     "isTemplate": false,
59 |     "count": 0,
60 |     "updated": 1507540481
61 |   },
62 |   {
63 |     "id": "64cc393a-f9c9-4521-9052-543f99fcd97e",
64 |     "data": [
65 |       {
66 |         "text": "muy bien",
67 |         "userDefined": false
68 |       }
69 |     ],
70 |     "isTemplate": false,
71 |     "count": 0,
72 |     "updated": 1507540481
73 |   },
74 |   {
75 |     "id": "0f4a932a-c929-47d4-8cb2-5095882f40a0",
76 |     "data": [
77 |       {
78 |         "text": "estupendo",
79 |         "userDefined": false
80 |       }
81 |     ],
82 |     "isTemplate": false,
83 |     "count": 0,
84 |     "updated": 1507540481
85 |   }
86 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/goodbye.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "a90df8dd-f5bd-45dd-b8cf-12cc4b5cb800",
 3 |   "name": "goodbye",
 4 |   "auto": true,
 5 |   "contexts": [],
 6 |   "responses": [
 7 |     {
 8 |       "resetContexts": false,
 9 |       "affectedContexts": [],
10 |       "parameters": [],
11 |       "messages": [
12 |         {
13 |           "type": 0,
14 |           "lang": "es",
15 |           "speech": "¡Nos vemos! Disfrutar"
16 |         },
17 |         {
18 |           "type": 0,
19 |           "lang": "en",
20 |           "speech": "See ya! Enjoy"
21 |         }
22 |       ],
23 |       "defaultResponsePlatforms": {},
24 |       "speech": []
25 |     }
26 |   ],
27 |   "priority": 500000,
28 |   "webhookUsed": false,
29 |   "webhookForSlotFilling": false,
30 |   "lastUpdate": 1507540635,
31 |   "fallbackIntent": false,
32 |   "events": []
33 | }


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/goodbye_usersays_en.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "1f094fbb-199a-40cd-af8f-3978fcebc027",
 4 |     "data": [
 5 |       {
 6 |         "text": "bye",
 7 |         "userDefined": false
 8 |       }
 9 |     ],
10 |     "isTemplate": false,
11 |     "count": 0,
12 |     "updated": 0
13 |   },
14 |   {
15 |     "id": "b25f004f-425e-4ff3-b4db-36fa46772fc9",
16 |     "data": [
17 |       {
18 |         "text": "goodbye",
19 |         "userDefined": false
20 |       }
21 |     ],
22 |     "isTemplate": false,
23 |     "count": 0,
24 |     "updated": 0
25 |   },
26 |   {
27 |     "id": "f4e435f4-c88a-4998-b2c0-ef94565327ae",
28 |     "data": [
29 |       {
30 |         "text": "good bye",
31 |         "userDefined": false
32 |       }
33 |     ],
34 |     "isTemplate": false,
35 |     "count": 0,
36 |     "updated": 0
37 |   },
38 |   {
39 |     "id": "328a011a-08ba-4bd0-beea-6ab08859dd89",
40 |     "data": [
41 |       {
42 |         "text": "stop",
43 |         "userDefined": false
44 |       }
45 |     ],
46 |     "isTemplate": false,
47 |     "count": 0,
48 |     "updated": 0
49 |   },
50 |   {
51 |     "id": "b8d9e693-6383-4be9-a98f-38a60890fa7a",
52 |     "data": [
53 |       {
54 |         "text": "end",
55 |         "userDefined": false
56 |       }
57 |     ],
58 |     "isTemplate": false,
59 |     "count": 0,
60 |     "updated": 0
61 |   }
62 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/goodbye_usersays_es.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "dde37345-fa41-4310-9a7f-74f74ca7a925",
 4 |     "data": [
 5 |       {
 6 |         "text": "a usted adiós",
 7 |         "userDefined": false
 8 |       }
 9 |     ],
10 |     "isTemplate": false,
11 |     "count": 0,
12 |     "updated": 1507540635
13 |   },
14 |   {
15 |     "id": "86811b69-cee1-4ae0-8944-ace8eb4badc2",
16 |     "data": [
17 |       {
18 |         "text": "despedida",
19 |         "userDefined": false
20 |       }
21 |     ],
22 |     "isTemplate": false,
23 |     "count": 0,
24 |     "updated": 1507540635
25 |   },
26 |   {
27 |     "id": "78bbd720-b50b-4cb9-9dce-eee0fef5aa74",
28 |     "data": [
29 |       {
30 |         "text": "adiós",
31 |         "userDefined": false
32 |       }
33 |     ],
34 |     "isTemplate": false,
35 |     "count": 0,
36 |     "updated": 1507540635
37 |   },
38 |   {
39 |     "id": "74597c31-b649-49cc-bd3b-9a4ed3487070",
40 |     "data": [
41 |       {
42 |         "text": "suspender",
43 |         "userDefined": false
44 |       }
45 |     ],
46 |     "isTemplate": false,
47 |     "count": 0,
48 |     "updated": 1507540635
49 |   },
50 |   {
51 |     "id": "7b42db30-d815-46fd-a7f5-79b9180c7c6b",
52 |     "data": [
53 |       {
54 |         "text": "fin",
55 |         "userDefined": false
56 |       }
57 |     ],
58 |     "isTemplate": false,
59 |     "count": 0,
60 |     "updated": 1507540635
61 |   }
62 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/hi.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "id": "3e7ea801-9d08-479c-ada2-27ce467ca326",
 3 |   "name": "hi",
 4 |   "auto": true,
 5 |   "contexts": [],
 6 |   "userSays": [
 7 |     {
 8 |       "id": "6a29da7f-3926-4b68-aa6d-dbbfe1c6b79d",
 9 |       "data": [
10 |         {
11 |           "text": "hi"
12 |         }
13 |       ],
14 |       "isTemplate": false,
15 |       "count": 0
16 |     },
17 |     {
18 |       "id": "a83ef752-c2fe-4d45-abd4-35bc381f6d96",
19 |       "data": [
20 |         {
21 |           "text": "hello"
22 |         }
23 |       ],
24 |       "isTemplate": false,
25 |       "count": 0
26 |     },
27 |     {
28 |       "id": "91c4267d-5e30-4d45-9ce3-d0b21836c372",
29 |       "data": [
30 |         {
31 |           "text": "hey there"
32 |         }
33 |       ],
34 |       "isTemplate": false,
35 |       "count": 0
36 |     },
37 |     {
38 |       "id": "4b41d389-8bf7-4976-97ea-3b39a3e883ab",
39 |       "data": [
40 |         {
41 |           "text": "howdy"
42 |         }
43 |       ],
44 |       "isTemplate": false,
45 |       "count": 0
46 |     },
47 |     {
48 |       "id": "d819385a-a9c8-4dd0-9787-9a47c53156f3",
49 |       "data": [
50 |         {
51 |           "text": "hey"
52 |         }
53 |       ],
54 |       "isTemplate": false,
55 |       "count": 0
56 |     }
57 |   ],
58 |   "responses": [
59 |     {
60 |       "resetContexts": false,
61 |       "action": "greet",
62 |       "affectedContexts": [],
63 |       "parameters": [],
64 |       "messages": [
65 |         {
66 |           "type": 0,
67 |           "speech": "hey! how can I help you?"
68 |         }
69 |       ]
70 |     }
71 |   ],
72 |   "priority": 500000,
73 |   "webhookUsed": false,
74 |   "fallbackIntent": false
75 | }


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/hi_usersays_en.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "462fb0f5-d97a-4a95-96ab-91f49f289676",
 4 |     "data": [
 5 |       {
 6 |         "text": "hey",
 7 |         "userDefined": false
 8 |       }
 9 |     ],
10 |     "isTemplate": false,
11 |     "count": 0,
12 |     "updated": 0
13 |   },
14 |   {
15 |     "id": "651c6730-61e8-467b-a174-aca4e0ed66af",
16 |     "data": [
17 |       {
18 |         "text": "howdy",
19 |         "userDefined": false
20 |       }
21 |     ],
22 |     "isTemplate": false,
23 |     "count": 0,
24 |     "updated": 0
25 |   },
26 |   {
27 |     "id": "bab1998d-d54f-4a5f-9ed2-c7e8b24f37fc",
28 |     "data": [
29 |       {
30 |         "text": "hey there",
31 |         "userDefined": false
32 |       }
33 |     ],
34 |     "isTemplate": false,
35 |     "count": 0,
36 |     "updated": 0
37 |   },
38 |   {
39 |     "id": "9dfc509d-4f0e-4723-af4a-10ceef2fbf91",
40 |     "data": [
41 |       {
42 |         "text": "hello",
43 |         "userDefined": false
44 |       }
45 |     ],
46 |     "isTemplate": false,
47 |     "count": 0,
48 |     "updated": 0
49 |   },
50 |   {
51 |     "id": "a2271775-488e-4223-9ff4-458cfe4d2ba8",
52 |     "data": [
53 |       {
54 |         "text": "hi",
55 |         "userDefined": false
56 |       }
57 |     ],
58 |     "isTemplate": false,
59 |     "count": 0,
60 |     "updated": 0
61 |   }
62 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/hi_usersays_es.json:
--------------------------------------------------------------------------------
 1 | [
 2 |   {
 3 |     "id": "40c60a12-1079-4e2c-a7c3-3498ab00de30",
 4 |     "data": [
 5 |       {
 6 |         "text": "hello",
 7 |         "userDefined": false
 8 |       }
 9 |     ],
10 |     "isTemplate": false,
11 |     "count": 0,
12 |     "updated": 1507540781
13 |   },
14 |   {
15 |     "id": "96019dea-5810-4ce0-9f69-16c2ce012603",
16 |     "data": [
17 |       {
18 |         "text": "Hola amigo",
19 |         "userDefined": false
20 |       }
21 |     ],
22 |     "isTemplate": false,
23 |     "count": 0,
24 |     "updated": 1507540781
25 |   },
26 |   {
27 |     "id": "298a54d1-c80c-4542-b41f-2cb549db814c",
28 |     "data": [
29 |       {
30 |         "text": "Bueno",
31 |         "userDefined": false
32 |       }
33 |     ],
34 |     "isTemplate": false,
35 |     "count": 0,
36 |     "updated": 1507540781
37 |   },
38 |   {
39 |     "id": "0f490459-85df-4865-abd0-70f87f62890e",
40 |     "data": [
41 |       {
42 |         "text": "Caramba",
43 |         "userDefined": false
44 |       }
45 |     ],
46 |     "isTemplate": false,
47 |     "count": 0,
48 |     "updated": 1507540781
49 |   },
50 |   {
51 |     "id": "1bf90b9b-fe34-42ae-96ea-21beaa3fca4f",
52 |     "data": [
53 |       {
54 |         "text": "Hola",
55 |         "userDefined": false
56 |       }
57 |     ],
58 |     "isTemplate": false,
59 |     "count": 0,
60 |     "updated": 1507540781
61 |   }
62 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/inform.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "id": "fd566317-b15d-4685-9158-63028b6fb5bf",
  3 |   "name": "inform",
  4 |   "auto": true,
  5 |   "contexts": [],
  6 |   "userSays": [
  7 |     {
  8 |       "id": "d5562a99-f039-4bc7-a1bd-53d6cc811206",
  9 |       "data": [
 10 |         {
 11 |           "text": "central",
 12 |           "alias": "location",
 13 |           "meta": "@location",
 14 |           "userDefined": false
 15 |         },
 16 |         {
 17 |           "text": " "
 18 |         },
 19 |         {
 20 |           "text": "indian",
 21 |           "alias": "cuisine",
 22 |           "meta": "@cuisine",
 23 |           "userDefined": true
 24 |         },
 25 |         {
 26 |           "text": " restaurant"
 27 |         }
 28 |       ],
 29 |       "isTemplate": false,
 30 |       "count": 0
 31 |     },
 32 |     {
 33 |       "id": "5e043d29-a4ce-4642-a191-e287d397b02f",
 34 |       "data": [
 35 |         {
 36 |           "text": "anywhere in the "
 37 |         },
 38 |         {
 39 |           "text": "west",
 40 |           "alias": "location",
 41 |           "meta": "@location",
 42 |           "userDefined": false
 43 |         }
 44 |       ],
 45 |       "isTemplate": false,
 46 |       "count": 0
 47 |     },
 48 |     {
 49 |       "id": "540cbb16-4c62-4bc7-b3f2-8f1107e65471",
 50 |       "data": [
 51 |         {
 52 |           "text": "search for restaurants"
 53 |         }
 54 |       ],
 55 |       "isTemplate": false,
 56 |       "count": 0
 57 |     },
 58 |     {
 59 |       "id": "cb096268-a8e5-49d5-8771-60bb1a511151",
 60 |       "data": [
 61 |         {
 62 |           "text": "i am looking for an "
 63 |         },
 64 |         {
 65 |           "text": "indian",
 66 |           "alias": "cuisine",
 67 |           "meta": "@cuisine",
 68 |           "userDefined": true
 69 |         },
 70 |         {
 71 |           "text": " spot"
 72 |         }
 73 |       ],
 74 |       "isTemplate": false,
 75 |       "count": 0
 76 |     },
 77 |     {
 78 |       "id": "2cf1b035-62f1-40cd-867c-af92cbbaf5c7",
 79 |       "data": [
 80 |         {
 81 |           "text": "show me "
 82 |         },
 83 |         {
 84 |           "text": "chinese",
 85 |           "alias": "cuisine",
 86 |           "meta": "@cuisine",
 87 |           "userDefined": true
 88 |         },
 89 |         {
 90 |           "text": " restaurants"
 91 |         }
 92 |       ],
 93 |       "isTemplate": false,
 94 |       "count": 0
 95 |     },
 96 |     {
 97 |       "id": "51f618be-e5a9-4007-a0d5-d6f12475a9b1",
 98 |       "data": [
 99 |         {
100 |           "text": "i\u0027m looking for a place in the "
101 |         },
102 |         {
103 |           "text": "north",
104 |           "alias": "location",
105 |           "meta": "@location",
106 |           "userDefined": true
107 |         },
108 |         {
109 |           "text": " of "
110 |         }
111 |       ],
112 |       "isTemplate": false,
113 |       "count": 0
114 |     },
115 |     {
116 |       "id": "20a723ce-1efc-4c03-84ec-ee17c3219329",
117 |       "data": [
118 |         {
119 |           "text": "i\u0027m looking for a place to eat"
120 |         }
121 |       ],
122 |       "isTemplate": false,
123 |       "count": 0
124 |     }
125 |   ],
126 |   "responses": [
127 |     {
128 |       "resetContexts": false,
129 |       "affectedContexts": [],
130 |       "parameters": [
131 |         {
132 |           "dataType": "@location",
133 |           "name": "location",
134 |           "value": "$location",
135 |           "isList": true
136 |         },
137 |         {
138 |           "dataType": "@cuisine",
139 |           "name": "cuisine",
140 |           "value": "$cuisine",
141 |           "isList": false
142 |         }
143 |       ],
144 |       "messages": [
145 |         {
146 |           "type": 0,
147 |           "speech": "Here is a great spot I am sure you\u0027ll like, pal!"
148 |         }
149 |       ]
150 |     }
151 |   ],
152 |   "priority": 500000,
153 |   "webhookUsed": false,
154 |   "fallbackIntent": false
155 | }
156 | 


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/inform_usersays_en.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "id": "e623ff79-8f24-40bf-a6ed-5a885d9af6c8",
  4 |     "data": [
  5 |       {
  6 |         "text": "i\u0027m looking for a place to eat",
  7 |         "userDefined": false
  8 |       }
  9 |     ],
 10 |     "isTemplate": false,
 11 |     "count": 0,
 12 |     "updated": 0
 13 |   },
 14 |   {
 15 |     "id": "a3e1901a-6c84-402c-a5a4-e2ec05307aa9",
 16 |     "data": [
 17 |       {
 18 |         "text": "i\u0027m looking for a place in the ",
 19 |         "userDefined": false
 20 |       },
 21 |       {
 22 |         "text": "north",
 23 |         "alias": "location",
 24 |         "meta": "@location",
 25 |         "userDefined": false
 26 |       },
 27 |       {
 28 |         "text": " of",
 29 |         "userDefined": false
 30 |       }
 31 |     ],
 32 |     "isTemplate": false,
 33 |     "count": 0,
 34 |     "updated": 0
 35 |   },
 36 |   {
 37 |     "id": "fbf1e1b3-da86-4bbf-98b4-85e09f14c7b2",
 38 |     "data": [
 39 |       {
 40 |         "text": "show me ",
 41 |         "userDefined": false
 42 |       },
 43 |       {
 44 |         "text": "chinese",
 45 |         "alias": "cuisine",
 46 |         "meta": "@cuisine",
 47 |         "userDefined": false
 48 |       },
 49 |       {
 50 |         "text": " restaurants",
 51 |         "userDefined": false
 52 |       }
 53 |     ],
 54 |     "isTemplate": false,
 55 |     "count": 0,
 56 |     "updated": 0
 57 |   },
 58 |   {
 59 |     "id": "860619cb-6c78-41b9-882d-d105a51e4377",
 60 |     "data": [
 61 |       {
 62 |         "text": "i am looking for an ",
 63 |         "userDefined": false
 64 |       },
 65 |       {
 66 |         "text": "indian",
 67 |         "alias": "cuisine",
 68 |         "meta": "@cuisine",
 69 |         "userDefined": false
 70 |       },
 71 |       {
 72 |         "text": " spot",
 73 |         "userDefined": false
 74 |       }
 75 |     ],
 76 |     "isTemplate": false,
 77 |     "count": 0,
 78 |     "updated": 0
 79 |   },
 80 |   {
 81 |     "id": "a8b9fa75-19db-49ef-963b-50d316a14aa2",
 82 |     "data": [
 83 |       {
 84 |         "text": "search for restaurants",
 85 |         "userDefined": false
 86 |       }
 87 |     ],
 88 |     "isTemplate": false,
 89 |     "count": 0,
 90 |     "updated": 0
 91 |   },
 92 |   {
 93 |     "id": "c91a0223-e109-4d32-aab0-4008fb0a9f35",
 94 |     "data": [
 95 |       {
 96 |         "text": "anywhere in the ",
 97 |         "userDefined": false
 98 |       },
 99 |       {
100 |         "text": "west",
101 |         "alias": "location",
102 |         "meta": "@location",
103 |         "userDefined": false
104 |       }
105 |     ],
106 |     "isTemplate": false,
107 |     "count": 0,
108 |     "updated": 0
109 |   },
110 |   {
111 |     "id": "34c28215-f492-44d6-88a9-779ff59cb301",
112 |     "data": [
113 |       {
114 |         "text": "central",
115 |         "alias": "location",
116 |         "meta": "@location",
117 |         "userDefined": false
118 |       },
119 |       {
120 |         "text": " ",
121 |         "userDefined": false
122 |       },
123 |       {
124 |         "text": "indian",
125 |         "alias": "cuisine",
126 |         "meta": "@cuisine",
127 |         "userDefined": false
128 |       },
129 |       {
130 |         "text": " restaurant",
131 |         "userDefined": false
132 |       }
133 |     ],
134 |     "isTemplate": false,
135 |     "count": 0,
136 |     "updated": 0
137 |   }
138 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/intents/inform_usersays_es.json:
--------------------------------------------------------------------------------
  1 | [
  2 |   {
  3 |     "id": "b3ecd39d-4eec-435d-a0fe-6cfd29412ad7",
  4 |     "data": [
  5 |       {
  6 |         "text": "estoy buscando un lugar para comer",
  7 |         "userDefined": false
  8 |       }
  9 |     ],
 10 |     "isTemplate": false,
 11 |     "count": 0,
 12 |     "updated": 1507541018
 13 |   },
 14 |   {
 15 |     "id": "5a25e429-a00a-4418-a48b-7d019eca3ac6",
 16 |     "data": [
 17 |       {
 18 |         "text": "busco un lugar en el ",
 19 |         "userDefined": false
 20 |       },
 21 |       {
 22 |         "text": "norte",
 23 |         "alias": "location",
 24 |         "meta": "@location",
 25 |         "userDefined": false
 26 |       }
 27 |     ],
 28 |     "isTemplate": false,
 29 |     "count": 0,
 30 |     "updated": 1507541018
 31 |   },
 32 |   {
 33 |     "id": "8b6fcd21-8e95-46ff-a841-54c59c650571",
 34 |     "data": [
 35 |       {
 36 |         "text": "muéstrame los restaurantes ",
 37 |         "userDefined": false
 38 |       },
 39 |       {
 40 |         "text": "chinos",
 41 |         "alias": "cuisine",
 42 |         "meta": "@cuisine",
 43 |         "userDefined": true
 44 |       }
 45 |     ],
 46 |     "isTemplate": false,
 47 |     "count": 1,
 48 |     "updated": 1507541018
 49 |   },
 50 |   {
 51 |     "id": "86992625-d848-47de-8220-f9a7d5ddf63b",
 52 |     "data": [
 53 |       {
 54 |         "text": "estoy buscando un lugar ",
 55 |         "userDefined": false
 56 |       },
 57 |       {
 58 |         "text": "indio",
 59 |         "alias": "cuisine",
 60 |         "meta": "@cuisine",
 61 |         "userDefined": false
 62 |       }
 63 |     ],
 64 |     "isTemplate": false,
 65 |     "count": 0,
 66 |     "updated": 1507541018
 67 |   },
 68 |   {
 69 |     "id": "2c5ce034-f89b-4570-a7eb-7bee714902df",
 70 |     "data": [
 71 |       {
 72 |         "text": "buscar restaurantes",
 73 |         "userDefined": false
 74 |       }
 75 |     ],
 76 |     "isTemplate": false,
 77 |     "count": 0,
 78 |     "updated": 1507540897
 79 |   },
 80 |   {
 81 |     "id": "93ce3697-b53f-4e96-bea1-9de96b098ec4",
 82 |     "data": [
 83 |       {
 84 |         "text": "en cualquier parte del ",
 85 |         "userDefined": false
 86 |       },
 87 |       {
 88 |         "text": "oeste",
 89 |         "alias": "location",
 90 |         "meta": "@location",
 91 |         "userDefined": false
 92 |       }
 93 |     ],
 94 |     "isTemplate": false,
 95 |     "count": 0,
 96 |     "updated": 1507540897
 97 |   },
 98 |   {
 99 |     "id": "6190aae9-a23f-4d8a-9a50-84a9d971d3f0",
100 |     "data": [
101 |       {
102 |         "text": "restaurante ",
103 |         "userDefined": false
104 |       },
105 |       {
106 |         "text": "central",
107 |         "alias": "location",
108 |         "meta": "@location",
109 |         "userDefined": false
110 |       },
111 |       {
112 |         "text": " de la ",
113 |         "userDefined": false
114 |       },
115 |       {
116 |         "text": "India",
117 |         "alias": "cuisine",
118 |         "meta": "@cuisine",
119 |         "userDefined": false
120 |       }
121 |     ],
122 |     "isTemplate": false,
123 |     "count": 0,
124 |     "updated": 1507540897
125 |   }
126 | ]


--------------------------------------------------------------------------------
/data/examples/dialogflow/package.json:
--------------------------------------------------------------------------------
1 | {
2 |   "version": "1.0.0"
3 | }


--------------------------------------------------------------------------------
/data/examples/rasa/demo-rasa.md:
--------------------------------------------------------------------------------
 1 | ## intent:affirm
 2 | - yes
 3 | - yep
 4 | - yeah
 5 | - indeed
 6 | - that's right
 7 | - ok
 8 | - great
 9 | - right, thank you
10 | - correct
11 | - great choice
12 | - sounds really good
13 | 
14 | ## intent:goodbye
15 | - bye
16 | - goodbye
17 | - good bye
18 | - stop
19 | - end
20 | - farewell
21 | - Bye bye
22 | - have a good one
23 | 
24 | ## intent:greet
25 | - hey
26 | - howdy
27 | - hey there
28 | - hello
29 | - hi
30 | - good morning
31 | - good evening
32 | - dear sir
33 | 
34 | ## intent:restaurant_search
35 | - i'm looking for a place to eat
36 | - I want to grab lunch
37 | - I am searching for a dinner spot
38 | - i'm looking for a place in the [north](location) of town
39 | - show me [chinese](cuisine) restaurants
40 | - show me [chines](cuisine:chinese) restaurants in the [north](location)
41 | - show me a [mexican](cuisine) place in the [centre](location)
42 | - i am looking for an [indian](cuisine) spot called olaolaolaolaolaola
43 | - search for restaurants
44 | - anywhere in the [west](location)
45 | - anywhere near [18328](location)
46 | - I am looking for [asian fusion](cuisine) food
47 | - I am looking a restaurant in [29432](location)
48 | - I am looking for [mexican indian fusion](cuisine)
49 | - [central](location) [indian](cuisine) restaurant
50 | 
51 | ## synonym:chinese
52 | + Chines
53 | * Chinese
54 | 
55 | ## synonym:vegetarian
56 | - vegg
57 | - veggie
58 | 
59 | ## regex:zipcode
60 | - [0-9]{5}
61 | 
62 | ## regex:greet
63 | - hey[^\s]*


--------------------------------------------------------------------------------
/data/examples/wit/demo-flights.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "data" : [
 3 |     {
 4 |       "text" : "i want to go from berlin to tokyo tomorrow",
 5 |       "entities" : [
 6 |         {
 7 |           "entity" : "location",
 8 |           "value" : "\"berlin\"",
 9 |           "role" : "from",
10 |           "start" : 18,
11 |           "end" : 24
12 |         },
13 |         {
14 |           "entity" : "intent",
15 |           "value" : "\"flight_booking\"",
16 |           "start" : 0,
17 |           "end" : 42
18 |         },
19 |         {
20 |           "entity" : "location",
21 |           "value" : "\"tokyo\"",
22 |           "role" : "to",
23 |           "start" : 28,
24 |           "end" : 33
25 |         },
26 |         {
27 |           "entity" : "datetime",
28 |           "value" : "\"2016-05-29T00:00:00.000-07:00\"",
29 |           "start" : 34,
30 |           "end" : 42
31 |         }
32 |       ]
33 |     },
34 |     {
35 |       "text" : "i'm looking for a flight from london to amsterdam next monday",
36 |       "entities" : [
37 |         {
38 |           "entity" : "location",
39 |           "value" : "\"london\"",
40 |           "role" : "from",
41 |           "start" : 30,
42 |           "end" : 36
43 |         },
44 |         {
45 |           "entity" : "location",
46 |           "value" : "\"amsterdam\"",
47 |           "role" : "to",
48 |           "start" : 40,
49 |           "end" : 49
50 |         },
51 |         {
52 |           "entity" : "datetime",
53 |           "value" : "\"2016-05-30T00:00:00.000-07:00\"",
54 |           "start" : 50,
55 |           "end" : 61
56 |         }
57 |       ]
58 |     },
59 |     {
60 |       "text" : "i want to fly to berlin",
61 |       "entities" : [
62 |         {
63 |           "entity" : "location",
64 |           "value" : "\"berlin\"",
65 |           "role" : "from",
66 |           "start" : 17,
67 |           "end" : 23
68 |         }
69 |       ]
70 |     },
71 |     {
72 |       "text" : "i want to fly from london",
73 |       "entities" : [
74 |         {
75 |           "entity" : "location",
76 |           "value" : "\"london\"",
77 |           "role" : "from",
78 |           "start" : 19,
79 |           "end" : 25
80 |         }
81 |       ]
82 |     }
83 |   ]
84 | }


--------------------------------------------------------------------------------
/data/test/demo-rasa-small.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "rasa_nlu_data": {
 3 |     "common_examples": [
 4 |       {
 5 |         "text": "hey",
 6 |         "intent": "greet",
 7 |         "entities": []
 8 |       },
 9 |       {
10 |         "text": "dear sir",
11 |         "intent": "greet",
12 |         "entities": []
13 |       },
14 |       {
15 |         "text": "i'm looking for a place to eat",
16 |         "intent": "restaurant_search",
17 |         "entities": []
18 |       },
19 |       {
20 |         "text": "i'm looking for a place in the north of town",
21 |         "intent": "restaurant_search",
22 |         "entities": [
23 |           {
24 |             "start": 31,
25 |             "end": 36,
26 |             "value": "north",
27 |             "entity": "location"
28 |           }
29 |         ]
30 |       },
31 |       {
32 |         "text": "show me a mexican place in the centre",
33 |         "intent": "restaurant_search",
34 |         "entities": [
35 |           {
36 |             "start": 31,
37 |             "end": 37,
38 |             "value": "centre",
39 |             "entity": "location"
40 |           },
41 |           {
42 |             "start": 10,
43 |             "end": 17,
44 |             "value": "mexican",
45 |             "entity": "cuisine"
46 |           }
47 |         ]
48 |       }
49 |     ]
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/data/test/dialogflow_en_converted_to_rasa.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "rasa_nlu_data": {
  3 |     "entity_synonyms": [
  4 |       {
  5 |         "value": "mexican",
  6 |         "synonyms": ["mexico"]
  7 |       },
  8 |       {
  9 |         "value": "chinese",
 10 |         "synonyms": ["china"]
 11 |       },
 12 |       {
 13 |         "value": "indian",
 14 |         "synonyms": ["india"]
 15 |       }
 16 |     ],
 17 |     "common_examples": [
 18 |       {
 19 |         "text": "central indian restaurant",
 20 |         "intent": "inform",
 21 |         "entities": [
 22 |           {
 23 |             "start": 0,
 24 |             "end": 7,
 25 |             "value": "central",
 26 |             "entity": "location"
 27 |           },
 28 |           {
 29 |             "start": 8,
 30 |             "end": 14,
 31 |             "value": "indian",
 32 |             "entity": "cuisine"
 33 |           }
 34 |         ]
 35 |       },
 36 |       {
 37 |         "text": "anywhere in the west",
 38 |         "intent": "inform",
 39 |         "entities": [
 40 |           {
 41 |             "start": 16,
 42 |             "end": 20,
 43 |             "value": "west",
 44 |             "entity": "location"
 45 |           }
 46 |         ]
 47 |       },
 48 |       {
 49 |         "text": "i am looking for an indian spot",
 50 |         "intent": "inform",
 51 |         "entities": [
 52 |           {
 53 |             "start": 20,
 54 |             "end": 26,
 55 |             "value": "indian",
 56 |             "entity": "cuisine"
 57 |           }
 58 |         ]
 59 |       },
 60 |       {
 61 |         "text": "show me chinese restaurants",
 62 |         "intent": "inform",
 63 |         "entities": [
 64 |           {
 65 |             "start": 8,
 66 |             "end": 15,
 67 |             "value": "chinese",
 68 |             "entity": "cuisine"
 69 |           }
 70 |         ]
 71 |       },
 72 |       {
 73 |         "text": "i'm looking for a place in the north of ",
 74 |         "intent": "inform",
 75 |         "entities": [
 76 |           {
 77 |             "start": 31,
 78 |             "end": 36,
 79 |             "value": "north",
 80 |             "entity": "location"
 81 |           }
 82 |         ]
 83 |       },
 84 |       {
 85 |         "text": "great",
 86 |         "intent": "affirm",
 87 |         "entities": []
 88 |       },
 89 |       {
 90 |         "text": "ok",
 91 |         "intent": "affirm",
 92 |         "entities": []
 93 |       },
 94 |       {
 95 |         "text": "that's right",
 96 |         "intent": "affirm",
 97 |         "entities": []
 98 |       },
 99 |       {
100 |         "text": "indeed",
101 |         "intent": "affirm",
102 |         "entities": []
103 |       },
104 |       {
105 |         "text": "yeah",
106 |         "intent": "affirm",
107 |         "entities": []
108 |       },
109 |       {
110 |         "text": "yep",
111 |         "intent": "affirm",
112 |         "entities": []
113 |       },
114 |       {
115 |         "text": "yes",
116 |         "intent": "affirm",
117 |         "entities": []
118 |       },
119 |       {
120 |         "text": "end",
121 |         "intent": "goodbye",
122 |         "entities": []
123 |       },
124 |       {
125 |         "text": "stop",
126 |         "intent": "goodbye",
127 |         "entities": []
128 |       },
129 |       {
130 |         "text": "good bye",
131 |         "intent": "goodbye",
132 |         "entities": []
133 |       },
134 |       {
135 |         "text": "goodbye",
136 |         "intent": "goodbye",
137 |         "entities": []
138 |       },
139 |       {
140 |         "text": "bye",
141 |         "intent": "goodbye",
142 |         "entities": []
143 |       },
144 |       {
145 |         "text": "hi",
146 |         "intent": "hi",
147 |         "entities": []
148 |       },
149 |       {
150 |         "text": "hello",
151 |         "intent": "hi",
152 |         "entities": []
153 |       },
154 |       {
155 |         "text": "hey there",
156 |         "intent": "hi",
157 |         "entities": []
158 |       },
159 |       {
160 |         "text": "howdy",
161 |         "intent": "hi",
162 |         "entities": []
163 |       },
164 |       {
165 |         "text": "hey",
166 |         "intent": "hi",
167 |         "entities": []
168 |       },
169 |       {
170 |         "text": "search for restaurants",
171 |         "intent": "inform",
172 |         "entities": []
173 |       },
174 |       {
175 |         "text": "i'm looking for a place to eat",
176 |         "intent": "inform",
177 |         "entities": []
178 |       }
179 |     ]
180 |   }
181 | }


--------------------------------------------------------------------------------
/data/test/json_converted_to_md.md:
--------------------------------------------------------------------------------
 1 | ## intent:affirm
 2 | - yes
 3 | - yep
 4 | - yeah
 5 | - indeed
 6 | - that's right
 7 | - ok
 8 | - great
 9 | - right, thank you
10 | - correct
11 | - great choice
12 | - sounds really good
13 | 
14 | ## intent:goodbye
15 | - bye
16 | - goodbye
17 | - good bye
18 | - stop
19 | - end
20 | - farewell
21 | - Bye bye
22 | - have a good one
23 | 
24 | ## intent:greet
25 | - hey
26 | - howdy
27 | - hey there
28 | - hello
29 | - hi
30 | - good morning
31 | - good evening
32 | - dear sir
33 | 
34 | ## intent:restaurant_search
35 | - i'm looking for a place to eat
36 | - I want to grab lunch
37 | - I am searching for a dinner spot
38 | - i'm looking for a place in the [north](location) of town
39 | - show me [chinese](cuisine) restaurants
40 | - show me [chines](cuisine:chinese) restaurants
41 | - show me a [mexican](cuisine) place in the [centre](location)
42 | - i am looking for an [indian](cuisine) spot called olaolaolaolaolaola
43 | - search for restaurants
44 | - anywhere in the [west](location)
45 | - anywhere near [18328](location)
46 | - I am looking for [asian fusion](cuisine) food
47 | - I am looking a restaurant in [29432](location)
48 | - I am looking for [mexican indian fusion](cuisine)
49 | - [central](location) [indian](cuisine) restaurant
50 | 
51 | ## synonym:chinese
52 | - Chines
53 | - chines
54 | - Chinese
55 | 
56 | ## synonym:vegetarian
57 | - vegg
58 | - veggie
59 | 


--------------------------------------------------------------------------------
/data/test/markdown_single_sections/regex_only.md:
--------------------------------------------------------------------------------
1 | ## regex:greet
2 | - hey[^\s]*


--------------------------------------------------------------------------------
/data/test/markdown_single_sections/synonyms_only.md:
--------------------------------------------------------------------------------
1 | ## synonym:chinese
2 | - Chines
3 | - Chinese


--------------------------------------------------------------------------------
/data/test/multiple_files_json/demo-rasa-affirm.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "rasa_nlu_data": {
 3 |     "common_examples": [
 4 |       {
 5 |         "text": "yes", 
 6 |         "intent": "affirm", 
 7 |         "entities": []
 8 |       }, 
 9 |       {
10 |         "text": "yep", 
11 |         "intent": "affirm", 
12 |         "entities": []
13 |       }, 
14 |       {
15 |         "text": "yeah", 
16 |         "intent": "affirm", 
17 |         "entities": []
18 |       },
19 |       {
20 |         "text": "indeed",
21 |         "intent": "affirm",
22 |         "entities": []
23 |       },
24 |       {
25 |         "text": "that's right",
26 |         "intent": "affirm",
27 |         "entities": []
28 |       },
29 |       {
30 |         "text": "ok",
31 |         "intent": "affirm",
32 |         "entities": []
33 |       },
34 |       {
35 |         "text": "great",
36 |         "intent": "affirm",
37 |         "entities": []
38 |       },
39 |       {
40 |         "text": "right, thank you",
41 |         "intent": "affirm",
42 |         "entities": []
43 |       },
44 |       {
45 |         "text": "correct",
46 |         "intent": "affirm",
47 |         "entities": []
48 |       },
49 |       {
50 |         "text": "great choice",
51 |         "intent": "affirm",
52 |         "entities": []
53 |       },
54 |       {
55 |         "text": "sounds really good",
56 |         "intent": "affirm",
57 |         "entities": []
58 |       }
59 |     ]
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/data/test/multiple_files_json/demo-rasa-goodbye.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "rasa_nlu_data": {
 3 |     "common_examples": [
 4 |       {
 5 |         "text": "bye", 
 6 |         "intent": "goodbye", 
 7 |         "entities": []
 8 |       }, 
 9 |       {
10 |         "text": "goodbye", 
11 |         "intent": "goodbye", 
12 |         "entities": []
13 |       }, 
14 |       {
15 |         "text": "good bye", 
16 |         "intent": "goodbye", 
17 |         "entities": []
18 |       }, 
19 |       {
20 |         "text": "stop", 
21 |         "intent": "goodbye", 
22 |         "entities": []
23 |       }, 
24 |       {
25 |         "text": "end", 
26 |         "intent": "goodbye", 
27 |         "entities": []
28 |       },
29 |       {
30 |         "text": "farewell",
31 |         "intent": "goodbye",
32 |         "entities": []
33 |       },
34 |       {
35 |         "text": "Bye bye",
36 |         "intent": "goodbye",
37 |         "entities": []
38 |       },
39 |       {
40 |         "text": "have a good one",
41 |         "intent": "goodbye",
42 |         "entities": []
43 |       }
44 |     ]
45 |   }
46 | }
47 | 


--------------------------------------------------------------------------------
/data/test/multiple_files_json/demo-rasa-greet.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "rasa_nlu_data": {
 3 |     "regex_features": [
 4 |       {
 5 |         "name": "zipcode",
 6 |         "pattern": "[0-9]{5}"
 7 |       }],
 8 |     "common_examples": [
 9 |       {
10 |         "text": "hey", 
11 |         "intent": "greet", 
12 |         "entities": []
13 |       }, 
14 |       {
15 |         "text": "howdy", 
16 |         "intent": "greet", 
17 |         "entities": []
18 |       }, 
19 |       {
20 |         "text": "hey there",
21 |         "intent": "greet", 
22 |         "entities": []
23 |       }, 
24 |       {
25 |         "text": "hello", 
26 |         "intent": "greet", 
27 |         "entities": []
28 |       }, 
29 |       {
30 |         "text": "hi", 
31 |         "intent": "greet", 
32 |         "entities": []
33 |       },
34 |       {
35 |         "text": "good morning",
36 |         "intent": "greet",
37 |         "entities": []
38 |       },
39 |       {
40 |         "text": "good evening",
41 |         "intent": "greet",
42 |         "entities": []
43 |       },
44 |       {
45 |         "text": "dear sir",
46 |         "intent": "greet",
47 |         "entities": []
48 |       }
49 |     ]
50 |   }
51 | }
52 | 


--------------------------------------------------------------------------------
/data/test/multiple_files_markdown/demo-rasa-affirm.md:
--------------------------------------------------------------------------------
 1 | ## intent:affirm
 2 | - yes
 3 | - yep
 4 | - yeah
 5 | - indeed
 6 | - that's right
 7 | - ok
 8 | - great
 9 | - right, thank you
10 | - correct
11 | - great choice
12 | - sounds really good
13 | 


--------------------------------------------------------------------------------
/data/test/multiple_files_markdown/demo-rasa-goodbye.md:
--------------------------------------------------------------------------------
 1 | ## intent:goodbye
 2 | - bye
 3 | - goodbye
 4 | - good bye
 5 | - stop
 6 | - end
 7 | - farewell
 8 | - Bye bye
 9 | - have a good one
10 | 


--------------------------------------------------------------------------------
/data/test/multiple_files_markdown/demo-rasa-greet.md:
--------------------------------------------------------------------------------
 1 | ## intent:greet
 2 | - hey
 3 | - howdy
 4 | - hey there
 5 | - hello
 6 | - hi
 7 | - good morning
 8 | - good evening
 9 | - dear sir
10 | 
11 | ## regex:greet
12 | - hey[^\s]*


--------------------------------------------------------------------------------
/data/test/multiple_files_markdown/demo-rasa-restaurant_search.md:
--------------------------------------------------------------------------------
 1 | ## intent:restaurant_search
 2 | - i'm looking for a place to eat
 3 | - I want to grab lunch
 4 | - I am searching for a dinner spot
 5 | - i'm looking for a place in the [north](location) of town
 6 | - show me [chinese](cuisine) restaurants
 7 | - show me [chines](cuisine:chinese) restaurants in the [north](location)
 8 | - show me a [mexican](cuisine) place in the [centre](location)
 9 | - i am looking for an [indian](cuisine) spot called olaolaolaolaolaola
10 | - search for restaurants
11 | - anywhere in the [west](location)
12 | - anywhere near [18328](location)
13 | - I am looking for [asian fusion](cuisine) food
14 | - I am looking a restaurant in [29432](location)
15 | - I am looking for [mexican indian fusion](cuisine)
16 | - [central](location) [indian](cuisine) restaurant
17 | 
18 | ## synonym:chinese
19 | - Chines
20 | - Chinese
21 | 
22 | ## synonym:vegetarian
23 | - vegg
24 | - veggie
25 | 
26 | ## regex:zipcode
27 | - [0-9]{5}
28 | 


--------------------------------------------------------------------------------
/data/test/wit_converted_to_rasa.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "rasa_nlu_data": {
 3 |     "common_examples": [
 4 |       {
 5 |         "text": "i want to go from berlin to tokyo tomorrow", 
 6 |         "intent": "flight_booking", 
 7 |         "entities": [
 8 |           {
 9 |             "start": 18, 
10 |             "role": "from", 
11 |             "end": 24, 
12 |             "value": "berlin", 
13 |             "entity": "location"
14 |           }, 
15 |           {
16 |             "start": 28, 
17 |             "role": "to", 
18 |             "end": 33, 
19 |             "value": "tokyo", 
20 |             "entity": "location"
21 |           }, 
22 |           {
23 |             "start": 34, 
24 |             "end": 42, 
25 |             "value": "2016-05-29T00:00:00.000-07:00", 
26 |             "entity": "datetime"
27 |           }
28 |         ]
29 |       },
30 |       {
31 |         "text": "i'm looking for a flight from london to amsterdam next monday", 
32 |         "entities": [
33 |           {
34 |             "start": 30, 
35 |             "role": "from", 
36 |             "end": 36, 
37 |             "value": "london", 
38 |             "entity": "location"
39 |           }, 
40 |           {
41 |             "start": 40, 
42 |             "role": "to", 
43 |             "end": 49, 
44 |             "value": "amsterdam", 
45 |             "entity": "location"
46 |           }, 
47 |           {
48 |             "start": 50, 
49 |             "end": 61, 
50 |             "value": "2016-05-30T00:00:00.000-07:00", 
51 |             "entity": "datetime"
52 |           }
53 |         ]
54 |       }, 
55 |       {
56 |         "text": "i want to fly to berlin", 
57 |         "entities": [
58 |           {
59 |             "start": 17, 
60 |             "role": "from", 
61 |             "end": 23, 
62 |             "value": "berlin", 
63 |             "entity": "location"
64 |           }
65 |         ]
66 |       }, 
67 |       {
68 |         "text": "i want to fly from london", 
69 |         "entities": [
70 |           {
71 |             "start": 19, 
72 |             "role": "from", 
73 |             "end": 25, 
74 |             "value": "london", 
75 |             "entity": "location"
76 |           }
77 |         ]
78 |       }
79 |     ]
80 |   }
81 | }


--------------------------------------------------------------------------------
/docker/Dockerfile_bare:
--------------------------------------------------------------------------------
 1 | FROM python:3.6-slim
 2 | 
 3 | ENV RASA_NLU_DOCKER="YES" \
 4 |     RASA_NLU_HOME=/app \
 5 |     RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages
 6 | 
 7 | # Run updates, install basics and cleanup
 8 | # - build-essential: Compile specific dependencies
 9 | # - git-core: Checkout git repos
10 | RUN apt-get update -qq \
11 |     && apt-get install -y --no-install-recommends build-essential git-core \
12 |     && apt-get clean \
13 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
14 | 
15 | WORKDIR ${RASA_NLU_HOME}
16 | 
17 | COPY . ${RASA_NLU_HOME}
18 | 
19 | RUN pip install -r alt_requirements/requirements_bare.txt
20 | 
21 | RUN pip install -e .
22 | 
23 | VOLUME ["/app/projects", "/app/logs", "/app/data"]
24 | 
25 | EXPOSE 5000
26 | 
27 | ENTRYPOINT ["./entrypoint.sh"]
28 | CMD ["help"]
29 | 


--------------------------------------------------------------------------------
/docker/Dockerfile_full:
--------------------------------------------------------------------------------
  1 | FROM python:3.6-slim
  2 | 
  3 | ENV RASA_NLU_DOCKER="YES" \
  4 |     RASA_NLU_HOME=/app \
  5 |     RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages
  6 | 
  7 | # Run updates, install basics and cleanup
  8 | # - build-essential: Compile specific dependencies
  9 | # - git-core: Checkout git repos
 10 | RUN apt-get update -qq \
 11 |     && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl  \
 12 |     && apt-get clean \
 13 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 14 | 
 15 | WORKDIR ${RASA_NLU_HOME}
 16 | 
 17 | # use bash always
 18 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh
 19 | 
 20 | 
 21 | COPY . ${RASA_NLU_HOME}
 22 | 
 23 | ## install java stuff
 24 | 
 25 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list
 26 | 
 27 | RUN apt-get update && apt-get install -y --no-install-recommends \
 28 | 		bzip2 \
 29 | 		unzip \
 30 | 		xz-utils \
 31 | 	&& rm -rf /var/lib/apt/lists/*
 32 | 
 33 | # Default to UTF-8 file.encoding
 34 | ENV LANG C.UTF-8
 35 | 
 36 | # add a simple script that can auto-detect the appropriate JAVA_HOME value
 37 | # based on whether the JDK or only the JRE is installed
 38 | RUN { \
 39 | 		echo '#!/bin/sh'; \
 40 | 		echo 'set -e'; \
 41 | 		echo; \
 42 | 		echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \
 43 | 	} > /usr/local/bin/docker-java-home \
 44 | 	&& chmod +x /usr/local/bin/docker-java-home
 45 | 
 46 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe
 47 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home
 48 | ENV JAVA_HOME /docker-java-home
 49 | 
 50 | ENV JAVA_VERSION 8u141
 51 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1
 52 | 
 53 | # see https://bugs.debian.org/775775
 54 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872
 55 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1
 56 | 
 57 | RUN set -ex; \
 58 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail)
 59 | 	if [ ! -d /usr/share/man/man1 ]; then \
 60 | 		mkdir -p /usr/share/man/man1; \
 61 | 	fi; \
 62 | 	apt-get update; \
 63 | 	apt-get install -y -t jessie-backports\
 64 | 		openjdk-8-jdk \
 65 | 		ca-certificates-java \
 66 | 	; \
 67 | 	rm -rf /var/lib/apt/lists/*; \
 68 | # verify that "docker-java-home" returns what we expect
 69 | 	[ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \
 70 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java
 71 | 	update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \
 72 | # ... and verify that it actually worked for one of the alternatives we care about
 73 | 	update-alternatives --query java | grep -q 'Status: manual'
 74 | 
 75 | # see CA_CERTIFICATES_JAVA_VERSION notes above
 76 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure
 77 | 
 78 | ## done java
 79 | 
 80 | 
 81 | RUN pip install -r alt_requirements/requirements_full.txt
 82 | 
 83 | RUN pip install -e .
 84 | 
 85 | RUN apt-get update -qq \
 86 |     && apt-get install -y --no-install-recommends wget \
 87 |     && wget -P data/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat \
 88 |     && apt-get remove -y wget \
 89 |     && apt-get autoremove -y
 90 | 
 91 | RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz --no-cache-dir > /dev/null \
 92 |     && python -m spacy link en_core_web_md en \
 93 |     && pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz --no-cache-dir > /dev/null \
 94 |     && python -m spacy link de_core_news_sm de
 95 | 
 96 | COPY sample_configs/config_spacy_duckling.yml ${RASA_NLU_HOME}/config.yml
 97 | 
 98 | #VOLUME ["/app/projects", "/app/logs", "/app/data"]
 99 | 
100 | EXPOSE 5000
101 | 
102 | ENTRYPOINT ["./entrypoint.sh"]
103 | CMD ["start", "-c", "config.yml", "--path", "/app/projects"]
104 | 


--------------------------------------------------------------------------------
/docker/Dockerfile_mitie:
--------------------------------------------------------------------------------
  1 | FROM python:2.7-slim
  2 | 
  3 | ENV RASA_NLU_DOCKER="YES" \
  4 |     RASA_NLU_HOME=/app \
  5 |     RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python2.7/dist-packages
  6 | 
  7 | # Run updates, install basics and cleanup
  8 | # - build-essential: Compile specific dependencies
  9 | # - git-core: Checkout git repos
 10 | RUN apt-get update -qq \
 11 |     && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl  \
 12 |     && apt-get clean \
 13 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 14 | 
 15 | 
 16 | # use bash always
 17 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh
 18 | 
 19 | ## install java stuff
 20 | 
 21 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list
 22 | 
 23 | RUN apt-get update && apt-get install -y --no-install-recommends \
 24 | 		bzip2 \
 25 | 		unzip \
 26 | 		xz-utils \
 27 | 	&& rm -rf /var/lib/apt/lists/*
 28 | 
 29 | # Default to UTF-8 file.encoding
 30 | ENV LANG C.UTF-8
 31 | 
 32 | # add a simple script that can auto-detect the appropriate JAVA_HOME value
 33 | # based on whether the JDK or only the JRE is installed
 34 | RUN { \
 35 | 		echo '#!/bin/sh'; \
 36 | 		echo 'set -e'; \
 37 | 		echo; \
 38 | 		echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \
 39 | 	} > /usr/local/bin/docker-java-home \
 40 | 	&& chmod +x /usr/local/bin/docker-java-home
 41 | 
 42 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe
 43 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home
 44 | ENV JAVA_HOME /docker-java-home
 45 | 
 46 | ENV JAVA_VERSION 8u141
 47 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1
 48 | 
 49 | # see https://bugs.debian.org/775775
 50 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872
 51 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1
 52 | 
 53 | RUN set -ex; \
 54 | 	\
 55 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail)
 56 | 	if [ ! -d /usr/share/man/man1 ]; then \
 57 | 		mkdir -p /usr/share/man/man1; \
 58 | 	fi; \
 59 | 	\
 60 | 	apt-get update; \
 61 | 	apt-get install -y -t jessie-backports\
 62 | 		openjdk-8-jdk \
 63 | 		ca-certificates-java \
 64 | 	; \
 65 | 	rm -rf /var/lib/apt/lists/*; \
 66 | 	\
 67 | # verify that "docker-java-home" returns what we expect
 68 | 	[ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \
 69 | 	\
 70 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java
 71 | 	update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \
 72 | # ... and verify that it actually worked for one of the alternatives we care about
 73 | 	update-alternatives --query java | grep -q 'Status: manual'
 74 | 
 75 | # see CA_CERTIFICATES_JAVA_VERSION notes above
 76 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure
 77 | 
 78 | ## done java
 79 | 
 80 | 
 81 | WORKDIR ${RASA_NLU_HOME}
 82 | 
 83 | COPY . ${RASA_NLU_HOME}
 84 | 
 85 | RUN pip install -r alt_requirements/requirements_mitie.txt
 86 | 
 87 | RUN pip install -e .
 88 | 
 89 | RUN apt-get update -qq \
 90 |     && apt-get install -y --no-install-recommends wget \
 91 |     && wget -P data/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat \
 92 |     && apt-get remove -y wget \
 93 |     && apt-get autoremove -y
 94 | 
 95 | COPY sample_configs/config_mitie.json ${RASA_NLU_HOME}/config.json
 96 | 
 97 | VOLUME ["/app/projects", "/app/logs", "/app/data"]
 98 | 
 99 | EXPOSE 5000
100 | 
101 | ENTRYPOINT ["./entrypoint.sh"]
102 | CMD ["start", "-c", "config.json"]
103 | 


--------------------------------------------------------------------------------
/docker/Dockerfile_spacy_sklearn:
--------------------------------------------------------------------------------
  1 | FROM python:3.6-slim
  2 | 
  3 | ENV RASA_NLU_DOCKER="YES" \
  4 |     RASA_NLU_HOME=/app \
  5 |     RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages
  6 | 
  7 | # Run updates, install basics and cleanup
  8 | # - build-essential: Compile specific dependencies
  9 | # - git-core: Checkout git repos
 10 | RUN apt-get update -qq \
 11 |     && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl  \
 12 |     && apt-get clean \
 13 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 14 | 
 15 | WORKDIR ${RASA_NLU_HOME}
 16 | 
 17 | COPY . ${RASA_NLU_HOME}
 18 | 
 19 | 
 20 | # use bash always
 21 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh
 22 | 
 23 | ## install java stuff
 24 | 
 25 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list
 26 | 
 27 | RUN apt-get update && apt-get install -y --no-install-recommends \
 28 | 		bzip2 \
 29 | 		unzip \
 30 | 		xz-utils \
 31 | 	&& rm -rf /var/lib/apt/lists/*
 32 | 
 33 | # Default to UTF-8 file.encoding
 34 | ENV LANG C.UTF-8
 35 | 
 36 | # add a simple script that can auto-detect the appropriate JAVA_HOME value
 37 | # based on whether the JDK or only the JRE is installed
 38 | RUN { \
 39 | 		echo '#!/bin/sh'; \
 40 | 		echo 'set -e'; \
 41 | 		echo; \
 42 | 		echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \
 43 | 	} > /usr/local/bin/docker-java-home \
 44 | 	&& chmod +x /usr/local/bin/docker-java-home
 45 | 
 46 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe
 47 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home
 48 | ENV JAVA_HOME /docker-java-home
 49 | 
 50 | ENV JAVA_VERSION 8u141
 51 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1
 52 | 
 53 | # see https://bugs.debian.org/775775
 54 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872
 55 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1
 56 | 
 57 | RUN set -ex; \
 58 | 	\
 59 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail)
 60 | 	if [ ! -d /usr/share/man/man1 ]; then \
 61 | 		mkdir -p /usr/share/man/man1; \
 62 | 	fi; \
 63 | 	\
 64 | 	apt-get update; \
 65 | 	apt-get install -y -t jessie-backports\
 66 | 		openjdk-8-jdk \
 67 | 		ca-certificates-java \
 68 | 	; \
 69 | 	rm -rf /var/lib/apt/lists/*; \
 70 | 	\
 71 | # verify that "docker-java-home" returns what we expect
 72 | 	[ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \
 73 | 	\
 74 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java
 75 | 	update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \
 76 | # ... and verify that it actually worked for one of the alternatives we care about
 77 | 	update-alternatives --query java | grep -q 'Status: manual'
 78 | 
 79 | # see CA_CERTIFICATES_JAVA_VERSION notes above
 80 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure
 81 | 
 82 | ## done java
 83 | 
 84 | RUN pip install -r alt_requirements/requirements_spacy_sklearn.txt
 85 | 
 86 | RUN pip install -e .
 87 | 
 88 | RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz --no-cache-dir > /dev/null \
 89 |     && python -m spacy link en_core_web_md en \
 90 |     && pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz --no-cache-dir > /dev/null \
 91 |     && python -m spacy link de_core_news_sm de
 92 | 
 93 | COPY sample_configs/config_spacy.yml ${RASA_NLU_HOME}/config.yml
 94 | 
 95 | VOLUME ["/app/projects", "/app/logs", "/app/data"]
 96 | 
 97 | EXPOSE 5000
 98 | 
 99 | ENTRYPOINT ["./entrypoint.sh"]
100 | CMD ["start", "-c", "config.yml", "--path", "/app/projects"]
101 | 


--------------------------------------------------------------------------------
/docker/Dockerfile_test:
--------------------------------------------------------------------------------
  1 | # Dockerfile to build a whole instance of rasa and run the rasa `pytest`
  2 | #  (created to test the changes needed for docker automation)
  3 | #
  4 | #  (so far) only used manually, via:
  5 | #         `docker build -f docker/Dockerfile_test .` (from project root)
  6 | #         `docker run -it [id-output-from-above]`
  7 | 
  8 | FROM python:3.6-slim
  9 | 
 10 | ENV RASA_NLU_DOCKER="YES" \
 11 |     RASA_NLU_HOME=/app \
 12 |     RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages
 13 | 
 14 | # Run updates, install basics and cleanup
 15 | # - build-essential: Compile specific dependencies
 16 | # - git-core: Checkout git repos
 17 | RUN apt-get update -qq \
 18 |     && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl vim\
 19 |     && apt-get clean \
 20 |     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 21 | 
 22 | RUN apt-get update -qq \
 23 |     && apt-get install -y --no-install-recommends wget
 24 | 
 25 | RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz --no-cache-dir > /dev/null \
 26 |     && python -m spacy link en_core_web_md en \
 27 |     && pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz --no-cache-dir > /dev/null \
 28 |     && python -m spacy link de_core_news_sm de
 29 | 
 30 | 
 31 | WORKDIR ${RASA_NLU_HOME}
 32 | 
 33 | # use bash always
 34 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh
 35 | 
 36 | ## install java stuff
 37 | 
 38 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list
 39 | 
 40 | RUN apt-get update && apt-get install -y --no-install-recommends \
 41 | 		bzip2 \
 42 | 		unzip \
 43 | 		xz-utils \
 44 | 	&& rm -rf /var/lib/apt/lists/*
 45 | 
 46 | # Default to UTF-8 file.encoding
 47 | ENV LANG C.UTF-8
 48 | 
 49 | # add a simple script that can auto-detect the appropriate JAVA_HOME value
 50 | # based on whether the JDK or only the JRE is installed
 51 | RUN { \
 52 | 		echo '#!/bin/sh'; \
 53 | 		echo 'set -e'; \
 54 | 		echo; \
 55 | 		echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \
 56 | 	} > /usr/local/bin/docker-java-home \
 57 | 	&& chmod +x /usr/local/bin/docker-java-home
 58 | 
 59 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe
 60 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home
 61 | ENV JAVA_HOME /docker-java-home
 62 | 
 63 | ENV JAVA_VERSION 8u141
 64 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1
 65 | 
 66 | # see https://bugs.debian.org/775775
 67 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872
 68 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1
 69 | 
 70 | RUN set -ex; \
 71 | 	\
 72 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail)
 73 | 	if [ ! -d /usr/share/man/man1 ]; then \
 74 | 		mkdir -p /usr/share/man/man1; \
 75 | 	fi; \
 76 | 	\
 77 | 	apt-get update; \
 78 | 	apt-get install -y -t jessie-backports\
 79 | 		openjdk-8-jdk \
 80 | 		ca-certificates-java \
 81 | 	; \
 82 | 	rm -rf /var/lib/apt/lists/*; \
 83 | 	\
 84 | # verify that "docker-java-home" returns what we expect
 85 | 	[ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \
 86 | 	\
 87 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java
 88 | 	update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \
 89 | # ... and verify that it actually worked for one of the alternatives we care about
 90 | 	update-alternatives --query java | grep -q 'Status: manual'
 91 | 
 92 | # see CA_CERTIFICATES_JAVA_VERSION notes above
 93 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure
 94 | 
 95 | ## done java
 96 | 
 97 | 
 98 | COPY . ${RASA_NLU_HOME}
 99 | 
100 | RUN  wget -P data/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat
101 | 
102 | RUN pip install -r alt_requirements/requirements_dev.txt
103 | 
104 | RUN pip install -e .
105 | 
106 | RUN  sed -i -e 's/backend      : tkagg/backend      : PDF/' /usr/local/lib/python3.6/site-packages/matplotlib/mpl-data/matplotlibrc
107 | 
108 | VOLUME ["/app/projects", "/app/logs", "/app/data"]
109 | 
110 | EXPOSE 5000
111 | 
112 | ENTRYPOINT ["/usr/local/bin/py.test"]
113 | CMD ["_pytest", "--cov", "rasa_nlu", "--pep8", "-v"]
114 | 
115 | 


--------------------------------------------------------------------------------
/docker/docker-cloud.yml:
--------------------------------------------------------------------------------
1 | rasanlu:
2 |   image: rasa/rasa_nlu:latest-spacy
3 |   ports:
4 |     - "5000:5000"
5 |   volumes:
6 |    - "/rasa-app-data/projects:/app/projects"
7 |    - "/rasa-app-data/logs:/app/logs"
8 | 


--------------------------------------------------------------------------------
/docs/_static/css/custom.css:
--------------------------------------------------------------------------------
 1 | .rating-container {
 2 |   background-color: rgba(0, 0, 16, 0.8);
 3 |   padding: 1rem;
 4 |   text-align: center;
 5 |   margin-bottom: 2rem;
 6 | }
 7 | 
 8 | .rating-text p {
 9 |   color: white;
10 |   font-size: 1.3rem;
11 |   padding: 0.3rem;
12 | }
13 | 
14 | .submit {
15 |   text-align: center;
16 | }
17 | 
18 | svg {
19 |   fill: rgb(242, 242, 242);
20 |   height: 3.6rem;
21 |   width: 3.6rem;
22 |   margin: 0.2rem;
23 | }
24 | 
25 | label {
26 |   color: white;
27 |   text-align: center;
28 |   display: inline-block;
29 |   margin: 0 1rem 0 1rem;
30 | }
31 | 
32 | #radios label {
33 |   position: relative;
34 | }
35 | 
36 | input[type="radio"] {
37 |   position: absolute;
38 |   opacity: 0;
39 | }
40 | 
41 | input[type="radio"] + svg {
42 |   -webkit-transition: all 0.2s;
43 |   transition: all 0.2s;
44 | }
45 | 
46 | input + svg {
47 |   cursor: pointer;
48 | }
49 | 
50 | input[class="great"]:hover + svg,
51 | input[class="great"]:checked + svg,
52 | input[class="great"]:focus + svg {
53 |   fill: rgb(0, 204, 79);
54 | }
55 | 
56 | input[class="didnt-work"]:hover + svg,
57 | input[class="didnt-work"]:checked + svg,
58 | input[class="didnt-work"]:focus + svg {
59 |   fill: rgb(255, 0, 0);
60 | }
61 | 
62 | input[class="didnt-finish"]:hover + svg,
63 | input[class="didnt-finish"]:checked + svg,
64 | input[class="didnt-finish"]:focus + svg {
65 |   fill: rgb(255, 255, 0);
66 | }
67 | 
68 | .wy-table-responsive table td,
69 | .wy-table-responsive table th {
70 |     white-space: normal !important;
71 | }
72 | 
73 | /*
74 |    fixes wrong line height in code snippets. (line height between code lines
75 |    and shown line numbers is different so that the line numbers and the code
76 |    lines don't align without that fix)
77 |  */
78 | .rst-content .highlight > pre {
79 |   line-height: 18px;
80 | }
81 | 
82 | /*
83 |    fixes overlapping "Choose Version" bar in the navigation hiding content items
84 |    adds enough padding so the version chooser doesn't overlap with any
85 |    navigation items
86 | */
87 | .wy-nav-side {
88 |   padding-bottom: 3.2em;
89 | }
90 | 


--------------------------------------------------------------------------------
/docs/_static/images/component_lifecycle.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/docs/_static/images/component_lifecycle.png


--------------------------------------------------------------------------------
/docs/_static/images/rasa_nlu_intent_gui.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/docs/_static/images/rasa_nlu_intent_gui.png


--------------------------------------------------------------------------------
/docs/_templates/layout.html:
--------------------------------------------------------------------------------
 1 | {% extends "!layout.html" %}
 2 | 
 3 | {% set css_files = css_files + ['_static/css/custom.css'] %}
 4 | 
 5 | {% block footer %}
 6 | {{ super() }}
 7 | <!-- Global Site Tag (gtag.js) - Google Analytics -->
 8 | <script async src="https://www.googletagmanager.com/gtag/js?id=UA-87333416-1"></script>
 9 | <script>
10 | window.dataLayer = window.dataLayer || [];
11 | function gtag(){dataLayer.push(arguments)};
12 | gtag('js', new Date());
13 | 
14 | gtag('config', 'UA-87333416-1');
15 | </script>
16 | <script type="text/javascript" src="//script.crazyegg.com/pages/scripts/0074/3851.js" async="async"></script>
17 | {% endblock %}
18 | 


--------------------------------------------------------------------------------
/docs/changelog.rst:
--------------------------------------------------------------------------------
1 | .. include:: ../CHANGELOG.rst


--------------------------------------------------------------------------------
/docs/closeloop.rst:
--------------------------------------------------------------------------------
 1 | .. _section_closeloop:
 2 | 
 3 | Improving your models from feedback
 4 | ===================================
 5 | 
 6 | When the rasa_nlu server is running, it keeps track of all the
 7 | predictions it's made and saves these to a log file.
 8 | By default log files are placed in ``logs/``. The files in this
 9 | directory contain one json object per line.
10 | You can fix any incorrect predictions and add them to your
11 | training set to improve your parser.
12 | After adding these to your training data, but before
13 | retraining your model, it is strongly recommended that you use the
14 | visualizer to spot any errors, see
15 | :ref:`Visualizing training data <visualizing-the-training-data>`.
16 | 


--------------------------------------------------------------------------------
/docs/community.rst:
--------------------------------------------------------------------------------
 1 | .. _section_community:
 2 | 
 3 | Community Contributions
 4 | =======================
 5 | 
 6 | .. note::
 7 |     This is an (incomplete) list of external resources created by the Rasa community. 
 8 |     We list them here because they can help you learn about the Rasa Stack, but they are 
 9 |     not officially endorsed by Rasa and we cannot promise that they will be kept up-to-date
10 |     as the project evolves. 
11 | 
12 | 
13 | Community Written Documentation
14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
15 | 
16 | 
17 | - A three part tutorial on using Rasa NLU in combination with Node-RED to create a basic chat bot and integrate it with Slack and Twilio.
18 |     - `Part 1 <https://blog.spg.ai/build-a-chatbot-with-rasa-nlu-dc2bfb55edb2>`_ - Installation, Education, and Model Training
19 |     - `Part 2 <https://blog.spg.ai/build-a-chatbot-with-rasa-nlu-part-two-8d533a0cfda8>`_ - Back end fulfillment using Node-RED
20 |     - `Part 3 <https://blog.spg.ai/build-a-chatbot-was-rasa-nlu-part-3-b53c61954e86>`_ - A Complete Chatbot on Slack and Twilio
21 | - Documentation on using Rasa NLU with Docker
22 |     - `Using Rasa NLU with Docker <https://blog.spg.ai/using-rasa-nlu-with-docker-96b86856b392>`_ - The easiest way to get started working with Rasa    
23 | - `Failing Gracefully with Rasa NLU <https://blog.spg.ai/failing-gracefully-with-rasa-nlu-14a7d8e53af9>`_
24 | 
25 | Community Open Source Tools/Software
26 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
27 | 
28 | Below is a list of tools and applications written around or for Rasa NLU using a permissive license.
29 | 
30 | - Postgres backed UI for interacting with Rasa NLU
31 |     - `Rasa UI <https://github.com/paschmann/rasa-ui>`_
32 | 
33 | - A tool for generating training examples from a list of entities
34 |     - `Chatito <https://github.com/rodrigopivi/Chatito>`_
35 |     
36 | - A custom API and UI on top of Rasa NLU for ease of use
37 |     - `Articulate <https://github.com/samtecspg/articulate>`_
38 | 
39 | Video Tutorials
40 | ^^^^^^^^^^^^^^^
41 | 
42 | - Talk about the Rasa Stack at `PyData <https://www.youtube.com/watch?v=LEFF7-_uh3M>`_
43 | 


--------------------------------------------------------------------------------
/docs/config.rst:
--------------------------------------------------------------------------------
 1 | .. _section_configuration:
 2 | 
 3 | Configuration
 4 | =============
 5 | 
 6 | You can provide options to Rasa NLU through:
 7 | 
 8 | - a yaml-formatted config file
 9 | - environment variables
10 | - command line arguments
11 | 
12 | Environment variables override options in your config file, 
13 | and command line args will override any options specified elsewhere.
14 | Environment variables are capitalised and prefixed with ``RASA_``, 
15 | so the option ``pipeline`` is specified with the ``RASA_PIPELINE`` env var.
16 | 
17 | Default
18 | -------
19 | Here is an example model configuration:
20 | 
21 | .. literalinclude:: ../sample_configs/config_crf.yml
22 |     :language: yaml
23 | 
24 | As you can see, there are a couple of top-level configuration keys, like
25 | ``language`` and ``pipeline`` - but most of the configuration is component
26 | specific.
27 | 
28 | Explanations for the configuration keys of the different components are part
29 | of the :ref:`section_pipeline`.
30 | 
31 | Options
32 | -------
33 | A short explanation and examples for each configuration value.
34 | 
35 | pipeline
36 | ~~~~~~~~
37 | 
38 | :Type: ``str`` or ``[dict]``
39 | :Examples:
40 |     using a pipeline template (predefined set of components with default
41 |     parameters):
42 | 
43 |     .. code-block:: yaml
44 | 
45 |         pipeline: "spacy_sklearn"
46 | 
47 |     or alternatively specifying the components and paremters:
48 | 
49 |     .. code-block:: yaml
50 | 
51 |         pipeline:
52 |         - name: "nlp_spacy"
53 |           model: "en"               # parameter of the spacy component
54 |         - name: "ner_synonyms"
55 | 
56 | :Description:
57 |     The pipeline used for training. Can either be a template
58 |     (passing a string) or a list of components (array) and there
59 |     configuration values. For all available templates,
60 |     see :ref:`section_pipeline`. The component specific parameters
61 |     are listed there as well.
62 | 
63 | language
64 | ~~~~~~~~
65 | 
66 | :Type: ``str``
67 | :Examples:
68 | 
69 |     .. code-block:: yaml
70 | 
71 |         language: "en"
72 | 
73 | :Description:
74 |     Language the model is trained in. Underlying word vectors
75 |     will be loaded by using this language. There is more info
76 |     about available languages in :ref:`section_languages`.
77 | 


--------------------------------------------------------------------------------
/docs/context.rst:
--------------------------------------------------------------------------------
 1 | .. _section_context:
 2 | 
 3 | Context-aware Dialogue
 4 | ======================
 5 | 
 6 | Rasa NLU allows you to turn natural language into structured data,
 7 | but this might not be enough if you want to build a bot that handles what
 8 | has been said in context and adjusts the flow of the conversation
 9 | accordingly. Rasa's open-source solution to handle contextual dialogue is
10 | `Rasa Core <https://github.com/RasaHQ/rasa_core>`_, but there are other tools
11 | out there such as `Dialogflow <https://dialogflow.com>`_ (not open-sourced).
12 | 
13 | Rasa Core uses machine learning to predict the evolution of a conversation,
14 | and does away with the need for tedious and poorly-scaling ``if/else`` logic.
15 | It also allows you to implement custom actions in response to the
16 | user message, such as saying something back, modifying a database, calling an
17 | API or handing over to a human. It is by design the natural companion of
18 | Rasa NLU if you want to build conversational bots.


--------------------------------------------------------------------------------
/docs/contribute.rst:
--------------------------------------------------------------------------------
 1 | Contributing
 2 | ============
 3 | 
 4 | Contributions are very much encouraged! Please create an issue before doing any work to avoid disappointment.
 5 | 
 6 | We created a tag that should get you started quickly if you are searching for
 7 | `interesting topics to get started <https://github.com/RasaHQ/rasa_nlu/issues?q=is%3Aissue+is%3Aopen+label%3A%22help+wanted%22>`_.
 8 | 
 9 | 
10 | Python Conventions
11 | ^^^^^^^^^^^^^^^^^^
12 | 
13 | Python code should follow the pep-8 spec.
14 | 
15 | Python 2 and 3 Cross Compatibility
16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
17 | 
18 | To ensure cross compatibility between Python 2 and 3 we prioritize Python 3 conventions.
19 | Keep in mind that:
20 | 
21 | - all string literals are unicode strings
22 | - division generates floating point numbers. Use ``//`` for truncated division
23 | - some built-ins, e.g. ``map`` and ``filter`` return iterators in Python 3. If you want to make use of them import the Python 3 version of them from ``builtins``. Otherwise use list comprehensions, which work uniformly across versions
24 | - use ``io.open`` instead of the builtin ``open`` when working with files
25 | - The following imports from ``__future__`` are mandatory in every python file: ``unicode_literals``, ``print_function``, ``division``, and ``absolute_import``
26 | 
27 | Please refer to this `cheat sheet <http://python-future.org/compatible_idioms.html#>`_ to learn how to write different constructs compatible with Python 2 and 3.
28 | 
29 | Code of conduct
30 | ^^^^^^^^^^^^^^^
31 | 
32 | Rasa NLU adheres to the `Contributor Covenant Code of Conduct <http://contributor-covenant.org/version/1/4/>`_.
33 | By participating, you are expected to uphold this code.
34 | 
35 | Documentation
36 | ^^^^^^^^^^^^^
37 | Everything should be properly documented. To locally test the documentation you need to install
38 | 
39 | .. code-block:: bash
40 | 
41 |     brew install sphinx
42 |     pip install sphinx_rtd_theme
43 | 
44 | After that, you can compile and view the documentation using:
45 | 
46 | .. code-block:: bash
47 | 
48 |     cd docs
49 |     make html
50 |     cd _build/html
51 |     python -m SimpleHTTPServer 8000 .
52 |     # python 3: python -m http.server
53 | 
54 | The documentation will be running on http://localhost:8000/.
55 | 
56 | Code snippets that are part of the documentation can be tested using
57 | 
58 | .. code-block:: bash
59 | 
60 |     make doctest
61 | 


--------------------------------------------------------------------------------
/docs/faq.rst:
--------------------------------------------------------------------------------
 1 | .. _section_faq:
 2 | 
 3 | Frequently Asked Questions
 4 | ==========================
 5 | 
 6 | How many training examples do I need?
 7 | -------------------------------------
 8 | Unfortunately, there is no cookie-cutter answer to this question. It depends on your intents and your entities.
 9 | 
10 | If you have intents that are easily confusable, you will need more training data. Accordingly, as you add more
11 | intents, you also want to add more training examples for each intent. If you quickly write 20-30 unique expressions for
12 | each intent, you should be good for the beginning.
13 | 
14 | The same holds true for entities. the number of training examples you will need depends on how closely related your different entity types are and how clearly
15 | entities are distinguishable from non-entities in your use case.
16 | 
17 | To assess your model's performance, :ref:`run the server and manually test some messages <tutorial_using_your_model>`
18 | , or use the :ref:`evaluation script <section_evaluation>`.
19 | 
20 | 
21 | 
22 | Does it run with python 3?
23 | --------------------------
24 | Yes it does, Rasa NLU supports python 2.7 as well as python 3.5 and 3.6. If there are any issues with a specific python version, feel free to create an issue or directly provide a fix.
25 | 
26 | Which languages are supported?
27 | ------------------------------
28 | There is a list containing all officialy supported languages :ref:`here <section_languages>`. Nevertheless, there are
29 | others working on adding more languages, feel free to have a look at the `github issues <https://github.com/RasaHQ/rasa_nlu/issues>`_
30 | section or the `gitter chat <https://gitter.im/RasaHQ/rasa_nlu>`_.
31 | 
32 | .. _section_faq_version:
33 | 
34 | Which version of Rasa NLU am I running?
35 | ---------------------------------------
36 | To find out which Rasa version you are running, you can execute
37 | 
38 | .. code-block:: bash
39 | 
40 |    python -c "import rasa_nlu; print(rasa_nlu.__version__);"
41 | 
42 | If you are using a virtual environment to run your python code, make sure you are using the correct python to execute the above code.
43 | 
44 | Why am I getting an ``UndefinedMetricWarning``?
45 | -----------------------------------------------
46 | The complete warning is: ``UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.``
47 | The warning is a result of a lack of training data. During the training the dataset will be splitted multiple times, if there are to few training samples for any of the intents, the splitting might result in splits that do not contain any examples for this intent.
48 | 
49 | Hence, the solution is to add more training samples. As this is only a warning, training will still succeed, but the resulting models predictions might be weak on the intents where you are lacking training data.  
50 | 
51 | 
52 | I have an issue, can you help me?
53 | ---------------------------------
54 | We'd love to help you. If you are unsure if your issue is related to your setup, you should state your problem in the `gitter chat <https://gitter.im/RasaHQ/rasa_nlu>`_.
55 | If you found an issue with the framework, please file a report on `github issues <https://github.com/RasaHQ/rasa_nlu/issues>`_
56 | including all the information needed to reproduce the problem.
57 | 
58 | .. toctree::
59 |    :maxdepth: 1
60 | 


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
  1 | 
  2 | Language Understanding with Rasa NLU
  3 | ====================================
  4 | 
  5 | .. note::
  6 |     This is the documentation for version |release| of Rasa NLU. Make sure you select
  7 |     the appropriate version of the documentation for your local installation!
  8 | 
  9 | 
 10 | Rasa NLU is an open-source tool for intent classification and entity extraction. For example, taking a sentence like
 11 | 
 12 | .. code-block:: console
 13 | 
 14 |     "I am looking for a Mexican restaurant in the center of town"
 15 | 
 16 | and returning structured data like
 17 | 
 18 | .. code-block:: json
 19 | 
 20 |     { 
 21 |       "intent": "search_restaurant",
 22 |       "entities": {
 23 |         "cuisine" : "Mexican",
 24 |         "location" : "center"
 25 |       }
 26 |     }
 27 | 
 28 | 
 29 | The intended audience is mainly people developing bots. 
 30 | You can use Rasa as a drop-in replacement for `wit <https://wit.ai>`_ , `LUIS <https://www.luis.ai>`_ , or `Dialogflow <https://dialogflow.com>`_, the only change in your code is to send requests to ``localhost`` instead (see :ref:`section_migration` for details).
 31 | 
 32 | Why might you use Rasa instead of one of those services?
 33 | 
 34 | - you don't have to hand over your data to FB/MSFT/GOOG
 35 | - you don't have to make a ``https`` call every time.
 36 | - you can tune models to work well on your particular use case.
 37 | 
 38 | These points are laid out in more detail in a `blog post <https://medium.com/lastmile-conversations/do-it-yourself-nlp-for-bot-developers-2e2da2817f3d>`_ .
 39 | 
 40 | 
 41 | The quickest quickstart in the west
 42 | -----------------------------------
 43 | 
 44 | 
 45 | .. code-block:: console
 46 | 
 47 |     $ python setup.py install
 48 |     $ python -m rasa_nlu.server -e wit &
 49 |     $ curl 'http://localhost:5000/parse?q=hello'
 50 |     [{"_text": "hello", "confidence": 1.0, "entities": {}, "intent": "greet"}]
 51 | 
 52 | 
 53 | There you go! you just parsed some text. Next step, do the :ref:`section_tutorial`.
 54 | 
 55 | .. note:: This demo uses a very limited ML model. To apply Rasa NLU to your use case, you need to train your own model! Follow the tutorial to get to know how to apply rasa_nlu to your data.
 56 | 
 57 | About 
 58 | -----
 59 | 
 60 | You can think of Rasa NLU as a set of high level APIs for building your own language parser using existing NLP and ML libraries.
 61 | The setup process is designed to be as simple as possible. If you're currently using wit, LUIS, or Dialogflow, you just:
 62 | 
 63 | 1. download your app data from wit or LUIS and feed it into Rasa NLU
 64 | 2. run Rasa NLU on your machine and switch the URL of your wit/LUIS/Dialogflow api calls to ``localhost:5000/parse``.
 65 | 
 66 | Rasa NLU is written in Python, but it you can use it from any language through :ref:`section_http`.
 67 | If your project *is* written in Python you can simply import the relevant classes.
 68 | 
 69 | Rasa is a set of tools for building more advanced bots, developed by `Rasa
 70 | <https://rasa.com>`_. This is the natural language understanding module. To
 71 | build conversational chatbots, you can interface Rasa NLU with libraries
 72 | that steer the flow of the conversation - more on this in :ref:`section_context`.
 73 | 
 74 | .. toctree::
 75 |    :maxdepth: 1
 76 |    :caption: Getting Started
 77 | 
 78 |    installation
 79 |    tutorial
 80 | 
 81 | .. toctree::
 82 |    :maxdepth: 1
 83 |    :caption: User Documentation
 84 | 
 85 |    config
 86 |    migrating
 87 |    dataformat
 88 |    http
 89 |    python
 90 |    entities
 91 |    closeloop
 92 |    persist
 93 |    languages
 94 |    pipeline
 95 |    evaluation
 96 |    context
 97 |    faq
 98 |    migrations
 99 |    license
100 | 
101 | .. toctree::
102 |    :maxdepth: 1
103 |    :caption: Resources
104 | 
105 |    community
106 | 
107 | .. toctree::
108 |    :maxdepth: 1
109 |    :caption: Developer Documentation
110 | 
111 |    contribute
112 |    changelog
113 | 


--------------------------------------------------------------------------------
/docs/key.enc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/docs/key.enc


--------------------------------------------------------------------------------
/docs/languages.rst:
--------------------------------------------------------------------------------
 1 | .. _section_languages:
 2 | 
 3 | Language Support
 4 | ================
 5 | 
 6 | Rasa NLU supports a number of different languages. Exactly which ones depends on 
 7 | the backend you are using, and the features you require.
 8 | 
 9 | Firstly, the ``tensorflow_embedding`` pipeline in principle supports any language,
10 | but only performs intent classification.
11 | In addition, with the spaCy backend you can now load fastText vectors, which are available 
12 | for `hundreds of languages <https://github.com/facebookresearch/fastText/blob/master/pretrained-vectors.md>`_.
13 | 
14 | For both intent and entity recognition, the following languages and backend combinations
15 | are tested and available:
16 | 
17 | =============  ==============================
18 | backend        supported languages
19 | =============  ==============================
20 | spacy-sklearn  english (``en``),
21 |                german (``de``),
22 |                spanish (``es``),
23 |                portuguese (``pt``),
24 |                italian (``it``),
25 |                dutch (``nl``),
26 |                french (``fr``)
27 | MITIE          english (``en``)
28 | Jieba-MITIE    chinese (``zh``) :ref:`* <jieba>`
29 | =============  ==============================
30 | 
31 | These languages can be set as part of the :ref:`section_configuration`.
32 | 
33 | Adding a new language
34 | ---------------------
35 | We want to make the process of adding new languages as simple as possible to increase the number of
36 | supported languages. Nevertheless, to use a language you either need a trained word representation or
37 | you need to train that presentation on your own using a large corpus of text data in that language.
38 | 
39 | These are the steps necessary to add a new language:
40 | 
41 | spacy-sklearn
42 | ^^^^^^^^^^^^^
43 | 
44 | spaCy already provides a really good documentation page about `Adding languages <https://spacy.io/docs/usage/adding-languages>`_.
45 | This will help you train a tokenizer and vocabulary for a new language in spaCy.
46 | 
47 | As described in the documentation, you need to register your language using ``set_lang_class()`` which will
48 | allow Rasa NLU to load and use your new language by passing in your language identifier as the ``language`` :ref:`section_configuration` option.
49 | 
50 | MITIE
51 | ^^^^^
52 | 
53 | 1. Get a ~clean language corpus (a Wikipedia dump works) as a set of text files
54 | 2. Build and run `MITIE Wordrep Tool`_ on your corpus. This can take several hours/days depending on your dataset and your workstation. You'll need something like 128GB of RAM for wordrep to run - yes that's alot: try to extend your swap.
55 | 3. Set the path of your new ``total_word_feature_extractor.dat`` as value of the *mitie_file* parameter in ``config_mitie.json``
56 | 
57 | .. _jieba:
58 | 
59 | Jieba-MITIE
60 | ^^^^^^^^^^^
61 | 
62 | Some notes about using the Jieba tokenizer together with MITIE on chinese
63 | language data: To use it, you need a proper MITIE feature extractor, e.g.
64 | ``data/total_word_feature_extractor_zh.dat``. It should be trained
65 | from a Chinese corpus using the MITIE wordrep tools
66 | (takes 2-3 days for training).
67 | 
68 | For training, please build the
69 | `MITIE Wordrep Tool`_.
70 | Note that Chinese corpus should be tokenized first before feeding
71 | into the tool for training. Close-domain corpus that best matches
72 | user case works best.
73 | 
74 | A detailed instruction on how to train the model yourself can be found in
75 | A trained model from Chinese Wikipedia Dump and Baidu Baike can be `crownpku <https://github.com/crownpku>`_  's
76 | `blogpost <http://www.crownpku.com/2017/07/27/%E7%94%A8Rasa_NLU%E6%9E%84%E5%BB%BA%E8%87%AA%E5%B7%B1%E7%9A%84%E4%B8%AD%E6%96%87NLU%E7%B3%BB%E7%BB%9F.html>`_.
77 | 
78 | .. _`MITIE Wordrep Tool`: https://github.com/mit-nlp/MITIE/tree/master/tools/wordrep


--------------------------------------------------------------------------------
/docs/license.rst:
--------------------------------------------------------------------------------
1 | 
2 | License
3 | =======
4 | 
5 | 
6 | .. literalinclude:: ../LICENSE.txt


--------------------------------------------------------------------------------
/docs/migrating.rst:
--------------------------------------------------------------------------------
  1 | .. _section_migration:
  2 | 
  3 | Migrating an existing app
  4 | =========================
  5 | 
  6 | Rasa NLU is designed to make migrating from wit/LUIS/Dialogflow as simple as possible.
  7 | The TLDR instructions for migrating are: 
  8 | 
  9 | - download an export of your app data from wit/LUIS/Dialogflow
 10 | - follow the :ref:`tutorial`, using your downloaded data instead of ``demo-rasa.json``
 11 | 
 12 | 
 13 | Banana Peels
 14 | ------------
 15 | 
 16 | Just some specific things to watch out for for each of the services you might want to migrate from
 17 | 
 18 | wit.ai
 19 | ^^^^^^
 20 | 
 21 | Wit used to handle ``intents`` natively. 
 22 | Now they are somewhat obfuscated. 
 23 | To create an ``intent`` in wit you have to create and ``entity`` which spans the entire text.
 24 | The file you want from your download is called ``expressions.json``
 25 | 
 26 | LUIS.ai
 27 | ^^^^^^^
 28 | 
 29 | Nothing special here. Downloading the data and importing it into Rasa NLU should work without issues
 30 | 
 31 | Dialogflow
 32 | ^^^^^^^^^^
 33 | 
 34 | Dialogflow exports generate multiple files rather than just one.
 35 | Put them all in a directory (see ``data/examples/dialogflow`` in the repo)
 36 | and pass that path to the trainer. 
 37 | 
 38 | 
 39 | 
 40 | Emulation
 41 | ---------
 42 | 
 43 | To make Rasa NLU easy to try out with existing projects,
 44 | the server can `emulate` wit, LUIS, or Dialogflow.
 45 | In native mode, a request / response looks like this : 
 46 | 
 47 | .. code-block:: console
 48 | 
 49 |     $ curl -XPOST localhost:5000/parse -d '{"q":"I am looking for Chinese food"}' | python -mjson.tool
 50 |     {
 51 |       "text": "I am looking for Chinese food", 
 52 |       "intent": "restaurant_search", 
 53 |       "confidence": 0.4794813722432127,
 54 |       "entities": [
 55 |         {
 56 |           "start": 17,
 57 |           "end": 24, 
 58 |           "value": "chinese", 
 59 |           "entity": "cuisine"
 60 |         }
 61 |       ]
 62 |     }
 63 | 
 64 | 
 65 | if we run in ``wit`` mode
 66 | (e.g. ``python -m rasa_nlu.server --emulate wit --path projects``).
 67 | 
 68 | then instead have to make a GET request
 69 | 
 70 | .. code-block:: console
 71 | 
 72 |     $ curl 'localhost:5000/parse?q=hello' | python -mjson.tool
 73 |     [
 74 |         {
 75 |             "_text": "hello",
 76 |             "confidence": 0.4794813722432127,
 77 |             "entities": {},
 78 |             "intent": "greet"
 79 |         }
 80 |     ]
 81 | 
 82 | similarly for LUIS, but with a slightly different response format
 83 | 
 84 | 
 85 | .. code-block:: console
 86 | 
 87 |     $ curl 'localhost:5000/parse?q=hello' | python -mjson.tool
 88 |     {
 89 |         "entities": [],
 90 |         "query": "hello",
 91 |         "topScoringIntent": {
 92 |             "intent": "inform",
 93 |             "score": 0.4794813722432127
 94 |         }
 95 |     }
 96 | 
 97 | and finally for Dialogflow
 98 | 
 99 | .. code-block:: console
100 | 
101 |     $ curl 'localhost:5000/parse?q=hello' | python -mjson.tool
102 |     {
103 |         "id": "ffd7ede3-b62f-11e6-b292-98fe944ee8c2",
104 |         "result": {
105 |             "action": null,
106 |             "actionIncomplete": null,
107 |             "contexts": [],
108 |             "fulfillment": {},
109 |             "metadata": {
110 |                 "intentId": "ffdbd6f3-b62f-11e6-8504-98fe944ee8c2",
111 |                 "intentName": "greet",
112 |                 "webhookUsed": "false"
113 |             },
114 |             "parameters": {},
115 |             "resolvedQuery": "hello",
116 |             "score": null,
117 |             "source": "agent"
118 |         },
119 |         "sessionId": "ffdbd814-b62f-11e6-93b2-98fe944ee8c2",
120 |         "status": {
121 |             "code": 200,
122 |             "errorType": "success"
123 |         },
124 |         "timestamp": "2016-11-29T12:33:15.369411"
125 |     }
126 | 


--------------------------------------------------------------------------------
/docs/persist.rst:
--------------------------------------------------------------------------------
 1 | .. _section_persistence:
 2 | 
 3 | Model Persistence
 4 | =================
 5 | 
 6 | 
 7 | Rasa NLU supports using `S3 <https://aws.amazon.com/s3/>`_ and
 8 | `GCS <https://cloud.google.com/storage/>`_ to save your models.
 9 | 
10 | * Amazon S3 Storage
11 |     S3 is supported using the ``boto3`` module which you can
12 |     install with ``pip install boto3``.
13 | 
14 |     Start the Rasa NLU server with ``storage`` option set to
15 |     ``aws``. Get your S3 credentials and set the following
16 |     environment variables:
17 | 
18 |     - ``AWS_SECRET_ACCESS_KEY``
19 |     - ``AWS_ACCESS_KEY_ID``
20 |     - ``AWS_DEFAULT_REGION``
21 |     - ``BUCKET_NAME``
22 |     - ``AWS_ENDPOINT_URL``
23 | 
24 |     If there is no bucket with the name ``BUCKET_NAME`` Rasa will create it.
25 | 
26 | * Google Cloud Storage
27 |     GCS is supported using the ``google-cloud-storage`` package
28 |     which you can install with ``pip install google-cloud-storage``
29 | 
30 |     Start the Rasa NLU server with ``storage`` option set to ``gcs``.
31 | 
32 |     When running on google app engine and compute engine, the auth
33 |     credentials are already set up. For running locally or elsewhere,
34 |     checkout their
35 |     `client repo <https://github.com/GoogleCloudPlatform/python-docs-samples/tree/master/storage/cloud-client#authentication>`_
36 |     for details on setting up authentication. It involves creating
37 |     a service account key file from google cloud console,
38 |     and setting the ``GOOGLE_APPLICATION_CREDENTIALS`` environment
39 |     variable to the path of that key file.
40 | 
41 | * Azure Storage
42 |     Azure is supported using the ``azure-storage-blob`` package 
43 |     which you can install with ``pip install azure-storage-blob``
44 | 
45 |     Start the Rasa NLU server with ``storage`` option set to ``azure``.
46 | 
47 |     The following environment variables must be set:
48 | 
49 |     - ``AZURE_CONTAINER``
50 |     - ``AZURE_ACCOUNT_NAME``
51 |     - ``AZURE_ACCOUNT_KEY``
52 | 
53 |     If there is no container with the name ``AZURE_CONTAINER`` Rasa will create it.
54 | 
55 | Models are gzipped before saving to cloud.
56 | 


--------------------------------------------------------------------------------
/docs/poll.html:
--------------------------------------------------------------------------------
 1 | <div class="rating-container">
 2 |     <div class="rating-text">
 3 |       <p> How was the tutorial? Click to Vote </p>
 4 |     </div>
 5 |       <form class="rating-form">
 6 |         <label for="great" onclick="gtag('event', 'docs_tutorial_satisfaction', { event_category: 'nlu_docs_satisfaction', event_action: 'click', event_label: 'great'} );">
 7 |             <input type="radio" name="rating" class="great" id="great" value="great" />
 8 |             <svg viewBox="0 0 24 24"><path d="M12,17.5C14.33,17.5 16.3,16.04 17.11,14H6.89C7.69,16.04 9.67,17.5 12,17.5M8.5,11A1.5,1.5 0 0,0 10,9.5A1.5,1.5 0 0,0 8.5,8A1.5,1.5 0 0,0 7,9.5A1.5,1.5 0 0,0 8.5,11M15.5,11A1.5,1.5 0 0,0 17,9.5A1.5,1.5 0 0,0 15.5,8A1.5,1.5 0 0,0 14,9.5A1.5,1.5 0 0,0 15.5,11M12,20A8,8 0 0,1 4,12A8,8 0 0,1 12,4A8,8 0 0,1 20,12A8,8 0 0,1 12,20M12,2C6.47,2 2,6.5 2,12A10,10 0 0,0 12,22A10,10 0 0,0 22,12A10,10 0 0,0 12,2Z" /></svg>
 9 |             <span style="display: block;"> Great! </span>
10 |         </label>
11 | 
12 |         <label for="didnt-work" onclick="gtag('event', 'docs_tutorial_satisfaction', { event_category: 'nlu_docs_satisfaction', event_action: 'click', event_label: 'didnt-work'} );">
13 |             <input type="radio" name="rating" class="didnt-work" id="didnt-work" value="didnt-work" />
14 |             <svg viewBox="0 0 24 24"><path d="M12,2C6.47,2 2,6.47 2,12C2,17.53 6.47,22 12,22A10,10 0 0,0 22,12C22,6.47 17.5,2 12,2M12,20A8,8 0 0,1 4,12A8,8 0 0,1 12,4A8,8 0 0,1 20,12A8,8 0 0,1 12,20M16.18,7.76L15.12,8.82L14.06,7.76L13,8.82L14.06,9.88L13,10.94L14.06,12L15.12,10.94L16.18,12L17.24,10.94L16.18,9.88L17.24,8.82L16.18,7.76M7.82,12L8.88,10.94L9.94,12L11,10.94L9.94,9.88L11,8.82L9.94,7.76L8.88,8.82L7.82,7.76L6.76,8.82L7.82,9.88L6.76,10.94L7.82,12M12,14C9.67,14 7.69,15.46 6.89,17.5H17.11C16.31,15.46 14.33,14 12,14Z" /></svg>
15 |             <span style="display: block;"> Didn't Work </span>
16 |         </label>
17 | 
18 |         <label for="didnt-finish" onclick="gtag('event', 'docs_tutorial_satisfaction', { event_category: 'nlu_docs_satisfaction', event_action: 'click', event_label: 'didnt-finish'} );">
19 |             <input type="radio" name="rating" class="didnt-finish" id="didnt-finish" value="didnt-finish" />
20 |             <svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" version="1.1" width="100%" height="100%" viewBox="0 0 24 24"><path d="M8.5,11A1.5,1.5 0 0,1 7,9.5A1.5,1.5 0 0,1 8.5,8A1.5,1.5 0 0,1 10,9.5A1.5,1.5 0 0,1 8.5,11M15.5,11A1.5,1.5 0 0,1 14,9.5A1.5,1.5 0 0,1 15.5,8A1.5,1.5 0 0,1 17,9.5A1.5,1.5 0 0,1 15.5,11M12,20A8,8 0 0,0 20,12A8,8 0 0,0 12,4A8,8 0 0,0 4,12A8,8 0 0,0 12,20M12,2A10,10 0 0,1 22,12A10,10 0 0,1 12,22C6.47,22 2,17.5 2,12A10,10 0 0,1 12,2M9,14H15A1,1 0 0,1 16,15A1,1 0 0,1 15,16H9A1,1 0 0,1 8,15A1,1 0 0,1 9,14Z" /></svg>
21 |             <span style="display: block;"> Didn't Finish </span>
22 |         </label>
23 |       </form>
24 | </div>
25 | 


--------------------------------------------------------------------------------
/docs/python.rst:
--------------------------------------------------------------------------------
 1 | .. _section_python:
 2 | 
 3 | Using Rasa NLU from python
 4 | ==========================
 5 | Apart from running Rasa NLU as a HTTP server you can use it directly in your python program.
 6 | Rasa NLU supports both Python 2 and 3.
 7 | 
 8 | Training Time
 9 | -------------
10 | For creating your models, you can follow the same instructions as non-python users.
11 | Or, you can train directly in python with a script like the following (using spacy):
12 | 
13 | .. testcode::
14 | 
15 |     from rasa_nlu.training_data import load_data
16 |     from rasa_nlu.config import RasaNLUModelConfig
17 |     from rasa_nlu.model import Trainer
18 |     from rasa_nlu import config
19 | 
20 |     training_data = load_data('data/examples/rasa/demo-rasa.json')
21 |     trainer = Trainer(config.load("sample_configs/config_spacy.yml"))
22 |     trainer.train(training_data)
23 |     model_directory = trainer.persist('./projects/default/')  # Returns the directory the model is stored in
24 | 
25 | Prediction Time
26 | ---------------
27 | 
28 | You can call Rasa NLU directly from your python script. To do so, you need to load the metadata of
29 | your model and instantiate an interpreter. The ``metadata.json`` in your model dir contains the
30 | necessary info to recover your model:
31 | 
32 | .. testcode::
33 | 
34 |     from rasa_nlu.model import Metadata, Interpreter
35 | 
36 |     # where `model_directory points to the folder the model is persisted in
37 |     interpreter = Interpreter.load(model_directory)
38 | 
39 | You can then use the loaded interpreter to parse text:
40 | 
41 | .. testcode::
42 | 
43 |     interpreter.parse(u"The text I want to understand")
44 | 
45 | which returns the same ``dict`` as the HTTP api would (without emulation).
46 | 
47 | If multiple models are created, it is reasonable to share components between the different models. E.g.
48 | the ``'nlp_spacy'`` component, which is used by every pipeline that wants to have access to the spacy word vectors,
49 | can be cached to avoid storing the large word vectors more than once in main memory. To use the caching,
50 | a ``ComponentBuilder`` should be passed when loading and training models.
51 | 
52 | Here is a short example on how to create a component builder, that can be reused to train and run multiple models, to train a model:
53 | 
54 | .. testcode::
55 | 
56 |     from rasa_nlu.training_data import load_data
57 |     from rasa_nlu import config
58 |     from rasa_nlu.components import ComponentBuilder
59 |     from rasa_nlu.model import Trainer
60 | 
61 |     builder = ComponentBuilder(use_cache=True)      # will cache components between pipelines (where possible)
62 | 
63 |     training_data = load_data('data/examples/rasa/demo-rasa.json')
64 |     trainer = Trainer(config.load("sample_configs/config_spacy.yml"), builder)
65 |     trainer.train(training_data)
66 |     model_directory = trainer.persist('./projects/default/')  # Returns the directory the model is stored in
67 | 
68 | The same builder can be used to load a model (can be a totally different one). The builder only caches components that are safe to be shared between models. Here is a short example on how to use the builder when loading models:
69 | 
70 | .. testcode::
71 | 
72 |     from rasa_nlu.model import Metadata, Interpreter
73 |     from rasa_nlu import config
74 | 
75 |     # For simplicity we will load the same model twice, usually you would want to use the metadata of
76 |     # different models
77 | 
78 |     interpreter = Interpreter.load(model_directory, builder)     # to use the builder, pass it as an arg when loading the model
79 |     # the clone will share resources with the first model, as long as the same builder is passed!
80 |     interpreter_clone = Interpreter.load(model_directory, builder)
81 | 
82 | 
83 | 


--------------------------------------------------------------------------------
/entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | set -e
 4 | 
 5 | function print_help {
 6 |     echo "Available options:"
 7 |     echo " start commands (rasa cmd line arguments)  - Start RasaNLU server"
 8 |     echo " download {mitie, spacy en, spacy de}      - Download packages for mitie or spacy (english or german)"
 9 |     echo " start -h                                  - Print RasaNLU help"
10 |     echo " help                                      - Print this help"
11 |     echo " run                                       - Run an arbitrary command inside the container"
12 | }
13 | 
14 | function download_package {
15 |     case $1 in
16 |         mitie)
17 |             echo "Downloading mitie model..."
18 |             wget https://github.com/mit-nlp/MITIE/releases/download/v0.4/MITIE-models-v0.2.tar.bz2
19 |             tar jxf MITIE-models-v0.2.tar.bz2
20 |             ;;
21 |         spacy)
22 |             case $2 in 
23 |                 en|de)
24 |                     echo "Downloading spacy.$2 model..."
25 |                     python -m spacy download "$2"
26 |                     echo "Done."
27 |                     ;;
28 |                 *) 
29 |                     echo "Error. Rasa_nlu supports only english and german models for the time being"
30 |                     print_help
31 |                     exit 1
32 |                     ;;
33 |             esac
34 |             ;;
35 |         *) 
36 |             echo "Error: invalid package specified."
37 |             echo 
38 |             print_help
39 |             ;;
40 |     esac
41 | }
42 | 
43 | case ${1} in
44 |     start)
45 |         exec python -m rasa_nlu.server "${@:2}" 
46 |         ;;
47 |     run)
48 |         exec "${@:2}"
49 |         ;;
50 |     download)
51 |         download_package ${@:2}
52 |         ;;
53 |     *)
54 |         print_help
55 |         ;;
56 | esac
57 | 
58 | 
59 | 


--------------------------------------------------------------------------------
/heroku/Procfile:
--------------------------------------------------------------------------------
1 | web: python setup.py install --force; python -m rasa_nlu.server -P $PORT


--------------------------------------------------------------------------------
/heroku/runtime.txt:
--------------------------------------------------------------------------------
1 | python-2.7.11
2 | 


--------------------------------------------------------------------------------
/jieba_userdict/jieba_userdict.txt:
--------------------------------------------------------------------------------
1 | 创新办 3 i
2 | 云计算 5
3 | 凱特琳 nz
4 | 台中


--------------------------------------------------------------------------------
/rasa_nlu/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import logging
 7 | 
 8 | import rasa_nlu.version
 9 | 
10 | logging.getLogger(__name__).addHandler(logging.NullHandler())
11 | 
12 | __version__ = rasa_nlu.version.__version__
13 | 


--------------------------------------------------------------------------------
/rasa_nlu/classifiers/__init__.py:
--------------------------------------------------------------------------------
1 | from __future__ import absolute_import
2 | from __future__ import division
3 | from __future__ import print_function
4 | from __future__ import unicode_literals
5 | 
6 | # How many intents are at max put into the output intent
7 | # ranking, everything else will be cut off
8 | INTENT_RANKING_LENGTH = 10
9 | 


--------------------------------------------------------------------------------
/rasa_nlu/classifiers/keyword_intent_classifier.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import absolute_import
 5 | from builtins import map
 6 | from typing import Any
 7 | from typing import Dict
 8 | from typing import Text
 9 | 
10 | from rasa_nlu.components import Component
11 | from rasa_nlu.training_data import Message
12 | 
13 | 
14 | class KeywordIntentClassifier(Component):
15 | 
16 |     name = "intent_classifier_keyword"
17 | 
18 |     provides = ["intent"]
19 | 
20 |     his = ["hello", "hi", "hey"]
21 | 
22 |     byes = ["bye", "goodbye"]
23 | 
24 |     def process(self, message, **kwargs):
25 |         # type: (Message, **Any) -> None
26 | 
27 |         intent = {"name": self.parse(message.text), "confidence": 1.0}
28 |         message.set("intent", intent,
29 |                     add_to_output=True)
30 | 
31 |     def parse(self, text):
32 |         # type: (Text) -> Text
33 | 
34 |         _text = text.lower()
35 | 
36 |         def is_present(x):
37 |             return x in _text
38 | 
39 |         if any(map(is_present, self.his)):
40 |             return "greet"
41 |         elif any(map(is_present, self.byes)):
42 |             return "goodbye"
43 |         else:
44 |             return None
45 | 


--------------------------------------------------------------------------------
/rasa_nlu/convert.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import argparse
 7 | 
 8 | from rasa_nlu import training_data
 9 | from rasa_nlu.utils import write_to_file
10 | 
11 | 
12 | def create_argument_parser():
13 |     parser = argparse.ArgumentParser(
14 |             description='Convert training data formats into one another')
15 | 
16 |     parser.add_argument('-d', '--data_file',
17 |                         required=True,
18 |                         help='file or dir containing training data')
19 | 
20 |     parser.add_argument('-o', '--out_file',
21 |                         required=True,
22 |                         help='file where to save training data in rasa format')
23 | 
24 |     parser.add_argument('-l', '--language',
25 |                         default='en',
26 |                         help='language of the data')
27 | 
28 |     parser.add_argument('-f', '--format',
29 |                         required=True,
30 |                         choices=['json', 'md'],
31 |                         help="Output format the training data should be "
32 |                              "converted into.")
33 |     return parser
34 | 
35 | 
36 | def convert_training_data(data_file, out_file, output_format, language):
37 |     td = training_data.load_data(data_file, language)
38 | 
39 |     if output_format == 'md':
40 |         output = td.as_markdown()
41 |     else:
42 |         output = td.as_json(indent=2)
43 | 
44 |     write_to_file(out_file, output)
45 | 
46 | 
47 | if __name__ == "__main__":
48 |     parser = create_argument_parser()
49 |     args = parser.parse_args()
50 | 
51 |     convert_training_data(args.data_file,
52 |                           args.out_file,
53 |                           args.format,
54 |                           args.language)
55 | 


--------------------------------------------------------------------------------
/rasa_nlu/emulators/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import absolute_import
 5 | from builtins import object
 6 | from typing import Any
 7 | from typing import Dict
 8 | from typing import Optional
 9 | from typing import Text
10 | 
11 | 
12 | class NoEmulator(object):
13 |     def __init__(self):
14 |         # type: () -> None
15 | 
16 |         self.name = None  # type: Optional[Text]
17 | 
18 |     def normalise_request_json(self, data):
19 |         # type: (Dict[Text, Any]) -> Dict[Text, Any]
20 | 
21 |         _data = {}
22 |         _data["text"] = data["q"][0] if type(data["q"]) == list else data["q"]
23 | 
24 |         if not data.get("project"):
25 |             _data["project"] = "default"
26 |         elif type(data["project"]) == list:
27 |             _data["project"] = data["project"][0]
28 |         else:
29 |             _data["project"] = data["project"]
30 | 
31 |         if data.get("model"):
32 |             _data["model"] = data["model"][0] if type(data["model"]) == list else data["model"]
33 | 
34 |         _data['time'] = data["time"] if "time" in data else None
35 |         return _data
36 | 
37 |     def normalise_response_json(self, data):
38 |         # type: (Dict[Text, Any]) -> Any
39 |         """Transform data to target format."""
40 | 
41 |         return data
42 | 


--------------------------------------------------------------------------------
/rasa_nlu/emulators/dialogflow.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import absolute_import
 5 | from builtins import str
 6 | import uuid
 7 | from datetime import datetime
 8 | 
 9 | from typing import Any
10 | from typing import Dict
11 | from typing import Text
12 | from typing import List
13 | 
14 | from rasa_nlu.emulators import NoEmulator
15 | 
16 | 
17 | class DialogflowEmulator(NoEmulator):
18 |     def __init__(self):
19 |         # type: () -> None
20 | 
21 |         super(DialogflowEmulator, self).__init__()
22 |         self.name = 'api'
23 | 
24 |     def normalise_response_json(self, data):
25 |         # type: (Dict[Text, Any]) -> Dict[Text, Any]
26 |         """Transform data to Dialogflow format."""
27 | 
28 |         # populate entities dict
29 |         entities = {
30 |             entity_type: []
31 |             for entity_type in set([x["entity"] for x in data["entities"]])}  # type: Dict[Text, List[Text]]
32 | 
33 |         for entity in data["entities"]:
34 |             entities[entity["entity"]].append(entity["value"])
35 | 
36 |         return {
37 |             "id": str(uuid.uuid1()),
38 |             "timestamp": datetime.now().isoformat(),
39 |             "result": {
40 |                 "source": "agent",
41 |                 "resolvedQuery": data["text"],
42 |                 "action": None,
43 |                 "actionIncomplete": None,
44 |                 "parameters": entities,
45 |                 "contexts": [],
46 |                 "metadata": {
47 |                     "intentId": str(uuid.uuid1()),
48 |                     "webhookUsed": "false",
49 |                     "intentName": data["intent"]
50 |                 },
51 |                 "fulfillment": {},
52 |                 "score": None,
53 |             },
54 |             "status": {
55 |                 "code": 200,
56 |                 "errorType": "success"
57 |             },
58 |             "sessionId": str(uuid.uuid1())
59 |         }
60 | 


--------------------------------------------------------------------------------
/rasa_nlu/emulators/luis.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import absolute_import
 5 | 
 6 | from typing import Any
 7 | from typing import Dict
 8 | from typing import Text
 9 | 
10 | from rasa_nlu.emulators import NoEmulator
11 | 
12 | 
13 | class LUISEmulator(NoEmulator):
14 |     def __init__(self):
15 |         # type: () -> None
16 | 
17 |         super(LUISEmulator, self).__init__()
18 |         self.name = 'luis'
19 | 
20 |     def _top_intent(self, data):
21 |         if data.get("intent"):
22 |             return {
23 |                 "intent": data["intent"]["name"],
24 |                 "score": data["intent"]["confidence"]
25 |             }
26 |         else:
27 |             return None
28 | 
29 |     def _ranking(self, data):
30 |         if data.get("intent_ranking"):
31 |             return [{"intent": el["name"], "score": el["confidence"]} for el in data["intent_ranking"]]
32 |         else:
33 |             top = self._top_intent(data)
34 |             return [top] if top else []
35 | 
36 |     def normalise_response_json(self, data):
37 |         # type: (Dict[Text, Any]) -> Dict[Text, Any]
38 |         """Transform data to luis.ai format."""
39 | 
40 |         top_intent = self._top_intent(data)
41 |         ranking = self._ranking(data)
42 |         return {
43 |             "query": data["text"],
44 |             "topScoringIntent": top_intent,
45 |             "intents": ranking,
46 |             "entities": [
47 |                 {
48 |                     "entity": e["value"],
49 |                     "type": e["entity"],
50 |                     "startIndex": None,
51 |                     "endIndex": None,
52 |                     "score": None
53 |                 } for e in data["entities"]
54 |             ] if "entities" in data else []
55 |         }
56 | 


--------------------------------------------------------------------------------
/rasa_nlu/emulators/wit.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import absolute_import
 5 | 
 6 | from typing import Any
 7 | from typing import Dict
 8 | from typing import List
 9 | from typing import Text
10 | 
11 | from rasa_nlu.emulators import NoEmulator
12 | 
13 | 
14 | class WitEmulator(NoEmulator):
15 |     def __init__(self):
16 |         # type: () -> None
17 | 
18 |         super(WitEmulator, self).__init__()
19 |         self.name = "wit"
20 | 
21 |     def normalise_response_json(self, data):
22 |         # type: (Dict[Text, Any]) -> List[Dict[Text, Any]]
23 |         """Transform data to wit.ai format."""
24 | 
25 |         entities = {}
26 |         for entity in data["entities"]:
27 |             entities[entity["entity"]] = {
28 |                 "confidence": None,
29 |                 "type": "value",
30 |                 "value": entity["value"],
31 |                 "start": entity["start"],
32 |                 "end": entity["end"]
33 |             }
34 | 
35 |         return [
36 |             {
37 |                 "_text": data["text"],
38 |                 "confidence": data["intent"]['confidence'],
39 |                 "intent": data["intent"]['name'],
40 |                 "entities": entities
41 |             }
42 |         ]
43 | 


--------------------------------------------------------------------------------
/rasa_nlu/extractors/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | from typing import Any
 7 | from typing import Dict
 8 | from typing import List
 9 | from typing import Text
10 | 
11 | from rasa_nlu.components import Component
12 | from rasa_nlu.training_data import Message
13 | 
14 | 
15 | class EntityExtractor(Component):
16 |     def add_extractor_name(self, entities):
17 |         # type: (List[Dict[Text, Any]]) -> List[Dict[Text, Any]]
18 |         for entity in entities:
19 |             entity["extractor"] = self.name
20 |         return entities
21 | 
22 |     def add_processor_name(self, entity):
23 |         # type: (Dict[Text, Any]) -> Dict[Text, Any]
24 |         if "processors" in entity:
25 |             entity["processors"].append(self.name)
26 |         else:
27 |             entity["processors"] = [self.name]
28 | 
29 |         return entity
30 | 
31 |     @staticmethod
32 |     def find_entity(ent, text, tokens):
33 |         offsets = [token.offset for token in tokens]
34 |         ends = [token.end for token in tokens]
35 | 
36 |         if ent["start"] not in offsets:
37 |             message = ("Invalid entity {} in example '{}': "
38 |                        "entities must span whole tokens. "
39 |                        "Wrong entity start.".format(ent, text))
40 |             raise ValueError(message)
41 | 
42 |         if ent["end"] not in ends:
43 |             message = ("Invalid entity {} in example '{}': "
44 |                        "entities must span whole tokens. "
45 |                        "Wrong entity end.".format(ent, text))
46 |             raise ValueError(message)
47 | 
48 |         start = offsets.index(ent["start"])
49 |         end = ends.index(ent["end"]) + 1
50 |         return start, end
51 | 
52 |     def filter_trainable_entities(self, entity_examples):
53 |         # type: (List[Message]) -> List[Message]
54 |         """Filters out untrainable entity annotations.
55 | 
56 |         Creates a copy of entity_examples in which entities that have
57 |         `extractor` set to something other than self.name (e.g. 'ner_crf')
58 |         are removed."""
59 | 
60 |         filtered = []
61 |         for message in entity_examples:
62 |             entities = []
63 |             for ent in message.get("entities", []):
64 |                 extractor = ent.get("extractor")
65 |                 if not extractor or extractor == self.name:
66 |                     entities.append(ent)
67 |             data = message.data.copy()
68 |             data['entities'] = entities
69 |             filtered.append(
70 |                 Message(text=message.text,
71 |                         data=data,
72 |                         output_properties=message.output_properties,
73 |                         time=message.time))
74 | 
75 |         return filtered
76 | 


--------------------------------------------------------------------------------
/rasa_nlu/extractors/spacy_entity_extractor.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import typing
 7 | from typing import Any
 8 | from typing import Dict
 9 | from typing import List
10 | from typing import Text
11 | 
12 | from rasa_nlu.extractors import EntityExtractor
13 | from rasa_nlu.training_data import Message
14 | 
15 | if typing.TYPE_CHECKING:
16 |     from spacy.tokens.doc import Doc
17 | 
18 | 
19 | class SpacyEntityExtractor(EntityExtractor):
20 |     name = "ner_spacy"
21 | 
22 |     provides = ["entities"]
23 | 
24 |     requires = ["spacy_nlp"]
25 | 
26 |     def process(self, message, **kwargs):
27 |         # type: (Message, **Any) -> None
28 | 
29 |         # can't use the existing doc here (spacy_doc on the message)
30 |         # because tokens are lower cased which is bad for NER
31 |         spacy_nlp = kwargs.get("spacy_nlp", None)
32 |         doc = spacy_nlp(message.text)
33 |         extracted = self.add_extractor_name(self.extract_entities(doc))
34 |         message.set("entities",
35 |                     message.get("entities", []) + extracted,
36 |                     add_to_output=True)
37 | 
38 |     @staticmethod
39 |     def extract_entities(doc):
40 |         # type: (Doc) -> List[Dict[Text, Any]]
41 | 
42 |         entities = [
43 |             {
44 |                 "entity": ent.label_,
45 |                 "value": ent.text,
46 |                 "start": ent.start_char,
47 |                 "confidence": None,
48 |                 "end": ent.end_char
49 |             }
50 |             for ent in doc.ents]
51 |         return entities
52 | 


--------------------------------------------------------------------------------
/rasa_nlu/featurizers/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import numpy as np
 7 | 
 8 | from rasa_nlu.components import Component
 9 | 
10 | 
11 | class Featurizer(Component):
12 | 
13 |     @staticmethod
14 |     def _combine_with_existing_text_features(message,
15 |                                              additional_features):
16 |         if message.get("text_features") is not None:
17 |             return np.hstack((message.get("text_features"),
18 |                               additional_features))
19 |         else:
20 |             return additional_features
21 | 


--------------------------------------------------------------------------------
/rasa_nlu/featurizers/mitie_featurizer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import numpy as np
 7 | import typing
 8 | from typing import Any
 9 | from typing import List
10 | from typing import Text
11 | 
12 | from rasa_nlu.config import RasaNLUModelConfig
13 | from rasa_nlu.featurizers import Featurizer
14 | from rasa_nlu.tokenizers import Token
15 | from rasa_nlu.training_data import Message
16 | from rasa_nlu.training_data import TrainingData
17 | 
18 | if typing.TYPE_CHECKING:
19 |     import mitie
20 |     from builtins import str
21 | 
22 | 
23 | class MitieFeaturizer(Featurizer):
24 |     name = "intent_featurizer_mitie"
25 | 
26 |     provides = ["text_features"]
27 | 
28 |     requires = ["tokens", "mitie_feature_extractor"]
29 | 
30 |     @classmethod
31 |     def required_packages(cls):
32 |         # type: () -> List[Text]
33 |         return ["mitie", "numpy"]
34 | 
35 |     def ndim(self, feature_extractor):
36 |         # type: (mitie.total_word_feature_extractor) -> int
37 | 
38 |         return feature_extractor.num_dimensions
39 | 
40 |     def train(self, training_data, config, **kwargs):
41 |         # type: (TrainingData, RasaNLUModelConfig, **Any) -> None
42 | 
43 |         mitie_feature_extractor = self._mitie_feature_extractor(**kwargs)
44 |         for example in training_data.intent_examples:
45 |             features = self.features_for_tokens(example.get("tokens"),
46 |                                                 mitie_feature_extractor)
47 |             example.set("text_features",
48 |                         self._combine_with_existing_text_features(
49 |                                 example, features))
50 | 
51 |     def process(self, message, **kwargs):
52 |         # type: (Message, **Any) -> None
53 | 
54 |         mitie_feature_extractor = self._mitie_feature_extractor(**kwargs)
55 |         features = self.features_for_tokens(message.get("tokens"),
56 |                                             mitie_feature_extractor)
57 |         message.set("text_features",
58 |                     self._combine_with_existing_text_features(message,
59 |                                                               features))
60 | 
61 |     def _mitie_feature_extractor(self, **kwargs):
62 |         mitie_feature_extractor = kwargs.get("mitie_feature_extractor")
63 |         if not mitie_feature_extractor:
64 |             raise Exception("Failed to train 'intent_featurizer_mitie'. "
65 |                             "Missing a proper MITIE feature extractor. "
66 |                             "Make sure this component is preceded by "
67 |                             "the 'nlp_mitie' component in the pipeline "
68 |                             "configuration.")
69 |         return mitie_feature_extractor
70 | 
71 |     def features_for_tokens(self, tokens, feature_extractor):
72 |         # type: (List[Token], mitie.total_word_feature_extractor) -> np.ndarray
73 | 
74 |         vec = np.zeros(self.ndim(feature_extractor))
75 |         for token in tokens:
76 |             vec += feature_extractor.get_feature_vector(token.text)
77 |         if tokens:
78 |             return vec / len(tokens)
79 |         else:
80 |             return vec
81 | 


--------------------------------------------------------------------------------
/rasa_nlu/featurizers/regex_featurizer.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | import logging
  7 | import os
  8 | import re
  9 | import warnings
 10 | 
 11 | import typing
 12 | from typing import Any, Dict, List, Optional, Text
 13 | 
 14 | from rasa_nlu import utils
 15 | from rasa_nlu.config import RasaNLUModelConfig
 16 | from rasa_nlu.featurizers import Featurizer
 17 | from rasa_nlu.training_data import Message
 18 | from rasa_nlu.training_data import TrainingData
 19 | 
 20 | import numpy as np
 21 | 
 22 | logger = logging.getLogger(__name__)
 23 | 
 24 | if typing.TYPE_CHECKING:
 25 |     from rasa_nlu.model import Metadata
 26 | 
 27 | 
 28 | REGEX_FEATURIZER_FILE_NAME = "regex_featurizer.json"
 29 | 
 30 | 
 31 | class RegexFeaturizer(Featurizer):
 32 |     name = "intent_entity_featurizer_regex"
 33 | 
 34 |     provides = ["text_features"]
 35 | 
 36 |     requires = ["tokens"]
 37 | 
 38 |     def __init__(self, component_config=None, known_patterns=None):
 39 |         super(RegexFeaturizer, self).__init__(component_config)
 40 | 
 41 |         self.known_patterns = known_patterns if known_patterns else []
 42 | 
 43 |     def train(self, training_data, config, **kwargs):
 44 |         # type: (TrainingData, RasaNLUModelConfig, **Any) -> None
 45 | 
 46 |         for example in training_data.regex_features:
 47 |             self.known_patterns.append(example)
 48 | 
 49 |         for example in training_data.training_examples:
 50 |             updated = self._text_features_with_regex(example)
 51 |             example.set("text_features", updated)
 52 | 
 53 |     def process(self, message, **kwargs):
 54 |         # type: (Message, **Any) -> None
 55 | 
 56 |         updated = self._text_features_with_regex(message)
 57 |         message.set("text_features", updated)
 58 | 
 59 |     def _text_features_with_regex(self, message):
 60 |         if self.known_patterns is not None:
 61 |             extras = self.features_for_patterns(message)
 62 |             return self._combine_with_existing_text_features(message, extras)
 63 |         else:
 64 |             return message.get("text_features")
 65 | 
 66 |     def features_for_patterns(self, message):
 67 |         """Checks which known patterns match the message.
 68 | 
 69 |         Given a sentence, returns a vector of {1,0} values indicating which
 70 |         regexes did match. Furthermore, if the
 71 |         message is tokenized, the function will mark the matching regex on
 72 |         the tokens that are part of the match."""
 73 | 
 74 |         found = []
 75 |         for i, exp in enumerate(self.known_patterns):
 76 |             match = re.search(exp["pattern"], message.text)
 77 |             if match is not None:
 78 |                 for t in message.get("tokens", []):
 79 |                     if t.offset < match.end() and t.end > match.start():
 80 |                         t.set("pattern", i)
 81 |                 found.append(1.0)
 82 |             else:
 83 |                 found.append(0.0)
 84 |         return np.array(found)
 85 | 
 86 |     @classmethod
 87 |     def load(cls,
 88 |              model_dir=None,   # type: Optional[Text]
 89 |              model_metadata=None,   # type: Optional[Metadata]
 90 |              cached_component=None,   # type: Optional[RegexFeaturizer]
 91 |              **kwargs  # type: **Any
 92 |              ):
 93 |         # type: (...) -> RegexFeaturizer
 94 | 
 95 |         meta = model_metadata.for_component(cls.name)
 96 |         file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME)
 97 |         regex_file = os.path.join(model_dir, file_name)
 98 | 
 99 |         if os.path.exists(regex_file):
100 |             known_patterns = utils.read_json_file(regex_file)
101 |             return RegexFeaturizer(meta, known_patterns=known_patterns)
102 |         else:
103 |             return RegexFeaturizer(meta)
104 | 
105 |     def persist(self, model_dir):
106 |         # type: (Text) -> Optional[Dict[Text, Any]]
107 |         """Persist this model into the passed directory.
108 | 
109 |         Return the metadata necessary to load the model again."""
110 | 
111 |         if self.known_patterns:
112 |             regex_file = os.path.join(model_dir, REGEX_FEATURIZER_FILE_NAME)
113 |             utils.write_json_to_file(regex_file, self.known_patterns, indent=4)
114 | 
115 |         return {"regex_file": REGEX_FEATURIZER_FILE_NAME}
116 | 


--------------------------------------------------------------------------------
/rasa_nlu/featurizers/spacy_featurizer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import numpy as np
 7 | import typing
 8 | from typing import Any
 9 | 
10 | from rasa_nlu.featurizers import Featurizer
11 | from rasa_nlu.training_data import Message
12 | from rasa_nlu.training_data import TrainingData
13 | 
14 | if typing.TYPE_CHECKING:
15 |     from spacy.language import Language
16 |     from spacy.tokens import Doc
17 | 
18 | 
19 | def ndim(spacy_nlp):
20 |     """Number of features used to represent a document / sentence."""
21 |     # type: Language -> int
22 |     return spacy_nlp.vocab.vectors_length
23 | 
24 | 
25 | def features_for_doc(doc):
26 |     """Feature vector for a single document / sentence."""
27 |     # type: Doc -> np.ndarray
28 |     return doc.vector
29 | 
30 | 
31 | class SpacyFeaturizer(Featurizer):
32 |     name = "intent_featurizer_spacy"
33 | 
34 |     provides = ["text_features"]
35 | 
36 |     requires = ["spacy_doc"]
37 | 
38 |     def train(self, training_data, config, **kwargs):
39 |         # type: (TrainingData) -> None
40 | 
41 |         for example in training_data.intent_examples:
42 |             self._set_spacy_features(example)
43 | 
44 |     def process(self, message, **kwargs):
45 |         # type: (Message, **Any) -> None
46 | 
47 |         self._set_spacy_features(message)
48 | 
49 |     def _set_spacy_features(self, message):
50 |         """Adds the spacy word vectors to the messages text features."""
51 | 
52 |         fs = features_for_doc(message.get("spacy_doc"))
53 |         features = self._combine_with_existing_text_features(message, fs)
54 |         message.set("text_features", features)
55 | 


--------------------------------------------------------------------------------
/rasa_nlu/run.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import json
 7 | import logging
 8 | 
 9 | import six
10 | from builtins import input
11 | 
12 | from rasa_nlu import utils
13 | from rasa_nlu.model import Interpreter
14 | 
15 | logger = logging.getLogger(__name__)
16 | 
17 | 
18 | def create_argument_parser():
19 |     import argparse
20 |     parser = argparse.ArgumentParser(
21 |             description='run a Rasa NLU model locally on the command line '
22 |                         'for manual testing')
23 | 
24 |     parser.add_argument('-m', '--model', required=True,
25 |                         help="path to model")
26 | 
27 |     utils.add_logging_option_arguments(parser, default=logging.INFO)
28 | 
29 |     return parser
30 | 
31 | 
32 | def run_cmdline(model_path, component_builder=None):
33 |     interpreter = Interpreter.load(model_path, component_builder)
34 | 
35 |     logger.info("NLU model loaded. Type a message and "
36 |                 "press enter to parse it.")
37 |     while True:
38 |         text = input().strip()
39 |         if six.PY2:
40 |             # in python 2 input doesn't return unicode values
41 |             text = text.decode("utf-8")
42 |         r = interpreter.parse(text)
43 |         print(json.dumps(r, indent=2))
44 |         logger.info("Next message:")
45 | 
46 | 
47 | if __name__ == '__main__':
48 |     cmdline_args = create_argument_parser().parse_args()
49 | 
50 |     utils.configure_colored_logging(cmdline_args.loglevel)
51 | 
52 |     run_cmdline(cmdline_args.model)
53 | 


--------------------------------------------------------------------------------
/rasa_nlu/schemas/nlu_model.yml:
--------------------------------------------------------------------------------
1 | allowempty: True
2 | mapping:
3 |   language:
4 |     type: !!python/str "str"
5 |   pipeline:
6 |     type: !!python/str "any"
7 | 


--------------------------------------------------------------------------------
/rasa_nlu/tokenizers/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import absolute_import
 5 | from builtins import object
 6 | 
 7 | 
 8 | class Tokenizer(object):
 9 |     pass
10 | 
11 | 
12 | class Token(object):
13 |     def __init__(self, text, offset, data=None):
14 |         self.offset = offset
15 |         self.text = text
16 |         self.end = offset + len(text)
17 |         self.data = data if data else {}
18 | 
19 |     def set(self, prop, info):
20 |         self.data[prop] = info
21 | 
22 |     def get(self, prop, default=None):
23 |         return self.data.get(prop, default)
24 | 


--------------------------------------------------------------------------------
/rasa_nlu/tokenizers/mitie_tokenizer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import unicode_literals
 2 | from __future__ import print_function
 3 | from __future__ import division
 4 | from __future__ import absolute_import
 5 | from builtins import str
 6 | import re
 7 | 
 8 | from typing import Any
 9 | from typing import Dict
10 | from typing import List
11 | from typing import Text
12 | from typing import Tuple
13 | 
14 | from rasa_nlu.config import RasaNLUModelConfig
15 | from rasa_nlu.tokenizers import Token
16 | from rasa_nlu.tokenizers import Tokenizer
17 | from rasa_nlu.components import Component
18 | from rasa_nlu.training_data import Message
19 | from rasa_nlu.training_data import TrainingData
20 | 
21 | 
22 | class MitieTokenizer(Tokenizer, Component):
23 |     name = "tokenizer_mitie"
24 | 
25 |     provides = ["tokens"]
26 | 
27 |     @classmethod
28 |     def required_packages(cls):
29 |         # type: () -> List[Text]
30 |         return ["mitie"]
31 | 
32 |     def train(self, training_data, config, **kwargs):
33 |         # type: (TrainingData, RasaNLUModelConfig, **Any) -> None
34 | 
35 |         for example in training_data.training_examples:
36 |             example.set("tokens", self.tokenize(example.text))
37 | 
38 |     def process(self, message, **kwargs):
39 |         # type: (Message, **Any) -> None
40 | 
41 |         message.set("tokens", self.tokenize(message.text))
42 | 
43 |     def _token_from_offset(self, text, offset, encoded_sentence):
44 |         return Token(text.decode('utf-8'),
45 |                      self._byte_to_char_offset(encoded_sentence, offset))
46 | 
47 |     def tokenize(self, text):
48 |         # type: (Text) -> List[Token]
49 |         import mitie
50 | 
51 |         encoded_sentence = text.encode('utf-8')
52 |         tokenized = mitie.tokenize_with_offsets(encoded_sentence)
53 |         tokens = [self._token_from_offset(token, offset, encoded_sentence)
54 |                   for token, offset in tokenized]
55 |         return tokens
56 | 
57 |     @staticmethod
58 |     def _byte_to_char_offset(text, byte_offset):
59 |         return len(text[:byte_offset].decode('utf-8'))
60 | 


--------------------------------------------------------------------------------
/rasa_nlu/tokenizers/spacy_tokenizer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import typing
 7 | from typing import Any, List
 8 | 
 9 | from rasa_nlu.components import Component
10 | from rasa_nlu.config import RasaNLUModelConfig
11 | from rasa_nlu.tokenizers import Tokenizer, Token
12 | from rasa_nlu.training_data import Message
13 | from rasa_nlu.training_data import TrainingData
14 | 
15 | if typing.TYPE_CHECKING:
16 |     from spacy.tokens.doc import Doc
17 | 
18 | 
19 | class SpacyTokenizer(Tokenizer, Component):
20 |     name = "tokenizer_spacy"
21 | 
22 |     provides = ["tokens"]
23 | 
24 |     def train(self, training_data, config, **kwargs):
25 |         # type: (TrainingData, RasaNLUModelConfig, **Any) -> None
26 | 
27 |         for example in training_data.training_examples:
28 |             example.set("tokens", self.tokenize(example.get("spacy_doc")))
29 | 
30 |     def process(self, message, **kwargs):
31 |         # type: (Message, **Any) -> None
32 | 
33 |         message.set("tokens", self.tokenize(message.get("spacy_doc")))
34 | 
35 |     def tokenize(self, doc):
36 |         # type: (Doc) -> List[Token]
37 | 
38 |         return [Token(t.text, t.idx) for t in doc]
39 | 


--------------------------------------------------------------------------------
/rasa_nlu/tokenizers/whitespace_tokenizer.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | from typing import Any, List, Text
 7 | 
 8 | from rasa_nlu.components import Component
 9 | from rasa_nlu.config import RasaNLUModelConfig
10 | from rasa_nlu.tokenizers import Tokenizer, Token
11 | from rasa_nlu.training_data import Message
12 | from rasa_nlu.training_data import TrainingData
13 | 
14 | 
15 | class WhitespaceTokenizer(Tokenizer, Component):
16 |     name = "tokenizer_whitespace"
17 | 
18 |     provides = ["tokens"]
19 | 
20 |     def train(self, training_data, config, **kwargs):
21 |         # type: (TrainingData, RasaNLUModelConfig, **Any) -> None
22 | 
23 |         for example in training_data.training_examples:
24 |             example.set("tokens", self.tokenize(example.text))
25 | 
26 |     def process(self, message, **kwargs):
27 |         # type: (Message, **Any) -> None
28 | 
29 |         message.set("tokens", self.tokenize(message.text))
30 | 
31 |     def tokenize(self, text):
32 |         # type: (Text) -> List[Token]
33 | 
34 |         words = text.split()
35 |         running_offset = 0
36 |         tokens = []
37 |         for word in words:
38 |             word_offset = text.index(word, running_offset)
39 |             word_len = len(word)
40 |             running_offset = word_offset + word_len
41 |             tokens.append(Token(word, word_offset))
42 |         return tokens
43 | 


--------------------------------------------------------------------------------
/rasa_nlu/tokenizers/yaha_tokenizer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Nov 23 14:54:35 2017
 4 | 
 5 | @author: user
 6 | """
 7 | 
 8 | from __future__ import unicode_literals
 9 | from __future__ import print_function
10 | from __future__ import division
11 | from __future__ import absolute_import
12 | 
13 | from typing import Any
14 | from typing import Dict
15 | from typing import List
16 | from typing import Text
17 | 
18 | from rasa_nlu.config import RasaNLUConfig
19 | from rasa_nlu.tokenizers import Tokenizer, Token
20 | from rasa_nlu.components import Component
21 | from rasa_nlu.training_data import Message
22 | from rasa_nlu.training_data import TrainingData
23 | 
24 | import sys
25 | from yaha import Cuttor
26 | 
27 | reload(sys)
28 | sys.setdefaultencoding('utf-8')
29 | 
30 | class YahaTokenizer(Tokenizer, Component):
31 |     
32 |     
33 |     name = "tokenizer_yaha"
34 | 
35 |     provides = ["tokens"]
36 |     
37 |     cuttor = Cuttor()
38 |     
39 |     def __init__(self):
40 |         pass
41 |        
42 | 
43 |     @classmethod
44 |     def required_packages(cls):
45 |         # type: () -> List[Text]
46 |         return ["yaha"]
47 | 
48 |     def train(self, training_data, config, **kwargs):
49 |         # type: (TrainingData, RasaNLUConfig, **Any) -> None
50 |         if config['language'] != 'zh':
51 |             raise Exception("tokenizer_yaha is only used for Chinese. Check your configure json file.")
52 |             
53 |         for example in training_data.training_examples:
54 |             example.set("tokens", self.tokenize(example.text))
55 | 
56 |     def process(self, message, **kwargs):
57 |         # type: (Message, **Any) -> None
58 | 
59 |         message.set("tokens", self.tokenize(message.text))
60 | 
61 |     def tokenize(self, text):
62 |         # type: (Text) -> List[Token]
63 |         tokenized = self.cuttor.tokenize(text.decode('utf-8'), search=True)
64 |         tokens = [Token(word, start) for (word, start, end) in tokenized]
65 | 
66 |         return tokens
67 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/__init__.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | from rasa_nlu.training_data.message import Message
 9 | from rasa_nlu.training_data.training_data import TrainingData
10 | from rasa_nlu.training_data.loading import load_data
11 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/formats/__init__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | from rasa_nlu.training_data.formats.dialogflow import DialogflowReader
 7 | from rasa_nlu.training_data.formats.luis import LuisReader
 8 | from rasa_nlu.training_data.formats.wit import WitReader
 9 | from rasa_nlu.training_data.formats.markdown import MarkdownWriter, MarkdownReader
10 | from rasa_nlu.training_data.formats.rasa import RasaReader, RasaWriter
11 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/formats/dialogflow.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import logging
 7 | import os
 8 | 
 9 | from rasa_nlu.training_data import Message, TrainingData
10 | from rasa_nlu.training_data.formats.readerwriter import TrainingDataReader
11 | from rasa_nlu import utils
12 | from rasa_nlu.training_data.util import transform_entity_synonyms
13 | 
14 | logger = logging.getLogger(__name__)
15 | 
16 | DIALOGFLOW_PACKAGE = "dialogflow_package"
17 | DIALOGFLOW_AGENT = "dialogflow_agent"
18 | DIALOGFLOW_INTENT = "dialogflow_intent"
19 | DIALOGFLOW_INTENT_EXAMPLES = "dialogflow_intent_examples"
20 | DIALOGFLOW_ENTITIES = "dialogflow_entities"
21 | DIALOGFLOW_ENTITY_ENTRIES = "dialogflow_entity_entries"
22 | 
23 | 
24 | class DialogflowReader(TrainingDataReader):
25 |     def read(self, fn, **kwargs):
26 |         # type: ([Text]) -> TrainingData
27 |         """Loads training data stored in the Dialogflow data format."""
28 | 
29 |         language = kwargs["language"]
30 |         fformat = kwargs["fformat"]
31 | 
32 |         if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}:
33 |             raise ValueError("fformat must be either {}, or {}".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES))
34 | 
35 |         root_js = utils.read_json_file(fn)
36 |         examples_js = self._read_examples_js(fn, language, fformat)
37 | 
38 |         if not examples_js:
39 |             logger.warning("No training examples found for dialogflow file {}!".format(fn))
40 |             return TrainingData()
41 |         elif fformat == DIALOGFLOW_INTENT:
42 |             return self._read_intent(root_js, examples_js)
43 |         elif fformat == DIALOGFLOW_ENTITIES:
44 |             return self._read_entities(examples_js)
45 | 
46 |     def _read_intent(self, intent_js, examples_js):
47 |         """Reads the intent and examples from respective jsons."""
48 |         intent = intent_js.get("name")
49 | 
50 |         training_examples = []
51 |         for ex in examples_js:
52 |             text, entities = self._join_text_chunks(ex['data'])
53 |             training_examples.append(Message.build(text, intent, entities))
54 | 
55 |         return TrainingData(training_examples)
56 | 
57 |     def _join_text_chunks(self, chunks):
58 |         """Combines text chunks and extracts entities."""
59 |         utterance = ""
60 |         entities = []
61 |         for chunk in chunks:
62 |             entity = self._extract_entity(chunk, len(utterance))
63 |             if entity:
64 |                 entities.append(entity)
65 |             utterance += chunk["text"]
66 | 
67 |         return utterance, entities
68 | 
69 |     def _extract_entity(self, chunk, current_offset):
70 |         """Extract an entity from a chunk if present."""
71 |         entity = None
72 |         if "meta" in chunk or "alias" in chunk:
73 |             start = current_offset
74 |             text = chunk['text']
75 |             end = start + len(text)
76 |             entity_type = chunk.get("alias", chunk["meta"])
77 |             if entity_type != u'@sys.ignore':
78 |                 entity = utils.build_entity(start, end, text, entity_type)
79 | 
80 |         return entity
81 | 
82 |     def _read_entities(self, examples_js):
83 |         entity_synonyms = transform_entity_synonyms(examples_js)
84 |         return TrainingData([], entity_synonyms)
85 | 
86 |     def _read_examples_js(self, fn, language, fformat):
87 |         """Infer and load the example file based on the root filename and root format."""
88 |         examples_type = "usersays" if fformat == DIALOGFLOW_INTENT else "entries"
89 |         examples_fn_ending = "_{}_{}.json".format(examples_type, language)
90 |         examples_fn = fn.replace(".json", examples_fn_ending)
91 |         if os.path.isfile(examples_fn):
92 |             return utils.read_json_file(examples_fn)
93 |         else:
94 |             return None
95 | 
96 |     def reads(self, s, **kwargs):
97 |         raise NotImplementedError
98 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/formats/luis.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import logging
 7 | 
 8 | from rasa_nlu.training_data import Message, TrainingData
 9 | from rasa_nlu.training_data.formats.readerwriter import JsonTrainingDataReader
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class LuisReader(JsonTrainingDataReader):
15 | 
16 |     def read_from_json(self, js, **kwargs):
17 |         # type: (Text, Any) -> TrainingData
18 |         """Loads training data stored in the LUIS.ai data format."""
19 | 
20 |         training_examples = []
21 |         regex_features = []
22 | 
23 |         # Simple check to ensure we support this luis data schema version
24 |         if not js["luis_schema_version"].startswith("2"):
25 |             raise Exception("Invalid luis data schema version {}, should be 2.x.x. "
26 |                             "Make sure to use the latest luis version "
27 |                             "(e.g. by downloading your data again)."
28 |                             "".format(js["luis_schema_version"]))
29 | 
30 |         for r in js.get("regex_features", []):
31 |             if r.get("activated", False):
32 |                 regex_features.append({"name": r.get("name"),
33 |                                        "pattern": r.get("pattern")})
34 | 
35 |         for s in js["utterances"]:
36 |             text = s.get("text")
37 |             intent = s.get("intent")
38 |             entities = []
39 |             for e in s.get("entities") or []:
40 |                 start, end = e["startPos"], e["endPos"] + 1
41 |                 val = text[start:end]
42 |                 entities.append({"entity": e["entity"],
43 |                                  "value": val,
44 |                                  "start": start,
45 |                                  "end": end})
46 | 
47 |             data = {"entities": entities}
48 |             if intent:
49 |                 data["intent"] = intent
50 |             training_examples.append(Message(text, data))
51 |         return TrainingData(training_examples, regex_features=regex_features)
52 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/formats/readerwriter.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import json
 7 | from rasa_nlu import utils
 8 | 
 9 | 
10 | class TrainingDataReader(object):
11 |     def read(self, filename, **kwargs):
12 |         """Reads TrainingData from a file."""
13 |         return self.reads(utils.read_file(filename), **kwargs)
14 | 
15 |     def reads(self, s, **kwargs):
16 |         """Reads TrainingData from a string."""
17 |         raise NotImplementedError
18 | 
19 | 
20 | class TrainingDataWriter(object):
21 |     def dump(self, filename, training_data):
22 |         """Writes a TrainingData object in markdown format to a file."""
23 |         s = self.dumps(training_data)
24 |         utils.write_to_file(filename, s)
25 | 
26 |     def dumps(self, training_data):
27 |         """Turns TrainingData into a string."""
28 |         raise NotImplementedError
29 | 
30 | 
31 | class JsonTrainingDataReader(TrainingDataReader):
32 |     def reads(self, s, **kwargs):
33 |         """Transforms string into json object and passes it on."""
34 |         js = json.loads(s)
35 |         return self.read_from_json(js, **kwargs)
36 | 
37 |     def read_from_json(self, js, **kwargs):
38 |         """Reads TrainingData from a json object."""
39 |         raise NotImplementedError
40 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/formats/wit.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import logging
 7 | 
 8 | from rasa_nlu.training_data import Message, TrainingData
 9 | from rasa_nlu.training_data.formats.readerwriter import JsonTrainingDataReader
10 | 
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | class WitReader(JsonTrainingDataReader):
15 | 
16 |     def read_from_json(self, js, **kwargs):
17 |         # type: (Text, Any) -> TrainingData
18 |         """Loads training data stored in the WIT.ai data format."""
19 | 
20 |         training_examples = []
21 | 
22 |         for s in js["data"]:
23 |             entities = s.get("entities")
24 |             if entities is None:
25 |                 continue
26 |             text = s.get("text")
27 |             intents = [e["value"] for e in entities if e["entity"] == 'intent']
28 |             intent = intents[0].strip("\"") if intents else None
29 | 
30 |             entities = [e
31 |                         for e in entities
32 |                         if ("start" in e and "end" in e and
33 |                             e["entity"] != 'intent')]
34 |             for e in entities:
35 |                 # for some reason wit adds additional quotes around entity values
36 |                 e["value"] = e["value"].strip("\"")
37 | 
38 |             data = {}
39 |             if intent:
40 |                 data["intent"] = intent
41 |             if entities is not None:
42 |                 data["entities"] = entities
43 |             training_examples.append(Message(text, data))
44 |         return TrainingData(training_examples)
45 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/message.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | from rasa_nlu.utils import ordered
 9 | 
10 | 
11 | class Message(object):
12 |     def __init__(self, text, data=None, output_properties=None, time=None):
13 |         self.text = text
14 |         self.time = time
15 |         self.data = data if data else {}
16 | 
17 |         if output_properties:
18 |             self.output_properties = output_properties
19 |         else:
20 |             self.output_properties = set()
21 | 
22 |     def set(self, prop, info, add_to_output=False):
23 |         self.data[prop] = info
24 |         if add_to_output:
25 |             self.output_properties.add(prop)
26 | 
27 |     def get(self, prop, default=None):
28 |         return self.data.get(prop, default)
29 | 
30 |     def as_dict(self, only_output_properties=False):
31 |         if only_output_properties:
32 |             d = {key: value
33 |                  for key, value in self.data.items()
34 |                  if key in self.output_properties}
35 |         else:
36 |             d = self.data
37 |         return dict(d, text=self.text)
38 | 
39 |     def __eq__(self, other):
40 |         if not isinstance(other, Message):
41 |             return False
42 |         else:
43 |             return ((other.text, ordered(other.data)) ==
44 |                     (self.text, ordered(self.data)))
45 | 
46 |     def __hash__(self):
47 |         return hash((self.text, str(ordered(self.data))))
48 | 
49 |     @classmethod
50 |     def build(cls, text, intent=None, entities=None):
51 |         data = {}
52 |         if intent:
53 |             data["intent"] = intent
54 |         if entities:
55 |             data["entities"] = entities
56 |         return cls(text, data)
57 | 


--------------------------------------------------------------------------------
/rasa_nlu/training_data/util.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from __future__ import absolute_import
 4 | from __future__ import division
 5 | from __future__ import print_function
 6 | from __future__ import unicode_literals
 7 | 
 8 | import logging
 9 | 
10 | logger = logging.getLogger(__name__)
11 | 
12 | 
13 | def transform_entity_synonyms(synonyms, known_synonyms=None):
14 |     """Transforms the entity synonyms into a text->value dictionary"""
15 |     entity_synonyms = known_synonyms if known_synonyms else {}
16 |     for s in synonyms:
17 |         if "value" in s and "synonyms" in s:
18 |             for synonym in s["synonyms"]:
19 |                 entity_synonyms[synonym] = s["value"]
20 |     return entity_synonyms
21 | 
22 | 
23 | def check_duplicate_synonym(entity_synonyms, text, syn, context_str=""):
24 |     if text in entity_synonyms and entity_synonyms[text] != syn:
25 |         logger.warning("Found inconsistent entity synonyms while {0}, overwriting {1}->{2}"
26 |                        "with {1}->{2} during merge".format(context_str, text, entity_synonyms[text], syn))
27 | 


--------------------------------------------------------------------------------
/rasa_nlu/utils/mitie_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | import os
  7 | 
  8 | import typing
  9 | from builtins import str
 10 | from typing import Any
 11 | from typing import Dict
 12 | from typing import List
 13 | from typing import Optional
 14 | from typing import Text
 15 | 
 16 | from rasa_nlu.components import Component
 17 | from rasa_nlu.config import RasaNLUModelConfig
 18 | from rasa_nlu.model import Metadata
 19 | 
 20 | if typing.TYPE_CHECKING:
 21 |     import mitie
 22 | 
 23 | 
 24 | class MitieNLP(Component):
 25 |     name = "nlp_mitie"
 26 | 
 27 |     provides = ["mitie_feature_extractor", "mitie_file"]
 28 | 
 29 |     defaults = {
 30 |         # name of the language model to load - this contains
 31 |         # the MITIE feature extractor
 32 |         "model": os.path.join("data", "total_word_feature_extractor.dat"),
 33 |     }
 34 | 
 35 |     def __init__(self,
 36 |                  component_config=None,  # type: Dict[Text, Any]
 37 |                  extractor=None
 38 |                  ):
 39 |         # type: (...) -> None
 40 |         """Construct a new language model from the MITIE framework."""
 41 | 
 42 |         super(MitieNLP, self).__init__(component_config)
 43 | 
 44 |         self.extractor = extractor
 45 | 
 46 |     @classmethod
 47 |     def required_packages(cls):
 48 |         # type: () -> List[Text]
 49 |         return ["mitie"]
 50 | 
 51 |     @classmethod
 52 |     def create(cls, cfg):
 53 |         # type: (RasaNLUModelConfig) -> MitieNLP
 54 |         import mitie
 55 | 
 56 |         component_conf = cfg.for_component(cls.name, cls.defaults)
 57 |         model_file = component_conf.get("model")
 58 |         if not model_file:
 59 |             raise Exception("The MITIE component 'nlp_mitie' needs "
 60 |                             "the configuration value for 'model'."
 61 |                             "Please take a look at the "
 62 |                             "documentation in the pipeline section "
 63 |                             "to get more info about this "
 64 |                             "parameter.")
 65 |         extractor = mitie.total_word_feature_extractor(model_file)
 66 |         cls.ensure_proper_language_model(extractor)
 67 | 
 68 |         return MitieNLP(component_conf, extractor)
 69 | 
 70 |     @classmethod
 71 |     def cache_key(cls, model_metadata):
 72 |         # type: (Metadata) -> Optional[Text]
 73 | 
 74 |         component_meta = model_metadata.for_component(cls.name)
 75 | 
 76 |         mitie_file = component_meta.get("model", None)
 77 |         if mitie_file is not None:
 78 |             return cls.name + "-" + str(os.path.abspath(mitie_file))
 79 |         else:
 80 |             return None
 81 | 
 82 |     def provide_context(self):
 83 |         # type: () -> Dict[Text, Any]
 84 | 
 85 |         return {"mitie_feature_extractor": self.extractor,
 86 |                 "mitie_file": self.component_config.get("model")}
 87 | 
 88 |     @staticmethod
 89 |     def ensure_proper_language_model(extractor):
 90 |         # type: (Optional[mitie.total_word_feature_extractor]) -> None
 91 | 
 92 |         if extractor is None:
 93 |             raise Exception("Failed to load MITIE feature extractor. "
 94 |                             "Loading the model returned 'None'.")
 95 | 
 96 |     @classmethod
 97 |     def load(cls,
 98 |              model_dir=None,  # type: Optional[Text]
 99 |              model_metadata=None,  # type: Optional[Metadata]
100 |              cached_component=None,  # type: Optional[MitieNLP]
101 |              **kwargs  # type: **Any
102 |              ):
103 |         # type: (...) -> MitieNLP
104 |         import mitie
105 | 
106 |         if cached_component:
107 |             return cached_component
108 | 
109 |         component_meta = model_metadata.for_component(cls.name)
110 |         mitie_file = component_meta.get("model")
111 |         return cls(component_meta,
112 |                    mitie.total_word_feature_extractor(mitie_file))
113 | 
114 |     def persist(self, model_dir):
115 |         # type: (Text) -> Dict[Text, Any]
116 | 
117 |         return {
118 |             "mitie_feature_extractor_fingerprint": self.extractor.fingerprint,
119 |             "model": self.component_config.get("model")
120 |         }
121 | 


--------------------------------------------------------------------------------
/rasa_nlu/version.py:
--------------------------------------------------------------------------------
1 | from __future__ import unicode_literals
2 | from __future__ import print_function
3 | from __future__ import division
4 | from __future__ import absolute_import
5 | 
6 | __version__ = '0.12.2'
7 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | -r alt_requirements/requirements_bare.txt
2 | 


--------------------------------------------------------------------------------
/sample_configs/config_crf.yml:
--------------------------------------------------------------------------------
 1 | language: "en"
 2 | 
 3 | pipeline:
 4 | - name: "nlp_spacy"
 5 |   model: "en"
 6 | - name: "ner_spacy"
 7 | - name: "ner_ngrams"
 8 |   max_number_of_ngrams: 7
 9 | - name: "ner_duckling_http"
10 |   url: "http://my_url"
11 |   dimensions:
12 |   - "NUMBER"
13 | - name: "ner_crf"
14 |   BILOU_flag: true
15 |   features:
16 |     # features for word before token
17 |     - ["low", "title", "upper", "pos", "pos2"]
18 |     # features of token itself
19 |     - ["bias", "low", "word3", "word2", "upper", "title", "digit", "pos", "pos2", "pattern"]
20 |     # features for word after the token we want to tag
21 |     - ["low", "title", "upper", "pos", "pos2"]
22 |   max_iterations: 50
23 |   L1_c: 1
24 |   L2_c: 1e-3
25 | - name: "intent_classifier_sklearn"
26 |   C: [1, 2, 5, 10, 20, 100]
27 |   kernel: "linear"
28 | 


--------------------------------------------------------------------------------
/sample_configs/config_defaults.yml:
--------------------------------------------------------------------------------
1 | language: "en"
2 | 
3 | pipeline: []
4 | 
5 | data:
6 | 


--------------------------------------------------------------------------------
/sample_configs/config_embedding.yml:
--------------------------------------------------------------------------------
1 | language: "en"
2 | 
3 | pipeline: "tensorflow_embedding"
4 | 


--------------------------------------------------------------------------------
/sample_configs/config_jieba_mitie.yml:
--------------------------------------------------------------------------------
 1 | language: "zh"
 2 | 
 3 | pipeline:
 4 | - name: "nlp_mitie"
 5 |   model: "data/total_word_feature_extractor_zh.dat"
 6 | - name: "tokenizer_jieba"
 7 | - name: "ner_mitie"
 8 | - name: "ner_synonyms"
 9 | - name: "intent_entity_featurizer_regex"
10 | - name: "intent_classifier_mitie"
11 | 


--------------------------------------------------------------------------------
/sample_configs/config_jieba_mitie_sklearn.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "rasa_nlu_test",
 3 |   "pipeline": ["nlp_mitie",
 4 |         "tokenizer_jieba",
 5 |         "ner_mitie",
 6 |         "ner_synonyms",
 7 |         "intent_entity_featurizer_regex",
 8 |         "intent_featurizer_mitie",
 9 |         "intent_classifier_sklearn"],
10 |   "language": "zh",
11 |   "mitie_file": "./data/total_word_feature_extractor_zh.dat",
12 |   "path" : "./models",
13 |   "data" : "./data/examples/rasa/demo-rasa_zh.json"
14 | }
15 | 


--------------------------------------------------------------------------------
/sample_configs/config_jieba_mitie_sklearn.yml:
--------------------------------------------------------------------------------
 1 | language: "zh"
 2 | 
 3 | pipeline:
 4 | - name: "nlp_mitie"
 5 |   model: "data/total_word_feature_extractor_zh.dat"
 6 | - name: "tokenizer_jieba"
 7 | - name: "ner_mitie"
 8 | - name: "ner_synonyms"
 9 | - name: "intent_entity_featurizer_regex"
10 | - name: "intent_featurizer_mitie"
11 | - name: "intent_classifier_sklearn"
12 | 


--------------------------------------------------------------------------------
/sample_configs/config_jieba_mitie_sklearn_plus_dict_path.yml:
--------------------------------------------------------------------------------
 1 | language: "zh"
 2 | 
 3 | pipeline:
 4 | - name: "nlp_mitie"
 5 |   model: "data/total_word_feature_extractor_zh.dat"
 6 | - name: "tokenizer_jieba"
 7 |   default_dict: "./default_dict.big"
 8 |   user_dicts: "./jieba_userdict"
 9 | # you can put in file path or directory path as the "user_dicts" value
10 | #  user_dicts: "./jieba_userdict/jieba_userdict.txt"
11 | - name: "ner_mitie"
12 | - name: "ner_synonyms"
13 | - name: "intent_entity_featurizer_regex"
14 | - name: "intent_featurizer_mitie"
15 | - name: "intent_classifier_sklearn"
16 | 


--------------------------------------------------------------------------------
/sample_configs/config_mitie.yml:
--------------------------------------------------------------------------------
 1 | language: "en"
 2 | 
 3 | pipeline:
 4 | - name: "nlp_mitie"
 5 |   model: "data/total_word_feature_extractor.dat"
 6 | - name: "tokenizer_mitie"
 7 | - name: "ner_mitie"
 8 | - name: "ner_synonyms"
 9 | - name: "intent_entity_featurizer_regex"
10 | - name: "intent_classifier_mitie"
11 | 


--------------------------------------------------------------------------------
/sample_configs/config_mitie_sklearn.yml:
--------------------------------------------------------------------------------
 1 | language: "en"
 2 | 
 3 | pipeline:
 4 | - name: "nlp_mitie"
 5 |   model: "data/total_word_feature_extractor.dat"
 6 | - name: "tokenizer_mitie"
 7 | - name: "ner_mitie"
 8 | - name: "ner_synonyms"
 9 | - name: "intent_entity_featurizer_regex"
10 | - name: "intent_featurizer_mitie"
11 | - name: "intent_classifier_sklearn"
12 | 


--------------------------------------------------------------------------------
/sample_configs/config_spacy.yml:
--------------------------------------------------------------------------------
1 | language: "en"
2 | 
3 | pipeline: "spacy_sklearn"
4 | 


--------------------------------------------------------------------------------
/sample_configs/config_spacy_duckling.yml:
--------------------------------------------------------------------------------
 1 | language: "en"
 2 | 
 3 | pipeline:
 4 | # this is using the spacy sklearn pipeline, adding duckling
 5 | # all components will use their default values
 6 | - name: "nlp_spacy"
 7 | - name: "tokenizer_spacy"
 8 | - name: "intent_featurizer_spacy"
 9 | - name: "ner_crf"
10 | - name: "ner_synonyms"
11 | - name: "intent_classifier_sklearn"
12 | - name: "ner_duckling"
13 | 


--------------------------------------------------------------------------------
/sample_configs/config_train_server_json.yml:
--------------------------------------------------------------------------------
 1 | language: "en"
 2 | 
 3 | pipeline: "spacy_sklearn"
 4 | 
 5 | # data contains the same json, as described in the training data section
 6 | data: {
 7 |   "rasa_nlu_data": {
 8 |     "common_examples": [
 9 |       {
10 |         "text": "hey",
11 |         "intent": "greet",
12 |         "entities": []
13 |       }
14 |     ]
15 |   }
16 | }


--------------------------------------------------------------------------------
/sample_configs/config_train_server_md.yml:
--------------------------------------------------------------------------------
 1 | language: "en"
 2 | 
 3 | pipeline: "spacy_sklearn"
 4 | 
 5 | # data contains the same md, as described in the training data section
 6 | data: |
 7 |   ## intent:affirm
 8 |   - yes
 9 |   - yep
10 | 
11 |   ## intent:goodbye
12 |   - bye
13 |   - goodbye
14 | 


--------------------------------------------------------------------------------
/sample_configs/config_yaha_mitie_sklearn.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "rasa_nlu_test",
 3 |   "pipeline": ["nlp_mitie",
 4 |         "tokenizer_yaha",
 5 |         "ner_mitie",
 6 |         "ner_synonyms",
 7 |         "intent_entity_featurizer_regex",
 8 |         "intent_featurizer_mitie",
 9 |         "intent_classifier_sklearn"],
10 |   "language": "zh",
11 |   "mitie_file": "./data/total_word_feature_extractor_zh.dat",
12 |   "path" : "./models",
13 |   "data" : "./data/examples/rasa/demo-rasa_zh.json"
14 | }
15 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | # pytest PEP8 configuration
 2 | [tool:pytest]
 3 | pep8maxlinelength = 120
 4 | pep8ignore =
 5 |     docs/conf.py ALL
 6 |     *.py W503
 7 |     *.py E126
 8 | 
 9 | # ignoring W503: line break occurred before a binary operator
10 | # ignoring E126: continuation line over-indented for hanging indent
11 | 
12 | [metadata]
13 | description-file = README.md
14 | license_file = LICENSE.txt
15 | 
16 | [bdist_wheel]
17 | # this will create a universal wheel for all distributions and py2 & py3
18 | universal=1
19 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
  1 | import io
  2 | import os
  3 | 
  4 | from setuptools import setup, find_packages
  5 | 
  6 | here = os.path.abspath(os.path.dirname(__file__))
  7 | 
  8 | # Avoids IDE errors, but actual version is read from version.py
  9 | __version__ = None
 10 | exec(open('rasa_nlu/version.py').read())
 11 | 
 12 | # Get the long description from the README file
 13 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f:
 14 |     long_description = f.read()
 15 | 
 16 | tests_requires = [
 17 |     "pytest",
 18 |     "pytest-pep8",
 19 |     "pytest-services",
 20 |     "pytest-cov",
 21 |     "pytest-twisted<1.6",
 22 |     "treq"
 23 | ]
 24 | 
 25 | install_requires = [
 26 |     "pathlib",
 27 |     "cloudpickle",
 28 |     "gevent",
 29 |     "klein",
 30 |     "boto3",
 31 |     "packaging",
 32 |     "typing",
 33 |     "future",
 34 |     "six",
 35 |     "tqdm",
 36 |     "requests",
 37 |     "jsonschema",
 38 |     "matplotlib",
 39 |     "numpy>=1.13",
 40 |     "simplejson",
 41 |     "pyyaml",
 42 |     "coloredlogs"
 43 | ]
 44 | 
 45 | extras_requires = {
 46 |     'test': tests_requires,
 47 |     'spacy': ["scikit-learn",
 48 |               "sklearn-crfsuite",
 49 |               "scipy",
 50 |               "spacy>2.0",
 51 |               ],
 52 |     'tensorflow': ["scikit-learn",
 53 |                    "tensorflow",
 54 |                    ],
 55 |     'mitie': ["mitie"],
 56 |     'jieba': ["jieba"],
 57 |     #'yaha': ["yaha"],
 58 | }
 59 | 
 60 | setup(
 61 |     name='rasa-nlu',
 62 |     packages=find_packages(exclude=['contrib', 'docs', 'tests']),
 63 |     classifiers=[
 64 |         "Development Status :: 4 - Beta",
 65 |         "Intended Audience :: Developers",
 66 |         "License :: OSI Approved :: Apache Software License",
 67 |         # supported python versions
 68 |         "Programming Language :: Python",
 69 |         "Programming Language :: Python :: 2.7",
 70 |         "Programming Language :: Python :: 3.5",
 71 |         "Programming Language :: Python :: 3.6",
 72 |         "Topic :: Software Development :: Libraries",
 73 |     ],
 74 |     version=__version__,
 75 |     install_requires=install_requires,
 76 |     tests_require=tests_requires,
 77 |     extras_require=extras_requires,
 78 |     include_package_data=True,
 79 |     description="Rasa NLU a natural language parser for bots",
 80 |     long_description=long_description,
 81 |     long_description_content_type="text/markdown",
 82 |     author='Rasa Technologies GmbH',
 83 |     author_email='hi@rasa.com',
 84 |     maintainer="Tom Bocklisch",
 85 |     maintainer_email="tom@rasa.com",
 86 |     license='Apache 2.0',
 87 |     url="https://rasa.com",
 88 |     keywords="nlp machine-learning machine-learning-library bot bots "
 89 |              "botkit rasa conversational-agents conversational-ai chatbot"
 90 |              "chatbot-framework bot-framework",
 91 |     download_url="https://github.com/RasaHQ/rasa_nlu/archive/{}.tar.gz"
 92 |                  "".format(__version__),
 93 |     project_urls={
 94 |         'Bug Reports': 'https://github.com/rasahq/rasa_nlu/issues',
 95 |         'Source': 'https://github.com/rasahq/rasa_nlu',
 96 |     },
 97 | )
 98 | 
 99 | print("\nWelcome to Rasa NLU!")
100 | print("If any questions please visit documentation "
101 |       "page https://nlu.rasa.com")
102 | print("or join community chat on https://gitter.im/RasaHQ/rasa_nlu")
103 | 


--------------------------------------------------------------------------------
/test_models/test_model_mitie/model_20170628-002704/entity_extractor.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie/model_20170628-002704/entity_extractor.dat


--------------------------------------------------------------------------------
/test_models/test_model_mitie/model_20170628-002704/entity_synonyms.json:
--------------------------------------------------------------------------------
1 | {"chines": "chinese"}


--------------------------------------------------------------------------------
/test_models/test_model_mitie/model_20170628-002704/intent_classifier.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie/model_20170628-002704/intent_classifier.dat


--------------------------------------------------------------------------------
/test_models/test_model_mitie/model_20170628-002704/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "entity_synonyms": "entity_synonyms.json",
 3 |     "mitie_file": "data/total_word_feature_extractor.dat",
 4 |     "pipeline": [
 5 |         "nlp_mitie",
 6 |         "tokenizer_mitie",
 7 |         "ner_mitie",
 8 |         "ner_synonyms",
 9 |         "intent_classifier_mitie"
10 |     ],
11 |     "trained_at": "20170628-002704",
12 |     "training_data": "training_data.json",
13 |     "rasa_nlu_version": "0.9.0a4",
14 |     "intent_classifier_mitie": "intent_classifier.dat",
15 |     "entity_extractor_mitie": "entity_extractor.dat",
16 |     "mitie_feature_extractor_fingerprint": 10023965992282753551,
17 |     "language": "en"
18 | }


--------------------------------------------------------------------------------
/test_models/test_model_mitie_sklearn/model_20170628-002712/entity_extractor.dat:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie_sklearn/model_20170628-002712/entity_extractor.dat


--------------------------------------------------------------------------------
/test_models/test_model_mitie_sklearn/model_20170628-002712/entity_synonyms.json:
--------------------------------------------------------------------------------
1 | {"chines": "chinese"}


--------------------------------------------------------------------------------
/test_models/test_model_mitie_sklearn/model_20170628-002712/intent_classifier.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie_sklearn/model_20170628-002712/intent_classifier.pkl


--------------------------------------------------------------------------------
/test_models/test_model_mitie_sklearn/model_20170628-002712/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "entity_synonyms": "entity_synonyms.json",
 3 |     "mitie_file": "data/total_word_feature_extractor.dat",
 4 |     "pipeline": [
 5 |         "nlp_mitie",
 6 |         "tokenizer_mitie",
 7 |         "ner_mitie",
 8 |         "ner_synonyms",
 9 |         "intent_featurizer_mitie",
10 |         "intent_classifier_sklearn"
11 |     ],
12 |     "trained_at": "20170628-002712",
13 |     "intent_classifier_sklearn": "intent_classifier.pkl",
14 |     "rasa_nlu_version": "0.9.0a4",
15 |     "training_data": "training_data.json",
16 |     "entity_extractor_mitie": "entity_extractor.dat",
17 |     "mitie_feature_extractor_fingerprint": 10023965992282753551,
18 |     "language": "en"
19 | }


--------------------------------------------------------------------------------
/test_models/test_model_spacy_sklearn/model_20170628-002705/crf_model.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_spacy_sklearn/model_20170628-002705/crf_model.pkl


--------------------------------------------------------------------------------
/test_models/test_model_spacy_sklearn/model_20170628-002705/entity_synonyms.json:
--------------------------------------------------------------------------------
1 | {"chines": "chinese"}


--------------------------------------------------------------------------------
/test_models/test_model_spacy_sklearn/model_20170628-002705/intent_classifier.pkl:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_spacy_sklearn/model_20170628-002705/intent_classifier.pkl


--------------------------------------------------------------------------------
/test_models/test_model_spacy_sklearn/model_20170628-002705/metadata.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "entity_extractor_crf": {
 3 |         "version": 1,
 4 |         "crf_features": [
 5 |             [
 6 |                 "low",
 7 |                 "title",
 8 |                 "upper",
 9 |                 "pos",
10 |                 "pos2"
11 |             ],
12 |             [
13 |                 "bias",
14 |                 "low",
15 |                 "word3",
16 |                 "word2",
17 |                 "upper",
18 |                 "title",
19 |                 "digit",
20 |                 "pos",
21 |                 "pos2"
22 |             ],
23 |             [
24 |                 "low",
25 |                 "title",
26 |                 "upper",
27 |                 "pos",
28 |                 "pos2"
29 |             ]
30 |         ],
31 |         "model_file": "crf_model.pkl",
32 |         "BILOU_flag": true
33 |     },
34 |     "entity_synonyms": "entity_synonyms.json",
35 |     "trained_at": "20170628-002705",
36 |     "pipeline": [
37 |         "nlp_spacy",
38 |         "ner_crf",
39 |         "ner_synonyms",
40 |         "intent_featurizer_spacy",
41 |         "intent_classifier_sklearn"
42 |     ],
43 |     "spacy_model_name": "en",
44 |     "training_data": "training_data.json",
45 |     "intent_classifier_sklearn": "intent_classifier.pkl",
46 |     "rasa_nlu_version": "0.9.0a4",
47 |     "language": "en"
48 | }


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/tests/__init__.py


--------------------------------------------------------------------------------
/tests/base/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/tests/base/__init__.py


--------------------------------------------------------------------------------
/tests/base/test_components.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import pytest
 7 | 
 8 | from rasa_nlu import registry
 9 | from rasa_nlu.components import find_unavailable_packages
10 | from rasa_nlu.model import Metadata
11 | 
12 | 
13 | @pytest.mark.parametrize("component_class", registry.component_classes)
14 | def test_no_components_with_same_name(component_class):
15 |     """The name of the components need to be unique as they will
16 |     be referenced by name when defining processing pipelines."""
17 | 
18 |     names = [cls.name for cls in registry.component_classes]
19 |     assert names.count(component_class.name) == 1, \
20 |         "There is more than one component named {}".format(component_class.name)
21 | 
22 | 
23 | @pytest.mark.parametrize("pipeline_template",
24 |                          registry.registered_pipeline_templates)
25 | def test_all_components_in_model_templates_exist(pipeline_template):
26 |     """We provide a couple of ready to use pipelines, this test ensures
27 |     all components referenced by name in the
28 |     pipeline definitions are available."""
29 | 
30 |     components = registry.registered_pipeline_templates[pipeline_template]
31 |     for component in components:
32 |         assert component in registry.registered_components, \
33 |             "Model template contains unknown component."
34 | 
35 | 
36 | @pytest.mark.parametrize("component_class", registry.component_classes)
37 | def test_all_arguments_can_be_satisfied(component_class):
38 |     """Check that `train` method parameters can be filled
39 |     filled from the context. Similar to `pipeline_init` test."""
40 | 
41 |     # All available context arguments that will ever be generated during train
42 |     # it might still happen, that in a certain pipeline
43 |     # configuration arguments can not be satisfied!
44 |     provided_properties = {provided
45 |                            for c in registry.component_classes
46 |                            for provided in c.provides}
47 | 
48 |     for req in component_class.requires:
49 |         assert req in provided_properties, \
50 |             "No component provides required property."
51 | 
52 | 
53 | def test_find_unavailable_packages():
54 |     unavailable = find_unavailable_packages(["my_made_up_package_name", "io",
55 |                                              "foo_bar", "foo_bar"])
56 |     assert unavailable == {"my_made_up_package_name", "foo_bar"}
57 | 
58 | 
59 | def test_builder_create_unknown(component_builder, default_config):
60 |     with pytest.raises(Exception) as excinfo:
61 |         component_builder.create_component("my_made_up_componment",
62 |                                            default_config)
63 |     assert "Unknown component name" in str(excinfo.value)
64 | 
65 | 
66 | def test_builder_create_by_module_path(component_builder, default_config):
67 |     from rasa_nlu.featurizers.regex_featurizer import RegexFeaturizer
68 | 
69 |     path = "rasa_nlu.featurizers.regex_featurizer.RegexFeaturizer"
70 |     component = component_builder.create_component(path, default_config)
71 |     assert type(component) == RegexFeaturizer
72 | 
73 | 
74 | def test_builder_load_unknown(component_builder):
75 |     with pytest.raises(Exception) as excinfo:
76 |         component_builder.load_component("my_made_up_componment", "",
77 |                                          Metadata({}, None))
78 |     assert "Unknown component name" in str(excinfo.value)
79 | 


--------------------------------------------------------------------------------
/tests/base/test_config.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import tempfile
 7 | 
 8 | import pytest
 9 | from typing import Text
10 | 
11 | import rasa_nlu
12 | from rasa_nlu import config, utils
13 | from rasa_nlu.config import RasaNLUModelConfig, InvalidConfigError
14 | from rasa_nlu.registry import registered_pipeline_templates
15 | from tests.conftest import CONFIG_DEFAULTS_PATH
16 | from tests.utilities import write_file_config
17 | 
18 | defaults = utils.read_yaml_file(CONFIG_DEFAULTS_PATH)
19 | 
20 | 
21 | def test_default_config(default_config):
22 |     assert default_config.as_dict() == defaults
23 | 
24 | 
25 | def test_blank_config():
26 |     file_config = {}
27 |     f = write_file_config(file_config)
28 |     final_config = config.load(f.name)
29 |     assert final_config.as_dict() == defaults
30 | 
31 | 
32 | def test_invalid_config_json():
33 |     file_config = """pipeline: [spacy_sklearn"""  # invalid yaml
34 |     with tempfile.NamedTemporaryFile("w+", suffix="_tmp_config_file.json") as f:
35 |         f.write(file_config)
36 |         f.flush()
37 |         with pytest.raises(rasa_nlu.config.InvalidConfigError):
38 |             config.load(f.name)
39 | 
40 | 
41 | def test_invalid_pipeline_template():
42 |     args = {"pipeline": "my_made_up_name"}
43 |     f = write_file_config(args)
44 |     with pytest.raises(InvalidConfigError) as execinfo:
45 |         config.load(f.name)
46 |     assert "unknown pipeline template" in str(execinfo.value)
47 | 
48 | 
49 | def test_pipeline_looksup_registry():
50 |     pipeline_template = list(registered_pipeline_templates)[0]
51 |     args = {"pipeline": pipeline_template}
52 |     f = write_file_config(args)
53 |     final_config = config.load(f.name)
54 |     components = [c.get("name") for c in final_config.pipeline]
55 |     assert components == registered_pipeline_templates[pipeline_template]
56 | 
57 | 
58 | def test_default_config_file():
59 |     final_config = RasaNLUModelConfig()
60 |     assert len(final_config) > 1
61 | 
62 | 
63 | def test_set_attr_on_component(default_config):
64 |     cfg = config.load("sample_configs/config_spacy.yml")
65 |     cfg.set_component_attr("intent_classifier_sklearn", C=324)
66 | 
67 |     expected = {"C": 324, "name": "intent_classifier_sklearn"}
68 | 
69 |     assert cfg.for_component("intent_classifier_sklearn") == expected
70 |     assert cfg.for_component("tokenizer_spacy") == {"name": "tokenizer_spacy"}
71 | 


--------------------------------------------------------------------------------
/tests/base/test_data_router.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import mock
 7 | 
 8 | from rasa_nlu import data_router
 9 | from rasa_nlu import persistor
10 | 
11 | 
12 | def test_list_projects_in_cloud_method():
13 |     class UniqueValue(object):
14 |         pass
15 | 
16 |     def mocked_get_persistor(*args, **kwargs):
17 |         class MockedClass(object):
18 |             def list_projects(self):
19 |                 return [UniqueValue()]
20 | 
21 |             def list_models(self, project):
22 |                 return [UniqueValue()]
23 | 
24 |         return MockedClass()
25 | 
26 |     def mocked_data_router_init(self, *args, **kwargs):
27 |         self.config = None
28 | 
29 |     with mock.patch.object(persistor, 'get_persistor',
30 |                            mocked_get_persistor):
31 |         return_value = data_router.DataRouter()._list_projects_in_cloud()
32 |     assert isinstance(return_value[0], UniqueValue)
33 | 


--------------------------------------------------------------------------------
/tests/base/test_interpreter.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import rasa_nlu
 7 | 
 8 | import pytest
 9 | 
10 | from rasa_nlu import registry, training_data
11 | from rasa_nlu.model import Interpreter
12 | from tests import utilities
13 | 
14 | 
15 | @utilities.slowtest
16 | @pytest.mark.parametrize("pipeline_template",
17 |                          list(registry.registered_pipeline_templates.keys()))
18 | def test_interpreter(pipeline_template, component_builder, tmpdir):
19 |     test_data = "data/examples/rasa/demo-rasa.json"
20 |     _conf = utilities.base_test_conf(pipeline_template)
21 |     _conf["data"] = test_data
22 |     td = training_data.load_data(test_data)
23 |     interpreter = utilities.interpreter_for(component_builder,
24 |                                             "data/examples/rasa/demo-rasa.json",
25 |                                             tmpdir.strpath,
26 |                                             _conf)
27 | 
28 |     texts = ["good bye", "i am looking for an indian spot"]
29 | 
30 |     for text in texts:
31 |         result = interpreter.parse(text, time=None)
32 |         assert result['text'] == text
33 |         assert (not result['intent']['name']
34 |                 or result['intent']['name'] in td.intents)
35 |         assert result['intent']['confidence'] >= 0
36 |         # Ensure the model doesn't detect entity types that are not present
37 |         # Models on our test data set are not stable enough to
38 |         # require the exact entities to be found
39 |         for entity in result['entities']:
40 |             assert entity['entity'] in td.entities
41 | 
42 | 
43 | @pytest.mark.parametrize("metadata",
44 |                          [{"rasa_nlu_version": "0.11.0"},
45 |                           {"rasa_nlu_version": "0.10.2"},
46 |                           {"rasa_nlu_version": "0.12.0a1"}])
47 | def test_model_not_compatible(metadata):
48 |     with pytest.raises(rasa_nlu.model.UnsupportedModelError):
49 |         Interpreter.ensure_model_compatibility(metadata)
50 | 
51 | 
52 | @pytest.mark.parametrize("metadata",
53 |                          [{"rasa_nlu_version": "0.12.0"},
54 |                           {"rasa_nlu_version": "0.12.2"},
55 |                           {"rasa_nlu_version": "0.12.0a2"}])
56 | def test_model_is_compatible(metadata):
57 |     # should not raise an exception
58 |     assert Interpreter.ensure_model_compatibility(metadata) is None
59 | 


--------------------------------------------------------------------------------
/tests/base/test_persistor.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | import os
  7 | 
  8 | import mock
  9 | import pytest
 10 | from moto import mock_s3
 11 | 
 12 | from tests import utilities
 13 | from rasa_nlu import persistor, train
 14 | 
 15 | 
 16 | class Object(object):
 17 |     pass
 18 | 
 19 | 
 20 | def test_if_persistor_class_has_list_projects_method():
 21 |     with pytest.raises(NotImplementedError):
 22 |         persistor.Persistor().list_projects()
 23 | 
 24 | 
 25 | @mock_s3
 26 | def test_list_projects_method_in_AWSPersistor(component_builder, tmpdir):
 27 |     # artificially create a persisted model
 28 |     _config = utilities.base_test_conf("keyword")
 29 |     os.environ["BUCKET_NAME"] = 'rasa-test'
 30 |     os.environ["AWS_DEFAULT_REGION"] = 'us-east-1'
 31 | 
 32 |     (trained, _, persisted_path) = train.do_train(
 33 |             _config,
 34 |             data="data/test/demo-rasa-small.json",
 35 |             path=tmpdir.strpath,
 36 |             project='mytestproject',
 37 |             storage='aws',
 38 |             component_builder=component_builder)
 39 | 
 40 |     # We need to create the bucket since this is all in Moto's 'virtual' AWS
 41 |     # account
 42 |     awspersistor = persistor.AWSPersistor(os.environ["BUCKET_NAME"])
 43 |     result = awspersistor.list_projects()
 44 | 
 45 |     assert result == ['mytestproject']
 46 | 
 47 | 
 48 | @mock_s3
 49 | def test_list_projects_method_raise_exeception_in_AWSPersistor():
 50 |     os.environ["AWS_DEFAULT_REGION"] = 'us-east-1'
 51 | 
 52 |     awspersistor = persistor.AWSPersistor("rasa-test")
 53 |     result = awspersistor.list_projects()
 54 | 
 55 |     assert result == []
 56 | 
 57 | 
 58 | def test_list_projects_method_in_GCSPersistor():
 59 |     def mocked_init(self, *args, **kwargs):
 60 |         self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x]
 61 |         self.bucket = Object()
 62 | 
 63 |         def mocked_list_blobs():
 64 |             filter_result = Object()
 65 |             filter_result.name = 'blob_name'
 66 |             return filter_result,
 67 | 
 68 |         self.bucket.list_blobs = mocked_list_blobs
 69 | 
 70 |     with mock.patch.object(persistor.GCSPersistor, "__init__", mocked_init):
 71 |         result = persistor.GCSPersistor("").list_projects()
 72 | 
 73 |     assert result == ['project']
 74 | 
 75 | 
 76 | def test_list_projects_method_raise_exeception_in_GCSPersistor():
 77 |     def mocked_init(self, *args, **kwargs):
 78 |         self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x]
 79 |         self.bucket = Object()
 80 | 
 81 |         def mocked_list_blobs():
 82 |             raise ValueError
 83 | 
 84 |         self.bucket.list_blobs = mocked_list_blobs
 85 | 
 86 |     with mock.patch.object(persistor.GCSPersistor, "__init__", mocked_init):
 87 |         result = persistor.GCSPersistor("").list_projects()
 88 | 
 89 |     assert result == []
 90 | 
 91 | 
 92 | def test_list_projects_method_in_AzurePersistor():
 93 |     def mocked_init(self, *args, **kwargs):
 94 |         self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x]
 95 |         self.blob_client = Object()
 96 |         self.container_name = 'test'
 97 | 
 98 |         def mocked_list_blobs(
 99 |             container_name,
100 |             prefix=None
101 |         ):
102 |             filter_result = Object()
103 |             filter_result.name = 'blob_name'
104 |             return filter_result,
105 | 
106 |         self.blob_client.list_blobs = mocked_list_blobs
107 | 
108 |     with mock.patch.object(persistor.AzurePersistor, "__init__", mocked_init):
109 |         result = persistor.AzurePersistor("").list_projects()
110 | 
111 |     assert result == ['project']
112 | 
113 | 
114 | def test_list_projects_method_raise_exeception_in_AzurePersistor():
115 |     def mocked_init(self, *args, **kwargs):
116 |         self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x]
117 |         self.blob_client = Object()
118 | 
119 |         def mocked_list_blobs(
120 |             container_name,
121 |             prefix=None
122 |         ):
123 |             raise ValueError
124 | 
125 |         self.blob_client.list_blobs = mocked_list_blobs
126 | 
127 |     with mock.patch.object(persistor.AzurePersistor, "__init__", mocked_init):
128 |         result = persistor.AzurePersistor("").list_projects()
129 | 
130 |     assert result == []
131 | 


--------------------------------------------------------------------------------
/tests/base/test_project.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import mock
 7 | 
 8 | from rasa_nlu.project import Project
 9 | 
10 | 
11 | def test_dynamic_load_model_with_exists_model():
12 |     MODEL_NAME = 'model_name'
13 | 
14 |     def mocked_init(*args, **kwargs):
15 |         return None
16 | 
17 |     with mock.patch.object(Project, "__init__", mocked_init):
18 |         project = Project()
19 | 
20 |         project._models = (MODEL_NAME, )
21 | 
22 |         result = project._dynamic_load_model(MODEL_NAME)
23 | 
24 |         assert result == MODEL_NAME
25 | 
26 | 
27 | def test_dynamic_load_model_with_refresh_exists_model():
28 |     MODEL_NAME = 'model_name'
29 | 
30 |     def mocked_init(*args, **kwargs):
31 |         return None
32 | 
33 |     def mocked_search_for_models(self):
34 |         self._models = (MODEL_NAME, )
35 | 
36 |     with mock.patch.object(Project, "__init__", mocked_init):
37 |         with mock.patch.object(Project, '_search_for_models', mocked_search_for_models):
38 |             project = Project()
39 | 
40 |             project._models = ()
41 | 
42 |             result = project._dynamic_load_model(MODEL_NAME)
43 | 
44 |             assert result == MODEL_NAME
45 | 
46 | 
47 | def test_dynamic_load_model_with_refresh_not_exists_model():
48 |     LATEST_MODEL_NAME = 'latest_model_name'
49 | 
50 |     def mocked_init(*args, **kwargs):
51 |         return None
52 | 
53 |     def mocked_search_for_models(self):
54 |         pass
55 | 
56 |     def mocked_latest_project_model(self):
57 |         return LATEST_MODEL_NAME
58 | 
59 |     with mock.patch.object(Project, "__init__", mocked_init):
60 |         with mock.patch.object(Project, "_search_for_models", mocked_search_for_models):
61 |             with mock.patch.object(Project, "_latest_project_model", mocked_latest_project_model):
62 |                 project = Project()
63 | 
64 |                 project._models = ()
65 | 
66 |                 result = project._dynamic_load_model('model_name')
67 | 
68 |                 assert result == LATEST_MODEL_NAME
69 | 
70 | 
71 | def test_dynamic_load_model_with_model_is_none():
72 |     LATEST_MODEL_NAME = 'latest_model_name'
73 | 
74 |     def mocked_init(*args, **kwargs):
75 |         return None
76 | 
77 |     def mocked_search_for_models(self):
78 |         pass
79 | 
80 |     def mocked_latest_project_model(self):
81 |         return LATEST_MODEL_NAME
82 | 
83 |     with mock.patch.object(Project, "__init__", mocked_init):
84 |         with mock.patch.object(Project, "_search_for_models", mocked_search_for_models):
85 |             with mock.patch.object(Project, "_latest_project_model", mocked_latest_project_model):
86 |                 project = Project()
87 | 
88 |                 project._models = ()
89 | 
90 |                 result = project._dynamic_load_model(None)
91 | 
92 |                 assert result == LATEST_MODEL_NAME
93 | 


--------------------------------------------------------------------------------
/tests/base/test_synonyms.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | from rasa_nlu.extractors.entity_synonyms import EntitySynonymMapper
 7 | 
 8 | 
 9 | def test_entity_synonyms():
10 |     entities = [{
11 |         "entity": "test",
12 |         "value": "chines",
13 |         "start": 0,
14 |         "end": 6
15 |     }, {
16 |         "entity": "test",
17 |         "value": "chinese",
18 |         "start": 0,
19 |         "end": 6
20 |     }, {
21 |         "entity": "test",
22 |         "value": "china",
23 |         "start": 0,
24 |         "end": 6
25 |     }]
26 |     ent_synonyms = {"chines": "chinese", "NYC": "New York City"}
27 |     EntitySynonymMapper(synonyms=ent_synonyms).replace_synonyms(entities)
28 |     assert len(entities) == 3
29 |     assert entities[0]["value"] == "chinese"
30 |     assert entities[1]["value"] == "chinese"
31 |     assert entities[2]["value"] == "china"
32 | 


--------------------------------------------------------------------------------
/tests/base/test_tokenizers.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | from __future__ import absolute_import
 3 | from __future__ import division
 4 | from __future__ import print_function
 5 | from __future__ import unicode_literals
 6 | 
 7 | 
 8 | def test_whitespace():
 9 |     from rasa_nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer
10 |     tk = WhitespaceTokenizer()
11 | 
12 |     assert [t.text for t in tk.tokenize("Forecast for lunch")] == \
13 |            ['Forecast', 'for', 'lunch']
14 | 
15 |     assert [t.offset for t in tk.tokenize("Forecast for lunch")] == \
16 |            [0, 9, 13]
17 | 
18 |     assert [t.text for t in tk.tokenize("hey ńöñàśçií how're you?")] == \
19 |            ['hey', 'ńöñàśçií', 'how\'re', 'you?']
20 | 
21 |     assert [t.offset for t in tk.tokenize("hey ńöñàśçií how're you?")] == \
22 |            [0, 4, 13, 20]
23 | 
24 | 
25 | def test_spacy(spacy_nlp):
26 |     from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer
27 |     tk = SpacyTokenizer()
28 | 
29 |     text = "Forecast for lunch"
30 |     assert [t.text for t in tk.tokenize(spacy_nlp(text))] == \
31 |            ['Forecast', 'for', 'lunch']
32 |     assert [t.offset for t in tk.tokenize(spacy_nlp(text))] == \
33 |            [0, 9, 13]
34 | 
35 |     text = "hey ńöñàśçií how're you?"
36 |     assert [t.text for t in tk.tokenize(spacy_nlp(text))] == \
37 |            ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?']
38 |     assert [t.offset for t in tk.tokenize(spacy_nlp(text))] == \
39 |            [0, 4, 13, 16, 20, 23]
40 | 
41 | 
42 | def test_mitie():
43 |     from rasa_nlu.tokenizers.mitie_tokenizer import MitieTokenizer
44 |     tk = MitieTokenizer()
45 | 
46 |     text = "Forecast for lunch"
47 |     assert [t.text for t in tk.tokenize(text)] == \
48 |            ['Forecast', 'for', 'lunch']
49 |     assert [t.offset for t in tk.tokenize(text)] == \
50 |            [0, 9, 13]
51 | 
52 |     text = "hey ńöñàśçií how're you?"
53 |     assert [t.text for t in tk.tokenize(text)] == \
54 |            ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?']
55 |     assert [t.offset for t in tk.tokenize(text)] == \
56 |            [0, 4, 13, 16, 20, 23]
57 | 
58 | 
59 | def test_jieba():
60 |     from rasa_nlu.tokenizers.jieba_tokenizer import JiebaTokenizer
61 |     tk = JiebaTokenizer()
62 | 
63 |     assert [t.text for t in tk.tokenize("我想去吃兰州拉面")] == \
64 |            ['我', '想', '去', '吃', '兰州', '拉面']
65 | 
66 |     assert [t.offset for t in tk.tokenize("我想去吃兰州拉面")] == \
67 |            [0, 1, 2, 3, 4, 6]
68 | 
69 |     assert [t.text for t in tk.tokenize("Micheal你好吗？")] == \
70 |            ['Micheal', '你好', '吗', '？']
71 | 
72 |     assert [t.offset for t in tk.tokenize("Micheal你好吗？")] == \
73 | [0, 7, 9, 10]


--------------------------------------------------------------------------------
/tests/base/test_utils.py:
--------------------------------------------------------------------------------
  1 | from __future__ import absolute_import
  2 | from __future__ import division
  3 | from __future__ import print_function
  4 | from __future__ import unicode_literals
  5 | 
  6 | import io
  7 | import os
  8 | import pickle
  9 | import tempfile
 10 | 
 11 | import pytest
 12 | 
 13 | from rasa_nlu import utils
 14 | from rasa_nlu.utils import (
 15 |     relative_normpath, create_dir, is_url, ordered, is_model_dir, remove_model,
 16 |     write_json_to_file, write_to_file)
 17 | 
 18 | 
 19 | @pytest.fixture
 20 | def empty_model_dir(scope="function"):
 21 |     temp_path = tempfile.mkdtemp()
 22 |     yield temp_path
 23 |     if os.path.exists(temp_path):
 24 |         os.rmdir(temp_path)
 25 | 
 26 | 
 27 | def test_relative_normpath():
 28 |     test_file = "/my/test/path/file.txt"
 29 |     assert relative_normpath(test_file, "/my/test") == "path/file.txt"
 30 |     assert relative_normpath(None, "/my/test") is None
 31 | 
 32 | 
 33 | def test_list_files_invalid_resource():
 34 |     with pytest.raises(ValueError) as execinfo:
 35 |         utils.list_files(None)
 36 |     assert "must be a string type" in str(execinfo.value)
 37 | 
 38 | 
 39 | def test_list_files_non_existing_dir():
 40 |     with pytest.raises(ValueError) as execinfo:
 41 |         utils.list_files("my/made_up/path")
 42 |     assert "Could not locate the resource" in str(execinfo.value)
 43 | 
 44 | 
 45 | def test_list_files_ignores_hidden_files(tmpdir):
 46 |     # create a hidden file
 47 |     open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close()
 48 |     # create a normal file
 49 |     normal_file = os.path.join(tmpdir.strpath, "normal_file")
 50 |     open(normal_file, 'a').close()
 51 |     assert utils.list_files(tmpdir.strpath) == [normal_file]
 52 | 
 53 | 
 54 | def test_creation_of_existing_dir(tmpdir):
 55 |     # makes sure there is no exception
 56 |     assert create_dir(tmpdir.strpath) is None
 57 | 
 58 | 
 59 | def test_ordered():
 60 |     target = {"a": [1, 3, 2], "c": "a", "b": 1}
 61 |     assert ordered(target) == [('a', [1, 2, 3]), ('b', 1), ('c', 'a')]
 62 | 
 63 | 
 64 | @pytest.mark.parametrize(("model_dir", "expected"),
 65 |                          [("test_models/test_model_mitie/model_20170628-002704", True),
 66 |                           ("test_models/test_model_mitie_sklearn/model_20170628-002712", True),
 67 |                           ("test_models/test_model_spacy_sklearn/model_20170628-002705", True),
 68 |                           ("test_models/", False),
 69 |                           ("test_models/nonexistent_for_sure_123", False)])
 70 | def test_is_model_dir(model_dir, expected):
 71 |     assert is_model_dir(model_dir) == expected
 72 | 
 73 | 
 74 | def test_is_model_dir_empty(empty_model_dir):
 75 |     assert is_model_dir(empty_model_dir)
 76 | 
 77 | 
 78 | def test_remove_model_empty(empty_model_dir):
 79 |     assert remove_model(empty_model_dir)
 80 | 
 81 | 
 82 | def test_remove_model_with_files(empty_model_dir):
 83 |     metadata_file = "metadata.json"
 84 |     metadata_content = {"pipeline": "spacy_sklearn", "language": "en"}
 85 |     metadata_path = os.path.join(empty_model_dir, metadata_file)
 86 |     write_json_to_file(metadata_path, metadata_content)
 87 | 
 88 |     fake_obj = {"Fake", "model"}
 89 |     fake_obj_path = os.path.join(empty_model_dir, "component.pkl")
 90 |     with io.open(fake_obj_path, "wb") as f:
 91 |         pickle.dump(fake_obj, f)
 92 | 
 93 |     assert remove_model(empty_model_dir)
 94 | 
 95 | 
 96 | def test_remove_model_invalid(empty_model_dir):
 97 |     test_file = "something.else"
 98 |     test_content = "Some other stuff"
 99 |     test_file_path = os.path.join(empty_model_dir, test_file)
100 |     write_to_file(test_file_path, test_content)
101 | 
102 |     with pytest.raises(ValueError) as e:
103 |         remove_model(empty_model_dir)
104 | 
105 |     os.remove(test_file_path)
106 | 
107 | 
108 | def test_is_url():
109 |     assert not is_url('./some/file/path')
110 |     assert is_url('https://rasa.com/')
111 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import logging
 7 | 
 8 | import pytest
 9 | 
10 | from rasa_nlu import data_router, config
11 | from rasa_nlu.components import ComponentBuilder
12 | 
13 | logging.basicConfig(level="DEBUG")
14 | 
15 | CONFIG_DEFAULTS_PATH = "sample_configs/config_defaults.yml"
16 | 
17 | DEFAULT_DATA_PATH = "data/examples/rasa/demo-rasa.json"
18 | 
19 | # see `rasa_nlu.data_router` for details. avoids deadlock in
20 | # `deferred_from_future` function during tests
21 | data_router.DEFERRED_RUN_IN_REACTOR_THREAD = False
22 | 
23 | 
24 | @pytest.fixture(scope="session")
25 | def component_builder():
26 |     return ComponentBuilder()
27 | 
28 | 
29 | @pytest.fixture(scope="session")
30 | def spacy_nlp(component_builder, default_config):
31 |     return component_builder.create_component("nlp_spacy", default_config).nlp
32 | 
33 | 
34 | @pytest.fixture(scope="session")
35 | def mitie_feature_extractor(component_builder, default_config):
36 |     return component_builder.create_component("nlp_mitie", default_config).extractor
37 | 
38 | 
39 | @pytest.fixture(scope="session")
40 | def default_config():
41 |     return config.load(CONFIG_DEFAULTS_PATH)
42 | 


--------------------------------------------------------------------------------
/tests/training/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/tests/training/__init__.py


--------------------------------------------------------------------------------
/tests/utilities.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import
 2 | from __future__ import division
 3 | from __future__ import print_function
 4 | from __future__ import unicode_literals
 5 | 
 6 | import tempfile
 7 | 
 8 | import pytest
 9 | import yaml
10 | from builtins import object
11 | 
12 | from rasa_nlu.config import RasaNLUModelConfig
13 | from rasa_nlu.model import Interpreter
14 | from rasa_nlu.train import do_train
15 | 
16 | slowtest = pytest.mark.slowtest
17 | 
18 | 
19 | def base_test_conf(pipeline_template):
20 |     # 'response_log': temp_log_file_dir(),
21 |     # 'port': 5022,
22 |     # "path": tempfile.mkdtemp(),
23 |     # "data": "./data/test/demo-rasa-small.json"
24 | 
25 |     return RasaNLUModelConfig({"pipeline": pipeline_template})
26 | 
27 | 
28 | def write_file_config(file_config):
29 |     with tempfile.NamedTemporaryFile("w+",
30 |                                      suffix="_tmp_config_file.yml",
31 |                                      delete=False) as f:
32 |         f.write(yaml.safe_dump(file_config))
33 |         f.flush()
34 |         return f
35 | 
36 | 
37 | def interpreter_for(component_builder, data, path, config):
38 |     (trained, _, path) = do_train(config, data, path,
39 |                                   component_builder=component_builder)
40 |     interpreter = Interpreter.load(path, component_builder)
41 |     return interpreter
42 | 
43 | 
44 | def temp_log_file_dir():
45 |     return tempfile.mkdtemp(suffix="_rasa_nlu_logs")
46 | 
47 | 
48 | class ResponseTest(object):
49 |     def __init__(self, endpoint, expected_response, payload=None):
50 |         self.endpoint = endpoint
51 |         self.expected_response = expected_response
52 |         self.payload = payload
53 | 


--------------------------------------------------------------------------------