├── .coveragerc ├── .dockerignore ├── .env ├── .gitattributes ├── .github ├── FUNDING.yml ├── ISSUE_TEMPLATE.md └── PULL_REQUEST_TEMPLATE.md ├── .gitignore ├── .travis.yml ├── CHANGELOG.rst ├── CODE_OF_CONDUCT.md ├── LICENSE.txt ├── MANIFEST.in ├── Makefile ├── README.md ├── alt_requirements ├── conda-requirements.txt ├── requirements_bare.txt ├── requirements_dev.txt ├── requirements_full.txt ├── requirements_mitie.txt ├── requirements_spacy_sklearn.txt └── requirements_tensorflow_sklearn.txt ├── app.json ├── cloudbuild.yaml ├── data ├── README.md ├── examples │ ├── dialogflow │ │ ├── agent.json │ │ ├── entities │ │ │ ├── cuisine.json │ │ │ ├── cuisine_entries_en.json │ │ │ ├── cuisine_entries_es.json │ │ │ ├── location.json │ │ │ ├── location_entries_en.json │ │ │ └── location_entries_es.json │ │ ├── intents │ │ │ ├── Default Fallback Intent.json │ │ │ ├── affirm.json │ │ │ ├── affirm_usersays_en.json │ │ │ ├── affirm_usersays_es.json │ │ │ ├── goodbye.json │ │ │ ├── goodbye_usersays_en.json │ │ │ ├── goodbye_usersays_es.json │ │ │ ├── hi.json │ │ │ ├── hi_usersays_en.json │ │ │ ├── hi_usersays_es.json │ │ │ ├── inform.json │ │ │ ├── inform_usersays_en.json │ │ │ └── inform_usersays_es.json │ │ └── package.json │ ├── luis │ │ └── demo-restaurants.json │ ├── rasa │ │ ├── demo-rasa.json │ │ ├── demo-rasa.md │ │ ├── demo-rasa_zh.json │ │ ├── demo-rasa_zh_medical.json │ │ └── demo-rasa_zh_movie.json │ └── wit │ │ └── demo-flights.json └── test │ ├── demo-rasa-noents.json │ ├── demo-rasa-small.json │ ├── demo-rasa-zh.json │ ├── dialogflow_en_converted_to_rasa.json │ ├── dialogflow_es_converted_to_rasa.json │ ├── json_converted_to_md.md │ ├── luis_converted_to_rasa.json │ ├── markdown_single_sections │ ├── regex_only.md │ └── synonyms_only.md │ ├── md_converted_to_json.json │ ├── multiple_files_json │ ├── demo-rasa-affirm.json │ ├── demo-rasa-goodbye.json │ ├── demo-rasa-greet.json │ └── demo-rasa-restaurant_search.json │ ├── multiple_files_markdown │ ├── demo-rasa-affirm.md │ ├── demo-rasa-goodbye.md │ ├── demo-rasa-greet.md │ └── demo-rasa-restaurant_search.md │ └── wit_converted_to_rasa.json ├── docker ├── Dockerfile_bare ├── Dockerfile_full ├── Dockerfile_mitie ├── Dockerfile_spacy_sklearn ├── Dockerfile_test └── docker-cloud.yml ├── docs ├── Makefile ├── _static │ ├── css │ │ └── custom.css │ └── images │ │ ├── component_lifecycle.png │ │ └── rasa_nlu_intent_gui.png ├── _templates │ └── layout.html ├── changelog.rst ├── closeloop.rst ├── community.rst ├── conf.py ├── config.rst ├── context.rst ├── contribute.rst ├── dataformat.rst ├── entities.rst ├── evaluation.rst ├── faq.rst ├── http.rst ├── index.rst ├── installation.rst ├── key.enc ├── languages.rst ├── license.rst ├── migrating.rst ├── migrations.rst ├── persist.rst ├── pipeline.rst ├── poll.html ├── python.rst └── tutorial.rst ├── entrypoint.sh ├── heroku ├── Procfile └── runtime.txt ├── jieba_userdict └── jieba_userdict.txt ├── rasa_nlu ├── __init__.py ├── classifiers │ ├── __init__.py │ ├── embedding_intent_classifier.py │ ├── keyword_intent_classifier.py │ ├── mitie_intent_classifier.py │ └── sklearn_intent_classifier.py ├── components.py ├── config.py ├── convert.py ├── data_router.py ├── emulators │ ├── __init__.py │ ├── dialogflow.py │ ├── luis.py │ └── wit.py ├── evaluate.py ├── extractors │ ├── __init__.py │ ├── crf_entity_extractor.py │ ├── duckling_extractor.py │ ├── duckling_http_extractor.py │ ├── entity_synonyms.py │ ├── mitie_entity_extractor.py │ └── spacy_entity_extractor.py ├── featurizers │ ├── __init__.py │ ├── count_vectors_featurizer.py │ ├── mitie_featurizer.py │ ├── ngram_featurizer.py │ ├── regex_featurizer.py │ └── spacy_featurizer.py ├── model.py ├── persistor.py ├── project.py ├── registry.py ├── run.py ├── schemas │ └── nlu_model.yml ├── server.py ├── tokenizers │ ├── __init__.py │ ├── jieba_tokenizer.py │ ├── mitie_tokenizer.py │ ├── spacy_tokenizer.py │ ├── whitespace_tokenizer.py │ └── yaha_tokenizer.py ├── train.py ├── training_data │ ├── __init__.py │ ├── formats │ │ ├── __init__.py │ │ ├── dialogflow.py │ │ ├── luis.py │ │ ├── markdown.py │ │ ├── rasa.py │ │ ├── readerwriter.py │ │ └── wit.py │ ├── loading.py │ ├── message.py │ ├── training_data.py │ └── util.py ├── utils │ ├── __init__.py │ ├── mitie_utils.py │ └── spacy_utils.py └── version.py ├── requirements.txt ├── sample_configs ├── config_crf.yml ├── config_defaults.yml ├── config_embedding.yml ├── config_jieba_mitie.yml ├── config_jieba_mitie_sklearn.json ├── config_jieba_mitie_sklearn.yml ├── config_jieba_mitie_sklearn_plus_dict_path.yml ├── config_mitie.yml ├── config_mitie_sklearn.yml ├── config_spacy.yml ├── config_spacy_duckling.yml ├── config_train_server_json.yml ├── config_train_server_md.yml └── config_yaha_mitie_sklearn.json ├── setup.cfg ├── setup.py ├── test_models ├── test_model_mitie │ └── model_20170628-002704 │ │ ├── entity_extractor.dat │ │ ├── entity_synonyms.json │ │ ├── intent_classifier.dat │ │ ├── metadata.json │ │ └── training_data.json ├── test_model_mitie_sklearn │ └── model_20170628-002712 │ │ ├── entity_extractor.dat │ │ ├── entity_synonyms.json │ │ ├── intent_classifier.pkl │ │ ├── metadata.json │ │ └── training_data.json └── test_model_spacy_sklearn │ └── model_20170628-002705 │ ├── crf_model.pkl │ ├── entity_synonyms.json │ ├── intent_classifier.pkl │ ├── metadata.json │ └── training_data.json └── tests ├── __init__.py ├── base ├── __init__.py ├── test_components.py ├── test_config.py ├── test_data_router.py ├── test_emulators.py ├── test_evaluation.py ├── test_extractors.py ├── test_featurizers.py ├── test_interpreter.py ├── test_multitenancy.py ├── test_persistor.py ├── test_project.py ├── test_server.py ├── test_synonyms.py ├── test_tokenizers.py ├── test_training_data.py └── test_utils.py ├── conftest.py ├── training ├── __init__.py └── test_train.py └── utilities.py /.coveragerc: -------------------------------------------------------------------------------- 1 | [report] 2 | exclude_lines = 3 | pragma: no cover 4 | def __repr__ 5 | raise NotImplementedError 6 | if __name__ == .__main__.: 7 | def create_argument_parser 8 | if typing.TYPE_CHECKING -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | docker* 2 | docs 3 | .git* 4 | **/*.pyc 5 | **/__pycache__ 6 | 7 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | TIMES=2 -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | * -text -------------------------------------------------------------------------------- /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | # These are supported funding model platforms 2 | 3 | github: crownpku # Replace with up to 4 GitHub Sponsors-enabled usernames e.g., [user1, user2] 4 | patreon: # Replace with a single Patreon username 5 | open_collective: # Replace with a single Open Collective username 6 | ko_fi: # Replace with a single Ko-fi username 7 | tidelift: # Replace with a single Tidelift platform-name/package-name e.g., npm/babel 8 | community_bridge: # Replace with a single Community Bridge project-name e.g., cloud-foundry 9 | liberapay: # Replace with a single Liberapay username 10 | issuehunt: # Replace with a single IssueHunt username 11 | otechie: # Replace with a single Otechie username 12 | custom: #https://www.paypal.me/crownpku 13 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | **Rasa NLU version**: 4 | 5 | **Operating system** (windows, osx, ...): 6 | 7 | **Content of model configuration file**: 8 | ```yml 9 | 10 | ``` 11 | 12 | **Issue**: 13 | 14 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | **Proposed changes**: 2 | - ... 3 | 4 | **Status (please check what you already did)**: 5 | - [ ] made PR ready for code review 6 | - [ ] added some tests for the functionality 7 | - [ ] updated the documentation 8 | - [ ] updated the changelog 9 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *pyc 3 | dist/ 4 | data/* 5 | !data/examples 6 | !data/test 7 | !data/README.md 8 | docs/_build 9 | server/ 10 | scala/ 11 | mongodb/ 12 | .cache/ 13 | build/ 14 | *.egg-info/ 15 | jnk/ 16 | logs/ 17 | tmp/ 18 | profile.* 19 | *.sqlite 20 | lastmile_ai/learn/plots/ 21 | *npy 22 | *# 23 | /config.json 24 | *log.json 25 | .coverage 26 | .coveralls.yml 27 | .idea/ 28 | *.iml 29 | out/ 30 | .vscode/ 31 | tmp_training_data.json 32 | .DS_Store 33 | models/ 34 | projects/ 35 | test_models/model_* 36 | .ipynb_checkpoints/ 37 | models/ 38 | nohup.out 39 | test_projects/test_project_* 40 | rasa_nlu/tmbo_test.py 41 | .mypy_cache/ 42 | *.tar.gz 43 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: python 2 | sudo: required 3 | group: deprecated-2017Q2 4 | services: 5 | - docker 6 | cache: 7 | directories: 8 | - $HOME/.cache/pip 9 | - /tmp/cached/ 10 | python: 11 | - '2.7' 12 | - '3.5' 13 | - '3.6' 14 | env: 15 | # needed to fix issues with boto during testing: 16 | # https://github.com/travis-ci/travis-ci/issues/7940 17 | global: BOTO_CONFIG=/dev/null 18 | install: 19 | - pip install git+https://github.com/tmbo/MITIE.git 20 | - pip install -r alt_requirements/requirements_dev.txt 21 | - pip install -e . 22 | - pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz 23 | --no-cache-dir > jnk 24 | - python -m spacy link en_core_web_md en 25 | - pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz 26 | --no-cache-dir > jnk 27 | - python -m spacy link de_core_news_sm de 28 | - if [[ ! -f /tmp/cached/total_word_feature_extractor.dat ]]; then wget --quiet -P /tmp/cached/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat; 29 | fi 30 | - mv /tmp/cached/total_word_feature_extractor.dat data/total_word_feature_extractor.dat 31 | - pip list 32 | before_script: 33 | - mkdir $HOME/tmp 34 | - export TMPDIR=$HOME/tmp 35 | script: 36 | - py.test --pep8 -m pep8 37 | - py.test tests/base --cov rasa_nlu -v --cov-append 38 | - py.test tests/training --cov rasa_nlu -v --cov-append 39 | after_success: 40 | - coveralls 41 | jobs: 42 | include: 43 | - stage: docs 44 | if: fork = false 45 | install: 46 | - pip install sphinx==1.5.2 sphinx-autobuild==0.7.1 sphinxcontrib-versioning==2.2.1 sphinxcontrib-programoutput==0.11 47 | nbsphinx==0.2.18 48 | - pip install -e git://github.com/RasaHQ/sphinx_rtd_theme.git#egg=sphinx_rtd_theme 49 | - pip install -e . 50 | script: 51 | - eval "$(ssh-agent -s)"; touch docs/key; chmod 0600 docs/key 52 | - openssl aes-256-cbc -K $encrypted_4a8a3f4b9c17_key -iv $encrypted_4a8a3f4b9c17_iv -in docs/key.enc -out docs/key -d && ssh-add docs/key 53 | - git config --global user.email "builds@travis-ci.com" 54 | - git config --global user.name "Travis CI" 55 | - git remote set-url --push origin "git@github.com:$TRAVIS_REPO_SLUG" 56 | - export ${!TRAVIS*} 57 | - sphinx-versioning push docs docs . 58 | - stage: deploy 59 | install: skip 60 | script: skip 61 | deploy: 62 | provider: pypi 63 | user: amn41 64 | # server: https://test.pypi.org/legacy/ 65 | distributions: "sdist bdist_wheel" 66 | on: 67 | branch: master 68 | tags: true 69 | password: 70 | secure: K3JhIpxBBfu8SC8voAsIvgU9pdND9PayQi8Ep4Whg+RPKgnLWMzbFe2FfSTyxuEIkJGx4S6h0qORGz4ro6b/tCy72ruEYxLrx3vt8uNtWdYXSRnW+Knqk4QKn2q+WehmfSxhkvu2PQ3LACGWN13Nnc4OdlY9u843d0dSjD9INlAs/+m6X3Me0zdACmwd0V0l4U2hNMjJyvOPznrQj4HrMIGWfuags4NLySVkpnYMMzz5lnamUZKUUfyChAKTUPXuoO8s9U0Zxj2duOy+2yu9hcJwomFwBLiWR6nKZmEtzYrfgHFDkRtNyuJtmQn3pR4BzbMV5L6Td7DAey3fRYss8JxVZ+3mwjsRzbbMDRpqqI8b7L0KBFnWfS5qOecB6T9hT2SVQuGHqj4Y/CAHqzscBhiOlhKev65JXIc1JIJACKWaHVYASKeU24zprlcalkRsqXmUv/rvSgP1UQSEsE726hxr0gs/gyJVRSmg7dxm/BrFTVa4Pucpy0QW3ABfc7miaz9LuNzsY+7OBxmsOhPDZpQVez9TNr4agdh6enRTK2cg0zDqjSfzjXBPwXRlcsR67u1JZPqjN0cpT44xKUvuwzDFgcZjK74tDx9A7cV6yS325cKIz8KQ08saBIyrbDtbv/i9ry1Dvkxj+k3t+i7kyuzjzMdhj2yDF9WTAGTdOhY= 71 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at tom@rasa.ai or alan@rasa.ai. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE README.rst requirements.txt 2 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean test lint 2 | 3 | TEST_PATH=./ 4 | 5 | help: 6 | @echo " clean" 7 | @echo " Remove python artifacts and build artifacts." 8 | @echo " lint" 9 | @echo " Check style with flake8." 10 | @echo " test" 11 | @echo " Run py.test" 12 | @echo " check-readme" 13 | @echo " Check if the readme can be converted from md to rst for pypi" 14 | 15 | clean: 16 | find . -name '*.pyc' -exec rm -f {} + 17 | find . -name '*.pyo' -exec rm -f {} + 18 | find . -name '*~' -exec rm -f {} + 19 | rm -rf build/ 20 | rm -rf dist/ 21 | rm -rf *.egg-info 22 | rm -rf docs/_build 23 | 24 | lint: 25 | py.test --pep8 -m pep8 26 | 27 | test: clean 28 | py.test tests --verbose --pep8 --color=yes $(TEST_PATH) 29 | 30 | livedocs: 31 | cd docs && make livehtml 32 | 33 | check-readme: 34 | # if this runs through we can be sure the readme is properly shown on pypi 35 | python setup.py check --restructuredtext --strict 36 | -------------------------------------------------------------------------------- /alt_requirements/conda-requirements.txt: -------------------------------------------------------------------------------- 1 | scipy==1.10.0 2 | scikit-learn==0.19.1 3 | -------------------------------------------------------------------------------- /alt_requirements/requirements_bare.txt: -------------------------------------------------------------------------------- 1 | gevent==23.9.1 2 | klein==17.10.0 3 | hyperlink==17.3.1 4 | boto3==1.5.20 5 | typing==3.6.2 6 | future==0.18.3 7 | six==1.11.0 8 | jsonschema==2.6.0 9 | matplotlib==2.1.0 10 | requests==2.31.0 11 | tqdm==4.19.5 12 | numpy==1.22.0 13 | simplejson==3.13.2 14 | cloudpickle==0.5.2 15 | msgpack-python==0.5.4 16 | packaging==17.1 17 | pyyaml==5.4 18 | coloredlogs==9.0 19 | -------------------------------------------------------------------------------- /alt_requirements/requirements_dev.txt: -------------------------------------------------------------------------------- 1 | 2 | # mitie 3 | git+https://github.com/mit-nlp/MITIE.git#egg=mitie 4 | # spacy 5 | spacy==1.8.2 6 | # sklearn 7 | scikit-learn==0.18.1 8 | scipy==1.10.0 9 | matplotlib==1.5.3 10 | # duckling 11 | duckling==1.7.1 12 | # sklearn_crfsuite 13 | sklearn-crfsuite==0.3.5 14 | # jieba 15 | jieba==0.38 16 | # cloudpickle 17 | cloudpickle==0.2.2 18 | 19 | -r requirements_full.txt 20 | 21 | 22 | # test 23 | python-coveralls==2.9.1 24 | pytest-pep8==1.0.6 25 | pytest-services==1.2.1 26 | pytest-cov==2.5.1 27 | pytest-twisted==1.6 28 | pytest==3.3.2 29 | treq==22.1.0 30 | moto==1.2.0 31 | mock==2.0.0 32 | # other 33 | google-cloud-storage==1.7.0 34 | azure-storage-blob==1.0.0 35 | 36 | # docs 37 | sphinx==1.5.2 38 | sphinx-autobuild==0.7.1 39 | sphinxcontrib-versioning==2.2.1 40 | sphinxcontrib-programoutput==0.11 41 | nbsphinx==0.2.18 42 | -e git://github.com/RasaHQ/sphinx_rtd_theme.git#egg=sphinx_rtd_theme 43 | -------------------------------------------------------------------------------- /alt_requirements/requirements_full.txt: -------------------------------------------------------------------------------- 1 | # Minimum Instal Requirements 2 | -r requirements_bare.txt 3 | 4 | # Spacy Requirements 5 | -r requirements_spacy_sklearn.txt 6 | 7 | # Tensorflow Requirements 8 | -r requirements_tensorflow_sklearn.txt 9 | 10 | # MITIE Requirements 11 | -r requirements_mitie.txt 12 | 13 | duckling==1.8.0 14 | Jpype1==0.6.2 15 | jieba==0.39 16 | -------------------------------------------------------------------------------- /alt_requirements/requirements_mitie.txt: -------------------------------------------------------------------------------- 1 | # Minimum Install Requirements 2 | -r requirements_bare.txt 3 | 4 | git+https://github.com/mit-nlp/MITIE.git#egg=mitie 5 | -------------------------------------------------------------------------------- /alt_requirements/requirements_spacy_sklearn.txt: -------------------------------------------------------------------------------- 1 | # Minimum Install Requirements 2 | -r requirements_bare.txt 3 | 4 | spacy==2.0.5 5 | scikit-learn==0.19.1 6 | scipy==1.10.0 7 | sklearn-crfsuite==0.3.6 -------------------------------------------------------------------------------- /alt_requirements/requirements_tensorflow_sklearn.txt: -------------------------------------------------------------------------------- 1 | # Minimum Install Requirements 2 | -r requirements_bare.txt 3 | 4 | scikit-learn==0.19.1 5 | tensorflow==2.11.1 -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rasa_nlu", 3 | "description": "a service for interpreting natural language", 4 | "image": "heroku/python", 5 | "repository": "https://github.com/RasaHQ/rasa_nlu", 6 | "keywords": ["python" ], 7 | "addons": [], 8 | "scripts": { 9 | "postdeploy": "echo '{}' > config.json;mkdir logs;python -m spacy download en" 10 | }, 11 | "buildpacks": [ 12 | { 13 | "url": "https://github.com/kennethreitz/conda-buildpack.git" 14 | } 15 | ], 16 | "env" : { 17 | "RASA_TOKEN": { 18 | "generator": "secret", 19 | "description": "token for validating requests" 20 | }, 21 | "RASA_PIPELINE": { 22 | "description": "which pipeline to use", 23 | "value": "spacy_sklearn" 24 | }, 25 | "RASA_PATH": { 26 | "description": "where to save projects and their models", 27 | "value": "/app/data" 28 | }, 29 | "RASA_LOGDIR": { 30 | "description": "where to save logs", 31 | "value": "/app/logs" 32 | }, 33 | "RASA_MITIE_FILE": { 34 | "description": "file containing mitie feature extractor", 35 | "value": "${RASA_PATH}/total_word_feature_extractor.dat" 36 | }, 37 | "RASA_SERVER_MODEL_DIRS": { 38 | "description": "dir containing model which should be used to process requests", 39 | "required": false 40 | }, 41 | "AWS_SECRET_ACCESS_KEY": { 42 | "description": "secret key for S3 access", 43 | "required": false 44 | }, 45 | "AWS_ACCESS_KEY_ID": { 46 | "description": "key id for S3 access", 47 | "required": false 48 | }, 49 | "BUCKET_NAME": { 50 | "description": "name of s3 bucket", 51 | "value": "rasanlu" 52 | }, 53 | "AWS_REGION": { 54 | "description": "aws region of S3 bucket", 55 | "value": "eu-west-1" 56 | } 57 | } 58 | } -------------------------------------------------------------------------------- /cloudbuild.yaml: -------------------------------------------------------------------------------- 1 | timeout: "20m" 2 | steps: 3 | - name: 'gcr.io/cloud-builders/docker' 4 | id: 'docker-build' 5 | args: ['build', '--file', './docker/Dockerfile_full', '-t', '$_IMAGE_REPO:$TAG_NAME', '.'] 6 | images: [ '$_IMAGE_REPO:$TAG_NAME' ] 7 | -------------------------------------------------------------------------------- /data/README.md: -------------------------------------------------------------------------------- 1 | These are some example training data files for a simple bot in the restaurant domain. 2 | They are in the format of the services rasa NLU can emulate, e.g. when you download an export 3 | of your app from one of these services it should look like one of these files. 4 | 5 | 6 | [examples/rasa](examples/rasa): examples in the native rasa NLU format 7 | 8 | [examples/luis](examples/luis): in LUIS format 9 | 10 | [examples/wit](examples/wit): in wit format 11 | 12 | [examples/api](examples/api): this is a dir and in Dialogflow format 13 | -------------------------------------------------------------------------------- /data/examples/dialogflow/agent.json: -------------------------------------------------------------------------------- 1 | { 2 | "description": "", 3 | "language": "en", 4 | "googleAssistant": { 5 | "googleAssistantCompatible": false, 6 | "project": "rasanlu-development", 7 | "welcomeIntentSignInRequired": false, 8 | "startIntents": [], 9 | "systemIntents": [], 10 | "endIntentIds": [], 11 | "oAuthLinking": { 12 | "required": false, 13 | "grantType": "AUTH_CODE_GRANT" 14 | }, 15 | "voiceType": "MALE_1", 16 | "capabilities": [], 17 | "protocolVersion": "V2" 18 | }, 19 | "defaultTimezone": "Asia/Hong_Kong", 20 | "webhook": { 21 | "available": false, 22 | "useForDomains": false, 23 | "cloudFunctionsEnabled": false, 24 | "cloudFunctionsInitialized": false 25 | }, 26 | "isPrivate": true, 27 | "customClassifierMode": "use.after", 28 | "mlMinConfidence": 0.3, 29 | "supportedLanguages": [ 30 | "es" 31 | ] 32 | } -------------------------------------------------------------------------------- /data/examples/dialogflow/entities/cuisine.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "11c77228-4a02-4db8-a398-b286fe8098d2", 3 | "name": "cuisine", 4 | "isOverridable": true, 5 | "isEnum": false, 6 | "automatedExpansion": false 7 | } -------------------------------------------------------------------------------- /data/examples/dialogflow/entities/cuisine_entries_en.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "value": "mexican", 4 | "synonyms": [ 5 | "mexican", 6 | "mexico" 7 | ] 8 | }, 9 | { 10 | "value": "chinese", 11 | "synonyms": [ 12 | "chinese", 13 | "china" 14 | ] 15 | }, 16 | { 17 | "value": "indian", 18 | "synonyms": [ 19 | "indian", 20 | "india" 21 | ] 22 | } 23 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/entities/cuisine_entries_es.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "value": "mexicano", 4 | "synonyms": [ 5 | "mexicano", 6 | "mexicana", 7 | "méxico" 8 | ] 9 | }, 10 | { 11 | "value": "chino", 12 | "synonyms": [ 13 | "chino", 14 | "china", 15 | "chinos" 16 | ] 17 | }, 18 | { 19 | "value": "indio", 20 | "synonyms": [ 21 | "indio", 22 | "india" 23 | ] 24 | } 25 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/entities/location.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "8ee88034-01d3-49d4-bb58-531a705b963b", 3 | "name": "location", 4 | "isOverridable": true, 5 | "isEnum": false, 6 | "automatedExpansion": false 7 | } -------------------------------------------------------------------------------- /data/examples/dialogflow/entities/location_entries_en.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "value": "centre", 4 | "synonyms": [ 5 | "centre" 6 | ] 7 | }, 8 | { 9 | "value": "west", 10 | "synonyms": [ 11 | "west" 12 | ] 13 | }, 14 | { 15 | "value": "central", 16 | "synonyms": [ 17 | "central" 18 | ] 19 | }, 20 | { 21 | "value": "north", 22 | "synonyms": [ 23 | "north" 24 | ] 25 | } 26 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/entities/location_entries_es.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "value": "centro", 4 | "synonyms": [ 5 | "centro", 6 | "centrar" 7 | ] 8 | }, 9 | { 10 | "value": "oeste", 11 | "synonyms": [ 12 | "oeste", 13 | "occidente" 14 | ] 15 | }, 16 | { 17 | "value": "central", 18 | "synonyms": [ 19 | "central", 20 | "céntrico" 21 | ] 22 | }, 23 | { 24 | "value": "norte", 25 | "synonyms": [ 26 | "norte" 27 | ] 28 | } 29 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/Default Fallback Intent.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "27b800fb-3b69-4723-932d-ca53eb849138", 3 | "name": "Default Fallback Intent", 4 | "auto": true, 5 | "contexts": [], 6 | "responses": [ 7 | { 8 | "resetContexts": false, 9 | "action": "input.unknown", 10 | "affectedContexts": [], 11 | "parameters": [], 12 | "messages": [ 13 | { 14 | "type": 0, 15 | "lang": "es", 16 | "speech": [ 17 | "Ups, no he entendido a que te refieres.", 18 | "¿Podrías repetirlo, por favor?", 19 | "¿Disculpa?", 20 | "¿Decías?", 21 | "¿Cómo?" 22 | ] 23 | }, 24 | { 25 | "type": 0, 26 | "lang": "en", 27 | "speech": [ 28 | "I didn\u0027t get that. Can you say it again?", 29 | "I missed what you said. Say it again?", 30 | "Sorry, could you say that again?", 31 | "Sorry, can you say that again?", 32 | "Can you say that again?", 33 | "Sorry, I didn\u0027t get that.", 34 | "Sorry, what was that?", 35 | "One more time?", 36 | "What was that?", 37 | "Say that again?", 38 | "I didn\u0027t get that.", 39 | "I missed that." 40 | ] 41 | } 42 | ], 43 | "defaultResponsePlatforms": {}, 44 | "speech": [] 45 | } 46 | ], 47 | "priority": 500000, 48 | "webhookUsed": false, 49 | "webhookForSlotFilling": false, 50 | "lastUpdate": 1507539905, 51 | "fallbackIntent": true, 52 | "events": [] 53 | } -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/affirm.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "c2e82a05-3980-4f74-b0d5-7ee0e1297284", 3 | "name": "affirm", 4 | "auto": true, 5 | "contexts": [], 6 | "responses": [ 7 | { 8 | "resetContexts": false, 9 | "affectedContexts": [], 10 | "parameters": [], 11 | "messages": [ 12 | { 13 | "type": 0, 14 | "lang": "es", 15 | "speech": "Me alegro de ayudarte, compañero :)" 16 | }, 17 | { 18 | "type": 0, 19 | "lang": "en", 20 | "speech": "Glad I help you, mate :)" 21 | } 22 | ], 23 | "defaultResponsePlatforms": {}, 24 | "speech": [] 25 | } 26 | ], 27 | "priority": 500000, 28 | "webhookUsed": false, 29 | "webhookForSlotFilling": false, 30 | "lastUpdate": 1507540481, 31 | "fallbackIntent": false, 32 | "events": [] 33 | } -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/affirm_usersays_en.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "74dc9ae2-335c-448e-8e02-f37225051102", 4 | "data": [ 5 | { 6 | "text": "yes", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 0 13 | }, 14 | { 15 | "id": "425eacad-7c28-471a-8a4b-58f5079ec1c6", 16 | "data": [ 17 | { 18 | "text": "yep", 19 | "userDefined": false 20 | } 21 | ], 22 | "isTemplate": false, 23 | "count": 0, 24 | "updated": 0 25 | }, 26 | { 27 | "id": "034520c5-bc84-4f09-bd74-625d10fa6499", 28 | "data": [ 29 | { 30 | "text": "yeah", 31 | "userDefined": false 32 | } 33 | ], 34 | "isTemplate": false, 35 | "count": 0, 36 | "updated": 0 37 | }, 38 | { 39 | "id": "ee97d4c8-1d44-4f8c-a657-33a2c4c1c869", 40 | "data": [ 41 | { 42 | "text": "indeed", 43 | "userDefined": false 44 | } 45 | ], 46 | "isTemplate": false, 47 | "count": 0, 48 | "updated": 0 49 | }, 50 | { 51 | "id": "6d5b05c4-5cd6-43b5-af52-764b3b1259e7", 52 | "data": [ 53 | { 54 | "text": "that\u0027s right", 55 | "userDefined": false 56 | } 57 | ], 58 | "isTemplate": false, 59 | "count": 0, 60 | "updated": 0 61 | }, 62 | { 63 | "id": "0ef60526-405a-40ec-955f-3961600ae7dd", 64 | "data": [ 65 | { 66 | "text": "ok", 67 | "userDefined": false 68 | } 69 | ], 70 | "isTemplate": false, 71 | "count": 0, 72 | "updated": 0 73 | }, 74 | { 75 | "id": "b15de01a-db24-4516-bab0-88eda8de1c16", 76 | "data": [ 77 | { 78 | "text": "great", 79 | "userDefined": false 80 | } 81 | ], 82 | "isTemplate": false, 83 | "count": 0, 84 | "updated": 0 85 | } 86 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/affirm_usersays_es.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "9dc52d2a-dbf2-44e7-bc55-0eee5a7a8a14", 4 | "data": [ 5 | { 6 | "text": "sí", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 1507540481 13 | }, 14 | { 15 | "id": "9dg52d2a-dbf2-44e7-bc55-0eee5a6a8a14", 16 | "data": [ 17 | { 18 | "text": "si", 19 | "userDefined": false 20 | } 21 | ], 22 | "isTemplate": false, 23 | "count": 0, 24 | "updated": 1507540481 25 | }, 26 | { 27 | "id": "9dg52d2a-dbf2-44e7-bc55-0eee5a6a8a24", 28 | "data": [ 29 | { 30 | "text": "Sí", 31 | "userDefined": false 32 | } 33 | ], 34 | "isTemplate": false, 35 | "count": 0, 36 | "updated": 1507540481 37 | }, 38 | { 39 | "id": "7a11df78-3b06-48c3-9aa4-9f779c23fb0b", 40 | "data": [ 41 | { 42 | "text": "de verdad", 43 | "userDefined": false 44 | } 45 | ], 46 | "isTemplate": false, 47 | "count": 0, 48 | "updated": 1507540481 49 | }, 50 | { 51 | "id": "e6238a3e-3dcd-4932-9034-1a05f037d4e3", 52 | "data": [ 53 | { 54 | "text": "está bien", 55 | "userDefined": false 56 | } 57 | ], 58 | "isTemplate": false, 59 | "count": 0, 60 | "updated": 1507540481 61 | }, 62 | { 63 | "id": "64cc393a-f9c9-4521-9052-543f99fcd97e", 64 | "data": [ 65 | { 66 | "text": "muy bien", 67 | "userDefined": false 68 | } 69 | ], 70 | "isTemplate": false, 71 | "count": 0, 72 | "updated": 1507540481 73 | }, 74 | { 75 | "id": "0f4a932a-c929-47d4-8cb2-5095882f40a0", 76 | "data": [ 77 | { 78 | "text": "estupendo", 79 | "userDefined": false 80 | } 81 | ], 82 | "isTemplate": false, 83 | "count": 0, 84 | "updated": 1507540481 85 | } 86 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/goodbye.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "a90df8dd-f5bd-45dd-b8cf-12cc4b5cb800", 3 | "name": "goodbye", 4 | "auto": true, 5 | "contexts": [], 6 | "responses": [ 7 | { 8 | "resetContexts": false, 9 | "affectedContexts": [], 10 | "parameters": [], 11 | "messages": [ 12 | { 13 | "type": 0, 14 | "lang": "es", 15 | "speech": "¡Nos vemos! Disfrutar" 16 | }, 17 | { 18 | "type": 0, 19 | "lang": "en", 20 | "speech": "See ya! Enjoy" 21 | } 22 | ], 23 | "defaultResponsePlatforms": {}, 24 | "speech": [] 25 | } 26 | ], 27 | "priority": 500000, 28 | "webhookUsed": false, 29 | "webhookForSlotFilling": false, 30 | "lastUpdate": 1507540635, 31 | "fallbackIntent": false, 32 | "events": [] 33 | } -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/goodbye_usersays_en.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "1f094fbb-199a-40cd-af8f-3978fcebc027", 4 | "data": [ 5 | { 6 | "text": "bye", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 0 13 | }, 14 | { 15 | "id": "b25f004f-425e-4ff3-b4db-36fa46772fc9", 16 | "data": [ 17 | { 18 | "text": "goodbye", 19 | "userDefined": false 20 | } 21 | ], 22 | "isTemplate": false, 23 | "count": 0, 24 | "updated": 0 25 | }, 26 | { 27 | "id": "f4e435f4-c88a-4998-b2c0-ef94565327ae", 28 | "data": [ 29 | { 30 | "text": "good bye", 31 | "userDefined": false 32 | } 33 | ], 34 | "isTemplate": false, 35 | "count": 0, 36 | "updated": 0 37 | }, 38 | { 39 | "id": "328a011a-08ba-4bd0-beea-6ab08859dd89", 40 | "data": [ 41 | { 42 | "text": "stop", 43 | "userDefined": false 44 | } 45 | ], 46 | "isTemplate": false, 47 | "count": 0, 48 | "updated": 0 49 | }, 50 | { 51 | "id": "b8d9e693-6383-4be9-a98f-38a60890fa7a", 52 | "data": [ 53 | { 54 | "text": "end", 55 | "userDefined": false 56 | } 57 | ], 58 | "isTemplate": false, 59 | "count": 0, 60 | "updated": 0 61 | } 62 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/goodbye_usersays_es.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "dde37345-fa41-4310-9a7f-74f74ca7a925", 4 | "data": [ 5 | { 6 | "text": "a usted adiós", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 1507540635 13 | }, 14 | { 15 | "id": "86811b69-cee1-4ae0-8944-ace8eb4badc2", 16 | "data": [ 17 | { 18 | "text": "despedida", 19 | "userDefined": false 20 | } 21 | ], 22 | "isTemplate": false, 23 | "count": 0, 24 | "updated": 1507540635 25 | }, 26 | { 27 | "id": "78bbd720-b50b-4cb9-9dce-eee0fef5aa74", 28 | "data": [ 29 | { 30 | "text": "adiós", 31 | "userDefined": false 32 | } 33 | ], 34 | "isTemplate": false, 35 | "count": 0, 36 | "updated": 1507540635 37 | }, 38 | { 39 | "id": "74597c31-b649-49cc-bd3b-9a4ed3487070", 40 | "data": [ 41 | { 42 | "text": "suspender", 43 | "userDefined": false 44 | } 45 | ], 46 | "isTemplate": false, 47 | "count": 0, 48 | "updated": 1507540635 49 | }, 50 | { 51 | "id": "7b42db30-d815-46fd-a7f5-79b9180c7c6b", 52 | "data": [ 53 | { 54 | "text": "fin", 55 | "userDefined": false 56 | } 57 | ], 58 | "isTemplate": false, 59 | "count": 0, 60 | "updated": 1507540635 61 | } 62 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/hi.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "3e7ea801-9d08-479c-ada2-27ce467ca326", 3 | "name": "hi", 4 | "auto": true, 5 | "contexts": [], 6 | "userSays": [ 7 | { 8 | "id": "6a29da7f-3926-4b68-aa6d-dbbfe1c6b79d", 9 | "data": [ 10 | { 11 | "text": "hi" 12 | } 13 | ], 14 | "isTemplate": false, 15 | "count": 0 16 | }, 17 | { 18 | "id": "a83ef752-c2fe-4d45-abd4-35bc381f6d96", 19 | "data": [ 20 | { 21 | "text": "hello" 22 | } 23 | ], 24 | "isTemplate": false, 25 | "count": 0 26 | }, 27 | { 28 | "id": "91c4267d-5e30-4d45-9ce3-d0b21836c372", 29 | "data": [ 30 | { 31 | "text": "hey there" 32 | } 33 | ], 34 | "isTemplate": false, 35 | "count": 0 36 | }, 37 | { 38 | "id": "4b41d389-8bf7-4976-97ea-3b39a3e883ab", 39 | "data": [ 40 | { 41 | "text": "howdy" 42 | } 43 | ], 44 | "isTemplate": false, 45 | "count": 0 46 | }, 47 | { 48 | "id": "d819385a-a9c8-4dd0-9787-9a47c53156f3", 49 | "data": [ 50 | { 51 | "text": "hey" 52 | } 53 | ], 54 | "isTemplate": false, 55 | "count": 0 56 | } 57 | ], 58 | "responses": [ 59 | { 60 | "resetContexts": false, 61 | "action": "greet", 62 | "affectedContexts": [], 63 | "parameters": [], 64 | "messages": [ 65 | { 66 | "type": 0, 67 | "speech": "hey! how can I help you?" 68 | } 69 | ] 70 | } 71 | ], 72 | "priority": 500000, 73 | "webhookUsed": false, 74 | "fallbackIntent": false 75 | } -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/hi_usersays_en.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "462fb0f5-d97a-4a95-96ab-91f49f289676", 4 | "data": [ 5 | { 6 | "text": "hey", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 0 13 | }, 14 | { 15 | "id": "651c6730-61e8-467b-a174-aca4e0ed66af", 16 | "data": [ 17 | { 18 | "text": "howdy", 19 | "userDefined": false 20 | } 21 | ], 22 | "isTemplate": false, 23 | "count": 0, 24 | "updated": 0 25 | }, 26 | { 27 | "id": "bab1998d-d54f-4a5f-9ed2-c7e8b24f37fc", 28 | "data": [ 29 | { 30 | "text": "hey there", 31 | "userDefined": false 32 | } 33 | ], 34 | "isTemplate": false, 35 | "count": 0, 36 | "updated": 0 37 | }, 38 | { 39 | "id": "9dfc509d-4f0e-4723-af4a-10ceef2fbf91", 40 | "data": [ 41 | { 42 | "text": "hello", 43 | "userDefined": false 44 | } 45 | ], 46 | "isTemplate": false, 47 | "count": 0, 48 | "updated": 0 49 | }, 50 | { 51 | "id": "a2271775-488e-4223-9ff4-458cfe4d2ba8", 52 | "data": [ 53 | { 54 | "text": "hi", 55 | "userDefined": false 56 | } 57 | ], 58 | "isTemplate": false, 59 | "count": 0, 60 | "updated": 0 61 | } 62 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/hi_usersays_es.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "40c60a12-1079-4e2c-a7c3-3498ab00de30", 4 | "data": [ 5 | { 6 | "text": "hello", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 1507540781 13 | }, 14 | { 15 | "id": "96019dea-5810-4ce0-9f69-16c2ce012603", 16 | "data": [ 17 | { 18 | "text": "Hola amigo", 19 | "userDefined": false 20 | } 21 | ], 22 | "isTemplate": false, 23 | "count": 0, 24 | "updated": 1507540781 25 | }, 26 | { 27 | "id": "298a54d1-c80c-4542-b41f-2cb549db814c", 28 | "data": [ 29 | { 30 | "text": "Bueno", 31 | "userDefined": false 32 | } 33 | ], 34 | "isTemplate": false, 35 | "count": 0, 36 | "updated": 1507540781 37 | }, 38 | { 39 | "id": "0f490459-85df-4865-abd0-70f87f62890e", 40 | "data": [ 41 | { 42 | "text": "Caramba", 43 | "userDefined": false 44 | } 45 | ], 46 | "isTemplate": false, 47 | "count": 0, 48 | "updated": 1507540781 49 | }, 50 | { 51 | "id": "1bf90b9b-fe34-42ae-96ea-21beaa3fca4f", 52 | "data": [ 53 | { 54 | "text": "Hola", 55 | "userDefined": false 56 | } 57 | ], 58 | "isTemplate": false, 59 | "count": 0, 60 | "updated": 1507540781 61 | } 62 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/inform.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": "fd566317-b15d-4685-9158-63028b6fb5bf", 3 | "name": "inform", 4 | "auto": true, 5 | "contexts": [], 6 | "userSays": [ 7 | { 8 | "id": "d5562a99-f039-4bc7-a1bd-53d6cc811206", 9 | "data": [ 10 | { 11 | "text": "central", 12 | "alias": "location", 13 | "meta": "@location", 14 | "userDefined": false 15 | }, 16 | { 17 | "text": " " 18 | }, 19 | { 20 | "text": "indian", 21 | "alias": "cuisine", 22 | "meta": "@cuisine", 23 | "userDefined": true 24 | }, 25 | { 26 | "text": " restaurant" 27 | } 28 | ], 29 | "isTemplate": false, 30 | "count": 0 31 | }, 32 | { 33 | "id": "5e043d29-a4ce-4642-a191-e287d397b02f", 34 | "data": [ 35 | { 36 | "text": "anywhere in the " 37 | }, 38 | { 39 | "text": "west", 40 | "alias": "location", 41 | "meta": "@location", 42 | "userDefined": false 43 | } 44 | ], 45 | "isTemplate": false, 46 | "count": 0 47 | }, 48 | { 49 | "id": "540cbb16-4c62-4bc7-b3f2-8f1107e65471", 50 | "data": [ 51 | { 52 | "text": "search for restaurants" 53 | } 54 | ], 55 | "isTemplate": false, 56 | "count": 0 57 | }, 58 | { 59 | "id": "cb096268-a8e5-49d5-8771-60bb1a511151", 60 | "data": [ 61 | { 62 | "text": "i am looking for an " 63 | }, 64 | { 65 | "text": "indian", 66 | "alias": "cuisine", 67 | "meta": "@cuisine", 68 | "userDefined": true 69 | }, 70 | { 71 | "text": " spot" 72 | } 73 | ], 74 | "isTemplate": false, 75 | "count": 0 76 | }, 77 | { 78 | "id": "2cf1b035-62f1-40cd-867c-af92cbbaf5c7", 79 | "data": [ 80 | { 81 | "text": "show me " 82 | }, 83 | { 84 | "text": "chinese", 85 | "alias": "cuisine", 86 | "meta": "@cuisine", 87 | "userDefined": true 88 | }, 89 | { 90 | "text": " restaurants" 91 | } 92 | ], 93 | "isTemplate": false, 94 | "count": 0 95 | }, 96 | { 97 | "id": "51f618be-e5a9-4007-a0d5-d6f12475a9b1", 98 | "data": [ 99 | { 100 | "text": "i\u0027m looking for a place in the " 101 | }, 102 | { 103 | "text": "north", 104 | "alias": "location", 105 | "meta": "@location", 106 | "userDefined": true 107 | }, 108 | { 109 | "text": " of " 110 | } 111 | ], 112 | "isTemplate": false, 113 | "count": 0 114 | }, 115 | { 116 | "id": "20a723ce-1efc-4c03-84ec-ee17c3219329", 117 | "data": [ 118 | { 119 | "text": "i\u0027m looking for a place to eat" 120 | } 121 | ], 122 | "isTemplate": false, 123 | "count": 0 124 | } 125 | ], 126 | "responses": [ 127 | { 128 | "resetContexts": false, 129 | "affectedContexts": [], 130 | "parameters": [ 131 | { 132 | "dataType": "@location", 133 | "name": "location", 134 | "value": "$location", 135 | "isList": true 136 | }, 137 | { 138 | "dataType": "@cuisine", 139 | "name": "cuisine", 140 | "value": "$cuisine", 141 | "isList": false 142 | } 143 | ], 144 | "messages": [ 145 | { 146 | "type": 0, 147 | "speech": "Here is a great spot I am sure you\u0027ll like, pal!" 148 | } 149 | ] 150 | } 151 | ], 152 | "priority": 500000, 153 | "webhookUsed": false, 154 | "fallbackIntent": false 155 | } 156 | -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/inform_usersays_en.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "e623ff79-8f24-40bf-a6ed-5a885d9af6c8", 4 | "data": [ 5 | { 6 | "text": "i\u0027m looking for a place to eat", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 0 13 | }, 14 | { 15 | "id": "a3e1901a-6c84-402c-a5a4-e2ec05307aa9", 16 | "data": [ 17 | { 18 | "text": "i\u0027m looking for a place in the ", 19 | "userDefined": false 20 | }, 21 | { 22 | "text": "north", 23 | "alias": "location", 24 | "meta": "@location", 25 | "userDefined": false 26 | }, 27 | { 28 | "text": " of", 29 | "userDefined": false 30 | } 31 | ], 32 | "isTemplate": false, 33 | "count": 0, 34 | "updated": 0 35 | }, 36 | { 37 | "id": "fbf1e1b3-da86-4bbf-98b4-85e09f14c7b2", 38 | "data": [ 39 | { 40 | "text": "show me ", 41 | "userDefined": false 42 | }, 43 | { 44 | "text": "chinese", 45 | "alias": "cuisine", 46 | "meta": "@cuisine", 47 | "userDefined": false 48 | }, 49 | { 50 | "text": " restaurants", 51 | "userDefined": false 52 | } 53 | ], 54 | "isTemplate": false, 55 | "count": 0, 56 | "updated": 0 57 | }, 58 | { 59 | "id": "860619cb-6c78-41b9-882d-d105a51e4377", 60 | "data": [ 61 | { 62 | "text": "i am looking for an ", 63 | "userDefined": false 64 | }, 65 | { 66 | "text": "indian", 67 | "alias": "cuisine", 68 | "meta": "@cuisine", 69 | "userDefined": false 70 | }, 71 | { 72 | "text": " spot", 73 | "userDefined": false 74 | } 75 | ], 76 | "isTemplate": false, 77 | "count": 0, 78 | "updated": 0 79 | }, 80 | { 81 | "id": "a8b9fa75-19db-49ef-963b-50d316a14aa2", 82 | "data": [ 83 | { 84 | "text": "search for restaurants", 85 | "userDefined": false 86 | } 87 | ], 88 | "isTemplate": false, 89 | "count": 0, 90 | "updated": 0 91 | }, 92 | { 93 | "id": "c91a0223-e109-4d32-aab0-4008fb0a9f35", 94 | "data": [ 95 | { 96 | "text": "anywhere in the ", 97 | "userDefined": false 98 | }, 99 | { 100 | "text": "west", 101 | "alias": "location", 102 | "meta": "@location", 103 | "userDefined": false 104 | } 105 | ], 106 | "isTemplate": false, 107 | "count": 0, 108 | "updated": 0 109 | }, 110 | { 111 | "id": "34c28215-f492-44d6-88a9-779ff59cb301", 112 | "data": [ 113 | { 114 | "text": "central", 115 | "alias": "location", 116 | "meta": "@location", 117 | "userDefined": false 118 | }, 119 | { 120 | "text": " ", 121 | "userDefined": false 122 | }, 123 | { 124 | "text": "indian", 125 | "alias": "cuisine", 126 | "meta": "@cuisine", 127 | "userDefined": false 128 | }, 129 | { 130 | "text": " restaurant", 131 | "userDefined": false 132 | } 133 | ], 134 | "isTemplate": false, 135 | "count": 0, 136 | "updated": 0 137 | } 138 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/intents/inform_usersays_es.json: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "id": "b3ecd39d-4eec-435d-a0fe-6cfd29412ad7", 4 | "data": [ 5 | { 6 | "text": "estoy buscando un lugar para comer", 7 | "userDefined": false 8 | } 9 | ], 10 | "isTemplate": false, 11 | "count": 0, 12 | "updated": 1507541018 13 | }, 14 | { 15 | "id": "5a25e429-a00a-4418-a48b-7d019eca3ac6", 16 | "data": [ 17 | { 18 | "text": "busco un lugar en el ", 19 | "userDefined": false 20 | }, 21 | { 22 | "text": "norte", 23 | "alias": "location", 24 | "meta": "@location", 25 | "userDefined": false 26 | } 27 | ], 28 | "isTemplate": false, 29 | "count": 0, 30 | "updated": 1507541018 31 | }, 32 | { 33 | "id": "8b6fcd21-8e95-46ff-a841-54c59c650571", 34 | "data": [ 35 | { 36 | "text": "muéstrame los restaurantes ", 37 | "userDefined": false 38 | }, 39 | { 40 | "text": "chinos", 41 | "alias": "cuisine", 42 | "meta": "@cuisine", 43 | "userDefined": true 44 | } 45 | ], 46 | "isTemplate": false, 47 | "count": 1, 48 | "updated": 1507541018 49 | }, 50 | { 51 | "id": "86992625-d848-47de-8220-f9a7d5ddf63b", 52 | "data": [ 53 | { 54 | "text": "estoy buscando un lugar ", 55 | "userDefined": false 56 | }, 57 | { 58 | "text": "indio", 59 | "alias": "cuisine", 60 | "meta": "@cuisine", 61 | "userDefined": false 62 | } 63 | ], 64 | "isTemplate": false, 65 | "count": 0, 66 | "updated": 1507541018 67 | }, 68 | { 69 | "id": "2c5ce034-f89b-4570-a7eb-7bee714902df", 70 | "data": [ 71 | { 72 | "text": "buscar restaurantes", 73 | "userDefined": false 74 | } 75 | ], 76 | "isTemplate": false, 77 | "count": 0, 78 | "updated": 1507540897 79 | }, 80 | { 81 | "id": "93ce3697-b53f-4e96-bea1-9de96b098ec4", 82 | "data": [ 83 | { 84 | "text": "en cualquier parte del ", 85 | "userDefined": false 86 | }, 87 | { 88 | "text": "oeste", 89 | "alias": "location", 90 | "meta": "@location", 91 | "userDefined": false 92 | } 93 | ], 94 | "isTemplate": false, 95 | "count": 0, 96 | "updated": 1507540897 97 | }, 98 | { 99 | "id": "6190aae9-a23f-4d8a-9a50-84a9d971d3f0", 100 | "data": [ 101 | { 102 | "text": "restaurante ", 103 | "userDefined": false 104 | }, 105 | { 106 | "text": "central", 107 | "alias": "location", 108 | "meta": "@location", 109 | "userDefined": false 110 | }, 111 | { 112 | "text": " de la ", 113 | "userDefined": false 114 | }, 115 | { 116 | "text": "India", 117 | "alias": "cuisine", 118 | "meta": "@cuisine", 119 | "userDefined": false 120 | } 121 | ], 122 | "isTemplate": false, 123 | "count": 0, 124 | "updated": 1507540897 125 | } 126 | ] -------------------------------------------------------------------------------- /data/examples/dialogflow/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "1.0.0" 3 | } -------------------------------------------------------------------------------- /data/examples/rasa/demo-rasa.md: -------------------------------------------------------------------------------- 1 | ## intent:affirm 2 | - yes 3 | - yep 4 | - yeah 5 | - indeed 6 | - that's right 7 | - ok 8 | - great 9 | - right, thank you 10 | - correct 11 | - great choice 12 | - sounds really good 13 | 14 | ## intent:goodbye 15 | - bye 16 | - goodbye 17 | - good bye 18 | - stop 19 | - end 20 | - farewell 21 | - Bye bye 22 | - have a good one 23 | 24 | ## intent:greet 25 | - hey 26 | - howdy 27 | - hey there 28 | - hello 29 | - hi 30 | - good morning 31 | - good evening 32 | - dear sir 33 | 34 | ## intent:restaurant_search 35 | - i'm looking for a place to eat 36 | - I want to grab lunch 37 | - I am searching for a dinner spot 38 | - i'm looking for a place in the [north](location) of town 39 | - show me [chinese](cuisine) restaurants 40 | - show me [chines](cuisine:chinese) restaurants in the [north](location) 41 | - show me a [mexican](cuisine) place in the [centre](location) 42 | - i am looking for an [indian](cuisine) spot called olaolaolaolaolaola 43 | - search for restaurants 44 | - anywhere in the [west](location) 45 | - anywhere near [18328](location) 46 | - I am looking for [asian fusion](cuisine) food 47 | - I am looking a restaurant in [29432](location) 48 | - I am looking for [mexican indian fusion](cuisine) 49 | - [central](location) [indian](cuisine) restaurant 50 | 51 | ## synonym:chinese 52 | + Chines 53 | * Chinese 54 | 55 | ## synonym:vegetarian 56 | - vegg 57 | - veggie 58 | 59 | ## regex:zipcode 60 | - [0-9]{5} 61 | 62 | ## regex:greet 63 | - hey[^\s]* -------------------------------------------------------------------------------- /data/examples/wit/demo-flights.json: -------------------------------------------------------------------------------- 1 | { 2 | "data" : [ 3 | { 4 | "text" : "i want to go from berlin to tokyo tomorrow", 5 | "entities" : [ 6 | { 7 | "entity" : "location", 8 | "value" : "\"berlin\"", 9 | "role" : "from", 10 | "start" : 18, 11 | "end" : 24 12 | }, 13 | { 14 | "entity" : "intent", 15 | "value" : "\"flight_booking\"", 16 | "start" : 0, 17 | "end" : 42 18 | }, 19 | { 20 | "entity" : "location", 21 | "value" : "\"tokyo\"", 22 | "role" : "to", 23 | "start" : 28, 24 | "end" : 33 25 | }, 26 | { 27 | "entity" : "datetime", 28 | "value" : "\"2016-05-29T00:00:00.000-07:00\"", 29 | "start" : 34, 30 | "end" : 42 31 | } 32 | ] 33 | }, 34 | { 35 | "text" : "i'm looking for a flight from london to amsterdam next monday", 36 | "entities" : [ 37 | { 38 | "entity" : "location", 39 | "value" : "\"london\"", 40 | "role" : "from", 41 | "start" : 30, 42 | "end" : 36 43 | }, 44 | { 45 | "entity" : "location", 46 | "value" : "\"amsterdam\"", 47 | "role" : "to", 48 | "start" : 40, 49 | "end" : 49 50 | }, 51 | { 52 | "entity" : "datetime", 53 | "value" : "\"2016-05-30T00:00:00.000-07:00\"", 54 | "start" : 50, 55 | "end" : 61 56 | } 57 | ] 58 | }, 59 | { 60 | "text" : "i want to fly to berlin", 61 | "entities" : [ 62 | { 63 | "entity" : "location", 64 | "value" : "\"berlin\"", 65 | "role" : "from", 66 | "start" : 17, 67 | "end" : 23 68 | } 69 | ] 70 | }, 71 | { 72 | "text" : "i want to fly from london", 73 | "entities" : [ 74 | { 75 | "entity" : "location", 76 | "value" : "\"london\"", 77 | "role" : "from", 78 | "start" : 19, 79 | "end" : 25 80 | } 81 | ] 82 | } 83 | ] 84 | } -------------------------------------------------------------------------------- /data/test/demo-rasa-small.json: -------------------------------------------------------------------------------- 1 | { 2 | "rasa_nlu_data": { 3 | "common_examples": [ 4 | { 5 | "text": "hey", 6 | "intent": "greet", 7 | "entities": [] 8 | }, 9 | { 10 | "text": "dear sir", 11 | "intent": "greet", 12 | "entities": [] 13 | }, 14 | { 15 | "text": "i'm looking for a place to eat", 16 | "intent": "restaurant_search", 17 | "entities": [] 18 | }, 19 | { 20 | "text": "i'm looking for a place in the north of town", 21 | "intent": "restaurant_search", 22 | "entities": [ 23 | { 24 | "start": 31, 25 | "end": 36, 26 | "value": "north", 27 | "entity": "location" 28 | } 29 | ] 30 | }, 31 | { 32 | "text": "show me a mexican place in the centre", 33 | "intent": "restaurant_search", 34 | "entities": [ 35 | { 36 | "start": 31, 37 | "end": 37, 38 | "value": "centre", 39 | "entity": "location" 40 | }, 41 | { 42 | "start": 10, 43 | "end": 17, 44 | "value": "mexican", 45 | "entity": "cuisine" 46 | } 47 | ] 48 | } 49 | ] 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /data/test/dialogflow_en_converted_to_rasa.json: -------------------------------------------------------------------------------- 1 | { 2 | "rasa_nlu_data": { 3 | "entity_synonyms": [ 4 | { 5 | "value": "mexican", 6 | "synonyms": ["mexico"] 7 | }, 8 | { 9 | "value": "chinese", 10 | "synonyms": ["china"] 11 | }, 12 | { 13 | "value": "indian", 14 | "synonyms": ["india"] 15 | } 16 | ], 17 | "common_examples": [ 18 | { 19 | "text": "central indian restaurant", 20 | "intent": "inform", 21 | "entities": [ 22 | { 23 | "start": 0, 24 | "end": 7, 25 | "value": "central", 26 | "entity": "location" 27 | }, 28 | { 29 | "start": 8, 30 | "end": 14, 31 | "value": "indian", 32 | "entity": "cuisine" 33 | } 34 | ] 35 | }, 36 | { 37 | "text": "anywhere in the west", 38 | "intent": "inform", 39 | "entities": [ 40 | { 41 | "start": 16, 42 | "end": 20, 43 | "value": "west", 44 | "entity": "location" 45 | } 46 | ] 47 | }, 48 | { 49 | "text": "i am looking for an indian spot", 50 | "intent": "inform", 51 | "entities": [ 52 | { 53 | "start": 20, 54 | "end": 26, 55 | "value": "indian", 56 | "entity": "cuisine" 57 | } 58 | ] 59 | }, 60 | { 61 | "text": "show me chinese restaurants", 62 | "intent": "inform", 63 | "entities": [ 64 | { 65 | "start": 8, 66 | "end": 15, 67 | "value": "chinese", 68 | "entity": "cuisine" 69 | } 70 | ] 71 | }, 72 | { 73 | "text": "i'm looking for a place in the north of ", 74 | "intent": "inform", 75 | "entities": [ 76 | { 77 | "start": 31, 78 | "end": 36, 79 | "value": "north", 80 | "entity": "location" 81 | } 82 | ] 83 | }, 84 | { 85 | "text": "great", 86 | "intent": "affirm", 87 | "entities": [] 88 | }, 89 | { 90 | "text": "ok", 91 | "intent": "affirm", 92 | "entities": [] 93 | }, 94 | { 95 | "text": "that's right", 96 | "intent": "affirm", 97 | "entities": [] 98 | }, 99 | { 100 | "text": "indeed", 101 | "intent": "affirm", 102 | "entities": [] 103 | }, 104 | { 105 | "text": "yeah", 106 | "intent": "affirm", 107 | "entities": [] 108 | }, 109 | { 110 | "text": "yep", 111 | "intent": "affirm", 112 | "entities": [] 113 | }, 114 | { 115 | "text": "yes", 116 | "intent": "affirm", 117 | "entities": [] 118 | }, 119 | { 120 | "text": "end", 121 | "intent": "goodbye", 122 | "entities": [] 123 | }, 124 | { 125 | "text": "stop", 126 | "intent": "goodbye", 127 | "entities": [] 128 | }, 129 | { 130 | "text": "good bye", 131 | "intent": "goodbye", 132 | "entities": [] 133 | }, 134 | { 135 | "text": "goodbye", 136 | "intent": "goodbye", 137 | "entities": [] 138 | }, 139 | { 140 | "text": "bye", 141 | "intent": "goodbye", 142 | "entities": [] 143 | }, 144 | { 145 | "text": "hi", 146 | "intent": "hi", 147 | "entities": [] 148 | }, 149 | { 150 | "text": "hello", 151 | "intent": "hi", 152 | "entities": [] 153 | }, 154 | { 155 | "text": "hey there", 156 | "intent": "hi", 157 | "entities": [] 158 | }, 159 | { 160 | "text": "howdy", 161 | "intent": "hi", 162 | "entities": [] 163 | }, 164 | { 165 | "text": "hey", 166 | "intent": "hi", 167 | "entities": [] 168 | }, 169 | { 170 | "text": "search for restaurants", 171 | "intent": "inform", 172 | "entities": [] 173 | }, 174 | { 175 | "text": "i'm looking for a place to eat", 176 | "intent": "inform", 177 | "entities": [] 178 | } 179 | ] 180 | } 181 | } -------------------------------------------------------------------------------- /data/test/json_converted_to_md.md: -------------------------------------------------------------------------------- 1 | ## intent:affirm 2 | - yes 3 | - yep 4 | - yeah 5 | - indeed 6 | - that's right 7 | - ok 8 | - great 9 | - right, thank you 10 | - correct 11 | - great choice 12 | - sounds really good 13 | 14 | ## intent:goodbye 15 | - bye 16 | - goodbye 17 | - good bye 18 | - stop 19 | - end 20 | - farewell 21 | - Bye bye 22 | - have a good one 23 | 24 | ## intent:greet 25 | - hey 26 | - howdy 27 | - hey there 28 | - hello 29 | - hi 30 | - good morning 31 | - good evening 32 | - dear sir 33 | 34 | ## intent:restaurant_search 35 | - i'm looking for a place to eat 36 | - I want to grab lunch 37 | - I am searching for a dinner spot 38 | - i'm looking for a place in the [north](location) of town 39 | - show me [chinese](cuisine) restaurants 40 | - show me [chines](cuisine:chinese) restaurants 41 | - show me a [mexican](cuisine) place in the [centre](location) 42 | - i am looking for an [indian](cuisine) spot called olaolaolaolaolaola 43 | - search for restaurants 44 | - anywhere in the [west](location) 45 | - anywhere near [18328](location) 46 | - I am looking for [asian fusion](cuisine) food 47 | - I am looking a restaurant in [29432](location) 48 | - I am looking for [mexican indian fusion](cuisine) 49 | - [central](location) [indian](cuisine) restaurant 50 | 51 | ## synonym:chinese 52 | - Chines 53 | - chines 54 | - Chinese 55 | 56 | ## synonym:vegetarian 57 | - vegg 58 | - veggie 59 | -------------------------------------------------------------------------------- /data/test/markdown_single_sections/regex_only.md: -------------------------------------------------------------------------------- 1 | ## regex:greet 2 | - hey[^\s]* -------------------------------------------------------------------------------- /data/test/markdown_single_sections/synonyms_only.md: -------------------------------------------------------------------------------- 1 | ## synonym:chinese 2 | - Chines 3 | - Chinese -------------------------------------------------------------------------------- /data/test/multiple_files_json/demo-rasa-affirm.json: -------------------------------------------------------------------------------- 1 | { 2 | "rasa_nlu_data": { 3 | "common_examples": [ 4 | { 5 | "text": "yes", 6 | "intent": "affirm", 7 | "entities": [] 8 | }, 9 | { 10 | "text": "yep", 11 | "intent": "affirm", 12 | "entities": [] 13 | }, 14 | { 15 | "text": "yeah", 16 | "intent": "affirm", 17 | "entities": [] 18 | }, 19 | { 20 | "text": "indeed", 21 | "intent": "affirm", 22 | "entities": [] 23 | }, 24 | { 25 | "text": "that's right", 26 | "intent": "affirm", 27 | "entities": [] 28 | }, 29 | { 30 | "text": "ok", 31 | "intent": "affirm", 32 | "entities": [] 33 | }, 34 | { 35 | "text": "great", 36 | "intent": "affirm", 37 | "entities": [] 38 | }, 39 | { 40 | "text": "right, thank you", 41 | "intent": "affirm", 42 | "entities": [] 43 | }, 44 | { 45 | "text": "correct", 46 | "intent": "affirm", 47 | "entities": [] 48 | }, 49 | { 50 | "text": "great choice", 51 | "intent": "affirm", 52 | "entities": [] 53 | }, 54 | { 55 | "text": "sounds really good", 56 | "intent": "affirm", 57 | "entities": [] 58 | } 59 | ] 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /data/test/multiple_files_json/demo-rasa-goodbye.json: -------------------------------------------------------------------------------- 1 | { 2 | "rasa_nlu_data": { 3 | "common_examples": [ 4 | { 5 | "text": "bye", 6 | "intent": "goodbye", 7 | "entities": [] 8 | }, 9 | { 10 | "text": "goodbye", 11 | "intent": "goodbye", 12 | "entities": [] 13 | }, 14 | { 15 | "text": "good bye", 16 | "intent": "goodbye", 17 | "entities": [] 18 | }, 19 | { 20 | "text": "stop", 21 | "intent": "goodbye", 22 | "entities": [] 23 | }, 24 | { 25 | "text": "end", 26 | "intent": "goodbye", 27 | "entities": [] 28 | }, 29 | { 30 | "text": "farewell", 31 | "intent": "goodbye", 32 | "entities": [] 33 | }, 34 | { 35 | "text": "Bye bye", 36 | "intent": "goodbye", 37 | "entities": [] 38 | }, 39 | { 40 | "text": "have a good one", 41 | "intent": "goodbye", 42 | "entities": [] 43 | } 44 | ] 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /data/test/multiple_files_json/demo-rasa-greet.json: -------------------------------------------------------------------------------- 1 | { 2 | "rasa_nlu_data": { 3 | "regex_features": [ 4 | { 5 | "name": "zipcode", 6 | "pattern": "[0-9]{5}" 7 | }], 8 | "common_examples": [ 9 | { 10 | "text": "hey", 11 | "intent": "greet", 12 | "entities": [] 13 | }, 14 | { 15 | "text": "howdy", 16 | "intent": "greet", 17 | "entities": [] 18 | }, 19 | { 20 | "text": "hey there", 21 | "intent": "greet", 22 | "entities": [] 23 | }, 24 | { 25 | "text": "hello", 26 | "intent": "greet", 27 | "entities": [] 28 | }, 29 | { 30 | "text": "hi", 31 | "intent": "greet", 32 | "entities": [] 33 | }, 34 | { 35 | "text": "good morning", 36 | "intent": "greet", 37 | "entities": [] 38 | }, 39 | { 40 | "text": "good evening", 41 | "intent": "greet", 42 | "entities": [] 43 | }, 44 | { 45 | "text": "dear sir", 46 | "intent": "greet", 47 | "entities": [] 48 | } 49 | ] 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /data/test/multiple_files_markdown/demo-rasa-affirm.md: -------------------------------------------------------------------------------- 1 | ## intent:affirm 2 | - yes 3 | - yep 4 | - yeah 5 | - indeed 6 | - that's right 7 | - ok 8 | - great 9 | - right, thank you 10 | - correct 11 | - great choice 12 | - sounds really good 13 | -------------------------------------------------------------------------------- /data/test/multiple_files_markdown/demo-rasa-goodbye.md: -------------------------------------------------------------------------------- 1 | ## intent:goodbye 2 | - bye 3 | - goodbye 4 | - good bye 5 | - stop 6 | - end 7 | - farewell 8 | - Bye bye 9 | - have a good one 10 | -------------------------------------------------------------------------------- /data/test/multiple_files_markdown/demo-rasa-greet.md: -------------------------------------------------------------------------------- 1 | ## intent:greet 2 | - hey 3 | - howdy 4 | - hey there 5 | - hello 6 | - hi 7 | - good morning 8 | - good evening 9 | - dear sir 10 | 11 | ## regex:greet 12 | - hey[^\s]* -------------------------------------------------------------------------------- /data/test/multiple_files_markdown/demo-rasa-restaurant_search.md: -------------------------------------------------------------------------------- 1 | ## intent:restaurant_search 2 | - i'm looking for a place to eat 3 | - I want to grab lunch 4 | - I am searching for a dinner spot 5 | - i'm looking for a place in the [north](location) of town 6 | - show me [chinese](cuisine) restaurants 7 | - show me [chines](cuisine:chinese) restaurants in the [north](location) 8 | - show me a [mexican](cuisine) place in the [centre](location) 9 | - i am looking for an [indian](cuisine) spot called olaolaolaolaolaola 10 | - search for restaurants 11 | - anywhere in the [west](location) 12 | - anywhere near [18328](location) 13 | - I am looking for [asian fusion](cuisine) food 14 | - I am looking a restaurant in [29432](location) 15 | - I am looking for [mexican indian fusion](cuisine) 16 | - [central](location) [indian](cuisine) restaurant 17 | 18 | ## synonym:chinese 19 | - Chines 20 | - Chinese 21 | 22 | ## synonym:vegetarian 23 | - vegg 24 | - veggie 25 | 26 | ## regex:zipcode 27 | - [0-9]{5} 28 | -------------------------------------------------------------------------------- /data/test/wit_converted_to_rasa.json: -------------------------------------------------------------------------------- 1 | { 2 | "rasa_nlu_data": { 3 | "common_examples": [ 4 | { 5 | "text": "i want to go from berlin to tokyo tomorrow", 6 | "intent": "flight_booking", 7 | "entities": [ 8 | { 9 | "start": 18, 10 | "role": "from", 11 | "end": 24, 12 | "value": "berlin", 13 | "entity": "location" 14 | }, 15 | { 16 | "start": 28, 17 | "role": "to", 18 | "end": 33, 19 | "value": "tokyo", 20 | "entity": "location" 21 | }, 22 | { 23 | "start": 34, 24 | "end": 42, 25 | "value": "2016-05-29T00:00:00.000-07:00", 26 | "entity": "datetime" 27 | } 28 | ] 29 | }, 30 | { 31 | "text": "i'm looking for a flight from london to amsterdam next monday", 32 | "entities": [ 33 | { 34 | "start": 30, 35 | "role": "from", 36 | "end": 36, 37 | "value": "london", 38 | "entity": "location" 39 | }, 40 | { 41 | "start": 40, 42 | "role": "to", 43 | "end": 49, 44 | "value": "amsterdam", 45 | "entity": "location" 46 | }, 47 | { 48 | "start": 50, 49 | "end": 61, 50 | "value": "2016-05-30T00:00:00.000-07:00", 51 | "entity": "datetime" 52 | } 53 | ] 54 | }, 55 | { 56 | "text": "i want to fly to berlin", 57 | "entities": [ 58 | { 59 | "start": 17, 60 | "role": "from", 61 | "end": 23, 62 | "value": "berlin", 63 | "entity": "location" 64 | } 65 | ] 66 | }, 67 | { 68 | "text": "i want to fly from london", 69 | "entities": [ 70 | { 71 | "start": 19, 72 | "role": "from", 73 | "end": 25, 74 | "value": "london", 75 | "entity": "location" 76 | } 77 | ] 78 | } 79 | ] 80 | } 81 | } -------------------------------------------------------------------------------- /docker/Dockerfile_bare: -------------------------------------------------------------------------------- 1 | FROM python:3.6-slim 2 | 3 | ENV RASA_NLU_DOCKER="YES" \ 4 | RASA_NLU_HOME=/app \ 5 | RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages 6 | 7 | # Run updates, install basics and cleanup 8 | # - build-essential: Compile specific dependencies 9 | # - git-core: Checkout git repos 10 | RUN apt-get update -qq \ 11 | && apt-get install -y --no-install-recommends build-essential git-core \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 14 | 15 | WORKDIR ${RASA_NLU_HOME} 16 | 17 | COPY . ${RASA_NLU_HOME} 18 | 19 | RUN pip install -r alt_requirements/requirements_bare.txt 20 | 21 | RUN pip install -e . 22 | 23 | VOLUME ["/app/projects", "/app/logs", "/app/data"] 24 | 25 | EXPOSE 5000 26 | 27 | ENTRYPOINT ["./entrypoint.sh"] 28 | CMD ["help"] 29 | -------------------------------------------------------------------------------- /docker/Dockerfile_full: -------------------------------------------------------------------------------- 1 | FROM python:3.6-slim 2 | 3 | ENV RASA_NLU_DOCKER="YES" \ 4 | RASA_NLU_HOME=/app \ 5 | RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages 6 | 7 | # Run updates, install basics and cleanup 8 | # - build-essential: Compile specific dependencies 9 | # - git-core: Checkout git repos 10 | RUN apt-get update -qq \ 11 | && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 14 | 15 | WORKDIR ${RASA_NLU_HOME} 16 | 17 | # use bash always 18 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh 19 | 20 | 21 | COPY . ${RASA_NLU_HOME} 22 | 23 | ## install java stuff 24 | 25 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list 26 | 27 | RUN apt-get update && apt-get install -y --no-install-recommends \ 28 | bzip2 \ 29 | unzip \ 30 | xz-utils \ 31 | && rm -rf /var/lib/apt/lists/* 32 | 33 | # Default to UTF-8 file.encoding 34 | ENV LANG C.UTF-8 35 | 36 | # add a simple script that can auto-detect the appropriate JAVA_HOME value 37 | # based on whether the JDK or only the JRE is installed 38 | RUN { \ 39 | echo '#!/bin/sh'; \ 40 | echo 'set -e'; \ 41 | echo; \ 42 | echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \ 43 | } > /usr/local/bin/docker-java-home \ 44 | && chmod +x /usr/local/bin/docker-java-home 45 | 46 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe 47 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home 48 | ENV JAVA_HOME /docker-java-home 49 | 50 | ENV JAVA_VERSION 8u141 51 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1 52 | 53 | # see https://bugs.debian.org/775775 54 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872 55 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1 56 | 57 | RUN set -ex; \ 58 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail) 59 | if [ ! -d /usr/share/man/man1 ]; then \ 60 | mkdir -p /usr/share/man/man1; \ 61 | fi; \ 62 | apt-get update; \ 63 | apt-get install -y -t jessie-backports\ 64 | openjdk-8-jdk \ 65 | ca-certificates-java \ 66 | ; \ 67 | rm -rf /var/lib/apt/lists/*; \ 68 | # verify that "docker-java-home" returns what we expect 69 | [ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \ 70 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java 71 | update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \ 72 | # ... and verify that it actually worked for one of the alternatives we care about 73 | update-alternatives --query java | grep -q 'Status: manual' 74 | 75 | # see CA_CERTIFICATES_JAVA_VERSION notes above 76 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure 77 | 78 | ## done java 79 | 80 | 81 | RUN pip install -r alt_requirements/requirements_full.txt 82 | 83 | RUN pip install -e . 84 | 85 | RUN apt-get update -qq \ 86 | && apt-get install -y --no-install-recommends wget \ 87 | && wget -P data/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat \ 88 | && apt-get remove -y wget \ 89 | && apt-get autoremove -y 90 | 91 | RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz --no-cache-dir > /dev/null \ 92 | && python -m spacy link en_core_web_md en \ 93 | && pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz --no-cache-dir > /dev/null \ 94 | && python -m spacy link de_core_news_sm de 95 | 96 | COPY sample_configs/config_spacy_duckling.yml ${RASA_NLU_HOME}/config.yml 97 | 98 | #VOLUME ["/app/projects", "/app/logs", "/app/data"] 99 | 100 | EXPOSE 5000 101 | 102 | ENTRYPOINT ["./entrypoint.sh"] 103 | CMD ["start", "-c", "config.yml", "--path", "/app/projects"] 104 | -------------------------------------------------------------------------------- /docker/Dockerfile_mitie: -------------------------------------------------------------------------------- 1 | FROM python:2.7-slim 2 | 3 | ENV RASA_NLU_DOCKER="YES" \ 4 | RASA_NLU_HOME=/app \ 5 | RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python2.7/dist-packages 6 | 7 | # Run updates, install basics and cleanup 8 | # - build-essential: Compile specific dependencies 9 | # - git-core: Checkout git repos 10 | RUN apt-get update -qq \ 11 | && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 14 | 15 | 16 | # use bash always 17 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh 18 | 19 | ## install java stuff 20 | 21 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list 22 | 23 | RUN apt-get update && apt-get install -y --no-install-recommends \ 24 | bzip2 \ 25 | unzip \ 26 | xz-utils \ 27 | && rm -rf /var/lib/apt/lists/* 28 | 29 | # Default to UTF-8 file.encoding 30 | ENV LANG C.UTF-8 31 | 32 | # add a simple script that can auto-detect the appropriate JAVA_HOME value 33 | # based on whether the JDK or only the JRE is installed 34 | RUN { \ 35 | echo '#!/bin/sh'; \ 36 | echo 'set -e'; \ 37 | echo; \ 38 | echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \ 39 | } > /usr/local/bin/docker-java-home \ 40 | && chmod +x /usr/local/bin/docker-java-home 41 | 42 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe 43 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home 44 | ENV JAVA_HOME /docker-java-home 45 | 46 | ENV JAVA_VERSION 8u141 47 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1 48 | 49 | # see https://bugs.debian.org/775775 50 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872 51 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1 52 | 53 | RUN set -ex; \ 54 | \ 55 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail) 56 | if [ ! -d /usr/share/man/man1 ]; then \ 57 | mkdir -p /usr/share/man/man1; \ 58 | fi; \ 59 | \ 60 | apt-get update; \ 61 | apt-get install -y -t jessie-backports\ 62 | openjdk-8-jdk \ 63 | ca-certificates-java \ 64 | ; \ 65 | rm -rf /var/lib/apt/lists/*; \ 66 | \ 67 | # verify that "docker-java-home" returns what we expect 68 | [ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \ 69 | \ 70 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java 71 | update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \ 72 | # ... and verify that it actually worked for one of the alternatives we care about 73 | update-alternatives --query java | grep -q 'Status: manual' 74 | 75 | # see CA_CERTIFICATES_JAVA_VERSION notes above 76 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure 77 | 78 | ## done java 79 | 80 | 81 | WORKDIR ${RASA_NLU_HOME} 82 | 83 | COPY . ${RASA_NLU_HOME} 84 | 85 | RUN pip install -r alt_requirements/requirements_mitie.txt 86 | 87 | RUN pip install -e . 88 | 89 | RUN apt-get update -qq \ 90 | && apt-get install -y --no-install-recommends wget \ 91 | && wget -P data/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat \ 92 | && apt-get remove -y wget \ 93 | && apt-get autoremove -y 94 | 95 | COPY sample_configs/config_mitie.json ${RASA_NLU_HOME}/config.json 96 | 97 | VOLUME ["/app/projects", "/app/logs", "/app/data"] 98 | 99 | EXPOSE 5000 100 | 101 | ENTRYPOINT ["./entrypoint.sh"] 102 | CMD ["start", "-c", "config.json"] 103 | -------------------------------------------------------------------------------- /docker/Dockerfile_spacy_sklearn: -------------------------------------------------------------------------------- 1 | FROM python:3.6-slim 2 | 3 | ENV RASA_NLU_DOCKER="YES" \ 4 | RASA_NLU_HOME=/app \ 5 | RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages 6 | 7 | # Run updates, install basics and cleanup 8 | # - build-essential: Compile specific dependencies 9 | # - git-core: Checkout git repos 10 | RUN apt-get update -qq \ 11 | && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl \ 12 | && apt-get clean \ 13 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 14 | 15 | WORKDIR ${RASA_NLU_HOME} 16 | 17 | COPY . ${RASA_NLU_HOME} 18 | 19 | 20 | # use bash always 21 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh 22 | 23 | ## install java stuff 24 | 25 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list 26 | 27 | RUN apt-get update && apt-get install -y --no-install-recommends \ 28 | bzip2 \ 29 | unzip \ 30 | xz-utils \ 31 | && rm -rf /var/lib/apt/lists/* 32 | 33 | # Default to UTF-8 file.encoding 34 | ENV LANG C.UTF-8 35 | 36 | # add a simple script that can auto-detect the appropriate JAVA_HOME value 37 | # based on whether the JDK or only the JRE is installed 38 | RUN { \ 39 | echo '#!/bin/sh'; \ 40 | echo 'set -e'; \ 41 | echo; \ 42 | echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \ 43 | } > /usr/local/bin/docker-java-home \ 44 | && chmod +x /usr/local/bin/docker-java-home 45 | 46 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe 47 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home 48 | ENV JAVA_HOME /docker-java-home 49 | 50 | ENV JAVA_VERSION 8u141 51 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1 52 | 53 | # see https://bugs.debian.org/775775 54 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872 55 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1 56 | 57 | RUN set -ex; \ 58 | \ 59 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail) 60 | if [ ! -d /usr/share/man/man1 ]; then \ 61 | mkdir -p /usr/share/man/man1; \ 62 | fi; \ 63 | \ 64 | apt-get update; \ 65 | apt-get install -y -t jessie-backports\ 66 | openjdk-8-jdk \ 67 | ca-certificates-java \ 68 | ; \ 69 | rm -rf /var/lib/apt/lists/*; \ 70 | \ 71 | # verify that "docker-java-home" returns what we expect 72 | [ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \ 73 | \ 74 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java 75 | update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \ 76 | # ... and verify that it actually worked for one of the alternatives we care about 77 | update-alternatives --query java | grep -q 'Status: manual' 78 | 79 | # see CA_CERTIFICATES_JAVA_VERSION notes above 80 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure 81 | 82 | ## done java 83 | 84 | RUN pip install -r alt_requirements/requirements_spacy_sklearn.txt 85 | 86 | RUN pip install -e . 87 | 88 | RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz --no-cache-dir > /dev/null \ 89 | && python -m spacy link en_core_web_md en \ 90 | && pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz --no-cache-dir > /dev/null \ 91 | && python -m spacy link de_core_news_sm de 92 | 93 | COPY sample_configs/config_spacy.yml ${RASA_NLU_HOME}/config.yml 94 | 95 | VOLUME ["/app/projects", "/app/logs", "/app/data"] 96 | 97 | EXPOSE 5000 98 | 99 | ENTRYPOINT ["./entrypoint.sh"] 100 | CMD ["start", "-c", "config.yml", "--path", "/app/projects"] 101 | -------------------------------------------------------------------------------- /docker/Dockerfile_test: -------------------------------------------------------------------------------- 1 | # Dockerfile to build a whole instance of rasa and run the rasa `pytest` 2 | # (created to test the changes needed for docker automation) 3 | # 4 | # (so far) only used manually, via: 5 | # `docker build -f docker/Dockerfile_test .` (from project root) 6 | # `docker run -it [id-output-from-above]` 7 | 8 | FROM python:3.6-slim 9 | 10 | ENV RASA_NLU_DOCKER="YES" \ 11 | RASA_NLU_HOME=/app \ 12 | RASA_NLU_PYTHON_PACKAGES=/usr/local/lib/python3.6/dist-packages 13 | 14 | # Run updates, install basics and cleanup 15 | # - build-essential: Compile specific dependencies 16 | # - git-core: Checkout git repos 17 | RUN apt-get update -qq \ 18 | && apt-get install -y --no-install-recommends build-essential git-core openssl libssl-dev libffi6 libffi-dev curl vim\ 19 | && apt-get clean \ 20 | && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* 21 | 22 | RUN apt-get update -qq \ 23 | && apt-get install -y --no-install-recommends wget 24 | 25 | RUN pip install https://github.com/explosion/spacy-models/releases/download/en_core_web_md-2.0.0/en_core_web_md-2.0.0.tar.gz --no-cache-dir > /dev/null \ 26 | && python -m spacy link en_core_web_md en \ 27 | && pip install https://github.com/explosion/spacy-models/releases/download/de_core_news_sm-2.0.0/de_core_news_sm-2.0.0.tar.gz --no-cache-dir > /dev/null \ 28 | && python -m spacy link de_core_news_sm de 29 | 30 | 31 | WORKDIR ${RASA_NLU_HOME} 32 | 33 | # use bash always 34 | RUN rm /bin/sh && ln -s /bin/bash /bin/sh 35 | 36 | ## install java stuff 37 | 38 | RUN echo "deb http://http.debian.net/debian jessie-backports main" > /etc/apt/sources.list.d/jessie-backports.list 39 | 40 | RUN apt-get update && apt-get install -y --no-install-recommends \ 41 | bzip2 \ 42 | unzip \ 43 | xz-utils \ 44 | && rm -rf /var/lib/apt/lists/* 45 | 46 | # Default to UTF-8 file.encoding 47 | ENV LANG C.UTF-8 48 | 49 | # add a simple script that can auto-detect the appropriate JAVA_HOME value 50 | # based on whether the JDK or only the JRE is installed 51 | RUN { \ 52 | echo '#!/bin/sh'; \ 53 | echo 'set -e'; \ 54 | echo; \ 55 | echo 'dirname "$(dirname "$(readlink -f "$(which javac || which java)")")"'; \ 56 | } > /usr/local/bin/docker-java-home \ 57 | && chmod +x /usr/local/bin/docker-java-home 58 | 59 | # do some fancy footwork to create a JAVA_HOME that's cross-architecture-safe 60 | RUN ln -svT "/usr/lib/jvm/java-8-openjdk-$(dpkg --print-architecture)" /docker-java-home 61 | ENV JAVA_HOME /docker-java-home 62 | 63 | ENV JAVA_VERSION 8u141 64 | ENV JAVA_DEBIAN_VERSION 8u141-b15-1~deb9u1 65 | 66 | # see https://bugs.debian.org/775775 67 | # and https://github.com/docker-library/java/issues/19#issuecomment-70546872 68 | ENV CA_CERTIFICATES_JAVA_VERSION 20170531+nmu1 69 | 70 | RUN set -ex; \ 71 | \ 72 | # deal with slim variants not having man page directories (which causes "update-alternatives" to fail) 73 | if [ ! -d /usr/share/man/man1 ]; then \ 74 | mkdir -p /usr/share/man/man1; \ 75 | fi; \ 76 | \ 77 | apt-get update; \ 78 | apt-get install -y -t jessie-backports\ 79 | openjdk-8-jdk \ 80 | ca-certificates-java \ 81 | ; \ 82 | rm -rf /var/lib/apt/lists/*; \ 83 | \ 84 | # verify that "docker-java-home" returns what we expect 85 | [ "$(readlink -f "$JAVA_HOME")" = "$(docker-java-home)" ]; \ 86 | \ 87 | # update-alternatives so that future installs of other OpenJDK versions don't change /usr/bin/java 88 | update-alternatives --get-selections | awk -v home="$(readlink -f "$JAVA_HOME")" 'index($3, home) == 1 { $2 = "manual"; print | "update-alternatives --set-selections" }'; \ 89 | # ... and verify that it actually worked for one of the alternatives we care about 90 | update-alternatives --query java | grep -q 'Status: manual' 91 | 92 | # see CA_CERTIFICATES_JAVA_VERSION notes above 93 | RUN /var/lib/dpkg/info/ca-certificates-java.postinst configure 94 | 95 | ## done java 96 | 97 | 98 | COPY . ${RASA_NLU_HOME} 99 | 100 | RUN wget -P data/ https://s3-eu-west-1.amazonaws.com/mitie/total_word_feature_extractor.dat 101 | 102 | RUN pip install -r alt_requirements/requirements_dev.txt 103 | 104 | RUN pip install -e . 105 | 106 | RUN sed -i -e 's/backend : tkagg/backend : PDF/' /usr/local/lib/python3.6/site-packages/matplotlib/mpl-data/matplotlibrc 107 | 108 | VOLUME ["/app/projects", "/app/logs", "/app/data"] 109 | 110 | EXPOSE 5000 111 | 112 | ENTRYPOINT ["/usr/local/bin/py.test"] 113 | CMD ["_pytest", "--cov", "rasa_nlu", "--pep8", "-v"] 114 | 115 | -------------------------------------------------------------------------------- /docker/docker-cloud.yml: -------------------------------------------------------------------------------- 1 | rasanlu: 2 | image: rasa/rasa_nlu:latest-spacy 3 | ports: 4 | - "5000:5000" 5 | volumes: 6 | - "/rasa-app-data/projects:/app/projects" 7 | - "/rasa-app-data/logs:/app/logs" 8 | -------------------------------------------------------------------------------- /docs/_static/css/custom.css: -------------------------------------------------------------------------------- 1 | .rating-container { 2 | background-color: rgba(0, 0, 16, 0.8); 3 | padding: 1rem; 4 | text-align: center; 5 | margin-bottom: 2rem; 6 | } 7 | 8 | .rating-text p { 9 | color: white; 10 | font-size: 1.3rem; 11 | padding: 0.3rem; 12 | } 13 | 14 | .submit { 15 | text-align: center; 16 | } 17 | 18 | svg { 19 | fill: rgb(242, 242, 242); 20 | height: 3.6rem; 21 | width: 3.6rem; 22 | margin: 0.2rem; 23 | } 24 | 25 | label { 26 | color: white; 27 | text-align: center; 28 | display: inline-block; 29 | margin: 0 1rem 0 1rem; 30 | } 31 | 32 | #radios label { 33 | position: relative; 34 | } 35 | 36 | input[type="radio"] { 37 | position: absolute; 38 | opacity: 0; 39 | } 40 | 41 | input[type="radio"] + svg { 42 | -webkit-transition: all 0.2s; 43 | transition: all 0.2s; 44 | } 45 | 46 | input + svg { 47 | cursor: pointer; 48 | } 49 | 50 | input[class="great"]:hover + svg, 51 | input[class="great"]:checked + svg, 52 | input[class="great"]:focus + svg { 53 | fill: rgb(0, 204, 79); 54 | } 55 | 56 | input[class="didnt-work"]:hover + svg, 57 | input[class="didnt-work"]:checked + svg, 58 | input[class="didnt-work"]:focus + svg { 59 | fill: rgb(255, 0, 0); 60 | } 61 | 62 | input[class="didnt-finish"]:hover + svg, 63 | input[class="didnt-finish"]:checked + svg, 64 | input[class="didnt-finish"]:focus + svg { 65 | fill: rgb(255, 255, 0); 66 | } 67 | 68 | .wy-table-responsive table td, 69 | .wy-table-responsive table th { 70 | white-space: normal !important; 71 | } 72 | 73 | /* 74 | fixes wrong line height in code snippets. (line height between code lines 75 | and shown line numbers is different so that the line numbers and the code 76 | lines don't align without that fix) 77 | */ 78 | .rst-content .highlight > pre { 79 | line-height: 18px; 80 | } 81 | 82 | /* 83 | fixes overlapping "Choose Version" bar in the navigation hiding content items 84 | adds enough padding so the version chooser doesn't overlap with any 85 | navigation items 86 | */ 87 | .wy-nav-side { 88 | padding-bottom: 3.2em; 89 | } 90 | -------------------------------------------------------------------------------- /docs/_static/images/component_lifecycle.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/docs/_static/images/component_lifecycle.png -------------------------------------------------------------------------------- /docs/_static/images/rasa_nlu_intent_gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/docs/_static/images/rasa_nlu_intent_gui.png -------------------------------------------------------------------------------- /docs/_templates/layout.html: -------------------------------------------------------------------------------- 1 | {% extends "!layout.html" %} 2 | 3 | {% set css_files = css_files + ['_static/css/custom.css'] %} 4 | 5 | {% block footer %} 6 | {{ super() }} 7 | 8 | 9 | 16 | 17 | {% endblock %} 18 | -------------------------------------------------------------------------------- /docs/changelog.rst: -------------------------------------------------------------------------------- 1 | .. include:: ../CHANGELOG.rst -------------------------------------------------------------------------------- /docs/closeloop.rst: -------------------------------------------------------------------------------- 1 | .. _section_closeloop: 2 | 3 | Improving your models from feedback 4 | =================================== 5 | 6 | When the rasa_nlu server is running, it keeps track of all the 7 | predictions it's made and saves these to a log file. 8 | By default log files are placed in ``logs/``. The files in this 9 | directory contain one json object per line. 10 | You can fix any incorrect predictions and add them to your 11 | training set to improve your parser. 12 | After adding these to your training data, but before 13 | retraining your model, it is strongly recommended that you use the 14 | visualizer to spot any errors, see 15 | :ref:`Visualizing training data `. 16 | -------------------------------------------------------------------------------- /docs/community.rst: -------------------------------------------------------------------------------- 1 | .. _section_community: 2 | 3 | Community Contributions 4 | ======================= 5 | 6 | .. note:: 7 | This is an (incomplete) list of external resources created by the Rasa community. 8 | We list them here because they can help you learn about the Rasa Stack, but they are 9 | not officially endorsed by Rasa and we cannot promise that they will be kept up-to-date 10 | as the project evolves. 11 | 12 | 13 | Community Written Documentation 14 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 15 | 16 | 17 | - A three part tutorial on using Rasa NLU in combination with Node-RED to create a basic chat bot and integrate it with Slack and Twilio. 18 | - `Part 1 `_ - Installation, Education, and Model Training 19 | - `Part 2 `_ - Back end fulfillment using Node-RED 20 | - `Part 3 `_ - A Complete Chatbot on Slack and Twilio 21 | - Documentation on using Rasa NLU with Docker 22 | - `Using Rasa NLU with Docker `_ - The easiest way to get started working with Rasa 23 | - `Failing Gracefully with Rasa NLU `_ 24 | 25 | Community Open Source Tools/Software 26 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 27 | 28 | Below is a list of tools and applications written around or for Rasa NLU using a permissive license. 29 | 30 | - Postgres backed UI for interacting with Rasa NLU 31 | - `Rasa UI `_ 32 | 33 | - A tool for generating training examples from a list of entities 34 | - `Chatito `_ 35 | 36 | - A custom API and UI on top of Rasa NLU for ease of use 37 | - `Articulate `_ 38 | 39 | Video Tutorials 40 | ^^^^^^^^^^^^^^^ 41 | 42 | - Talk about the Rasa Stack at `PyData `_ 43 | -------------------------------------------------------------------------------- /docs/config.rst: -------------------------------------------------------------------------------- 1 | .. _section_configuration: 2 | 3 | Configuration 4 | ============= 5 | 6 | You can provide options to Rasa NLU through: 7 | 8 | - a yaml-formatted config file 9 | - environment variables 10 | - command line arguments 11 | 12 | Environment variables override options in your config file, 13 | and command line args will override any options specified elsewhere. 14 | Environment variables are capitalised and prefixed with ``RASA_``, 15 | so the option ``pipeline`` is specified with the ``RASA_PIPELINE`` env var. 16 | 17 | Default 18 | ------- 19 | Here is an example model configuration: 20 | 21 | .. literalinclude:: ../sample_configs/config_crf.yml 22 | :language: yaml 23 | 24 | As you can see, there are a couple of top-level configuration keys, like 25 | ``language`` and ``pipeline`` - but most of the configuration is component 26 | specific. 27 | 28 | Explanations for the configuration keys of the different components are part 29 | of the :ref:`section_pipeline`. 30 | 31 | Options 32 | ------- 33 | A short explanation and examples for each configuration value. 34 | 35 | pipeline 36 | ~~~~~~~~ 37 | 38 | :Type: ``str`` or ``[dict]`` 39 | :Examples: 40 | using a pipeline template (predefined set of components with default 41 | parameters): 42 | 43 | .. code-block:: yaml 44 | 45 | pipeline: "spacy_sklearn" 46 | 47 | or alternatively specifying the components and paremters: 48 | 49 | .. code-block:: yaml 50 | 51 | pipeline: 52 | - name: "nlp_spacy" 53 | model: "en" # parameter of the spacy component 54 | - name: "ner_synonyms" 55 | 56 | :Description: 57 | The pipeline used for training. Can either be a template 58 | (passing a string) or a list of components (array) and there 59 | configuration values. For all available templates, 60 | see :ref:`section_pipeline`. The component specific parameters 61 | are listed there as well. 62 | 63 | language 64 | ~~~~~~~~ 65 | 66 | :Type: ``str`` 67 | :Examples: 68 | 69 | .. code-block:: yaml 70 | 71 | language: "en" 72 | 73 | :Description: 74 | Language the model is trained in. Underlying word vectors 75 | will be loaded by using this language. There is more info 76 | about available languages in :ref:`section_languages`. 77 | -------------------------------------------------------------------------------- /docs/context.rst: -------------------------------------------------------------------------------- 1 | .. _section_context: 2 | 3 | Context-aware Dialogue 4 | ====================== 5 | 6 | Rasa NLU allows you to turn natural language into structured data, 7 | but this might not be enough if you want to build a bot that handles what 8 | has been said in context and adjusts the flow of the conversation 9 | accordingly. Rasa's open-source solution to handle contextual dialogue is 10 | `Rasa Core `_, but there are other tools 11 | out there such as `Dialogflow `_ (not open-sourced). 12 | 13 | Rasa Core uses machine learning to predict the evolution of a conversation, 14 | and does away with the need for tedious and poorly-scaling ``if/else`` logic. 15 | It also allows you to implement custom actions in response to the 16 | user message, such as saying something back, modifying a database, calling an 17 | API or handing over to a human. It is by design the natural companion of 18 | Rasa NLU if you want to build conversational bots. -------------------------------------------------------------------------------- /docs/contribute.rst: -------------------------------------------------------------------------------- 1 | Contributing 2 | ============ 3 | 4 | Contributions are very much encouraged! Please create an issue before doing any work to avoid disappointment. 5 | 6 | We created a tag that should get you started quickly if you are searching for 7 | `interesting topics to get started `_. 8 | 9 | 10 | Python Conventions 11 | ^^^^^^^^^^^^^^^^^^ 12 | 13 | Python code should follow the pep-8 spec. 14 | 15 | Python 2 and 3 Cross Compatibility 16 | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ 17 | 18 | To ensure cross compatibility between Python 2 and 3 we prioritize Python 3 conventions. 19 | Keep in mind that: 20 | 21 | - all string literals are unicode strings 22 | - division generates floating point numbers. Use ``//`` for truncated division 23 | - some built-ins, e.g. ``map`` and ``filter`` return iterators in Python 3. If you want to make use of them import the Python 3 version of them from ``builtins``. Otherwise use list comprehensions, which work uniformly across versions 24 | - use ``io.open`` instead of the builtin ``open`` when working with files 25 | - The following imports from ``__future__`` are mandatory in every python file: ``unicode_literals``, ``print_function``, ``division``, and ``absolute_import`` 26 | 27 | Please refer to this `cheat sheet `_ to learn how to write different constructs compatible with Python 2 and 3. 28 | 29 | Code of conduct 30 | ^^^^^^^^^^^^^^^ 31 | 32 | Rasa NLU adheres to the `Contributor Covenant Code of Conduct `_. 33 | By participating, you are expected to uphold this code. 34 | 35 | Documentation 36 | ^^^^^^^^^^^^^ 37 | Everything should be properly documented. To locally test the documentation you need to install 38 | 39 | .. code-block:: bash 40 | 41 | brew install sphinx 42 | pip install sphinx_rtd_theme 43 | 44 | After that, you can compile and view the documentation using: 45 | 46 | .. code-block:: bash 47 | 48 | cd docs 49 | make html 50 | cd _build/html 51 | python -m SimpleHTTPServer 8000 . 52 | # python 3: python -m http.server 53 | 54 | The documentation will be running on http://localhost:8000/. 55 | 56 | Code snippets that are part of the documentation can be tested using 57 | 58 | .. code-block:: bash 59 | 60 | make doctest 61 | -------------------------------------------------------------------------------- /docs/faq.rst: -------------------------------------------------------------------------------- 1 | .. _section_faq: 2 | 3 | Frequently Asked Questions 4 | ========================== 5 | 6 | How many training examples do I need? 7 | ------------------------------------- 8 | Unfortunately, there is no cookie-cutter answer to this question. It depends on your intents and your entities. 9 | 10 | If you have intents that are easily confusable, you will need more training data. Accordingly, as you add more 11 | intents, you also want to add more training examples for each intent. If you quickly write 20-30 unique expressions for 12 | each intent, you should be good for the beginning. 13 | 14 | The same holds true for entities. the number of training examples you will need depends on how closely related your different entity types are and how clearly 15 | entities are distinguishable from non-entities in your use case. 16 | 17 | To assess your model's performance, :ref:`run the server and manually test some messages ` 18 | , or use the :ref:`evaluation script `. 19 | 20 | 21 | 22 | Does it run with python 3? 23 | -------------------------- 24 | Yes it does, Rasa NLU supports python 2.7 as well as python 3.5 and 3.6. If there are any issues with a specific python version, feel free to create an issue or directly provide a fix. 25 | 26 | Which languages are supported? 27 | ------------------------------ 28 | There is a list containing all officialy supported languages :ref:`here `. Nevertheless, there are 29 | others working on adding more languages, feel free to have a look at the `github issues `_ 30 | section or the `gitter chat `_. 31 | 32 | .. _section_faq_version: 33 | 34 | Which version of Rasa NLU am I running? 35 | --------------------------------------- 36 | To find out which Rasa version you are running, you can execute 37 | 38 | .. code-block:: bash 39 | 40 | python -c "import rasa_nlu; print(rasa_nlu.__version__);" 41 | 42 | If you are using a virtual environment to run your python code, make sure you are using the correct python to execute the above code. 43 | 44 | Why am I getting an ``UndefinedMetricWarning``? 45 | ----------------------------------------------- 46 | The complete warning is: ``UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 in labels with no predicted samples.`` 47 | The warning is a result of a lack of training data. During the training the dataset will be splitted multiple times, if there are to few training samples for any of the intents, the splitting might result in splits that do not contain any examples for this intent. 48 | 49 | Hence, the solution is to add more training samples. As this is only a warning, training will still succeed, but the resulting models predictions might be weak on the intents where you are lacking training data. 50 | 51 | 52 | I have an issue, can you help me? 53 | --------------------------------- 54 | We'd love to help you. If you are unsure if your issue is related to your setup, you should state your problem in the `gitter chat `_. 55 | If you found an issue with the framework, please file a report on `github issues `_ 56 | including all the information needed to reproduce the problem. 57 | 58 | .. toctree:: 59 | :maxdepth: 1 60 | -------------------------------------------------------------------------------- /docs/index.rst: -------------------------------------------------------------------------------- 1 | 2 | Language Understanding with Rasa NLU 3 | ==================================== 4 | 5 | .. note:: 6 | This is the documentation for version |release| of Rasa NLU. Make sure you select 7 | the appropriate version of the documentation for your local installation! 8 | 9 | 10 | Rasa NLU is an open-source tool for intent classification and entity extraction. For example, taking a sentence like 11 | 12 | .. code-block:: console 13 | 14 | "I am looking for a Mexican restaurant in the center of town" 15 | 16 | and returning structured data like 17 | 18 | .. code-block:: json 19 | 20 | { 21 | "intent": "search_restaurant", 22 | "entities": { 23 | "cuisine" : "Mexican", 24 | "location" : "center" 25 | } 26 | } 27 | 28 | 29 | The intended audience is mainly people developing bots. 30 | You can use Rasa as a drop-in replacement for `wit `_ , `LUIS `_ , or `Dialogflow `_, the only change in your code is to send requests to ``localhost`` instead (see :ref:`section_migration` for details). 31 | 32 | Why might you use Rasa instead of one of those services? 33 | 34 | - you don't have to hand over your data to FB/MSFT/GOOG 35 | - you don't have to make a ``https`` call every time. 36 | - you can tune models to work well on your particular use case. 37 | 38 | These points are laid out in more detail in a `blog post `_ . 39 | 40 | 41 | The quickest quickstart in the west 42 | ----------------------------------- 43 | 44 | 45 | .. code-block:: console 46 | 47 | $ python setup.py install 48 | $ python -m rasa_nlu.server -e wit & 49 | $ curl 'http://localhost:5000/parse?q=hello' 50 | [{"_text": "hello", "confidence": 1.0, "entities": {}, "intent": "greet"}] 51 | 52 | 53 | There you go! you just parsed some text. Next step, do the :ref:`section_tutorial`. 54 | 55 | .. note:: This demo uses a very limited ML model. To apply Rasa NLU to your use case, you need to train your own model! Follow the tutorial to get to know how to apply rasa_nlu to your data. 56 | 57 | About 58 | ----- 59 | 60 | You can think of Rasa NLU as a set of high level APIs for building your own language parser using existing NLP and ML libraries. 61 | The setup process is designed to be as simple as possible. If you're currently using wit, LUIS, or Dialogflow, you just: 62 | 63 | 1. download your app data from wit or LUIS and feed it into Rasa NLU 64 | 2. run Rasa NLU on your machine and switch the URL of your wit/LUIS/Dialogflow api calls to ``localhost:5000/parse``. 65 | 66 | Rasa NLU is written in Python, but it you can use it from any language through :ref:`section_http`. 67 | If your project *is* written in Python you can simply import the relevant classes. 68 | 69 | Rasa is a set of tools for building more advanced bots, developed by `Rasa 70 | `_. This is the natural language understanding module. To 71 | build conversational chatbots, you can interface Rasa NLU with libraries 72 | that steer the flow of the conversation - more on this in :ref:`section_context`. 73 | 74 | .. toctree:: 75 | :maxdepth: 1 76 | :caption: Getting Started 77 | 78 | installation 79 | tutorial 80 | 81 | .. toctree:: 82 | :maxdepth: 1 83 | :caption: User Documentation 84 | 85 | config 86 | migrating 87 | dataformat 88 | http 89 | python 90 | entities 91 | closeloop 92 | persist 93 | languages 94 | pipeline 95 | evaluation 96 | context 97 | faq 98 | migrations 99 | license 100 | 101 | .. toctree:: 102 | :maxdepth: 1 103 | :caption: Resources 104 | 105 | community 106 | 107 | .. toctree:: 108 | :maxdepth: 1 109 | :caption: Developer Documentation 110 | 111 | contribute 112 | changelog 113 | -------------------------------------------------------------------------------- /docs/key.enc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/docs/key.enc -------------------------------------------------------------------------------- /docs/languages.rst: -------------------------------------------------------------------------------- 1 | .. _section_languages: 2 | 3 | Language Support 4 | ================ 5 | 6 | Rasa NLU supports a number of different languages. Exactly which ones depends on 7 | the backend you are using, and the features you require. 8 | 9 | Firstly, the ``tensorflow_embedding`` pipeline in principle supports any language, 10 | but only performs intent classification. 11 | In addition, with the spaCy backend you can now load fastText vectors, which are available 12 | for `hundreds of languages `_. 13 | 14 | For both intent and entity recognition, the following languages and backend combinations 15 | are tested and available: 16 | 17 | ============= ============================== 18 | backend supported languages 19 | ============= ============================== 20 | spacy-sklearn english (``en``), 21 | german (``de``), 22 | spanish (``es``), 23 | portuguese (``pt``), 24 | italian (``it``), 25 | dutch (``nl``), 26 | french (``fr``) 27 | MITIE english (``en``) 28 | Jieba-MITIE chinese (``zh``) :ref:`* ` 29 | ============= ============================== 30 | 31 | These languages can be set as part of the :ref:`section_configuration`. 32 | 33 | Adding a new language 34 | --------------------- 35 | We want to make the process of adding new languages as simple as possible to increase the number of 36 | supported languages. Nevertheless, to use a language you either need a trained word representation or 37 | you need to train that presentation on your own using a large corpus of text data in that language. 38 | 39 | These are the steps necessary to add a new language: 40 | 41 | spacy-sklearn 42 | ^^^^^^^^^^^^^ 43 | 44 | spaCy already provides a really good documentation page about `Adding languages `_. 45 | This will help you train a tokenizer and vocabulary for a new language in spaCy. 46 | 47 | As described in the documentation, you need to register your language using ``set_lang_class()`` which will 48 | allow Rasa NLU to load and use your new language by passing in your language identifier as the ``language`` :ref:`section_configuration` option. 49 | 50 | MITIE 51 | ^^^^^ 52 | 53 | 1. Get a ~clean language corpus (a Wikipedia dump works) as a set of text files 54 | 2. Build and run `MITIE Wordrep Tool`_ on your corpus. This can take several hours/days depending on your dataset and your workstation. You'll need something like 128GB of RAM for wordrep to run - yes that's alot: try to extend your swap. 55 | 3. Set the path of your new ``total_word_feature_extractor.dat`` as value of the *mitie_file* parameter in ``config_mitie.json`` 56 | 57 | .. _jieba: 58 | 59 | Jieba-MITIE 60 | ^^^^^^^^^^^ 61 | 62 | Some notes about using the Jieba tokenizer together with MITIE on chinese 63 | language data: To use it, you need a proper MITIE feature extractor, e.g. 64 | ``data/total_word_feature_extractor_zh.dat``. It should be trained 65 | from a Chinese corpus using the MITIE wordrep tools 66 | (takes 2-3 days for training). 67 | 68 | For training, please build the 69 | `MITIE Wordrep Tool`_. 70 | Note that Chinese corpus should be tokenized first before feeding 71 | into the tool for training. Close-domain corpus that best matches 72 | user case works best. 73 | 74 | A detailed instruction on how to train the model yourself can be found in 75 | A trained model from Chinese Wikipedia Dump and Baidu Baike can be `crownpku `_ 's 76 | `blogpost `_. 77 | 78 | .. _`MITIE Wordrep Tool`: https://github.com/mit-nlp/MITIE/tree/master/tools/wordrep -------------------------------------------------------------------------------- /docs/license.rst: -------------------------------------------------------------------------------- 1 | 2 | License 3 | ======= 4 | 5 | 6 | .. literalinclude:: ../LICENSE.txt -------------------------------------------------------------------------------- /docs/migrating.rst: -------------------------------------------------------------------------------- 1 | .. _section_migration: 2 | 3 | Migrating an existing app 4 | ========================= 5 | 6 | Rasa NLU is designed to make migrating from wit/LUIS/Dialogflow as simple as possible. 7 | The TLDR instructions for migrating are: 8 | 9 | - download an export of your app data from wit/LUIS/Dialogflow 10 | - follow the :ref:`tutorial`, using your downloaded data instead of ``demo-rasa.json`` 11 | 12 | 13 | Banana Peels 14 | ------------ 15 | 16 | Just some specific things to watch out for for each of the services you might want to migrate from 17 | 18 | wit.ai 19 | ^^^^^^ 20 | 21 | Wit used to handle ``intents`` natively. 22 | Now they are somewhat obfuscated. 23 | To create an ``intent`` in wit you have to create and ``entity`` which spans the entire text. 24 | The file you want from your download is called ``expressions.json`` 25 | 26 | LUIS.ai 27 | ^^^^^^^ 28 | 29 | Nothing special here. Downloading the data and importing it into Rasa NLU should work without issues 30 | 31 | Dialogflow 32 | ^^^^^^^^^^ 33 | 34 | Dialogflow exports generate multiple files rather than just one. 35 | Put them all in a directory (see ``data/examples/dialogflow`` in the repo) 36 | and pass that path to the trainer. 37 | 38 | 39 | 40 | Emulation 41 | --------- 42 | 43 | To make Rasa NLU easy to try out with existing projects, 44 | the server can `emulate` wit, LUIS, or Dialogflow. 45 | In native mode, a request / response looks like this : 46 | 47 | .. code-block:: console 48 | 49 | $ curl -XPOST localhost:5000/parse -d '{"q":"I am looking for Chinese food"}' | python -mjson.tool 50 | { 51 | "text": "I am looking for Chinese food", 52 | "intent": "restaurant_search", 53 | "confidence": 0.4794813722432127, 54 | "entities": [ 55 | { 56 | "start": 17, 57 | "end": 24, 58 | "value": "chinese", 59 | "entity": "cuisine" 60 | } 61 | ] 62 | } 63 | 64 | 65 | if we run in ``wit`` mode 66 | (e.g. ``python -m rasa_nlu.server --emulate wit --path projects``). 67 | 68 | then instead have to make a GET request 69 | 70 | .. code-block:: console 71 | 72 | $ curl 'localhost:5000/parse?q=hello' | python -mjson.tool 73 | [ 74 | { 75 | "_text": "hello", 76 | "confidence": 0.4794813722432127, 77 | "entities": {}, 78 | "intent": "greet" 79 | } 80 | ] 81 | 82 | similarly for LUIS, but with a slightly different response format 83 | 84 | 85 | .. code-block:: console 86 | 87 | $ curl 'localhost:5000/parse?q=hello' | python -mjson.tool 88 | { 89 | "entities": [], 90 | "query": "hello", 91 | "topScoringIntent": { 92 | "intent": "inform", 93 | "score": 0.4794813722432127 94 | } 95 | } 96 | 97 | and finally for Dialogflow 98 | 99 | .. code-block:: console 100 | 101 | $ curl 'localhost:5000/parse?q=hello' | python -mjson.tool 102 | { 103 | "id": "ffd7ede3-b62f-11e6-b292-98fe944ee8c2", 104 | "result": { 105 | "action": null, 106 | "actionIncomplete": null, 107 | "contexts": [], 108 | "fulfillment": {}, 109 | "metadata": { 110 | "intentId": "ffdbd6f3-b62f-11e6-8504-98fe944ee8c2", 111 | "intentName": "greet", 112 | "webhookUsed": "false" 113 | }, 114 | "parameters": {}, 115 | "resolvedQuery": "hello", 116 | "score": null, 117 | "source": "agent" 118 | }, 119 | "sessionId": "ffdbd814-b62f-11e6-93b2-98fe944ee8c2", 120 | "status": { 121 | "code": 200, 122 | "errorType": "success" 123 | }, 124 | "timestamp": "2016-11-29T12:33:15.369411" 125 | } 126 | -------------------------------------------------------------------------------- /docs/persist.rst: -------------------------------------------------------------------------------- 1 | .. _section_persistence: 2 | 3 | Model Persistence 4 | ================= 5 | 6 | 7 | Rasa NLU supports using `S3 `_ and 8 | `GCS `_ to save your models. 9 | 10 | * Amazon S3 Storage 11 | S3 is supported using the ``boto3`` module which you can 12 | install with ``pip install boto3``. 13 | 14 | Start the Rasa NLU server with ``storage`` option set to 15 | ``aws``. Get your S3 credentials and set the following 16 | environment variables: 17 | 18 | - ``AWS_SECRET_ACCESS_KEY`` 19 | - ``AWS_ACCESS_KEY_ID`` 20 | - ``AWS_DEFAULT_REGION`` 21 | - ``BUCKET_NAME`` 22 | - ``AWS_ENDPOINT_URL`` 23 | 24 | If there is no bucket with the name ``BUCKET_NAME`` Rasa will create it. 25 | 26 | * Google Cloud Storage 27 | GCS is supported using the ``google-cloud-storage`` package 28 | which you can install with ``pip install google-cloud-storage`` 29 | 30 | Start the Rasa NLU server with ``storage`` option set to ``gcs``. 31 | 32 | When running on google app engine and compute engine, the auth 33 | credentials are already set up. For running locally or elsewhere, 34 | checkout their 35 | `client repo `_ 36 | for details on setting up authentication. It involves creating 37 | a service account key file from google cloud console, 38 | and setting the ``GOOGLE_APPLICATION_CREDENTIALS`` environment 39 | variable to the path of that key file. 40 | 41 | * Azure Storage 42 | Azure is supported using the ``azure-storage-blob`` package 43 | which you can install with ``pip install azure-storage-blob`` 44 | 45 | Start the Rasa NLU server with ``storage`` option set to ``azure``. 46 | 47 | The following environment variables must be set: 48 | 49 | - ``AZURE_CONTAINER`` 50 | - ``AZURE_ACCOUNT_NAME`` 51 | - ``AZURE_ACCOUNT_KEY`` 52 | 53 | If there is no container with the name ``AZURE_CONTAINER`` Rasa will create it. 54 | 55 | Models are gzipped before saving to cloud. 56 | -------------------------------------------------------------------------------- /docs/poll.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | How was the tutorial? Click to Vote 4 | 5 | 6 | 7 | 8 | 9 | Great! 10 | 11 | 12 | 13 | 14 | 15 | Didn't Work 16 | 17 | 18 | 19 | 20 | 21 | Didn't Finish 22 | 23 | 24 | 25 | -------------------------------------------------------------------------------- /docs/python.rst: -------------------------------------------------------------------------------- 1 | .. _section_python: 2 | 3 | Using Rasa NLU from python 4 | ========================== 5 | Apart from running Rasa NLU as a HTTP server you can use it directly in your python program. 6 | Rasa NLU supports both Python 2 and 3. 7 | 8 | Training Time 9 | ------------- 10 | For creating your models, you can follow the same instructions as non-python users. 11 | Or, you can train directly in python with a script like the following (using spacy): 12 | 13 | .. testcode:: 14 | 15 | from rasa_nlu.training_data import load_data 16 | from rasa_nlu.config import RasaNLUModelConfig 17 | from rasa_nlu.model import Trainer 18 | from rasa_nlu import config 19 | 20 | training_data = load_data('data/examples/rasa/demo-rasa.json') 21 | trainer = Trainer(config.load("sample_configs/config_spacy.yml")) 22 | trainer.train(training_data) 23 | model_directory = trainer.persist('./projects/default/') # Returns the directory the model is stored in 24 | 25 | Prediction Time 26 | --------------- 27 | 28 | You can call Rasa NLU directly from your python script. To do so, you need to load the metadata of 29 | your model and instantiate an interpreter. The ``metadata.json`` in your model dir contains the 30 | necessary info to recover your model: 31 | 32 | .. testcode:: 33 | 34 | from rasa_nlu.model import Metadata, Interpreter 35 | 36 | # where `model_directory points to the folder the model is persisted in 37 | interpreter = Interpreter.load(model_directory) 38 | 39 | You can then use the loaded interpreter to parse text: 40 | 41 | .. testcode:: 42 | 43 | interpreter.parse(u"The text I want to understand") 44 | 45 | which returns the same ``dict`` as the HTTP api would (without emulation). 46 | 47 | If multiple models are created, it is reasonable to share components between the different models. E.g. 48 | the ``'nlp_spacy'`` component, which is used by every pipeline that wants to have access to the spacy word vectors, 49 | can be cached to avoid storing the large word vectors more than once in main memory. To use the caching, 50 | a ``ComponentBuilder`` should be passed when loading and training models. 51 | 52 | Here is a short example on how to create a component builder, that can be reused to train and run multiple models, to train a model: 53 | 54 | .. testcode:: 55 | 56 | from rasa_nlu.training_data import load_data 57 | from rasa_nlu import config 58 | from rasa_nlu.components import ComponentBuilder 59 | from rasa_nlu.model import Trainer 60 | 61 | builder = ComponentBuilder(use_cache=True) # will cache components between pipelines (where possible) 62 | 63 | training_data = load_data('data/examples/rasa/demo-rasa.json') 64 | trainer = Trainer(config.load("sample_configs/config_spacy.yml"), builder) 65 | trainer.train(training_data) 66 | model_directory = trainer.persist('./projects/default/') # Returns the directory the model is stored in 67 | 68 | The same builder can be used to load a model (can be a totally different one). The builder only caches components that are safe to be shared between models. Here is a short example on how to use the builder when loading models: 69 | 70 | .. testcode:: 71 | 72 | from rasa_nlu.model import Metadata, Interpreter 73 | from rasa_nlu import config 74 | 75 | # For simplicity we will load the same model twice, usually you would want to use the metadata of 76 | # different models 77 | 78 | interpreter = Interpreter.load(model_directory, builder) # to use the builder, pass it as an arg when loading the model 79 | # the clone will share resources with the first model, as long as the same builder is passed! 80 | interpreter_clone = Interpreter.load(model_directory, builder) 81 | 82 | 83 | -------------------------------------------------------------------------------- /entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | function print_help { 6 | echo "Available options:" 7 | echo " start commands (rasa cmd line arguments) - Start RasaNLU server" 8 | echo " download {mitie, spacy en, spacy de} - Download packages for mitie or spacy (english or german)" 9 | echo " start -h - Print RasaNLU help" 10 | echo " help - Print this help" 11 | echo " run - Run an arbitrary command inside the container" 12 | } 13 | 14 | function download_package { 15 | case $1 in 16 | mitie) 17 | echo "Downloading mitie model..." 18 | wget https://github.com/mit-nlp/MITIE/releases/download/v0.4/MITIE-models-v0.2.tar.bz2 19 | tar jxf MITIE-models-v0.2.tar.bz2 20 | ;; 21 | spacy) 22 | case $2 in 23 | en|de) 24 | echo "Downloading spacy.$2 model..." 25 | python -m spacy download "$2" 26 | echo "Done." 27 | ;; 28 | *) 29 | echo "Error. Rasa_nlu supports only english and german models for the time being" 30 | print_help 31 | exit 1 32 | ;; 33 | esac 34 | ;; 35 | *) 36 | echo "Error: invalid package specified." 37 | echo 38 | print_help 39 | ;; 40 | esac 41 | } 42 | 43 | case ${1} in 44 | start) 45 | exec python -m rasa_nlu.server "${@:2}" 46 | ;; 47 | run) 48 | exec "${@:2}" 49 | ;; 50 | download) 51 | download_package ${@:2} 52 | ;; 53 | *) 54 | print_help 55 | ;; 56 | esac 57 | 58 | 59 | -------------------------------------------------------------------------------- /heroku/Procfile: -------------------------------------------------------------------------------- 1 | web: python setup.py install --force; python -m rasa_nlu.server -P $PORT -------------------------------------------------------------------------------- /heroku/runtime.txt: -------------------------------------------------------------------------------- 1 | python-2.7.11 2 | -------------------------------------------------------------------------------- /jieba_userdict/jieba_userdict.txt: -------------------------------------------------------------------------------- 1 | 创新办 3 i 2 | 云计算 5 3 | 凱特琳 nz 4 | 台中 -------------------------------------------------------------------------------- /rasa_nlu/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import logging 7 | 8 | import rasa_nlu.version 9 | 10 | logging.getLogger(__name__).addHandler(logging.NullHandler()) 11 | 12 | __version__ = rasa_nlu.version.__version__ 13 | -------------------------------------------------------------------------------- /rasa_nlu/classifiers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | # How many intents are at max put into the output intent 7 | # ranking, everything else will be cut off 8 | INTENT_RANKING_LENGTH = 10 9 | -------------------------------------------------------------------------------- /rasa_nlu/classifiers/keyword_intent_classifier.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import map 6 | from typing import Any 7 | from typing import Dict 8 | from typing import Text 9 | 10 | from rasa_nlu.components import Component 11 | from rasa_nlu.training_data import Message 12 | 13 | 14 | class KeywordIntentClassifier(Component): 15 | 16 | name = "intent_classifier_keyword" 17 | 18 | provides = ["intent"] 19 | 20 | his = ["hello", "hi", "hey"] 21 | 22 | byes = ["bye", "goodbye"] 23 | 24 | def process(self, message, **kwargs): 25 | # type: (Message, **Any) -> None 26 | 27 | intent = {"name": self.parse(message.text), "confidence": 1.0} 28 | message.set("intent", intent, 29 | add_to_output=True) 30 | 31 | def parse(self, text): 32 | # type: (Text) -> Text 33 | 34 | _text = text.lower() 35 | 36 | def is_present(x): 37 | return x in _text 38 | 39 | if any(map(is_present, self.his)): 40 | return "greet" 41 | elif any(map(is_present, self.byes)): 42 | return "goodbye" 43 | else: 44 | return None 45 | -------------------------------------------------------------------------------- /rasa_nlu/convert.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import argparse 7 | 8 | from rasa_nlu import training_data 9 | from rasa_nlu.utils import write_to_file 10 | 11 | 12 | def create_argument_parser(): 13 | parser = argparse.ArgumentParser( 14 | description='Convert training data formats into one another') 15 | 16 | parser.add_argument('-d', '--data_file', 17 | required=True, 18 | help='file or dir containing training data') 19 | 20 | parser.add_argument('-o', '--out_file', 21 | required=True, 22 | help='file where to save training data in rasa format') 23 | 24 | parser.add_argument('-l', '--language', 25 | default='en', 26 | help='language of the data') 27 | 28 | parser.add_argument('-f', '--format', 29 | required=True, 30 | choices=['json', 'md'], 31 | help="Output format the training data should be " 32 | "converted into.") 33 | return parser 34 | 35 | 36 | def convert_training_data(data_file, out_file, output_format, language): 37 | td = training_data.load_data(data_file, language) 38 | 39 | if output_format == 'md': 40 | output = td.as_markdown() 41 | else: 42 | output = td.as_json(indent=2) 43 | 44 | write_to_file(out_file, output) 45 | 46 | 47 | if __name__ == "__main__": 48 | parser = create_argument_parser() 49 | args = parser.parse_args() 50 | 51 | convert_training_data(args.data_file, 52 | args.out_file, 53 | args.format, 54 | args.language) 55 | -------------------------------------------------------------------------------- /rasa_nlu/emulators/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import object 6 | from typing import Any 7 | from typing import Dict 8 | from typing import Optional 9 | from typing import Text 10 | 11 | 12 | class NoEmulator(object): 13 | def __init__(self): 14 | # type: () -> None 15 | 16 | self.name = None # type: Optional[Text] 17 | 18 | def normalise_request_json(self, data): 19 | # type: (Dict[Text, Any]) -> Dict[Text, Any] 20 | 21 | _data = {} 22 | _data["text"] = data["q"][0] if type(data["q"]) == list else data["q"] 23 | 24 | if not data.get("project"): 25 | _data["project"] = "default" 26 | elif type(data["project"]) == list: 27 | _data["project"] = data["project"][0] 28 | else: 29 | _data["project"] = data["project"] 30 | 31 | if data.get("model"): 32 | _data["model"] = data["model"][0] if type(data["model"]) == list else data["model"] 33 | 34 | _data['time'] = data["time"] if "time" in data else None 35 | return _data 36 | 37 | def normalise_response_json(self, data): 38 | # type: (Dict[Text, Any]) -> Any 39 | """Transform data to target format.""" 40 | 41 | return data 42 | -------------------------------------------------------------------------------- /rasa_nlu/emulators/dialogflow.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import str 6 | import uuid 7 | from datetime import datetime 8 | 9 | from typing import Any 10 | from typing import Dict 11 | from typing import Text 12 | from typing import List 13 | 14 | from rasa_nlu.emulators import NoEmulator 15 | 16 | 17 | class DialogflowEmulator(NoEmulator): 18 | def __init__(self): 19 | # type: () -> None 20 | 21 | super(DialogflowEmulator, self).__init__() 22 | self.name = 'api' 23 | 24 | def normalise_response_json(self, data): 25 | # type: (Dict[Text, Any]) -> Dict[Text, Any] 26 | """Transform data to Dialogflow format.""" 27 | 28 | # populate entities dict 29 | entities = { 30 | entity_type: [] 31 | for entity_type in set([x["entity"] for x in data["entities"]])} # type: Dict[Text, List[Text]] 32 | 33 | for entity in data["entities"]: 34 | entities[entity["entity"]].append(entity["value"]) 35 | 36 | return { 37 | "id": str(uuid.uuid1()), 38 | "timestamp": datetime.now().isoformat(), 39 | "result": { 40 | "source": "agent", 41 | "resolvedQuery": data["text"], 42 | "action": None, 43 | "actionIncomplete": None, 44 | "parameters": entities, 45 | "contexts": [], 46 | "metadata": { 47 | "intentId": str(uuid.uuid1()), 48 | "webhookUsed": "false", 49 | "intentName": data["intent"] 50 | }, 51 | "fulfillment": {}, 52 | "score": None, 53 | }, 54 | "status": { 55 | "code": 200, 56 | "errorType": "success" 57 | }, 58 | "sessionId": str(uuid.uuid1()) 59 | } 60 | -------------------------------------------------------------------------------- /rasa_nlu/emulators/luis.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | 6 | from typing import Any 7 | from typing import Dict 8 | from typing import Text 9 | 10 | from rasa_nlu.emulators import NoEmulator 11 | 12 | 13 | class LUISEmulator(NoEmulator): 14 | def __init__(self): 15 | # type: () -> None 16 | 17 | super(LUISEmulator, self).__init__() 18 | self.name = 'luis' 19 | 20 | def _top_intent(self, data): 21 | if data.get("intent"): 22 | return { 23 | "intent": data["intent"]["name"], 24 | "score": data["intent"]["confidence"] 25 | } 26 | else: 27 | return None 28 | 29 | def _ranking(self, data): 30 | if data.get("intent_ranking"): 31 | return [{"intent": el["name"], "score": el["confidence"]} for el in data["intent_ranking"]] 32 | else: 33 | top = self._top_intent(data) 34 | return [top] if top else [] 35 | 36 | def normalise_response_json(self, data): 37 | # type: (Dict[Text, Any]) -> Dict[Text, Any] 38 | """Transform data to luis.ai format.""" 39 | 40 | top_intent = self._top_intent(data) 41 | ranking = self._ranking(data) 42 | return { 43 | "query": data["text"], 44 | "topScoringIntent": top_intent, 45 | "intents": ranking, 46 | "entities": [ 47 | { 48 | "entity": e["value"], 49 | "type": e["entity"], 50 | "startIndex": None, 51 | "endIndex": None, 52 | "score": None 53 | } for e in data["entities"] 54 | ] if "entities" in data else [] 55 | } 56 | -------------------------------------------------------------------------------- /rasa_nlu/emulators/wit.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | 6 | from typing import Any 7 | from typing import Dict 8 | from typing import List 9 | from typing import Text 10 | 11 | from rasa_nlu.emulators import NoEmulator 12 | 13 | 14 | class WitEmulator(NoEmulator): 15 | def __init__(self): 16 | # type: () -> None 17 | 18 | super(WitEmulator, self).__init__() 19 | self.name = "wit" 20 | 21 | def normalise_response_json(self, data): 22 | # type: (Dict[Text, Any]) -> List[Dict[Text, Any]] 23 | """Transform data to wit.ai format.""" 24 | 25 | entities = {} 26 | for entity in data["entities"]: 27 | entities[entity["entity"]] = { 28 | "confidence": None, 29 | "type": "value", 30 | "value": entity["value"], 31 | "start": entity["start"], 32 | "end": entity["end"] 33 | } 34 | 35 | return [ 36 | { 37 | "_text": data["text"], 38 | "confidence": data["intent"]['confidence'], 39 | "intent": data["intent"]['name'], 40 | "entities": entities 41 | } 42 | ] 43 | -------------------------------------------------------------------------------- /rasa_nlu/extractors/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from typing import Any 7 | from typing import Dict 8 | from typing import List 9 | from typing import Text 10 | 11 | from rasa_nlu.components import Component 12 | from rasa_nlu.training_data import Message 13 | 14 | 15 | class EntityExtractor(Component): 16 | def add_extractor_name(self, entities): 17 | # type: (List[Dict[Text, Any]]) -> List[Dict[Text, Any]] 18 | for entity in entities: 19 | entity["extractor"] = self.name 20 | return entities 21 | 22 | def add_processor_name(self, entity): 23 | # type: (Dict[Text, Any]) -> Dict[Text, Any] 24 | if "processors" in entity: 25 | entity["processors"].append(self.name) 26 | else: 27 | entity["processors"] = [self.name] 28 | 29 | return entity 30 | 31 | @staticmethod 32 | def find_entity(ent, text, tokens): 33 | offsets = [token.offset for token in tokens] 34 | ends = [token.end for token in tokens] 35 | 36 | if ent["start"] not in offsets: 37 | message = ("Invalid entity {} in example '{}': " 38 | "entities must span whole tokens. " 39 | "Wrong entity start.".format(ent, text)) 40 | raise ValueError(message) 41 | 42 | if ent["end"] not in ends: 43 | message = ("Invalid entity {} in example '{}': " 44 | "entities must span whole tokens. " 45 | "Wrong entity end.".format(ent, text)) 46 | raise ValueError(message) 47 | 48 | start = offsets.index(ent["start"]) 49 | end = ends.index(ent["end"]) + 1 50 | return start, end 51 | 52 | def filter_trainable_entities(self, entity_examples): 53 | # type: (List[Message]) -> List[Message] 54 | """Filters out untrainable entity annotations. 55 | 56 | Creates a copy of entity_examples in which entities that have 57 | `extractor` set to something other than self.name (e.g. 'ner_crf') 58 | are removed.""" 59 | 60 | filtered = [] 61 | for message in entity_examples: 62 | entities = [] 63 | for ent in message.get("entities", []): 64 | extractor = ent.get("extractor") 65 | if not extractor or extractor == self.name: 66 | entities.append(ent) 67 | data = message.data.copy() 68 | data['entities'] = entities 69 | filtered.append( 70 | Message(text=message.text, 71 | data=data, 72 | output_properties=message.output_properties, 73 | time=message.time)) 74 | 75 | return filtered 76 | -------------------------------------------------------------------------------- /rasa_nlu/extractors/spacy_entity_extractor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import typing 7 | from typing import Any 8 | from typing import Dict 9 | from typing import List 10 | from typing import Text 11 | 12 | from rasa_nlu.extractors import EntityExtractor 13 | from rasa_nlu.training_data import Message 14 | 15 | if typing.TYPE_CHECKING: 16 | from spacy.tokens.doc import Doc 17 | 18 | 19 | class SpacyEntityExtractor(EntityExtractor): 20 | name = "ner_spacy" 21 | 22 | provides = ["entities"] 23 | 24 | requires = ["spacy_nlp"] 25 | 26 | def process(self, message, **kwargs): 27 | # type: (Message, **Any) -> None 28 | 29 | # can't use the existing doc here (spacy_doc on the message) 30 | # because tokens are lower cased which is bad for NER 31 | spacy_nlp = kwargs.get("spacy_nlp", None) 32 | doc = spacy_nlp(message.text) 33 | extracted = self.add_extractor_name(self.extract_entities(doc)) 34 | message.set("entities", 35 | message.get("entities", []) + extracted, 36 | add_to_output=True) 37 | 38 | @staticmethod 39 | def extract_entities(doc): 40 | # type: (Doc) -> List[Dict[Text, Any]] 41 | 42 | entities = [ 43 | { 44 | "entity": ent.label_, 45 | "value": ent.text, 46 | "start": ent.start_char, 47 | "confidence": None, 48 | "end": ent.end_char 49 | } 50 | for ent in doc.ents] 51 | return entities 52 | -------------------------------------------------------------------------------- /rasa_nlu/featurizers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | 8 | from rasa_nlu.components import Component 9 | 10 | 11 | class Featurizer(Component): 12 | 13 | @staticmethod 14 | def _combine_with_existing_text_features(message, 15 | additional_features): 16 | if message.get("text_features") is not None: 17 | return np.hstack((message.get("text_features"), 18 | additional_features)) 19 | else: 20 | return additional_features 21 | -------------------------------------------------------------------------------- /rasa_nlu/featurizers/mitie_featurizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import typing 8 | from typing import Any 9 | from typing import List 10 | from typing import Text 11 | 12 | from rasa_nlu.config import RasaNLUModelConfig 13 | from rasa_nlu.featurizers import Featurizer 14 | from rasa_nlu.tokenizers import Token 15 | from rasa_nlu.training_data import Message 16 | from rasa_nlu.training_data import TrainingData 17 | 18 | if typing.TYPE_CHECKING: 19 | import mitie 20 | from builtins import str 21 | 22 | 23 | class MitieFeaturizer(Featurizer): 24 | name = "intent_featurizer_mitie" 25 | 26 | provides = ["text_features"] 27 | 28 | requires = ["tokens", "mitie_feature_extractor"] 29 | 30 | @classmethod 31 | def required_packages(cls): 32 | # type: () -> List[Text] 33 | return ["mitie", "numpy"] 34 | 35 | def ndim(self, feature_extractor): 36 | # type: (mitie.total_word_feature_extractor) -> int 37 | 38 | return feature_extractor.num_dimensions 39 | 40 | def train(self, training_data, config, **kwargs): 41 | # type: (TrainingData, RasaNLUModelConfig, **Any) -> None 42 | 43 | mitie_feature_extractor = self._mitie_feature_extractor(**kwargs) 44 | for example in training_data.intent_examples: 45 | features = self.features_for_tokens(example.get("tokens"), 46 | mitie_feature_extractor) 47 | example.set("text_features", 48 | self._combine_with_existing_text_features( 49 | example, features)) 50 | 51 | def process(self, message, **kwargs): 52 | # type: (Message, **Any) -> None 53 | 54 | mitie_feature_extractor = self._mitie_feature_extractor(**kwargs) 55 | features = self.features_for_tokens(message.get("tokens"), 56 | mitie_feature_extractor) 57 | message.set("text_features", 58 | self._combine_with_existing_text_features(message, 59 | features)) 60 | 61 | def _mitie_feature_extractor(self, **kwargs): 62 | mitie_feature_extractor = kwargs.get("mitie_feature_extractor") 63 | if not mitie_feature_extractor: 64 | raise Exception("Failed to train 'intent_featurizer_mitie'. " 65 | "Missing a proper MITIE feature extractor. " 66 | "Make sure this component is preceded by " 67 | "the 'nlp_mitie' component in the pipeline " 68 | "configuration.") 69 | return mitie_feature_extractor 70 | 71 | def features_for_tokens(self, tokens, feature_extractor): 72 | # type: (List[Token], mitie.total_word_feature_extractor) -> np.ndarray 73 | 74 | vec = np.zeros(self.ndim(feature_extractor)) 75 | for token in tokens: 76 | vec += feature_extractor.get_feature_vector(token.text) 77 | if tokens: 78 | return vec / len(tokens) 79 | else: 80 | return vec 81 | -------------------------------------------------------------------------------- /rasa_nlu/featurizers/regex_featurizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import logging 7 | import os 8 | import re 9 | import warnings 10 | 11 | import typing 12 | from typing import Any, Dict, List, Optional, Text 13 | 14 | from rasa_nlu import utils 15 | from rasa_nlu.config import RasaNLUModelConfig 16 | from rasa_nlu.featurizers import Featurizer 17 | from rasa_nlu.training_data import Message 18 | from rasa_nlu.training_data import TrainingData 19 | 20 | import numpy as np 21 | 22 | logger = logging.getLogger(__name__) 23 | 24 | if typing.TYPE_CHECKING: 25 | from rasa_nlu.model import Metadata 26 | 27 | 28 | REGEX_FEATURIZER_FILE_NAME = "regex_featurizer.json" 29 | 30 | 31 | class RegexFeaturizer(Featurizer): 32 | name = "intent_entity_featurizer_regex" 33 | 34 | provides = ["text_features"] 35 | 36 | requires = ["tokens"] 37 | 38 | def __init__(self, component_config=None, known_patterns=None): 39 | super(RegexFeaturizer, self).__init__(component_config) 40 | 41 | self.known_patterns = known_patterns if known_patterns else [] 42 | 43 | def train(self, training_data, config, **kwargs): 44 | # type: (TrainingData, RasaNLUModelConfig, **Any) -> None 45 | 46 | for example in training_data.regex_features: 47 | self.known_patterns.append(example) 48 | 49 | for example in training_data.training_examples: 50 | updated = self._text_features_with_regex(example) 51 | example.set("text_features", updated) 52 | 53 | def process(self, message, **kwargs): 54 | # type: (Message, **Any) -> None 55 | 56 | updated = self._text_features_with_regex(message) 57 | message.set("text_features", updated) 58 | 59 | def _text_features_with_regex(self, message): 60 | if self.known_patterns is not None: 61 | extras = self.features_for_patterns(message) 62 | return self._combine_with_existing_text_features(message, extras) 63 | else: 64 | return message.get("text_features") 65 | 66 | def features_for_patterns(self, message): 67 | """Checks which known patterns match the message. 68 | 69 | Given a sentence, returns a vector of {1,0} values indicating which 70 | regexes did match. Furthermore, if the 71 | message is tokenized, the function will mark the matching regex on 72 | the tokens that are part of the match.""" 73 | 74 | found = [] 75 | for i, exp in enumerate(self.known_patterns): 76 | match = re.search(exp["pattern"], message.text) 77 | if match is not None: 78 | for t in message.get("tokens", []): 79 | if t.offset < match.end() and t.end > match.start(): 80 | t.set("pattern", i) 81 | found.append(1.0) 82 | else: 83 | found.append(0.0) 84 | return np.array(found) 85 | 86 | @classmethod 87 | def load(cls, 88 | model_dir=None, # type: Optional[Text] 89 | model_metadata=None, # type: Optional[Metadata] 90 | cached_component=None, # type: Optional[RegexFeaturizer] 91 | **kwargs # type: **Any 92 | ): 93 | # type: (...) -> RegexFeaturizer 94 | 95 | meta = model_metadata.for_component(cls.name) 96 | file_name = meta.get("regex_file", REGEX_FEATURIZER_FILE_NAME) 97 | regex_file = os.path.join(model_dir, file_name) 98 | 99 | if os.path.exists(regex_file): 100 | known_patterns = utils.read_json_file(regex_file) 101 | return RegexFeaturizer(meta, known_patterns=known_patterns) 102 | else: 103 | return RegexFeaturizer(meta) 104 | 105 | def persist(self, model_dir): 106 | # type: (Text) -> Optional[Dict[Text, Any]] 107 | """Persist this model into the passed directory. 108 | 109 | Return the metadata necessary to load the model again.""" 110 | 111 | if self.known_patterns: 112 | regex_file = os.path.join(model_dir, REGEX_FEATURIZER_FILE_NAME) 113 | utils.write_json_to_file(regex_file, self.known_patterns, indent=4) 114 | 115 | return {"regex_file": REGEX_FEATURIZER_FILE_NAME} 116 | -------------------------------------------------------------------------------- /rasa_nlu/featurizers/spacy_featurizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import numpy as np 7 | import typing 8 | from typing import Any 9 | 10 | from rasa_nlu.featurizers import Featurizer 11 | from rasa_nlu.training_data import Message 12 | from rasa_nlu.training_data import TrainingData 13 | 14 | if typing.TYPE_CHECKING: 15 | from spacy.language import Language 16 | from spacy.tokens import Doc 17 | 18 | 19 | def ndim(spacy_nlp): 20 | """Number of features used to represent a document / sentence.""" 21 | # type: Language -> int 22 | return spacy_nlp.vocab.vectors_length 23 | 24 | 25 | def features_for_doc(doc): 26 | """Feature vector for a single document / sentence.""" 27 | # type: Doc -> np.ndarray 28 | return doc.vector 29 | 30 | 31 | class SpacyFeaturizer(Featurizer): 32 | name = "intent_featurizer_spacy" 33 | 34 | provides = ["text_features"] 35 | 36 | requires = ["spacy_doc"] 37 | 38 | def train(self, training_data, config, **kwargs): 39 | # type: (TrainingData) -> None 40 | 41 | for example in training_data.intent_examples: 42 | self._set_spacy_features(example) 43 | 44 | def process(self, message, **kwargs): 45 | # type: (Message, **Any) -> None 46 | 47 | self._set_spacy_features(message) 48 | 49 | def _set_spacy_features(self, message): 50 | """Adds the spacy word vectors to the messages text features.""" 51 | 52 | fs = features_for_doc(message.get("spacy_doc")) 53 | features = self._combine_with_existing_text_features(message, fs) 54 | message.set("text_features", features) 55 | -------------------------------------------------------------------------------- /rasa_nlu/run.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import json 7 | import logging 8 | 9 | import six 10 | from builtins import input 11 | 12 | from rasa_nlu import utils 13 | from rasa_nlu.model import Interpreter 14 | 15 | logger = logging.getLogger(__name__) 16 | 17 | 18 | def create_argument_parser(): 19 | import argparse 20 | parser = argparse.ArgumentParser( 21 | description='run a Rasa NLU model locally on the command line ' 22 | 'for manual testing') 23 | 24 | parser.add_argument('-m', '--model', required=True, 25 | help="path to model") 26 | 27 | utils.add_logging_option_arguments(parser, default=logging.INFO) 28 | 29 | return parser 30 | 31 | 32 | def run_cmdline(model_path, component_builder=None): 33 | interpreter = Interpreter.load(model_path, component_builder) 34 | 35 | logger.info("NLU model loaded. Type a message and " 36 | "press enter to parse it.") 37 | while True: 38 | text = input().strip() 39 | if six.PY2: 40 | # in python 2 input doesn't return unicode values 41 | text = text.decode("utf-8") 42 | r = interpreter.parse(text) 43 | print(json.dumps(r, indent=2)) 44 | logger.info("Next message:") 45 | 46 | 47 | if __name__ == '__main__': 48 | cmdline_args = create_argument_parser().parse_args() 49 | 50 | utils.configure_colored_logging(cmdline_args.loglevel) 51 | 52 | run_cmdline(cmdline_args.model) 53 | -------------------------------------------------------------------------------- /rasa_nlu/schemas/nlu_model.yml: -------------------------------------------------------------------------------- 1 | allowempty: True 2 | mapping: 3 | language: 4 | type: !!python/str "str" 5 | pipeline: 6 | type: !!python/str "any" 7 | -------------------------------------------------------------------------------- /rasa_nlu/tokenizers/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import object 6 | 7 | 8 | class Tokenizer(object): 9 | pass 10 | 11 | 12 | class Token(object): 13 | def __init__(self, text, offset, data=None): 14 | self.offset = offset 15 | self.text = text 16 | self.end = offset + len(text) 17 | self.data = data if data else {} 18 | 19 | def set(self, prop, info): 20 | self.data[prop] = info 21 | 22 | def get(self, prop, default=None): 23 | return self.data.get(prop, default) 24 | -------------------------------------------------------------------------------- /rasa_nlu/tokenizers/mitie_tokenizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | from builtins import str 6 | import re 7 | 8 | from typing import Any 9 | from typing import Dict 10 | from typing import List 11 | from typing import Text 12 | from typing import Tuple 13 | 14 | from rasa_nlu.config import RasaNLUModelConfig 15 | from rasa_nlu.tokenizers import Token 16 | from rasa_nlu.tokenizers import Tokenizer 17 | from rasa_nlu.components import Component 18 | from rasa_nlu.training_data import Message 19 | from rasa_nlu.training_data import TrainingData 20 | 21 | 22 | class MitieTokenizer(Tokenizer, Component): 23 | name = "tokenizer_mitie" 24 | 25 | provides = ["tokens"] 26 | 27 | @classmethod 28 | def required_packages(cls): 29 | # type: () -> List[Text] 30 | return ["mitie"] 31 | 32 | def train(self, training_data, config, **kwargs): 33 | # type: (TrainingData, RasaNLUModelConfig, **Any) -> None 34 | 35 | for example in training_data.training_examples: 36 | example.set("tokens", self.tokenize(example.text)) 37 | 38 | def process(self, message, **kwargs): 39 | # type: (Message, **Any) -> None 40 | 41 | message.set("tokens", self.tokenize(message.text)) 42 | 43 | def _token_from_offset(self, text, offset, encoded_sentence): 44 | return Token(text.decode('utf-8'), 45 | self._byte_to_char_offset(encoded_sentence, offset)) 46 | 47 | def tokenize(self, text): 48 | # type: (Text) -> List[Token] 49 | import mitie 50 | 51 | encoded_sentence = text.encode('utf-8') 52 | tokenized = mitie.tokenize_with_offsets(encoded_sentence) 53 | tokens = [self._token_from_offset(token, offset, encoded_sentence) 54 | for token, offset in tokenized] 55 | return tokens 56 | 57 | @staticmethod 58 | def _byte_to_char_offset(text, byte_offset): 59 | return len(text[:byte_offset].decode('utf-8')) 60 | -------------------------------------------------------------------------------- /rasa_nlu/tokenizers/spacy_tokenizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import typing 7 | from typing import Any, List 8 | 9 | from rasa_nlu.components import Component 10 | from rasa_nlu.config import RasaNLUModelConfig 11 | from rasa_nlu.tokenizers import Tokenizer, Token 12 | from rasa_nlu.training_data import Message 13 | from rasa_nlu.training_data import TrainingData 14 | 15 | if typing.TYPE_CHECKING: 16 | from spacy.tokens.doc import Doc 17 | 18 | 19 | class SpacyTokenizer(Tokenizer, Component): 20 | name = "tokenizer_spacy" 21 | 22 | provides = ["tokens"] 23 | 24 | def train(self, training_data, config, **kwargs): 25 | # type: (TrainingData, RasaNLUModelConfig, **Any) -> None 26 | 27 | for example in training_data.training_examples: 28 | example.set("tokens", self.tokenize(example.get("spacy_doc"))) 29 | 30 | def process(self, message, **kwargs): 31 | # type: (Message, **Any) -> None 32 | 33 | message.set("tokens", self.tokenize(message.get("spacy_doc"))) 34 | 35 | def tokenize(self, doc): 36 | # type: (Doc) -> List[Token] 37 | 38 | return [Token(t.text, t.idx) for t in doc] 39 | -------------------------------------------------------------------------------- /rasa_nlu/tokenizers/whitespace_tokenizer.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from typing import Any, List, Text 7 | 8 | from rasa_nlu.components import Component 9 | from rasa_nlu.config import RasaNLUModelConfig 10 | from rasa_nlu.tokenizers import Tokenizer, Token 11 | from rasa_nlu.training_data import Message 12 | from rasa_nlu.training_data import TrainingData 13 | 14 | 15 | class WhitespaceTokenizer(Tokenizer, Component): 16 | name = "tokenizer_whitespace" 17 | 18 | provides = ["tokens"] 19 | 20 | def train(self, training_data, config, **kwargs): 21 | # type: (TrainingData, RasaNLUModelConfig, **Any) -> None 22 | 23 | for example in training_data.training_examples: 24 | example.set("tokens", self.tokenize(example.text)) 25 | 26 | def process(self, message, **kwargs): 27 | # type: (Message, **Any) -> None 28 | 29 | message.set("tokens", self.tokenize(message.text)) 30 | 31 | def tokenize(self, text): 32 | # type: (Text) -> List[Token] 33 | 34 | words = text.split() 35 | running_offset = 0 36 | tokens = [] 37 | for word in words: 38 | word_offset = text.index(word, running_offset) 39 | word_len = len(word) 40 | running_offset = word_offset + word_len 41 | tokens.append(Token(word, word_offset)) 42 | return tokens 43 | -------------------------------------------------------------------------------- /rasa_nlu/tokenizers/yaha_tokenizer.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Thu Nov 23 14:54:35 2017 4 | 5 | @author: user 6 | """ 7 | 8 | from __future__ import unicode_literals 9 | from __future__ import print_function 10 | from __future__ import division 11 | from __future__ import absolute_import 12 | 13 | from typing import Any 14 | from typing import Dict 15 | from typing import List 16 | from typing import Text 17 | 18 | from rasa_nlu.config import RasaNLUConfig 19 | from rasa_nlu.tokenizers import Tokenizer, Token 20 | from rasa_nlu.components import Component 21 | from rasa_nlu.training_data import Message 22 | from rasa_nlu.training_data import TrainingData 23 | 24 | import sys 25 | from yaha import Cuttor 26 | 27 | reload(sys) 28 | sys.setdefaultencoding('utf-8') 29 | 30 | class YahaTokenizer(Tokenizer, Component): 31 | 32 | 33 | name = "tokenizer_yaha" 34 | 35 | provides = ["tokens"] 36 | 37 | cuttor = Cuttor() 38 | 39 | def __init__(self): 40 | pass 41 | 42 | 43 | @classmethod 44 | def required_packages(cls): 45 | # type: () -> List[Text] 46 | return ["yaha"] 47 | 48 | def train(self, training_data, config, **kwargs): 49 | # type: (TrainingData, RasaNLUConfig, **Any) -> None 50 | if config['language'] != 'zh': 51 | raise Exception("tokenizer_yaha is only used for Chinese. Check your configure json file.") 52 | 53 | for example in training_data.training_examples: 54 | example.set("tokens", self.tokenize(example.text)) 55 | 56 | def process(self, message, **kwargs): 57 | # type: (Message, **Any) -> None 58 | 59 | message.set("tokens", self.tokenize(message.text)) 60 | 61 | def tokenize(self, text): 62 | # type: (Text) -> List[Token] 63 | tokenized = self.cuttor.tokenize(text.decode('utf-8'), search=True) 64 | tokens = [Token(word, start) for (word, start, end) in tokenized] 65 | 66 | return tokens 67 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/__init__.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from rasa_nlu.training_data.message import Message 9 | from rasa_nlu.training_data.training_data import TrainingData 10 | from rasa_nlu.training_data.loading import load_data 11 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/formats/__init__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from rasa_nlu.training_data.formats.dialogflow import DialogflowReader 7 | from rasa_nlu.training_data.formats.luis import LuisReader 8 | from rasa_nlu.training_data.formats.wit import WitReader 9 | from rasa_nlu.training_data.formats.markdown import MarkdownWriter, MarkdownReader 10 | from rasa_nlu.training_data.formats.rasa import RasaReader, RasaWriter 11 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/formats/dialogflow.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import logging 7 | import os 8 | 9 | from rasa_nlu.training_data import Message, TrainingData 10 | from rasa_nlu.training_data.formats.readerwriter import TrainingDataReader 11 | from rasa_nlu import utils 12 | from rasa_nlu.training_data.util import transform_entity_synonyms 13 | 14 | logger = logging.getLogger(__name__) 15 | 16 | DIALOGFLOW_PACKAGE = "dialogflow_package" 17 | DIALOGFLOW_AGENT = "dialogflow_agent" 18 | DIALOGFLOW_INTENT = "dialogflow_intent" 19 | DIALOGFLOW_INTENT_EXAMPLES = "dialogflow_intent_examples" 20 | DIALOGFLOW_ENTITIES = "dialogflow_entities" 21 | DIALOGFLOW_ENTITY_ENTRIES = "dialogflow_entity_entries" 22 | 23 | 24 | class DialogflowReader(TrainingDataReader): 25 | def read(self, fn, **kwargs): 26 | # type: ([Text]) -> TrainingData 27 | """Loads training data stored in the Dialogflow data format.""" 28 | 29 | language = kwargs["language"] 30 | fformat = kwargs["fformat"] 31 | 32 | if fformat not in {DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES}: 33 | raise ValueError("fformat must be either {}, or {}".format(DIALOGFLOW_INTENT, DIALOGFLOW_ENTITIES)) 34 | 35 | root_js = utils.read_json_file(fn) 36 | examples_js = self._read_examples_js(fn, language, fformat) 37 | 38 | if not examples_js: 39 | logger.warning("No training examples found for dialogflow file {}!".format(fn)) 40 | return TrainingData() 41 | elif fformat == DIALOGFLOW_INTENT: 42 | return self._read_intent(root_js, examples_js) 43 | elif fformat == DIALOGFLOW_ENTITIES: 44 | return self._read_entities(examples_js) 45 | 46 | def _read_intent(self, intent_js, examples_js): 47 | """Reads the intent and examples from respective jsons.""" 48 | intent = intent_js.get("name") 49 | 50 | training_examples = [] 51 | for ex in examples_js: 52 | text, entities = self._join_text_chunks(ex['data']) 53 | training_examples.append(Message.build(text, intent, entities)) 54 | 55 | return TrainingData(training_examples) 56 | 57 | def _join_text_chunks(self, chunks): 58 | """Combines text chunks and extracts entities.""" 59 | utterance = "" 60 | entities = [] 61 | for chunk in chunks: 62 | entity = self._extract_entity(chunk, len(utterance)) 63 | if entity: 64 | entities.append(entity) 65 | utterance += chunk["text"] 66 | 67 | return utterance, entities 68 | 69 | def _extract_entity(self, chunk, current_offset): 70 | """Extract an entity from a chunk if present.""" 71 | entity = None 72 | if "meta" in chunk or "alias" in chunk: 73 | start = current_offset 74 | text = chunk['text'] 75 | end = start + len(text) 76 | entity_type = chunk.get("alias", chunk["meta"]) 77 | if entity_type != u'@sys.ignore': 78 | entity = utils.build_entity(start, end, text, entity_type) 79 | 80 | return entity 81 | 82 | def _read_entities(self, examples_js): 83 | entity_synonyms = transform_entity_synonyms(examples_js) 84 | return TrainingData([], entity_synonyms) 85 | 86 | def _read_examples_js(self, fn, language, fformat): 87 | """Infer and load the example file based on the root filename and root format.""" 88 | examples_type = "usersays" if fformat == DIALOGFLOW_INTENT else "entries" 89 | examples_fn_ending = "_{}_{}.json".format(examples_type, language) 90 | examples_fn = fn.replace(".json", examples_fn_ending) 91 | if os.path.isfile(examples_fn): 92 | return utils.read_json_file(examples_fn) 93 | else: 94 | return None 95 | 96 | def reads(self, s, **kwargs): 97 | raise NotImplementedError 98 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/formats/luis.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import logging 7 | 8 | from rasa_nlu.training_data import Message, TrainingData 9 | from rasa_nlu.training_data.formats.readerwriter import JsonTrainingDataReader 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class LuisReader(JsonTrainingDataReader): 15 | 16 | def read_from_json(self, js, **kwargs): 17 | # type: (Text, Any) -> TrainingData 18 | """Loads training data stored in the LUIS.ai data format.""" 19 | 20 | training_examples = [] 21 | regex_features = [] 22 | 23 | # Simple check to ensure we support this luis data schema version 24 | if not js["luis_schema_version"].startswith("2"): 25 | raise Exception("Invalid luis data schema version {}, should be 2.x.x. " 26 | "Make sure to use the latest luis version " 27 | "(e.g. by downloading your data again)." 28 | "".format(js["luis_schema_version"])) 29 | 30 | for r in js.get("regex_features", []): 31 | if r.get("activated", False): 32 | regex_features.append({"name": r.get("name"), 33 | "pattern": r.get("pattern")}) 34 | 35 | for s in js["utterances"]: 36 | text = s.get("text") 37 | intent = s.get("intent") 38 | entities = [] 39 | for e in s.get("entities") or []: 40 | start, end = e["startPos"], e["endPos"] + 1 41 | val = text[start:end] 42 | entities.append({"entity": e["entity"], 43 | "value": val, 44 | "start": start, 45 | "end": end}) 46 | 47 | data = {"entities": entities} 48 | if intent: 49 | data["intent"] = intent 50 | training_examples.append(Message(text, data)) 51 | return TrainingData(training_examples, regex_features=regex_features) 52 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/formats/readerwriter.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import json 7 | from rasa_nlu import utils 8 | 9 | 10 | class TrainingDataReader(object): 11 | def read(self, filename, **kwargs): 12 | """Reads TrainingData from a file.""" 13 | return self.reads(utils.read_file(filename), **kwargs) 14 | 15 | def reads(self, s, **kwargs): 16 | """Reads TrainingData from a string.""" 17 | raise NotImplementedError 18 | 19 | 20 | class TrainingDataWriter(object): 21 | def dump(self, filename, training_data): 22 | """Writes a TrainingData object in markdown format to a file.""" 23 | s = self.dumps(training_data) 24 | utils.write_to_file(filename, s) 25 | 26 | def dumps(self, training_data): 27 | """Turns TrainingData into a string.""" 28 | raise NotImplementedError 29 | 30 | 31 | class JsonTrainingDataReader(TrainingDataReader): 32 | def reads(self, s, **kwargs): 33 | """Transforms string into json object and passes it on.""" 34 | js = json.loads(s) 35 | return self.read_from_json(js, **kwargs) 36 | 37 | def read_from_json(self, js, **kwargs): 38 | """Reads TrainingData from a json object.""" 39 | raise NotImplementedError 40 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/formats/wit.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import logging 7 | 8 | from rasa_nlu.training_data import Message, TrainingData 9 | from rasa_nlu.training_data.formats.readerwriter import JsonTrainingDataReader 10 | 11 | logger = logging.getLogger(__name__) 12 | 13 | 14 | class WitReader(JsonTrainingDataReader): 15 | 16 | def read_from_json(self, js, **kwargs): 17 | # type: (Text, Any) -> TrainingData 18 | """Loads training data stored in the WIT.ai data format.""" 19 | 20 | training_examples = [] 21 | 22 | for s in js["data"]: 23 | entities = s.get("entities") 24 | if entities is None: 25 | continue 26 | text = s.get("text") 27 | intents = [e["value"] for e in entities if e["entity"] == 'intent'] 28 | intent = intents[0].strip("\"") if intents else None 29 | 30 | entities = [e 31 | for e in entities 32 | if ("start" in e and "end" in e and 33 | e["entity"] != 'intent')] 34 | for e in entities: 35 | # for some reason wit adds additional quotes around entity values 36 | e["value"] = e["value"].strip("\"") 37 | 38 | data = {} 39 | if intent: 40 | data["intent"] = intent 41 | if entities is not None: 42 | data["entities"] = entities 43 | training_examples.append(Message(text, data)) 44 | return TrainingData(training_examples) 45 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/message.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | from rasa_nlu.utils import ordered 9 | 10 | 11 | class Message(object): 12 | def __init__(self, text, data=None, output_properties=None, time=None): 13 | self.text = text 14 | self.time = time 15 | self.data = data if data else {} 16 | 17 | if output_properties: 18 | self.output_properties = output_properties 19 | else: 20 | self.output_properties = set() 21 | 22 | def set(self, prop, info, add_to_output=False): 23 | self.data[prop] = info 24 | if add_to_output: 25 | self.output_properties.add(prop) 26 | 27 | def get(self, prop, default=None): 28 | return self.data.get(prop, default) 29 | 30 | def as_dict(self, only_output_properties=False): 31 | if only_output_properties: 32 | d = {key: value 33 | for key, value in self.data.items() 34 | if key in self.output_properties} 35 | else: 36 | d = self.data 37 | return dict(d, text=self.text) 38 | 39 | def __eq__(self, other): 40 | if not isinstance(other, Message): 41 | return False 42 | else: 43 | return ((other.text, ordered(other.data)) == 44 | (self.text, ordered(self.data))) 45 | 46 | def __hash__(self): 47 | return hash((self.text, str(ordered(self.data)))) 48 | 49 | @classmethod 50 | def build(cls, text, intent=None, entities=None): 51 | data = {} 52 | if intent: 53 | data["intent"] = intent 54 | if entities: 55 | data["entities"] = entities 56 | return cls(text, data) 57 | -------------------------------------------------------------------------------- /rasa_nlu/training_data/util.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | from __future__ import absolute_import 4 | from __future__ import division 5 | from __future__ import print_function 6 | from __future__ import unicode_literals 7 | 8 | import logging 9 | 10 | logger = logging.getLogger(__name__) 11 | 12 | 13 | def transform_entity_synonyms(synonyms, known_synonyms=None): 14 | """Transforms the entity synonyms into a text->value dictionary""" 15 | entity_synonyms = known_synonyms if known_synonyms else {} 16 | for s in synonyms: 17 | if "value" in s and "synonyms" in s: 18 | for synonym in s["synonyms"]: 19 | entity_synonyms[synonym] = s["value"] 20 | return entity_synonyms 21 | 22 | 23 | def check_duplicate_synonym(entity_synonyms, text, syn, context_str=""): 24 | if text in entity_synonyms and entity_synonyms[text] != syn: 25 | logger.warning("Found inconsistent entity synonyms while {0}, overwriting {1}->{2}" 26 | "with {1}->{2} during merge".format(context_str, text, entity_synonyms[text], syn)) 27 | -------------------------------------------------------------------------------- /rasa_nlu/utils/mitie_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | 8 | import typing 9 | from builtins import str 10 | from typing import Any 11 | from typing import Dict 12 | from typing import List 13 | from typing import Optional 14 | from typing import Text 15 | 16 | from rasa_nlu.components import Component 17 | from rasa_nlu.config import RasaNLUModelConfig 18 | from rasa_nlu.model import Metadata 19 | 20 | if typing.TYPE_CHECKING: 21 | import mitie 22 | 23 | 24 | class MitieNLP(Component): 25 | name = "nlp_mitie" 26 | 27 | provides = ["mitie_feature_extractor", "mitie_file"] 28 | 29 | defaults = { 30 | # name of the language model to load - this contains 31 | # the MITIE feature extractor 32 | "model": os.path.join("data", "total_word_feature_extractor.dat"), 33 | } 34 | 35 | def __init__(self, 36 | component_config=None, # type: Dict[Text, Any] 37 | extractor=None 38 | ): 39 | # type: (...) -> None 40 | """Construct a new language model from the MITIE framework.""" 41 | 42 | super(MitieNLP, self).__init__(component_config) 43 | 44 | self.extractor = extractor 45 | 46 | @classmethod 47 | def required_packages(cls): 48 | # type: () -> List[Text] 49 | return ["mitie"] 50 | 51 | @classmethod 52 | def create(cls, cfg): 53 | # type: (RasaNLUModelConfig) -> MitieNLP 54 | import mitie 55 | 56 | component_conf = cfg.for_component(cls.name, cls.defaults) 57 | model_file = component_conf.get("model") 58 | if not model_file: 59 | raise Exception("The MITIE component 'nlp_mitie' needs " 60 | "the configuration value for 'model'." 61 | "Please take a look at the " 62 | "documentation in the pipeline section " 63 | "to get more info about this " 64 | "parameter.") 65 | extractor = mitie.total_word_feature_extractor(model_file) 66 | cls.ensure_proper_language_model(extractor) 67 | 68 | return MitieNLP(component_conf, extractor) 69 | 70 | @classmethod 71 | def cache_key(cls, model_metadata): 72 | # type: (Metadata) -> Optional[Text] 73 | 74 | component_meta = model_metadata.for_component(cls.name) 75 | 76 | mitie_file = component_meta.get("model", None) 77 | if mitie_file is not None: 78 | return cls.name + "-" + str(os.path.abspath(mitie_file)) 79 | else: 80 | return None 81 | 82 | def provide_context(self): 83 | # type: () -> Dict[Text, Any] 84 | 85 | return {"mitie_feature_extractor": self.extractor, 86 | "mitie_file": self.component_config.get("model")} 87 | 88 | @staticmethod 89 | def ensure_proper_language_model(extractor): 90 | # type: (Optional[mitie.total_word_feature_extractor]) -> None 91 | 92 | if extractor is None: 93 | raise Exception("Failed to load MITIE feature extractor. " 94 | "Loading the model returned 'None'.") 95 | 96 | @classmethod 97 | def load(cls, 98 | model_dir=None, # type: Optional[Text] 99 | model_metadata=None, # type: Optional[Metadata] 100 | cached_component=None, # type: Optional[MitieNLP] 101 | **kwargs # type: **Any 102 | ): 103 | # type: (...) -> MitieNLP 104 | import mitie 105 | 106 | if cached_component: 107 | return cached_component 108 | 109 | component_meta = model_metadata.for_component(cls.name) 110 | mitie_file = component_meta.get("model") 111 | return cls(component_meta, 112 | mitie.total_word_feature_extractor(mitie_file)) 113 | 114 | def persist(self, model_dir): 115 | # type: (Text) -> Dict[Text, Any] 116 | 117 | return { 118 | "mitie_feature_extractor_fingerprint": self.extractor.fingerprint, 119 | "model": self.component_config.get("model") 120 | } 121 | -------------------------------------------------------------------------------- /rasa_nlu/version.py: -------------------------------------------------------------------------------- 1 | from __future__ import unicode_literals 2 | from __future__ import print_function 3 | from __future__ import division 4 | from __future__ import absolute_import 5 | 6 | __version__ = '0.12.2' 7 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | -r alt_requirements/requirements_bare.txt 2 | -------------------------------------------------------------------------------- /sample_configs/config_crf.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: 4 | - name: "nlp_spacy" 5 | model: "en" 6 | - name: "ner_spacy" 7 | - name: "ner_ngrams" 8 | max_number_of_ngrams: 7 9 | - name: "ner_duckling_http" 10 | url: "http://my_url" 11 | dimensions: 12 | - "NUMBER" 13 | - name: "ner_crf" 14 | BILOU_flag: true 15 | features: 16 | # features for word before token 17 | - ["low", "title", "upper", "pos", "pos2"] 18 | # features of token itself 19 | - ["bias", "low", "word3", "word2", "upper", "title", "digit", "pos", "pos2", "pattern"] 20 | # features for word after the token we want to tag 21 | - ["low", "title", "upper", "pos", "pos2"] 22 | max_iterations: 50 23 | L1_c: 1 24 | L2_c: 1e-3 25 | - name: "intent_classifier_sklearn" 26 | C: [1, 2, 5, 10, 20, 100] 27 | kernel: "linear" 28 | -------------------------------------------------------------------------------- /sample_configs/config_defaults.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: [] 4 | 5 | data: 6 | -------------------------------------------------------------------------------- /sample_configs/config_embedding.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: "tensorflow_embedding" 4 | -------------------------------------------------------------------------------- /sample_configs/config_jieba_mitie.yml: -------------------------------------------------------------------------------- 1 | language: "zh" 2 | 3 | pipeline: 4 | - name: "nlp_mitie" 5 | model: "data/total_word_feature_extractor_zh.dat" 6 | - name: "tokenizer_jieba" 7 | - name: "ner_mitie" 8 | - name: "ner_synonyms" 9 | - name: "intent_entity_featurizer_regex" 10 | - name: "intent_classifier_mitie" 11 | -------------------------------------------------------------------------------- /sample_configs/config_jieba_mitie_sklearn.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rasa_nlu_test", 3 | "pipeline": ["nlp_mitie", 4 | "tokenizer_jieba", 5 | "ner_mitie", 6 | "ner_synonyms", 7 | "intent_entity_featurizer_regex", 8 | "intent_featurizer_mitie", 9 | "intent_classifier_sklearn"], 10 | "language": "zh", 11 | "mitie_file": "./data/total_word_feature_extractor_zh.dat", 12 | "path" : "./models", 13 | "data" : "./data/examples/rasa/demo-rasa_zh.json" 14 | } 15 | -------------------------------------------------------------------------------- /sample_configs/config_jieba_mitie_sklearn.yml: -------------------------------------------------------------------------------- 1 | language: "zh" 2 | 3 | pipeline: 4 | - name: "nlp_mitie" 5 | model: "data/total_word_feature_extractor_zh.dat" 6 | - name: "tokenizer_jieba" 7 | - name: "ner_mitie" 8 | - name: "ner_synonyms" 9 | - name: "intent_entity_featurizer_regex" 10 | - name: "intent_featurizer_mitie" 11 | - name: "intent_classifier_sklearn" 12 | -------------------------------------------------------------------------------- /sample_configs/config_jieba_mitie_sklearn_plus_dict_path.yml: -------------------------------------------------------------------------------- 1 | language: "zh" 2 | 3 | pipeline: 4 | - name: "nlp_mitie" 5 | model: "data/total_word_feature_extractor_zh.dat" 6 | - name: "tokenizer_jieba" 7 | default_dict: "./default_dict.big" 8 | user_dicts: "./jieba_userdict" 9 | # you can put in file path or directory path as the "user_dicts" value 10 | # user_dicts: "./jieba_userdict/jieba_userdict.txt" 11 | - name: "ner_mitie" 12 | - name: "ner_synonyms" 13 | - name: "intent_entity_featurizer_regex" 14 | - name: "intent_featurizer_mitie" 15 | - name: "intent_classifier_sklearn" 16 | -------------------------------------------------------------------------------- /sample_configs/config_mitie.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: 4 | - name: "nlp_mitie" 5 | model: "data/total_word_feature_extractor.dat" 6 | - name: "tokenizer_mitie" 7 | - name: "ner_mitie" 8 | - name: "ner_synonyms" 9 | - name: "intent_entity_featurizer_regex" 10 | - name: "intent_classifier_mitie" 11 | -------------------------------------------------------------------------------- /sample_configs/config_mitie_sklearn.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: 4 | - name: "nlp_mitie" 5 | model: "data/total_word_feature_extractor.dat" 6 | - name: "tokenizer_mitie" 7 | - name: "ner_mitie" 8 | - name: "ner_synonyms" 9 | - name: "intent_entity_featurizer_regex" 10 | - name: "intent_featurizer_mitie" 11 | - name: "intent_classifier_sklearn" 12 | -------------------------------------------------------------------------------- /sample_configs/config_spacy.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: "spacy_sklearn" 4 | -------------------------------------------------------------------------------- /sample_configs/config_spacy_duckling.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: 4 | # this is using the spacy sklearn pipeline, adding duckling 5 | # all components will use their default values 6 | - name: "nlp_spacy" 7 | - name: "tokenizer_spacy" 8 | - name: "intent_featurizer_spacy" 9 | - name: "ner_crf" 10 | - name: "ner_synonyms" 11 | - name: "intent_classifier_sklearn" 12 | - name: "ner_duckling" 13 | -------------------------------------------------------------------------------- /sample_configs/config_train_server_json.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: "spacy_sklearn" 4 | 5 | # data contains the same json, as described in the training data section 6 | data: { 7 | "rasa_nlu_data": { 8 | "common_examples": [ 9 | { 10 | "text": "hey", 11 | "intent": "greet", 12 | "entities": [] 13 | } 14 | ] 15 | } 16 | } -------------------------------------------------------------------------------- /sample_configs/config_train_server_md.yml: -------------------------------------------------------------------------------- 1 | language: "en" 2 | 3 | pipeline: "spacy_sklearn" 4 | 5 | # data contains the same md, as described in the training data section 6 | data: | 7 | ## intent:affirm 8 | - yes 9 | - yep 10 | 11 | ## intent:goodbye 12 | - bye 13 | - goodbye 14 | -------------------------------------------------------------------------------- /sample_configs/config_yaha_mitie_sklearn.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "rasa_nlu_test", 3 | "pipeline": ["nlp_mitie", 4 | "tokenizer_yaha", 5 | "ner_mitie", 6 | "ner_synonyms", 7 | "intent_entity_featurizer_regex", 8 | "intent_featurizer_mitie", 9 | "intent_classifier_sklearn"], 10 | "language": "zh", 11 | "mitie_file": "./data/total_word_feature_extractor_zh.dat", 12 | "path" : "./models", 13 | "data" : "./data/examples/rasa/demo-rasa_zh.json" 14 | } 15 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | # pytest PEP8 configuration 2 | [tool:pytest] 3 | pep8maxlinelength = 120 4 | pep8ignore = 5 | docs/conf.py ALL 6 | *.py W503 7 | *.py E126 8 | 9 | # ignoring W503: line break occurred before a binary operator 10 | # ignoring E126: continuation line over-indented for hanging indent 11 | 12 | [metadata] 13 | description-file = README.md 14 | license_file = LICENSE.txt 15 | 16 | [bdist_wheel] 17 | # this will create a universal wheel for all distributions and py2 & py3 18 | universal=1 19 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | 4 | from setuptools import setup, find_packages 5 | 6 | here = os.path.abspath(os.path.dirname(__file__)) 7 | 8 | # Avoids IDE errors, but actual version is read from version.py 9 | __version__ = None 10 | exec(open('rasa_nlu/version.py').read()) 11 | 12 | # Get the long description from the README file 13 | with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: 14 | long_description = f.read() 15 | 16 | tests_requires = [ 17 | "pytest", 18 | "pytest-pep8", 19 | "pytest-services", 20 | "pytest-cov", 21 | "pytest-twisted<1.6", 22 | "treq" 23 | ] 24 | 25 | install_requires = [ 26 | "pathlib", 27 | "cloudpickle", 28 | "gevent", 29 | "klein", 30 | "boto3", 31 | "packaging", 32 | "typing", 33 | "future", 34 | "six", 35 | "tqdm", 36 | "requests", 37 | "jsonschema", 38 | "matplotlib", 39 | "numpy>=1.13", 40 | "simplejson", 41 | "pyyaml", 42 | "coloredlogs" 43 | ] 44 | 45 | extras_requires = { 46 | 'test': tests_requires, 47 | 'spacy': ["scikit-learn", 48 | "sklearn-crfsuite", 49 | "scipy", 50 | "spacy>2.0", 51 | ], 52 | 'tensorflow': ["scikit-learn", 53 | "tensorflow", 54 | ], 55 | 'mitie': ["mitie"], 56 | 'jieba': ["jieba"], 57 | #'yaha': ["yaha"], 58 | } 59 | 60 | setup( 61 | name='rasa-nlu', 62 | packages=find_packages(exclude=['contrib', 'docs', 'tests']), 63 | classifiers=[ 64 | "Development Status :: 4 - Beta", 65 | "Intended Audience :: Developers", 66 | "License :: OSI Approved :: Apache Software License", 67 | # supported python versions 68 | "Programming Language :: Python", 69 | "Programming Language :: Python :: 2.7", 70 | "Programming Language :: Python :: 3.5", 71 | "Programming Language :: Python :: 3.6", 72 | "Topic :: Software Development :: Libraries", 73 | ], 74 | version=__version__, 75 | install_requires=install_requires, 76 | tests_require=tests_requires, 77 | extras_require=extras_requires, 78 | include_package_data=True, 79 | description="Rasa NLU a natural language parser for bots", 80 | long_description=long_description, 81 | long_description_content_type="text/markdown", 82 | author='Rasa Technologies GmbH', 83 | author_email='hi@rasa.com', 84 | maintainer="Tom Bocklisch", 85 | maintainer_email="tom@rasa.com", 86 | license='Apache 2.0', 87 | url="https://rasa.com", 88 | keywords="nlp machine-learning machine-learning-library bot bots " 89 | "botkit rasa conversational-agents conversational-ai chatbot" 90 | "chatbot-framework bot-framework", 91 | download_url="https://github.com/RasaHQ/rasa_nlu/archive/{}.tar.gz" 92 | "".format(__version__), 93 | project_urls={ 94 | 'Bug Reports': 'https://github.com/rasahq/rasa_nlu/issues', 95 | 'Source': 'https://github.com/rasahq/rasa_nlu', 96 | }, 97 | ) 98 | 99 | print("\nWelcome to Rasa NLU!") 100 | print("If any questions please visit documentation " 101 | "page https://nlu.rasa.com") 102 | print("or join community chat on https://gitter.im/RasaHQ/rasa_nlu") 103 | -------------------------------------------------------------------------------- /test_models/test_model_mitie/model_20170628-002704/entity_extractor.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie/model_20170628-002704/entity_extractor.dat -------------------------------------------------------------------------------- /test_models/test_model_mitie/model_20170628-002704/entity_synonyms.json: -------------------------------------------------------------------------------- 1 | {"chines": "chinese"} -------------------------------------------------------------------------------- /test_models/test_model_mitie/model_20170628-002704/intent_classifier.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie/model_20170628-002704/intent_classifier.dat -------------------------------------------------------------------------------- /test_models/test_model_mitie/model_20170628-002704/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "entity_synonyms": "entity_synonyms.json", 3 | "mitie_file": "data/total_word_feature_extractor.dat", 4 | "pipeline": [ 5 | "nlp_mitie", 6 | "tokenizer_mitie", 7 | "ner_mitie", 8 | "ner_synonyms", 9 | "intent_classifier_mitie" 10 | ], 11 | "trained_at": "20170628-002704", 12 | "training_data": "training_data.json", 13 | "rasa_nlu_version": "0.9.0a4", 14 | "intent_classifier_mitie": "intent_classifier.dat", 15 | "entity_extractor_mitie": "entity_extractor.dat", 16 | "mitie_feature_extractor_fingerprint": 10023965992282753551, 17 | "language": "en" 18 | } -------------------------------------------------------------------------------- /test_models/test_model_mitie_sklearn/model_20170628-002712/entity_extractor.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie_sklearn/model_20170628-002712/entity_extractor.dat -------------------------------------------------------------------------------- /test_models/test_model_mitie_sklearn/model_20170628-002712/entity_synonyms.json: -------------------------------------------------------------------------------- 1 | {"chines": "chinese"} -------------------------------------------------------------------------------- /test_models/test_model_mitie_sklearn/model_20170628-002712/intent_classifier.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_mitie_sklearn/model_20170628-002712/intent_classifier.pkl -------------------------------------------------------------------------------- /test_models/test_model_mitie_sklearn/model_20170628-002712/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "entity_synonyms": "entity_synonyms.json", 3 | "mitie_file": "data/total_word_feature_extractor.dat", 4 | "pipeline": [ 5 | "nlp_mitie", 6 | "tokenizer_mitie", 7 | "ner_mitie", 8 | "ner_synonyms", 9 | "intent_featurizer_mitie", 10 | "intent_classifier_sklearn" 11 | ], 12 | "trained_at": "20170628-002712", 13 | "intent_classifier_sklearn": "intent_classifier.pkl", 14 | "rasa_nlu_version": "0.9.0a4", 15 | "training_data": "training_data.json", 16 | "entity_extractor_mitie": "entity_extractor.dat", 17 | "mitie_feature_extractor_fingerprint": 10023965992282753551, 18 | "language": "en" 19 | } -------------------------------------------------------------------------------- /test_models/test_model_spacy_sklearn/model_20170628-002705/crf_model.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_spacy_sklearn/model_20170628-002705/crf_model.pkl -------------------------------------------------------------------------------- /test_models/test_model_spacy_sklearn/model_20170628-002705/entity_synonyms.json: -------------------------------------------------------------------------------- 1 | {"chines": "chinese"} -------------------------------------------------------------------------------- /test_models/test_model_spacy_sklearn/model_20170628-002705/intent_classifier.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/test_models/test_model_spacy_sklearn/model_20170628-002705/intent_classifier.pkl -------------------------------------------------------------------------------- /test_models/test_model_spacy_sklearn/model_20170628-002705/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "entity_extractor_crf": { 3 | "version": 1, 4 | "crf_features": [ 5 | [ 6 | "low", 7 | "title", 8 | "upper", 9 | "pos", 10 | "pos2" 11 | ], 12 | [ 13 | "bias", 14 | "low", 15 | "word3", 16 | "word2", 17 | "upper", 18 | "title", 19 | "digit", 20 | "pos", 21 | "pos2" 22 | ], 23 | [ 24 | "low", 25 | "title", 26 | "upper", 27 | "pos", 28 | "pos2" 29 | ] 30 | ], 31 | "model_file": "crf_model.pkl", 32 | "BILOU_flag": true 33 | }, 34 | "entity_synonyms": "entity_synonyms.json", 35 | "trained_at": "20170628-002705", 36 | "pipeline": [ 37 | "nlp_spacy", 38 | "ner_crf", 39 | "ner_synonyms", 40 | "intent_featurizer_spacy", 41 | "intent_classifier_sklearn" 42 | ], 43 | "spacy_model_name": "en", 44 | "training_data": "training_data.json", 45 | "intent_classifier_sklearn": "intent_classifier.pkl", 46 | "rasa_nlu_version": "0.9.0a4", 47 | "language": "en" 48 | } -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/tests/__init__.py -------------------------------------------------------------------------------- /tests/base/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/tests/base/__init__.py -------------------------------------------------------------------------------- /tests/base/test_components.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import pytest 7 | 8 | from rasa_nlu import registry 9 | from rasa_nlu.components import find_unavailable_packages 10 | from rasa_nlu.model import Metadata 11 | 12 | 13 | @pytest.mark.parametrize("component_class", registry.component_classes) 14 | def test_no_components_with_same_name(component_class): 15 | """The name of the components need to be unique as they will 16 | be referenced by name when defining processing pipelines.""" 17 | 18 | names = [cls.name for cls in registry.component_classes] 19 | assert names.count(component_class.name) == 1, \ 20 | "There is more than one component named {}".format(component_class.name) 21 | 22 | 23 | @pytest.mark.parametrize("pipeline_template", 24 | registry.registered_pipeline_templates) 25 | def test_all_components_in_model_templates_exist(pipeline_template): 26 | """We provide a couple of ready to use pipelines, this test ensures 27 | all components referenced by name in the 28 | pipeline definitions are available.""" 29 | 30 | components = registry.registered_pipeline_templates[pipeline_template] 31 | for component in components: 32 | assert component in registry.registered_components, \ 33 | "Model template contains unknown component." 34 | 35 | 36 | @pytest.mark.parametrize("component_class", registry.component_classes) 37 | def test_all_arguments_can_be_satisfied(component_class): 38 | """Check that `train` method parameters can be filled 39 | filled from the context. Similar to `pipeline_init` test.""" 40 | 41 | # All available context arguments that will ever be generated during train 42 | # it might still happen, that in a certain pipeline 43 | # configuration arguments can not be satisfied! 44 | provided_properties = {provided 45 | for c in registry.component_classes 46 | for provided in c.provides} 47 | 48 | for req in component_class.requires: 49 | assert req in provided_properties, \ 50 | "No component provides required property." 51 | 52 | 53 | def test_find_unavailable_packages(): 54 | unavailable = find_unavailable_packages(["my_made_up_package_name", "io", 55 | "foo_bar", "foo_bar"]) 56 | assert unavailable == {"my_made_up_package_name", "foo_bar"} 57 | 58 | 59 | def test_builder_create_unknown(component_builder, default_config): 60 | with pytest.raises(Exception) as excinfo: 61 | component_builder.create_component("my_made_up_componment", 62 | default_config) 63 | assert "Unknown component name" in str(excinfo.value) 64 | 65 | 66 | def test_builder_create_by_module_path(component_builder, default_config): 67 | from rasa_nlu.featurizers.regex_featurizer import RegexFeaturizer 68 | 69 | path = "rasa_nlu.featurizers.regex_featurizer.RegexFeaturizer" 70 | component = component_builder.create_component(path, default_config) 71 | assert type(component) == RegexFeaturizer 72 | 73 | 74 | def test_builder_load_unknown(component_builder): 75 | with pytest.raises(Exception) as excinfo: 76 | component_builder.load_component("my_made_up_componment", "", 77 | Metadata({}, None)) 78 | assert "Unknown component name" in str(excinfo.value) 79 | -------------------------------------------------------------------------------- /tests/base/test_config.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import tempfile 7 | 8 | import pytest 9 | from typing import Text 10 | 11 | import rasa_nlu 12 | from rasa_nlu import config, utils 13 | from rasa_nlu.config import RasaNLUModelConfig, InvalidConfigError 14 | from rasa_nlu.registry import registered_pipeline_templates 15 | from tests.conftest import CONFIG_DEFAULTS_PATH 16 | from tests.utilities import write_file_config 17 | 18 | defaults = utils.read_yaml_file(CONFIG_DEFAULTS_PATH) 19 | 20 | 21 | def test_default_config(default_config): 22 | assert default_config.as_dict() == defaults 23 | 24 | 25 | def test_blank_config(): 26 | file_config = {} 27 | f = write_file_config(file_config) 28 | final_config = config.load(f.name) 29 | assert final_config.as_dict() == defaults 30 | 31 | 32 | def test_invalid_config_json(): 33 | file_config = """pipeline: [spacy_sklearn""" # invalid yaml 34 | with tempfile.NamedTemporaryFile("w+", suffix="_tmp_config_file.json") as f: 35 | f.write(file_config) 36 | f.flush() 37 | with pytest.raises(rasa_nlu.config.InvalidConfigError): 38 | config.load(f.name) 39 | 40 | 41 | def test_invalid_pipeline_template(): 42 | args = {"pipeline": "my_made_up_name"} 43 | f = write_file_config(args) 44 | with pytest.raises(InvalidConfigError) as execinfo: 45 | config.load(f.name) 46 | assert "unknown pipeline template" in str(execinfo.value) 47 | 48 | 49 | def test_pipeline_looksup_registry(): 50 | pipeline_template = list(registered_pipeline_templates)[0] 51 | args = {"pipeline": pipeline_template} 52 | f = write_file_config(args) 53 | final_config = config.load(f.name) 54 | components = [c.get("name") for c in final_config.pipeline] 55 | assert components == registered_pipeline_templates[pipeline_template] 56 | 57 | 58 | def test_default_config_file(): 59 | final_config = RasaNLUModelConfig() 60 | assert len(final_config) > 1 61 | 62 | 63 | def test_set_attr_on_component(default_config): 64 | cfg = config.load("sample_configs/config_spacy.yml") 65 | cfg.set_component_attr("intent_classifier_sklearn", C=324) 66 | 67 | expected = {"C": 324, "name": "intent_classifier_sklearn"} 68 | 69 | assert cfg.for_component("intent_classifier_sklearn") == expected 70 | assert cfg.for_component("tokenizer_spacy") == {"name": "tokenizer_spacy"} 71 | -------------------------------------------------------------------------------- /tests/base/test_data_router.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import mock 7 | 8 | from rasa_nlu import data_router 9 | from rasa_nlu import persistor 10 | 11 | 12 | def test_list_projects_in_cloud_method(): 13 | class UniqueValue(object): 14 | pass 15 | 16 | def mocked_get_persistor(*args, **kwargs): 17 | class MockedClass(object): 18 | def list_projects(self): 19 | return [UniqueValue()] 20 | 21 | def list_models(self, project): 22 | return [UniqueValue()] 23 | 24 | return MockedClass() 25 | 26 | def mocked_data_router_init(self, *args, **kwargs): 27 | self.config = None 28 | 29 | with mock.patch.object(persistor, 'get_persistor', 30 | mocked_get_persistor): 31 | return_value = data_router.DataRouter()._list_projects_in_cloud() 32 | assert isinstance(return_value[0], UniqueValue) 33 | -------------------------------------------------------------------------------- /tests/base/test_interpreter.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import rasa_nlu 7 | 8 | import pytest 9 | 10 | from rasa_nlu import registry, training_data 11 | from rasa_nlu.model import Interpreter 12 | from tests import utilities 13 | 14 | 15 | @utilities.slowtest 16 | @pytest.mark.parametrize("pipeline_template", 17 | list(registry.registered_pipeline_templates.keys())) 18 | def test_interpreter(pipeline_template, component_builder, tmpdir): 19 | test_data = "data/examples/rasa/demo-rasa.json" 20 | _conf = utilities.base_test_conf(pipeline_template) 21 | _conf["data"] = test_data 22 | td = training_data.load_data(test_data) 23 | interpreter = utilities.interpreter_for(component_builder, 24 | "data/examples/rasa/demo-rasa.json", 25 | tmpdir.strpath, 26 | _conf) 27 | 28 | texts = ["good bye", "i am looking for an indian spot"] 29 | 30 | for text in texts: 31 | result = interpreter.parse(text, time=None) 32 | assert result['text'] == text 33 | assert (not result['intent']['name'] 34 | or result['intent']['name'] in td.intents) 35 | assert result['intent']['confidence'] >= 0 36 | # Ensure the model doesn't detect entity types that are not present 37 | # Models on our test data set are not stable enough to 38 | # require the exact entities to be found 39 | for entity in result['entities']: 40 | assert entity['entity'] in td.entities 41 | 42 | 43 | @pytest.mark.parametrize("metadata", 44 | [{"rasa_nlu_version": "0.11.0"}, 45 | {"rasa_nlu_version": "0.10.2"}, 46 | {"rasa_nlu_version": "0.12.0a1"}]) 47 | def test_model_not_compatible(metadata): 48 | with pytest.raises(rasa_nlu.model.UnsupportedModelError): 49 | Interpreter.ensure_model_compatibility(metadata) 50 | 51 | 52 | @pytest.mark.parametrize("metadata", 53 | [{"rasa_nlu_version": "0.12.0"}, 54 | {"rasa_nlu_version": "0.12.2"}, 55 | {"rasa_nlu_version": "0.12.0a2"}]) 56 | def test_model_is_compatible(metadata): 57 | # should not raise an exception 58 | assert Interpreter.ensure_model_compatibility(metadata) is None 59 | -------------------------------------------------------------------------------- /tests/base/test_persistor.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import os 7 | 8 | import mock 9 | import pytest 10 | from moto import mock_s3 11 | 12 | from tests import utilities 13 | from rasa_nlu import persistor, train 14 | 15 | 16 | class Object(object): 17 | pass 18 | 19 | 20 | def test_if_persistor_class_has_list_projects_method(): 21 | with pytest.raises(NotImplementedError): 22 | persistor.Persistor().list_projects() 23 | 24 | 25 | @mock_s3 26 | def test_list_projects_method_in_AWSPersistor(component_builder, tmpdir): 27 | # artificially create a persisted model 28 | _config = utilities.base_test_conf("keyword") 29 | os.environ["BUCKET_NAME"] = 'rasa-test' 30 | os.environ["AWS_DEFAULT_REGION"] = 'us-east-1' 31 | 32 | (trained, _, persisted_path) = train.do_train( 33 | _config, 34 | data="data/test/demo-rasa-small.json", 35 | path=tmpdir.strpath, 36 | project='mytestproject', 37 | storage='aws', 38 | component_builder=component_builder) 39 | 40 | # We need to create the bucket since this is all in Moto's 'virtual' AWS 41 | # account 42 | awspersistor = persistor.AWSPersistor(os.environ["BUCKET_NAME"]) 43 | result = awspersistor.list_projects() 44 | 45 | assert result == ['mytestproject'] 46 | 47 | 48 | @mock_s3 49 | def test_list_projects_method_raise_exeception_in_AWSPersistor(): 50 | os.environ["AWS_DEFAULT_REGION"] = 'us-east-1' 51 | 52 | awspersistor = persistor.AWSPersistor("rasa-test") 53 | result = awspersistor.list_projects() 54 | 55 | assert result == [] 56 | 57 | 58 | def test_list_projects_method_in_GCSPersistor(): 59 | def mocked_init(self, *args, **kwargs): 60 | self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x] 61 | self.bucket = Object() 62 | 63 | def mocked_list_blobs(): 64 | filter_result = Object() 65 | filter_result.name = 'blob_name' 66 | return filter_result, 67 | 68 | self.bucket.list_blobs = mocked_list_blobs 69 | 70 | with mock.patch.object(persistor.GCSPersistor, "__init__", mocked_init): 71 | result = persistor.GCSPersistor("").list_projects() 72 | 73 | assert result == ['project'] 74 | 75 | 76 | def test_list_projects_method_raise_exeception_in_GCSPersistor(): 77 | def mocked_init(self, *args, **kwargs): 78 | self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x] 79 | self.bucket = Object() 80 | 81 | def mocked_list_blobs(): 82 | raise ValueError 83 | 84 | self.bucket.list_blobs = mocked_list_blobs 85 | 86 | with mock.patch.object(persistor.GCSPersistor, "__init__", mocked_init): 87 | result = persistor.GCSPersistor("").list_projects() 88 | 89 | assert result == [] 90 | 91 | 92 | def test_list_projects_method_in_AzurePersistor(): 93 | def mocked_init(self, *args, **kwargs): 94 | self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x] 95 | self.blob_client = Object() 96 | self.container_name = 'test' 97 | 98 | def mocked_list_blobs( 99 | container_name, 100 | prefix=None 101 | ): 102 | filter_result = Object() 103 | filter_result.name = 'blob_name' 104 | return filter_result, 105 | 106 | self.blob_client.list_blobs = mocked_list_blobs 107 | 108 | with mock.patch.object(persistor.AzurePersistor, "__init__", mocked_init): 109 | result = persistor.AzurePersistor("").list_projects() 110 | 111 | assert result == ['project'] 112 | 113 | 114 | def test_list_projects_method_raise_exeception_in_AzurePersistor(): 115 | def mocked_init(self, *args, **kwargs): 116 | self._project_and_model_from_filename = lambda x: {'blob_name': ('project', )}[x] 117 | self.blob_client = Object() 118 | 119 | def mocked_list_blobs( 120 | container_name, 121 | prefix=None 122 | ): 123 | raise ValueError 124 | 125 | self.blob_client.list_blobs = mocked_list_blobs 126 | 127 | with mock.patch.object(persistor.AzurePersistor, "__init__", mocked_init): 128 | result = persistor.AzurePersistor("").list_projects() 129 | 130 | assert result == [] 131 | -------------------------------------------------------------------------------- /tests/base/test_project.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import mock 7 | 8 | from rasa_nlu.project import Project 9 | 10 | 11 | def test_dynamic_load_model_with_exists_model(): 12 | MODEL_NAME = 'model_name' 13 | 14 | def mocked_init(*args, **kwargs): 15 | return None 16 | 17 | with mock.patch.object(Project, "__init__", mocked_init): 18 | project = Project() 19 | 20 | project._models = (MODEL_NAME, ) 21 | 22 | result = project._dynamic_load_model(MODEL_NAME) 23 | 24 | assert result == MODEL_NAME 25 | 26 | 27 | def test_dynamic_load_model_with_refresh_exists_model(): 28 | MODEL_NAME = 'model_name' 29 | 30 | def mocked_init(*args, **kwargs): 31 | return None 32 | 33 | def mocked_search_for_models(self): 34 | self._models = (MODEL_NAME, ) 35 | 36 | with mock.patch.object(Project, "__init__", mocked_init): 37 | with mock.patch.object(Project, '_search_for_models', mocked_search_for_models): 38 | project = Project() 39 | 40 | project._models = () 41 | 42 | result = project._dynamic_load_model(MODEL_NAME) 43 | 44 | assert result == MODEL_NAME 45 | 46 | 47 | def test_dynamic_load_model_with_refresh_not_exists_model(): 48 | LATEST_MODEL_NAME = 'latest_model_name' 49 | 50 | def mocked_init(*args, **kwargs): 51 | return None 52 | 53 | def mocked_search_for_models(self): 54 | pass 55 | 56 | def mocked_latest_project_model(self): 57 | return LATEST_MODEL_NAME 58 | 59 | with mock.patch.object(Project, "__init__", mocked_init): 60 | with mock.patch.object(Project, "_search_for_models", mocked_search_for_models): 61 | with mock.patch.object(Project, "_latest_project_model", mocked_latest_project_model): 62 | project = Project() 63 | 64 | project._models = () 65 | 66 | result = project._dynamic_load_model('model_name') 67 | 68 | assert result == LATEST_MODEL_NAME 69 | 70 | 71 | def test_dynamic_load_model_with_model_is_none(): 72 | LATEST_MODEL_NAME = 'latest_model_name' 73 | 74 | def mocked_init(*args, **kwargs): 75 | return None 76 | 77 | def mocked_search_for_models(self): 78 | pass 79 | 80 | def mocked_latest_project_model(self): 81 | return LATEST_MODEL_NAME 82 | 83 | with mock.patch.object(Project, "__init__", mocked_init): 84 | with mock.patch.object(Project, "_search_for_models", mocked_search_for_models): 85 | with mock.patch.object(Project, "_latest_project_model", mocked_latest_project_model): 86 | project = Project() 87 | 88 | project._models = () 89 | 90 | result = project._dynamic_load_model(None) 91 | 92 | assert result == LATEST_MODEL_NAME 93 | -------------------------------------------------------------------------------- /tests/base/test_synonyms.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | from rasa_nlu.extractors.entity_synonyms import EntitySynonymMapper 7 | 8 | 9 | def test_entity_synonyms(): 10 | entities = [{ 11 | "entity": "test", 12 | "value": "chines", 13 | "start": 0, 14 | "end": 6 15 | }, { 16 | "entity": "test", 17 | "value": "chinese", 18 | "start": 0, 19 | "end": 6 20 | }, { 21 | "entity": "test", 22 | "value": "china", 23 | "start": 0, 24 | "end": 6 25 | }] 26 | ent_synonyms = {"chines": "chinese", "NYC": "New York City"} 27 | EntitySynonymMapper(synonyms=ent_synonyms).replace_synonyms(entities) 28 | assert len(entities) == 3 29 | assert entities[0]["value"] == "chinese" 30 | assert entities[1]["value"] == "chinese" 31 | assert entities[2]["value"] == "china" 32 | -------------------------------------------------------------------------------- /tests/base/test_tokenizers.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from __future__ import absolute_import 3 | from __future__ import division 4 | from __future__ import print_function 5 | from __future__ import unicode_literals 6 | 7 | 8 | def test_whitespace(): 9 | from rasa_nlu.tokenizers.whitespace_tokenizer import WhitespaceTokenizer 10 | tk = WhitespaceTokenizer() 11 | 12 | assert [t.text for t in tk.tokenize("Forecast for lunch")] == \ 13 | ['Forecast', 'for', 'lunch'] 14 | 15 | assert [t.offset for t in tk.tokenize("Forecast for lunch")] == \ 16 | [0, 9, 13] 17 | 18 | assert [t.text for t in tk.tokenize("hey ńöñàśçií how're you?")] == \ 19 | ['hey', 'ńöñàśçií', 'how\'re', 'you?'] 20 | 21 | assert [t.offset for t in tk.tokenize("hey ńöñàśçií how're you?")] == \ 22 | [0, 4, 13, 20] 23 | 24 | 25 | def test_spacy(spacy_nlp): 26 | from rasa_nlu.tokenizers.spacy_tokenizer import SpacyTokenizer 27 | tk = SpacyTokenizer() 28 | 29 | text = "Forecast for lunch" 30 | assert [t.text for t in tk.tokenize(spacy_nlp(text))] == \ 31 | ['Forecast', 'for', 'lunch'] 32 | assert [t.offset for t in tk.tokenize(spacy_nlp(text))] == \ 33 | [0, 9, 13] 34 | 35 | text = "hey ńöñàśçií how're you?" 36 | assert [t.text for t in tk.tokenize(spacy_nlp(text))] == \ 37 | ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?'] 38 | assert [t.offset for t in tk.tokenize(spacy_nlp(text))] == \ 39 | [0, 4, 13, 16, 20, 23] 40 | 41 | 42 | def test_mitie(): 43 | from rasa_nlu.tokenizers.mitie_tokenizer import MitieTokenizer 44 | tk = MitieTokenizer() 45 | 46 | text = "Forecast for lunch" 47 | assert [t.text for t in tk.tokenize(text)] == \ 48 | ['Forecast', 'for', 'lunch'] 49 | assert [t.offset for t in tk.tokenize(text)] == \ 50 | [0, 9, 13] 51 | 52 | text = "hey ńöñàśçií how're you?" 53 | assert [t.text for t in tk.tokenize(text)] == \ 54 | ['hey', 'ńöñàśçií', 'how', '\'re', 'you', '?'] 55 | assert [t.offset for t in tk.tokenize(text)] == \ 56 | [0, 4, 13, 16, 20, 23] 57 | 58 | 59 | def test_jieba(): 60 | from rasa_nlu.tokenizers.jieba_tokenizer import JiebaTokenizer 61 | tk = JiebaTokenizer() 62 | 63 | assert [t.text for t in tk.tokenize("我想去吃兰州拉面")] == \ 64 | ['我', '想', '去', '吃', '兰州', '拉面'] 65 | 66 | assert [t.offset for t in tk.tokenize("我想去吃兰州拉面")] == \ 67 | [0, 1, 2, 3, 4, 6] 68 | 69 | assert [t.text for t in tk.tokenize("Micheal你好吗?")] == \ 70 | ['Micheal', '你好', '吗', '?'] 71 | 72 | assert [t.offset for t in tk.tokenize("Micheal你好吗?")] == \ 73 | [0, 7, 9, 10] -------------------------------------------------------------------------------- /tests/base/test_utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import io 7 | import os 8 | import pickle 9 | import tempfile 10 | 11 | import pytest 12 | 13 | from rasa_nlu import utils 14 | from rasa_nlu.utils import ( 15 | relative_normpath, create_dir, is_url, ordered, is_model_dir, remove_model, 16 | write_json_to_file, write_to_file) 17 | 18 | 19 | @pytest.fixture 20 | def empty_model_dir(scope="function"): 21 | temp_path = tempfile.mkdtemp() 22 | yield temp_path 23 | if os.path.exists(temp_path): 24 | os.rmdir(temp_path) 25 | 26 | 27 | def test_relative_normpath(): 28 | test_file = "/my/test/path/file.txt" 29 | assert relative_normpath(test_file, "/my/test") == "path/file.txt" 30 | assert relative_normpath(None, "/my/test") is None 31 | 32 | 33 | def test_list_files_invalid_resource(): 34 | with pytest.raises(ValueError) as execinfo: 35 | utils.list_files(None) 36 | assert "must be a string type" in str(execinfo.value) 37 | 38 | 39 | def test_list_files_non_existing_dir(): 40 | with pytest.raises(ValueError) as execinfo: 41 | utils.list_files("my/made_up/path") 42 | assert "Could not locate the resource" in str(execinfo.value) 43 | 44 | 45 | def test_list_files_ignores_hidden_files(tmpdir): 46 | # create a hidden file 47 | open(os.path.join(tmpdir.strpath, ".hidden"), 'a').close() 48 | # create a normal file 49 | normal_file = os.path.join(tmpdir.strpath, "normal_file") 50 | open(normal_file, 'a').close() 51 | assert utils.list_files(tmpdir.strpath) == [normal_file] 52 | 53 | 54 | def test_creation_of_existing_dir(tmpdir): 55 | # makes sure there is no exception 56 | assert create_dir(tmpdir.strpath) is None 57 | 58 | 59 | def test_ordered(): 60 | target = {"a": [1, 3, 2], "c": "a", "b": 1} 61 | assert ordered(target) == [('a', [1, 2, 3]), ('b', 1), ('c', 'a')] 62 | 63 | 64 | @pytest.mark.parametrize(("model_dir", "expected"), 65 | [("test_models/test_model_mitie/model_20170628-002704", True), 66 | ("test_models/test_model_mitie_sklearn/model_20170628-002712", True), 67 | ("test_models/test_model_spacy_sklearn/model_20170628-002705", True), 68 | ("test_models/", False), 69 | ("test_models/nonexistent_for_sure_123", False)]) 70 | def test_is_model_dir(model_dir, expected): 71 | assert is_model_dir(model_dir) == expected 72 | 73 | 74 | def test_is_model_dir_empty(empty_model_dir): 75 | assert is_model_dir(empty_model_dir) 76 | 77 | 78 | def test_remove_model_empty(empty_model_dir): 79 | assert remove_model(empty_model_dir) 80 | 81 | 82 | def test_remove_model_with_files(empty_model_dir): 83 | metadata_file = "metadata.json" 84 | metadata_content = {"pipeline": "spacy_sklearn", "language": "en"} 85 | metadata_path = os.path.join(empty_model_dir, metadata_file) 86 | write_json_to_file(metadata_path, metadata_content) 87 | 88 | fake_obj = {"Fake", "model"} 89 | fake_obj_path = os.path.join(empty_model_dir, "component.pkl") 90 | with io.open(fake_obj_path, "wb") as f: 91 | pickle.dump(fake_obj, f) 92 | 93 | assert remove_model(empty_model_dir) 94 | 95 | 96 | def test_remove_model_invalid(empty_model_dir): 97 | test_file = "something.else" 98 | test_content = "Some other stuff" 99 | test_file_path = os.path.join(empty_model_dir, test_file) 100 | write_to_file(test_file_path, test_content) 101 | 102 | with pytest.raises(ValueError) as e: 103 | remove_model(empty_model_dir) 104 | 105 | os.remove(test_file_path) 106 | 107 | 108 | def test_is_url(): 109 | assert not is_url('./some/file/path') 110 | assert is_url('https://rasa.com/') 111 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import logging 7 | 8 | import pytest 9 | 10 | from rasa_nlu import data_router, config 11 | from rasa_nlu.components import ComponentBuilder 12 | 13 | logging.basicConfig(level="DEBUG") 14 | 15 | CONFIG_DEFAULTS_PATH = "sample_configs/config_defaults.yml" 16 | 17 | DEFAULT_DATA_PATH = "data/examples/rasa/demo-rasa.json" 18 | 19 | # see `rasa_nlu.data_router` for details. avoids deadlock in 20 | # `deferred_from_future` function during tests 21 | data_router.DEFERRED_RUN_IN_REACTOR_THREAD = False 22 | 23 | 24 | @pytest.fixture(scope="session") 25 | def component_builder(): 26 | return ComponentBuilder() 27 | 28 | 29 | @pytest.fixture(scope="session") 30 | def spacy_nlp(component_builder, default_config): 31 | return component_builder.create_component("nlp_spacy", default_config).nlp 32 | 33 | 34 | @pytest.fixture(scope="session") 35 | def mitie_feature_extractor(component_builder, default_config): 36 | return component_builder.create_component("nlp_mitie", default_config).extractor 37 | 38 | 39 | @pytest.fixture(scope="session") 40 | def default_config(): 41 | return config.load(CONFIG_DEFAULTS_PATH) 42 | -------------------------------------------------------------------------------- /tests/training/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/crownpku/Rasa_NLU_Chi/f995c06e5aee5b6f68ea877c1a271667357a1c68/tests/training/__init__.py -------------------------------------------------------------------------------- /tests/utilities.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import 2 | from __future__ import division 3 | from __future__ import print_function 4 | from __future__ import unicode_literals 5 | 6 | import tempfile 7 | 8 | import pytest 9 | import yaml 10 | from builtins import object 11 | 12 | from rasa_nlu.config import RasaNLUModelConfig 13 | from rasa_nlu.model import Interpreter 14 | from rasa_nlu.train import do_train 15 | 16 | slowtest = pytest.mark.slowtest 17 | 18 | 19 | def base_test_conf(pipeline_template): 20 | # 'response_log': temp_log_file_dir(), 21 | # 'port': 5022, 22 | # "path": tempfile.mkdtemp(), 23 | # "data": "./data/test/demo-rasa-small.json" 24 | 25 | return RasaNLUModelConfig({"pipeline": pipeline_template}) 26 | 27 | 28 | def write_file_config(file_config): 29 | with tempfile.NamedTemporaryFile("w+", 30 | suffix="_tmp_config_file.yml", 31 | delete=False) as f: 32 | f.write(yaml.safe_dump(file_config)) 33 | f.flush() 34 | return f 35 | 36 | 37 | def interpreter_for(component_builder, data, path, config): 38 | (trained, _, path) = do_train(config, data, path, 39 | component_builder=component_builder) 40 | interpreter = Interpreter.load(path, component_builder) 41 | return interpreter 42 | 43 | 44 | def temp_log_file_dir(): 45 | return tempfile.mkdtemp(suffix="_rasa_nlu_logs") 46 | 47 | 48 | class ResponseTest(object): 49 | def __init__(self, endpoint, expected_response, payload=None): 50 | self.endpoint = endpoint 51 | self.expected_response = expected_response 52 | self.payload = payload 53 | --------------------------------------------------------------------------------
How was the tutorial? Click to Vote